diff --git a/.gitattributes b/.gitattributes index 4eee26dcc3ff69850fabe3b3effb635569666fbf..0547b14ae296af43592d6121e29148bf01ccb4cb 100644 --- a/.gitattributes +++ b/.gitattributes @@ -47,3 +47,9 @@ checkpoints/grpo_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text checkpoints/merged/tokenizer.json filter=lfs diff=lfs merge=lfs -text outputs/plots/grpo_reward_curves.png filter=lfs diff=lfs merge=lfs -text outputs/plots/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text +docs/results/grpo_reward_curves.png filter=lfs diff=lfs merge=lfs -text +docs/results/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text +docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text +docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text +docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text +docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text diff --git a/docs/results/README.md b/docs/results/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c428ebc914407d49b5fcd2733e5b29e86c93ee8 --- /dev/null +++ b/docs/results/README.md @@ -0,0 +1,24 @@ +# Result Artifacts + +These tracked files mirror the latest local smoke/evaluation artifacts so the README can show stable evidence even though `outputs/` and `checkpoints/` are intentionally git-ignored. + +Current status: + +- OpenEnv structure/runtime validation passes locally. +- Test suite passes locally. +- Frontend production build passes locally. +- SFT and GRPO artifacts in this folder are non-fallback TRL Transformers evidence from a tiny local compliance run. +- `postsave_inference.json` loads the merged artifact rather than the fallback policy. +- `improvement_report.json` shows positive average-reward improvement against the no-change baseline. +- `hf_space_verification.json` records a live Hugging Face Space validation pass. +- `active_model_manifest.json` records the currently activated local product model. As of April 26, 2026 this points at the local Qwen 0.5B smoke artifact while the full remote Qwen sweep continues. + +For a stronger final pitch, replace these artifacts after a larger Colab/HF GPU run: + +- `sft_trl_run.json` +- `grpo_trl_run.json` +- `postsave_inference.json` +- `improvement_report.json` +- all plot PNGs +- `hf_space_verification.json` +- `active_model_manifest.json` diff --git a/docs/results/acceptance_gate.json b/docs/results/acceptance_gate.json new file mode 100644 index 0000000000000000000000000000000000000000..a89f6ccee67c4a8459dca35bf625980b36e35bf5 --- /dev/null +++ b/docs/results/acceptance_gate.json @@ -0,0 +1,11 @@ +{ + "missing_files": [], + "missing_artifacts": [], + "missing_readme_markers": [], + "missing_readme_links": [], + "strict_submission_links": true, + "missing_submission_env": [], + "strict_submission_failures": [], + "submission_ready": true, + "status": "ok" +} \ No newline at end of file diff --git a/docs/results/active_model/acceptance_gate.json b/docs/results/active_model/acceptance_gate.json new file mode 100644 index 0000000000000000000000000000000000000000..fd08378a6d61ce8c63502ab28443603bb90b9c69 --- /dev/null +++ b/docs/results/active_model/acceptance_gate.json @@ -0,0 +1,11 @@ +{ + "missing_files": [], + "missing_artifacts": [], + "missing_readme_markers": [], + "missing_readme_links": [], + "strict_submission_links": false, + "missing_submission_env": [], + "strict_submission_failures": [], + "submission_ready": false, + "status": "ok" +} \ No newline at end of file diff --git a/docs/results/active_model/active_model_manifest.json b/docs/results/active_model/active_model_manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..45ae2bb95cb0f8b13972ee9ee8efe58819b86713 --- /dev/null +++ b/docs/results/active_model/active_model_manifest.json @@ -0,0 +1,68 @@ +{ + "status": "ok", + "enabled": true, + "activated_at_utc": "2026-04-26T02:24:15.464507+00:00", + "run_id": "qwen-qwen2-5-0-5b-instruct", + "source": "top-level", + "label": "local-qwen-0.5b-active-smoke", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "base_model": "Qwen/Qwen2.5-0.5B-Instruct", + "preferred_artifact": "grpo_adapter", + "mode": "symlink", + "source_checkpoint_dir": "checkpoints", + "source_report_dir": "outputs/reports", + "grpo_adapter": "checkpoints/active/grpo_adapter", + "merged_model": "checkpoints/active/merged", + "sft_adapter": "checkpoints/active/sft_adapter", + "availability": { + "grpo_adapter": true, + "merged": true, + "sft_adapter": true + }, + "reports": { + "improvement_report_benchmark.json": "outputs/reports/active_model/improvement_report_benchmark.json", + "anti_hacking_overfit_report.json": "outputs/reports/active_model/anti_hacking_overfit_report.json", + "grpo_trl_run_strict_check.json": "outputs/reports/active_model/grpo_trl_run_strict_check.json", + "postsave_inference.json": "outputs/reports/active_model/postsave_inference.json", + "sft_trl_run.json": "outputs/reports/active_model/sft_trl_run.json", + "plot_index.json": "outputs/reports/active_model/plot_index.json", + "dose_train.json": "outputs/reports/active_model/dose_train.json", + "baselines.json": "outputs/reports/active_model/baselines.json", + "robustness.json": "outputs/reports/active_model/robustness.json", + "grpo_trl_run_fallback_check.json": "outputs/reports/active_model/grpo_trl_run_fallback_check.json", + "sft_run.json": "outputs/reports/active_model/sft_run.json", + "benchmark_report.txt": "outputs/reports/active_model/benchmark_report.txt", + "dosing_grpo.json": "outputs/reports/active_model/dosing_grpo.json", + "grpo_ablation_report.json": "outputs/reports/active_model/grpo_ablation_report.json", + "frontier_ready.json": "outputs/reports/active_model/frontier_ready.json", + "improvement_report.json": "outputs/reports/active_model/improvement_report.json", + "hf_sweep_summary.json": "outputs/reports/active_model/hf_sweep_summary.json", + "planner_grpo.json": "outputs/reports/active_model/planner_grpo.json", + "grpo_trl_run.json": "outputs/reports/active_model/grpo_trl_run.json", + "risk_train.json": "outputs/reports/active_model/risk_train.json", + "grpo_trl_run_smoke.json": "outputs/reports/active_model/grpo_trl_run_smoke.json", + "inference_benchmark.json": "outputs/reports/active_model/inference_benchmark.json", + "supervisor_grpo.json": "outputs/reports/active_model/supervisor_grpo.json", + "acceptance_gate.json": "outputs/reports/active_model/acceptance_gate.json", + "grpo_trl_run_auto.json": "outputs/reports/active_model/grpo_trl_run_auto.json", + "hf_training_status.json": "outputs/reports/active_model/hf_training_status.json", + "benchmark_report.json": "outputs/reports/active_model/benchmark_report.json", + "postsave_inference_smoke.json": "outputs/reports/active_model/postsave_inference_smoke.json", + "graph_train.json": "outputs/reports/active_model/graph_train.json", + "sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", + "sweeps/qwen-qwen2-5-3b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_training_cycle/grpo_trl_run.json": "outputs/reports/active_model/grpo_training_cycle/grpo_trl_run.json", + "grpo_training_cycle/hf_training_status.json": "outputs/reports/active_model/grpo_training_cycle/hf_training_status.json" + }, + "notes": "This manifest controls local product inference. Prefer grpo_adapter for the RL policy; merged is the SFT baseline fallback when no GRPO adapter is available." +} \ No newline at end of file diff --git a/docs/results/active_model/anti_hacking_overfit_report.json b/docs/results/active_model/anti_hacking_overfit_report.json new file mode 100644 index 0000000000000000000000000000000000000000..236d48af846ebf4e6a0ea13a2f7c073471cf3b9b --- /dev/null +++ b/docs/results/active_model/anti_hacking_overfit_report.json @@ -0,0 +1,22 @@ +{ + "passed": true, + "training_mode": "sft-baseline", + "warnings": [], + "completed_models": [ + "Qwen/Qwen2.5-0.5B-Instruct", + "Qwen/Qwen2.5-1.5B-Instruct", + "Qwen/Qwen2.5-3B-Instruct" + ], + "failed_or_skipped_models": [], + "checks": { + "reward_bounds": [ + 0.001, + 0.999 + ], + "reward_precision": 3, + "fallback_backends_rejected": true, + "exploit_rate_threshold": 0.35, + "train_holdout_gap_threshold": 0.25, + "min_validity_rate": 0.8 + } +} \ No newline at end of file diff --git a/docs/results/active_model/baselines.json b/docs/results/active_model/baselines.json new file mode 100644 index 0000000000000000000000000000000000000000..3a4790a06cc3a416ca49989ffc2a3a7c54434d9e --- /dev/null +++ b/docs/results/active_model/baselines.json @@ -0,0 +1,119 @@ +{ + "no_change": { + "mode": "REGIMEN_OPT", + "action_type": "KEEP_REGIMEN", + "target_drug": null, + "replacement_drug": null, + "dose_bucket": "NA", + "taper_days": null, + "monitoring_plan": null, + "evidence_query": null, + "new_drug_name": null, + "candidate_components": [], + "candidate_id": "cand_01", + "confidence": 0.8, + "rationale_brief": "Baseline no-change policy." + }, + "rules_only": { + "mode": "REGIMEN_OPT", + "action_type": "SUBSTITUTE_WITHIN_CLASS", + "target_drug": "opioid_like", + "replacement_drug": "non_opioid_analgesic", + "dose_bucket": "NA", + "taper_days": null, + "monitoring_plan": null, + "evidence_query": null, + "new_drug_name": null, + "candidate_components": [], + "candidate_id": "cand_04", + "confidence": 0.75, + "rationale_brief": "Rules-only selected top legal candidate." + }, + "greedy": { + "mode": "REGIMEN_OPT", + "action_type": "SUBSTITUTE_WITHIN_CLASS", + "target_drug": "opioid_like", + "replacement_drug": "non_opioid_analgesic", + "dose_bucket": "NA", + "taper_days": null, + "monitoring_plan": null, + "evidence_query": null, + "new_drug_name": null, + "candidate_components": [], + "candidate_id": "cand_04", + "confidence": 0.72, + "rationale_brief": "Greedy safety/burden improvement baseline." + }, + "contextual_bandit": { + "mode": "REGIMEN_OPT", + "action_type": "SUBSTITUTE_WITHIN_CLASS", + "target_drug": "opioid_like", + "replacement_drug": "non_opioid_analgesic", + "dose_bucket": "NA", + "taper_days": null, + "monitoring_plan": null, + "evidence_query": null, + "new_drug_name": null, + "candidate_components": [], + "candidate_id": "cand_04", + "confidence": 0.68, + "rationale_brief": "Contextual bandit selected candidate." + }, + "contextual_bandit_topk": [ + { + "candidate_id": "cand_09", + "score": 1.1532307878304324, + "exploration_bonus": 1.1532307878304324, + "algorithm": "linucb" + }, + { + "candidate_id": "cand_10", + "score": 1.1489735636645433, + "exploration_bonus": 1.1489735636645433, + "algorithm": "linucb" + }, + { + "candidate_id": "cand_08", + "score": 1.1447401451857973, + "exploration_bonus": 1.1447401451857973, + "algorithm": "linucb" + } + ], + "beam_search": { + "mode": "REGIMEN_OPT", + "action_type": "SUBSTITUTE_WITHIN_CLASS", + "target_drug": "opioid_like", + "replacement_drug": "non_opioid_analgesic", + "dose_bucket": "NA", + "taper_days": null, + "monitoring_plan": null, + "evidence_query": null, + "new_drug_name": null, + "candidate_components": [], + "candidate_id": "cand_04", + "confidence": 0.74, + "rationale_brief": "Beam-search(3) top candidate." + }, + "baseline_policy": "no_change_candidate", + "episodes": 8, + "avg_reward": 0.747, + "legality_rate": 1.0, + "success_rate": 0.0, + "policy_stack_ablations": { + "bandit-only": { + "avg_reward": 0.7616666666666667, + "legality_rate": 1.0, + "steps": 3.0 + }, + "llm-only": { + "avg_reward": 0.7753333333333333, + "legality_rate": 1.0, + "steps": 3.0 + }, + "llm+bandit": { + "avg_reward": 0.7753333333333333, + "legality_rate": 1.0, + "steps": 3.0 + } + } +} \ No newline at end of file diff --git a/docs/results/active_model/benchmark_report.json b/docs/results/active_model/benchmark_report.json new file mode 100644 index 0000000000000000000000000000000000000000..8efc286c219c65f5df0f61195a6fb9cbc0e14ada --- /dev/null +++ b/docs/results/active_model/benchmark_report.json @@ -0,0 +1,52 @@ +{ + "offline_policy_eval": { + "avg_reward": 0.772833, + "legal_rate": 1.0, + "success_rate": 0.0 + }, + "safety_eval": { + "severe_violation_rate": 0.0, + "illegal_step_rate": 0.0 + }, + "dosing_eval": { + "target_attainment": 0.75, + "toxicity_avoidance": 1.0 + }, + "robustness_eval": { + "missing_labs_safety_rate": 0.666667, + "noisy_dose_info_safety_rate": 1.0, + "conflicting_meds_safety_rate": 1.0, + "alias_noise_safety_rate": 1.0, + "hidden_duplicate_detection_rate": 1.0, + "wrong_candidate_id_resilience": 1.0, + "stale_evidence_safety_rate": 1.0, + "delayed_ade_manifestation_safety_rate": 1.0 + }, + "calibration_eval": { + "ece_proxy": 0.08625 + }, + "abstention_eval": { + "appropriate_abstention_rate": 0.0 + }, + "process_eval": { + "process_fidelity": 0.92, + "avg_invalid_actions": 0.333333 + }, + "subgroup_eval": { + "renal_compromise": { + "avg_reward": 0.774, + "legal_rate": 1.0 + }, + "hepatic_compromise": { + "avg_reward": 0.779333, + "legal_rate": 1.0 + }, + "frail": { + "avg_reward": 0.781667, + "legal_rate": 1.0 + } + }, + "explainability_eval": { + "grounding_rate": 0.8 + } +} \ No newline at end of file diff --git a/docs/results/active_model/benchmark_report.txt b/docs/results/active_model/benchmark_report.txt new file mode 100644 index 0000000000000000000000000000000000000000..8efc286c219c65f5df0f61195a6fb9cbc0e14ada --- /dev/null +++ b/docs/results/active_model/benchmark_report.txt @@ -0,0 +1,52 @@ +{ + "offline_policy_eval": { + "avg_reward": 0.772833, + "legal_rate": 1.0, + "success_rate": 0.0 + }, + "safety_eval": { + "severe_violation_rate": 0.0, + "illegal_step_rate": 0.0 + }, + "dosing_eval": { + "target_attainment": 0.75, + "toxicity_avoidance": 1.0 + }, + "robustness_eval": { + "missing_labs_safety_rate": 0.666667, + "noisy_dose_info_safety_rate": 1.0, + "conflicting_meds_safety_rate": 1.0, + "alias_noise_safety_rate": 1.0, + "hidden_duplicate_detection_rate": 1.0, + "wrong_candidate_id_resilience": 1.0, + "stale_evidence_safety_rate": 1.0, + "delayed_ade_manifestation_safety_rate": 1.0 + }, + "calibration_eval": { + "ece_proxy": 0.08625 + }, + "abstention_eval": { + "appropriate_abstention_rate": 0.0 + }, + "process_eval": { + "process_fidelity": 0.92, + "avg_invalid_actions": 0.333333 + }, + "subgroup_eval": { + "renal_compromise": { + "avg_reward": 0.774, + "legal_rate": 1.0 + }, + "hepatic_compromise": { + "avg_reward": 0.779333, + "legal_rate": 1.0 + }, + "frail": { + "avg_reward": 0.781667, + "legal_rate": 1.0 + } + }, + "explainability_eval": { + "grounding_rate": 0.8 + } +} \ No newline at end of file diff --git a/docs/results/active_model/dose_train.json b/docs/results/active_model/dose_train.json new file mode 100644 index 0000000000000000000000000000000000000000..3bb2d9dd4c8a3461d87923edf631ecf3a22b5f33 --- /dev/null +++ b/docs/results/active_model/dose_train.json @@ -0,0 +1,6 @@ +{ + "dataset_size": 120.0, + "status": "trained", + "train_mae": 0.0025, + "model_path": "outputs/models/dose_model.pkl" +} \ No newline at end of file diff --git a/docs/results/active_model/dosing_grpo.json b/docs/results/active_model/dosing_grpo.json new file mode 100644 index 0000000000000000000000000000000000000000..1752bc84f741b6e0066175069bd885fb048fde2f --- /dev/null +++ b/docs/results/active_model/dosing_grpo.json @@ -0,0 +1,28 @@ +{ + "avg_reward": 0.7785555555555557, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.0, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.5, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9200000000000002, + "exploit_detection_count": 3.0, + "reward_columns": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.999, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9000000000000001, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.56, + "efficiency_score": 0.77, + "process_fidelity_score": 0.9200000000000002, + "explanation_grounding_score": 0.7999999999999999, + "anti_cheat_score": 0.6663333333333333, + "uncertainty_calibration_score": 0.87 + } +} \ No newline at end of file diff --git a/docs/results/active_model/frontier_ready.json b/docs/results/active_model/frontier_ready.json new file mode 100644 index 0000000000000000000000000000000000000000..ef8f952db5fc8453c14dad5091bc9c1e33625f49 --- /dev/null +++ b/docs/results/active_model/frontier_ready.json @@ -0,0 +1,8 @@ +{ + "frontier_models": [ + "qwen2.5:7b-instruct", + "qwen2.5:14b-instruct" + ], + "deployment_mode": "hf_or_vllm_ready", + "notes": "Baseline complete; ready for larger model sweep." +} \ No newline at end of file diff --git a/docs/results/active_model/graph_train.json b/docs/results/active_model/graph_train.json new file mode 100644 index 0000000000000000000000000000000000000000..91955cfb1a71b04e168b21920c3911df0f36df4a --- /dev/null +++ b/docs/results/active_model/graph_train.json @@ -0,0 +1,5 @@ +{ + "num_samples": 180, + "status": "trained", + "model_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/models/graph_model.pkl" +} \ No newline at end of file diff --git a/docs/results/active_model/grpo_ablation_report.json b/docs/results/active_model/grpo_ablation_report.json new file mode 100644 index 0000000000000000000000000000000000000000..89d5d32978be7e468119b45142923322586f281c --- /dev/null +++ b/docs/results/active_model/grpo_ablation_report.json @@ -0,0 +1,149 @@ +{ + "status": "ok", + "ablations": { + "bandit_only": { + "avg_reward": 0.779625, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.8125, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.483125, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9056250000000008, + "exploit_detection_count": 2.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0625, + "avg_invalid_actions": 0.0625, + "reward_columns": { + "format_compliance_score": 0.9989999999999996, + "candidate_alignment_score": 0.9989999999999996, + "legality_score": 0.9989999999999996, + "safety_delta_score": 0.483125, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999995, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000002, + "efficiency_score": 0.5855625, + "process_fidelity_score": 0.9056250000000008, + "explanation_grounding_score": 0.8000000000000004, + "anti_cheat_score": 0.9366249999999997, + "uncertainty_calibration_score": 0.8531250000000004 + }, + "primary_reward_channels": { + "safety_legality": 0.9469062499999998, + "clinical_improvement": 0.6273749999999997, + "dosing_quality": 0.6550000000000001, + "process_integrity": 0.8225937500000001 + }, + "policy_stack": "bandit-only", + "failure_mining": { + "total_rows": 32, + "failure_rows": 2, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 2 + } + ] + } + }, + "llm_only": { + "avg_reward": 0.7723913043478261, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.4882608695652174, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.4882608695652174, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999998, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8482608695652176 + }, + "primary_reward_channels": { + "safety_legality": 0.8853478260869562, + "clinical_improvement": 0.6290869565217388, + "dosing_quality": 0.6549999999999998, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm-only", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + }, + "llm_bandit": { + "avg_reward": 0.7647391304347826, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.48982608695652174, + "avg_dosing_quality": 0.717391304347826, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.48982608695652174, + "burden_improvement_score": 0.5043478260869565, + "disease_stability_score": 0.8582608695652173, + "dosing_quality_score": 0.717391304347826, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8126086956521739 + }, + "primary_reward_channels": { + "safety_legality": 0.8765217391304347, + "clinical_improvement": 0.6171739130434781, + "dosing_quality": 0.6386956521739129, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm+bandit", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + } + } +} \ No newline at end of file diff --git a/docs/results/active_model/grpo_training_cycle/grpo_trl_run.json b/docs/results/active_model/grpo_training_cycle/grpo_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..1c242f4589a311ae34d0448039293b45b8d911e1 --- /dev/null +++ b/docs/results/active_model/grpo_training_cycle/grpo_trl_run.json @@ -0,0 +1,42 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "records": 2000, + "prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 4000, + "avg_reward": 0.782178, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.985277, + "safety_delta_score": 0.496104, + "burden_improvement_score": 0.494346, + "disease_stability_score": 0.8912, + "dosing_quality_score": 0.511938, + "abstention_quality_score": 0.56, + "efficiency_score": 0.84942, + "process_fidelity_score": 0.905268, + "explanation_grounding_score": 0.800248, + "anti_cheat_score": 0.48004, + "uncertainty_calibration_score": 0.730195 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.798661, + "clinical_improvement": 0.62689, + "dosing_quality": 0.535969, + "process_integrity": 0.888448 + } + }, + "reward_log": "/app/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "train_runtime": 6960.8084, + "train_samples_per_second": 0.287, + "train_steps_per_second": 0.287, + "total_flos": 0.0, + "train_loss": 2.3633859725151752e-06 + }, + "artifact_path": "/app/checkpoints/grpo_adapter", + "unsloth_available": false +} \ No newline at end of file diff --git a/docs/results/active_model/grpo_training_cycle/hf_training_status.json b/docs/results/active_model/grpo_training_cycle/hf_training_status.json new file mode 100644 index 0000000000000000000000000000000000000000..0822dcb1b0bdbad63e954a12d2b4bb7c157bc7b4 --- /dev/null +++ b/docs/results/active_model/grpo_training_cycle/hf_training_status.json @@ -0,0 +1,123 @@ +{ + "status": "running", + "started_at": 1777161126.3536248, + "finished_at": null, + "commands": [ + { + "args": [ + "python", + "scripts/bootstrap_data.py" + ], + "returncode": 0, + "elapsed_seconds": 0.821 + }, + { + "args": [ + "python", + "scripts/build_training_corpus.py", + "--profile", + "massive", + "--with-local", + "--with-synthetic", + "--with-hf" + ], + "returncode": 0, + "elapsed_seconds": 4.367 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--epochs", + "1", + "--max-steps", + "20", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 24.564 + }, + { + "args": [ + "reuse_artifact", + "grpo_adapter", + "/app/checkpoints/grpo_adapter" + ], + "returncode": 0, + "elapsed_seconds": 0.0 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sft_adapter", + "--output-dir", + "checkpoints/merged" + ], + "returncode": 0, + "elapsed_seconds": 9.014 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "3", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct" + ], + "returncode": 0, + "elapsed_seconds": 14.811 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8" + ], + "returncode": 0, + "elapsed_seconds": 4.458 + }, + { + "args": [ + "python", + "scripts/evaluate_baselines.py" + ], + "returncode": 0, + "elapsed_seconds": 4.603 + }, + { + "args": [ + "python", + "scripts/evaluate_all.py" + ], + "returncode": 0, + "elapsed_seconds": 4.271 + }, + { + "args": [ + "python", + "scripts/evaluate_compare_runs.py", + "--baseline", + "outputs/reports/baselines.json", + "--candidate", + "outputs/reports/benchmark_report.json", + "--output", + "outputs/reports/improvement_report.json" + ], + "returncode": 0, + "elapsed_seconds": 0.037 + } + ], + "artifact_repo_id": "TheJackBright/polyguard-openenv-training-artifacts" +} \ No newline at end of file diff --git a/docs/results/active_model/grpo_trl_run.json b/docs/results/active_model/grpo_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..1c242f4589a311ae34d0448039293b45b8d911e1 --- /dev/null +++ b/docs/results/active_model/grpo_trl_run.json @@ -0,0 +1,42 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "records": 2000, + "prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 4000, + "avg_reward": 0.782178, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.985277, + "safety_delta_score": 0.496104, + "burden_improvement_score": 0.494346, + "disease_stability_score": 0.8912, + "dosing_quality_score": 0.511938, + "abstention_quality_score": 0.56, + "efficiency_score": 0.84942, + "process_fidelity_score": 0.905268, + "explanation_grounding_score": 0.800248, + "anti_cheat_score": 0.48004, + "uncertainty_calibration_score": 0.730195 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.798661, + "clinical_improvement": 0.62689, + "dosing_quality": 0.535969, + "process_integrity": 0.888448 + } + }, + "reward_log": "/app/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "train_runtime": 6960.8084, + "train_samples_per_second": 0.287, + "train_steps_per_second": 0.287, + "total_flos": 0.0, + "train_loss": 2.3633859725151752e-06 + }, + "artifact_path": "/app/checkpoints/grpo_adapter", + "unsloth_available": false +} \ No newline at end of file diff --git a/docs/results/active_model/grpo_trl_run_auto.json b/docs/results/active_model/grpo_trl_run_auto.json new file mode 100644 index 0000000000000000000000000000000000000000..6ee3447446fe2c94787048f5abecfd2186024ed2 --- /dev/null +++ b/docs/results/active_model/grpo_trl_run_auto.json @@ -0,0 +1,39 @@ +{ + "status": "fallback", + "backend": "env_reward_fallback", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "records": 2, + "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 2, + "avg_reward": 0.798, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.999, + "safety_delta_score": 0.671, + "burden_improvement_score": 0.525, + "disease_stability_score": 0.74, + "dosing_quality_score": 0.5, + "abstention_quality_score": 0.56, + "efficiency_score": 0.857, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.8, + "anti_cheat_score": 0.5, + "uncertainty_calibration_score": 0.74 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.8095, + "clinical_improvement": 0.645, + "dosing_quality": 0.53, + "process_integrity": 0.894 + } + }, + "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "steps_executed": 2.0 + }, + "artifact_path": "", + "unsloth_available": false, + "trl_runtime_error": "We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.\nCheck your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'." +} \ No newline at end of file diff --git a/docs/results/active_model/grpo_trl_run_fallback_check.json b/docs/results/active_model/grpo_trl_run_fallback_check.json new file mode 100644 index 0000000000000000000000000000000000000000..e99d2da3538269276216240b8223f8102ea6ae86 --- /dev/null +++ b/docs/results/active_model/grpo_trl_run_fallback_check.json @@ -0,0 +1,39 @@ +{ + "status": "fallback", + "backend": "env_reward_fallback", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "records": 1, + "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 1, + "avg_reward": 0.764, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.999, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9, + "dosing_quality_score": 0.5, + "abstention_quality_score": 0.56, + "efficiency_score": 0.857, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.8, + "anti_cheat_score": 0.001, + "uncertainty_calibration_score": 0.7 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.675, + "clinical_improvement": 0.633, + "dosing_quality": 0.53, + "process_integrity": 0.894 + } + }, + "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "steps_executed": 1.0 + }, + "artifact_path": "", + "unsloth_available": false, + "trl_runtime_error": "forced_fallback" +} \ No newline at end of file diff --git a/docs/results/active_model/grpo_trl_run_smoke.json b/docs/results/active_model/grpo_trl_run_smoke.json new file mode 100644 index 0000000000000000000000000000000000000000..e99d2da3538269276216240b8223f8102ea6ae86 --- /dev/null +++ b/docs/results/active_model/grpo_trl_run_smoke.json @@ -0,0 +1,39 @@ +{ + "status": "fallback", + "backend": "env_reward_fallback", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "records": 1, + "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 1, + "avg_reward": 0.764, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.999, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9, + "dosing_quality_score": 0.5, + "abstention_quality_score": 0.56, + "efficiency_score": 0.857, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.8, + "anti_cheat_score": 0.001, + "uncertainty_calibration_score": 0.7 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.675, + "clinical_improvement": 0.633, + "dosing_quality": 0.53, + "process_integrity": 0.894 + } + }, + "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "steps_executed": 1.0 + }, + "artifact_path": "", + "unsloth_available": false, + "trl_runtime_error": "forced_fallback" +} \ No newline at end of file diff --git a/docs/results/active_model/grpo_trl_run_strict_check.json b/docs/results/active_model/grpo_trl_run_strict_check.json new file mode 100644 index 0000000000000000000000000000000000000000..e99d2da3538269276216240b8223f8102ea6ae86 --- /dev/null +++ b/docs/results/active_model/grpo_trl_run_strict_check.json @@ -0,0 +1,39 @@ +{ + "status": "fallback", + "backend": "env_reward_fallback", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "records": 1, + "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 1, + "avg_reward": 0.764, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.999, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9, + "dosing_quality_score": 0.5, + "abstention_quality_score": 0.56, + "efficiency_score": 0.857, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.8, + "anti_cheat_score": 0.001, + "uncertainty_calibration_score": 0.7 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.675, + "clinical_improvement": 0.633, + "dosing_quality": 0.53, + "process_integrity": 0.894 + } + }, + "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "steps_executed": 1.0 + }, + "artifact_path": "", + "unsloth_available": false, + "trl_runtime_error": "forced_fallback" +} \ No newline at end of file diff --git a/docs/results/active_model/hf_sweep_summary.json b/docs/results/active_model/hf_sweep_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..d18255ad3734ce2a82e317aa242155c974af0ebc --- /dev/null +++ b/docs/results/active_model/hf_sweep_summary.json @@ -0,0 +1,127 @@ +{ + "status": "ok", + "training_mode": "sft-baseline", + "completed_models": 3, + "failed_or_skipped_models": 0, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen2.5-0.5B", + "status": "completed", + "error": "", + "sft_backend": "trl_transformers", + "sft_examples": 2000, + "sft_train_loss": 0.19233327957964502, + "sft_runtime": 234.6302, + "grpo_backend": "", + "grpo_records": 0, + "grpo_avg_reward": 0.726, + "sft_inference_reward": 0.726, + "sft_valid_rate": 1.0, + "sft_latency_seconds": 1.839, + "grpo_inference_reward": 0.726, + "grpo_valid_rate": 1.0, + "grpo_latency_seconds": 0.0, + "train_holdout_gap": 0.0, + "fallback_detected": false, + "reward_range_ok": true, + "reward_range_failures": [], + "exploit_rate": 0.0, + "legal_rate": 0.0, + "candidate_diversity": 0.0, + "top_candidate_rate": 0.0, + "reward_components": {}, + "primary_reward_channels": {}, + "artifact_paths": { + "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "grpo": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen2.5-1.5B", + "status": "completed", + "error": "", + "sft_backend": "trl_transformers", + "sft_examples": 2000, + "sft_train_loss": 0.11515871361242898, + "sft_runtime": 483.7085, + "grpo_backend": "", + "grpo_records": 0, + "grpo_avg_reward": 0.726, + "sft_inference_reward": 0.726, + "sft_valid_rate": 1.0, + "sft_latency_seconds": 2.158, + "grpo_inference_reward": 0.726, + "grpo_valid_rate": 1.0, + "grpo_latency_seconds": 0.0, + "train_holdout_gap": 0.0, + "fallback_detected": false, + "reward_range_ok": true, + "reward_range_failures": [], + "exploit_rate": 0.0, + "legal_rate": 0.0, + "candidate_diversity": 0.0, + "top_candidate_rate": 0.0, + "reward_components": {}, + "primary_reward_channels": {}, + "artifact_paths": { + "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "grpo": "" + } + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen2.5-3B", + "status": "completed", + "error": "", + "sft_backend": "trl_transformers", + "sft_examples": 2000, + "sft_train_loss": 0.18184852770145518, + "sft_runtime": 372.1845, + "grpo_backend": "", + "grpo_records": 0, + "grpo_avg_reward": 0.762, + "sft_inference_reward": 0.762, + "sft_valid_rate": 1.0, + "sft_latency_seconds": 2.748, + "grpo_inference_reward": 0.762, + "grpo_valid_rate": 1.0, + "grpo_latency_seconds": 0.0, + "train_holdout_gap": 0.0, + "fallback_detected": false, + "reward_range_ok": true, + "reward_range_failures": [], + "exploit_rate": 0.0, + "legal_rate": 0.0, + "candidate_diversity": 0.0, + "top_candidate_rate": 0.0, + "reward_components": {}, + "primary_reward_channels": {}, + "artifact_paths": { + "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", + "grpo": "" + } + } + ], + "charts": { + "sft_vs_grpo_reward": "outputs/plots/sft_vs_grpo_reward.png", + "sft_loss_curves": "outputs/plots/sft_loss_curves.png", + "qwen_model_sft_reward": "outputs/plots/qwen_model_sft_reward.png", + "qwen_model_sft_loss": "outputs/plots/qwen_model_sft_loss.png", + "sft_validity_reward": "outputs/plots/sft_validity_reward.png", + "grpo_reward_curves": "outputs/plots/grpo_reward_curves.png", + "qwen_model_grpo_reward": "outputs/plots/qwen_model_grpo_reward.png", + "reward_component_bars": "outputs/plots/reward_component_bars.png", + "anti_cheat_failure_rates": "outputs/plots/anti_cheat_failure_rates.png", + "train_holdout_gap": "outputs/plots/train_holdout_gap.png", + "inference_validity_reward": "outputs/plots/inference_validity_reward.png", + "inference_latency_validity": "outputs/plots/inference_latency_validity.png" + } +} \ No newline at end of file diff --git a/docs/results/active_model/hf_training_status.json b/docs/results/active_model/hf_training_status.json new file mode 100644 index 0000000000000000000000000000000000000000..3b51e1cea1eefbf737d7eb5353372877ef512b5c --- /dev/null +++ b/docs/results/active_model/hf_training_status.json @@ -0,0 +1,261 @@ +{ + "status": "ok", + "started_at": 1777163399.0780032, + "finished_at": 1777164656.2574434, + "commands": [ + { + "args": [ + "python", + "scripts/bootstrap_data.py" + ], + "returncode": 0, + "elapsed_seconds": 0.504 + }, + { + "args": [ + "python", + "scripts/build_training_corpus.py", + "--profile", + "massive", + "--with-local", + "--with-synthetic", + "--with-hf" + ], + "returncode": 0, + "elapsed_seconds": 4.013 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 251.4 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 7.16 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 15.213 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "1", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 504.997 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 10.634 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 17.029 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-3B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "--epochs", + "1", + "--max-steps", + "0", + "--batch-size", + "1", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 394.356 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 15.472 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-3B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 20.373 + }, + { + "args": [ + "python", + "scripts/evaluate_baselines.py" + ], + "returncode": 0, + "elapsed_seconds": 4.112 + }, + { + "args": [ + "python", + "scripts/evaluate_all.py" + ], + "returncode": 0, + "elapsed_seconds": 3.787 + }, + { + "args": [ + "python", + "scripts/evaluate_compare_runs.py", + "--baseline", + "outputs/reports/baselines.json", + "--candidate", + "outputs/reports/benchmark_report.json", + "--output", + "outputs/reports/improvement_report.json" + ], + "returncode": 0, + "elapsed_seconds": 0.033 + }, + { + "args": [ + "python", + "scripts/benchmark_inference.py" + ], + "returncode": 0, + "elapsed_seconds": 2.376 + }, + { + "args": [ + "python", + "scripts/generate_hf_training_report.py", + "--mode", + "sft-baseline" + ], + "returncode": 0, + "elapsed_seconds": 1.791 + } + ], + "artifact_repo_id": "TheJackBright/polyguard-openenv-sft-baseline-artifacts", + "training_mode": "sft-baseline", + "model_sweep": [ + "Qwen/Qwen2.5-0.5B-Instruct", + "Qwen/Qwen2.5-1.5B-Instruct", + "Qwen/Qwen2.5-3B-Instruct" + ], + "improved": true, + "anti_hacking_passed": true, + "completed_run_ids": [ + "qwen-qwen2-5-0-5b-instruct", + "qwen-qwen2-5-1-5b-instruct", + "qwen-qwen2-5-3b-instruct" + ] +} \ No newline at end of file diff --git a/docs/results/active_model/improvement_report.json b/docs/results/active_model/improvement_report.json new file mode 100644 index 0000000000000000000000000000000000000000..886c258a6e289158e33375ff020b7746cee4b7fb --- /dev/null +++ b/docs/results/active_model/improvement_report.json @@ -0,0 +1,19 @@ +{ + "status": "ok", + "baseline": "outputs/reports/baselines.json", + "candidate": "outputs/reports/benchmark_report.json", + "deltas": { + "avg_reward": 0.025833, + "legality_rate": 0.0, + "success_rate": 0.0, + "avg_process_fidelity": 0.92, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0 + }, + "gate": { + "avg_reward_up": true, + "legality_up": true, + "success_up": true + }, + "improved": true +} \ No newline at end of file diff --git a/docs/results/active_model/improvement_report_benchmark.json b/docs/results/active_model/improvement_report_benchmark.json new file mode 100644 index 0000000000000000000000000000000000000000..5d8b11e47a79b24417c790054095326e72258681 --- /dev/null +++ b/docs/results/active_model/improvement_report_benchmark.json @@ -0,0 +1,19 @@ +{ + "status": "ok", + "baseline": "outputs/reports/baselines.json", + "candidate": "outputs/reports/benchmark_report.json", + "deltas": { + "avg_reward": -0.0025, + "legality_rate": 0.0, + "success_rate": 0.0, + "avg_process_fidelity": 0.92, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0 + }, + "gate": { + "avg_reward_up": false, + "legality_up": true, + "success_up": true + }, + "improved": false +} \ No newline at end of file diff --git a/docs/results/active_model/inference_benchmark.json b/docs/results/active_model/inference_benchmark.json new file mode 100644 index 0000000000000000000000000000000000000000..d498fb723ef9cab34b63311312d817f862a98790 --- /dev/null +++ b/docs/results/active_model/inference_benchmark.json @@ -0,0 +1,43 @@ +{ + "status": "ok", + "runs": [ + { + "run": 0, + "provider": "transformers", + "candidate_id": "cand_04", + "latency_ms": 1748.724, + "rationale": "Transformers fallback selected cand_04 via local ranker." + }, + { + "run": 1, + "provider": "transformers", + "candidate_id": "cand_02", + "latency_ms": 0.01, + "rationale": "Transformers fallback selected cand_02 via local ranker." + }, + { + "run": 2, + "provider": "transformers", + "candidate_id": "cand_04", + "latency_ms": 0.009, + "rationale": "Transformers fallback selected cand_04 via local ranker." + }, + { + "run": 3, + "provider": "transformers", + "candidate_id": "cand_04", + "latency_ms": 0.009, + "rationale": "Transformers fallback selected cand_04 via local ranker." + }, + { + "run": 4, + "provider": "transformers", + "candidate_id": "cand_04", + "latency_ms": 0.009, + "rationale": "Transformers fallback selected cand_04 via local ranker." + } + ], + "avg_latency_ms": 349.752, + "provider_requested": "transformers", + "model": "Qwen/Qwen2.5-0.5B-Instruct" +} \ No newline at end of file diff --git a/docs/results/active_model/planner_grpo.json b/docs/results/active_model/planner_grpo.json new file mode 100644 index 0000000000000000000000000000000000000000..c7e9aa4742688c7625d2182953907f8df1b35c7f --- /dev/null +++ b/docs/results/active_model/planner_grpo.json @@ -0,0 +1,28 @@ +{ + "avg_reward": 0.77625, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.0, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.5, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.92, + "exploit_detection_count": 4.0, + "reward_columns": { + "format_compliance_score": 0.9990000000000001, + "candidate_alignment_score": 0.9990000000000001, + "legality_score": 0.9990000000000001, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9000000000000002, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000002, + "efficiency_score": 0.73, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.7999999999999999, + "anti_cheat_score": 0.6663333333333333, + "uncertainty_calibration_score": 0.8699999999999998 + } +} \ No newline at end of file diff --git a/docs/results/active_model/plot_index.json b/docs/results/active_model/plot_index.json new file mode 100644 index 0000000000000000000000000000000000000000..653af59e59bdb01a0215534ae5e3274a36b60d0e --- /dev/null +++ b/docs/results/active_model/plot_index.json @@ -0,0 +1,9 @@ +{ + "plots": [ + "/app/outputs/plots/avg_reward.png", + "/app/outputs/plots/legality_rate.png", + "/app/outputs/plots/success_rate.png", + "/app/outputs/plots/avg_process_fidelity.png", + "/app/outputs/plots/policy_stack_avg_reward.png" + ] +} \ No newline at end of file diff --git a/docs/results/active_model/postsave_inference.json b/docs/results/active_model/postsave_inference.json new file mode 100644 index 0000000000000000000000000000000000000000..40d6577dec4199160cb23eb63f3805b530c38dd7 --- /dev/null +++ b/docs/results/active_model/postsave_inference.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.762, + "avg_latency_seconds": 2.748, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %{ \"patient_id\": \"patient_8000\", \"rationale\": \"candidate_id=cand_08; The patient is experiencing a high fever, which is a symptom of an infection. This suggests that the patient may have a bacterial infection, which would be treated with antibiotics. Candidate_id=cand_08 prescribes antibiotics, which are appropriate for treating bacterial infections.\",", + "candidate_id": "cand_08", + "selected_candidate": "cand_08", + "env_reward": 0.806, + "latency_seconds": 3.164, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"candidate_id\": \"cand_05\", \"rationale\": \"patient mentions having a headache and taking medicine. cand_05 explains that the patient is taking medicine for a headache, which aligns with the patient's symptoms. cand_09 does not mention any medication. cand_01 mentions nausea but no headache. cand_02 mentions fever but", + "candidate_id": "cand_05", + "selected_candidate": "cand_05", + "env_reward": 0.792, + "latency_seconds": 2.656, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.641, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; //= = = = = = = = = = = = = = = FUNCTION select_best_candidate(candidate_ids=LIST_of_STR, patient_id=STR, candidates_dict=DICTIONARY_OF_TUPLES) ;; best_candidate_id := candidate_ids[0]; strongest_rationale := candidates_dict[candidate_ids[0]][1]; for i in range(1, len(candidate_ids)) { this", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.643, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.638, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/active_model/postsave_inference_smoke.json b/docs/results/active_model/postsave_inference_smoke.json new file mode 100644 index 0000000000000000000000000000000000000000..9e752252d57d5a96a5438d816904747db17731e1 --- /dev/null +++ b/docs/results/active_model/postsave_inference_smoke.json @@ -0,0 +1,23 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 1, + "valid_rate": 1.0, + "avg_env_reward": 0.717, + "avg_latency_seconds": 5.523, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_123\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "}\n\nSure, I can help you choose a candidate ID based on the information provided. Please provide me with the list of candidate IDs and their respective rationales so that I can make an informed decision. If there are multiple candidates with similar rationales, please let me know which one aligns best with your preferences or needs. Additionally, if you have any specific criteria for choosing a candidate (e.g", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 5.523, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/active_model/risk_train.json b/docs/results/active_model/risk_train.json new file mode 100644 index 0000000000000000000000000000000000000000..6482da8f83639a916904d9e9ae558df6212feb0e --- /dev/null +++ b/docs/results/active_model/risk_train.json @@ -0,0 +1,6 @@ +{ + "dataset_size": 180.0, + "status": "trained", + "train_mae": 0.0033, + "model_path": "outputs/models/tabular_risk.pkl" +} \ No newline at end of file diff --git a/docs/results/active_model/robustness.json b/docs/results/active_model/robustness.json new file mode 100644 index 0000000000000000000000000000000000000000..d05951c38775cb018fa8a2c66eae81bbffc26776 --- /dev/null +++ b/docs/results/active_model/robustness.json @@ -0,0 +1,10 @@ +{ + "missing_labs_safety_rate": 0.81, + "noisy_dose_info_safety_rate": 0.78, + "conflicting_meds_safety_rate": 0.8, + "alias_noise_safety_rate": 0.79, + "hidden_duplicate_detection_rate": 0.77, + "wrong_candidate_id_resilience": 0.83, + "stale_evidence_safety_rate": 0.77, + "delayed_ade_manifestation_safety_rate": 0.75 +} \ No newline at end of file diff --git a/docs/results/active_model/sft_run.json b/docs/results/active_model/sft_run.json new file mode 100644 index 0000000000000000000000000000000000000000..76228de881515a4ecc37b27f08442f85307b0f68 --- /dev/null +++ b/docs/results/active_model/sft_run.json @@ -0,0 +1,9 @@ +{ + "status": "ok", + "backend": "fallback_sklearn", + "examples_used": 51, + "train_accuracy": 0.5098, + "artifact_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/sft_policy_fallback.json", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "trl_runtime_error": "We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.\nCheck your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'." +} \ No newline at end of file diff --git a/docs/results/active_model/sft_trl_run.json b/docs/results/active_model/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..cf39b39eacfc4a0eb4375b757c1d2cdd829d1bbd --- /dev/null +++ b/docs/results/active_model/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "unsloth_available": false, + "train_runtime": 372.1845, + "train_loss": 0.18184852770145518, + "train_metrics": { + "train_runtime": 372.1845, + "train_samples_per_second": 5.374, + "train_steps_per_second": 5.374, + "total_flos": 1.1456146931712e+16, + "train_loss": 0.18184852770145518 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/active_model/supervisor_grpo.json b/docs/results/active_model/supervisor_grpo.json new file mode 100644 index 0000000000000000000000000000000000000000..9ca005bc384929ba19cdbc4c7dae17badf8e6269 --- /dev/null +++ b/docs/results/active_model/supervisor_grpo.json @@ -0,0 +1,28 @@ +{ + "avg_reward": 0.7348571428571429, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.8571428571428572, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.5, + "avg_dosing_quality": 0.5, + "avg_process_fidelity": 0.92, + "exploit_detection_count": 3.0, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9000000000000001, + "dosing_quality_score": 0.5, + "abstention_quality_score": 0.56, + "efficiency_score": 0.6427142857142858, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.7999999999999999, + "anti_cheat_score": 0.5712857142857143, + "uncertainty_calibration_score": 0.6142857142857144 + } +} \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..e97c6f0e7743a9470ea6efc3b741694dafed8b57 --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 1.839, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8000\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.224, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "There are 5 candidates with IDs . The candidate with ID has the lowest prediction score. Therefore, we will choose candidate . Rationale: Candidate has the lowest prediction score among the candidates with IDs , indicating that they are the most likely to", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "there is no patient with patient_id = 8002, please select a different patient_id. Instruction: The task is to choose a candidate_id for the given patient_id and provide a brief rationale for that choice.\nThus, the final answer is (e).Human beings have been able to use fire since at least 3 million years ago, but we are not sure how they learned", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8003\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 1.737, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8004\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_1", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.736, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..986493a2b78954513d0716891dc5dd71c576c569 --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "model_index": 0, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 2, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json b/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..7d5a627d2519bb0afa7485ff88f51eb813d742e6 --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.0856, + "grad_norm": 3.887380838394165, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.001, + "step": 1 + }, + { + "loss": 1.6647, + "grad_norm": 1.2190884351730347, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.7138103246688843, + "epoch": 0.002, + "step": 2 + }, + { + "loss": 1.1696, + "grad_norm": 0.8276316523551941, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.003, + "step": 3 + }, + { + "loss": 3.0464, + "grad_norm": 3.3297364711761475, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.004, + "step": 4 + }, + { + "loss": 1.1875, + "grad_norm": 0.8076611757278442, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.005, + "step": 5 + }, + { + "loss": 1.6105, + "grad_norm": 1.0332727432250977, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.7188019752502441, + "epoch": 0.006, + "step": 6 + }, + { + "loss": 1.5834, + "grad_norm": 1.0094527006149292, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.007, + "step": 7 + }, + { + "loss": 1.1683, + "grad_norm": 0.7861526012420654, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.008, + "step": 8 + }, + { + "loss": 1.3843, + "grad_norm": 0.7377748489379883, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7495107650756836, + "epoch": 0.009, + "step": 9 + }, + { + "loss": 1.584, + "grad_norm": 0.9443085193634033, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.720465898513794, + "epoch": 0.01, + "step": 10 + }, + { + "loss": 1.366, + "grad_norm": 0.7967380285263062, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7504892349243164, + "epoch": 0.011, + "step": 11 + }, + { + "loss": 1.5266, + "grad_norm": 1.0016096830368042, + "learning_rate": 1.989e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.012, + "step": 12 + }, + { + "loss": 1.2453, + "grad_norm": 0.9283791184425354, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.7836938500404358, + "epoch": 0.013, + "step": 13 + }, + { + "loss": 1.6206, + "grad_norm": 0.9805537462234497, + "learning_rate": 1.987e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7171381115913391, + "epoch": 0.014, + "step": 14 + }, + { + "loss": 1.5375, + "grad_norm": 0.9191323518753052, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.015, + "step": 15 + }, + { + "loss": 1.3423, + "grad_norm": 0.7822748422622681, + "learning_rate": 1.985e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.016, + "step": 16 + }, + { + "loss": 2.9309, + "grad_norm": 2.773752450942993, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5277777910232544, + "epoch": 0.017, + "step": 17 + }, + { + "loss": 1.1574, + "grad_norm": 0.7265554666519165, + "learning_rate": 1.983e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7876712083816528, + "epoch": 0.018, + "step": 18 + }, + { + "loss": 2.9093, + "grad_norm": 2.9051146507263184, + "learning_rate": 1.982e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5388888716697693, + "epoch": 0.019, + "step": 19 + }, + { + "loss": 1.5786, + "grad_norm": 0.9728697538375854, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.02, + "step": 20 + }, + { + "loss": 1.0934, + "grad_norm": 0.7319854497909546, + "learning_rate": 1.98e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.7974559664726257, + "epoch": 0.021, + "step": 21 + }, + { + "loss": 1.2097, + "grad_norm": 0.8981963992118835, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.022, + "step": 22 + }, + { + "loss": 1.4816, + "grad_norm": 1.0308023691177368, + "learning_rate": 1.978e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.023, + "step": 23 + }, + { + "loss": 1.3218, + "grad_norm": 0.7793745398521423, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.024, + "step": 24 + }, + { + "loss": 1.4883, + "grad_norm": 1.0108226537704468, + "learning_rate": 1.976e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.025, + "step": 25 + }, + { + "loss": 1.1398, + "grad_norm": 0.7284001111984253, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7857142686843872, + "epoch": 0.026, + "step": 26 + }, + { + "loss": 1.5201, + "grad_norm": 0.9933396577835083, + "learning_rate": 1.974e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.7354409098625183, + "epoch": 0.027, + "step": 27 + }, + { + "loss": 2.8162, + "grad_norm": 3.1626200675964355, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.028, + "step": 28 + }, + { + "loss": 1.31, + "grad_norm": 0.8019158244132996, + "learning_rate": 1.972e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.7573385238647461, + "epoch": 0.029, + "step": 29 + }, + { + "loss": 2.7985, + "grad_norm": 3.126246929168701, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.03, + "step": 30 + }, + { + "loss": 1.5341, + "grad_norm": 0.952720582485199, + "learning_rate": 1.97e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7271214723587036, + "epoch": 0.031, + "step": 31 + }, + { + "loss": 1.0763, + "grad_norm": 0.7093926668167114, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.032, + "step": 32 + }, + { + "loss": 1.2127, + "grad_norm": 0.813561201095581, + "learning_rate": 1.968e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.033, + "step": 33 + }, + { + "loss": 2.7516, + "grad_norm": 3.1947083473205566, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.034, + "step": 34 + }, + { + "loss": 1.1881, + "grad_norm": 1.0367817878723145, + "learning_rate": 1.966e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.035, + "step": 35 + }, + { + "loss": 1.1991, + "grad_norm": 0.9249914288520813, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.036, + "step": 36 + }, + { + "loss": 1.0422, + "grad_norm": 0.7850101590156555, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.037, + "step": 37 + }, + { + "loss": 1.2488, + "grad_norm": 0.8151567578315735, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7651663422584534, + "epoch": 0.038, + "step": 38 + }, + { + "loss": 1.5095, + "grad_norm": 1.0585670471191406, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.7254575490951538, + "epoch": 0.039, + "step": 39 + }, + { + "loss": 2.6828, + "grad_norm": 3.3681087493896484, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.04, + "step": 40 + }, + { + "loss": 1.1754, + "grad_norm": 1.029766321182251, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.041, + "step": 41 + }, + { + "loss": 1.0827, + "grad_norm": 0.7520174980163574, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.042, + "step": 42 + }, + { + "loss": 1.1385, + "grad_norm": 1.012759804725647, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.043, + "step": 43 + }, + { + "loss": 2.6322, + "grad_norm": 3.4875218868255615, + "learning_rate": 1.957e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.044, + "step": 44 + }, + { + "loss": 1.23, + "grad_norm": 0.9103058576583862, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.045, + "step": 45 + }, + { + "loss": 1.4499, + "grad_norm": 1.0566458702087402, + "learning_rate": 1.955e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.046, + "step": 46 + }, + { + "loss": 1.1171, + "grad_norm": 1.0389467477798462, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.047, + "step": 47 + }, + { + "loss": 1.4262, + "grad_norm": 1.0595616102218628, + "learning_rate": 1.953e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.048, + "step": 48 + }, + { + "loss": 1.1224, + "grad_norm": 1.0530123710632324, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.049, + "step": 49 + }, + { + "loss": 2.5409, + "grad_norm": 3.6781489849090576, + "learning_rate": 1.951e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.05, + "step": 50 + }, + { + "loss": 1.0942, + "grad_norm": 1.0411880016326904, + "learning_rate": 1.95e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.7970049977302551, + "epoch": 0.051, + "step": 51 + }, + { + "loss": 1.0622, + "grad_norm": 0.8258970975875854, + "learning_rate": 1.949e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.052, + "step": 52 + }, + { + "loss": 1.1977, + "grad_norm": 0.8957047462463379, + "learning_rate": 1.948e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.7700586915016174, + "epoch": 0.053, + "step": 53 + }, + { + "loss": 1.3695, + "grad_norm": 1.122542142868042, + "learning_rate": 1.947e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.7520798444747925, + "epoch": 0.054, + "step": 54 + }, + { + "loss": 0.8548, + "grad_norm": 0.7688314914703369, + "learning_rate": 1.946e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.055, + "step": 55 + }, + { + "loss": 1.0659, + "grad_norm": 1.0568362474441528, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.056, + "step": 56 + }, + { + "loss": 1.0294, + "grad_norm": 0.8596540689468384, + "learning_rate": 1.944e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.057, + "step": 57 + }, + { + "loss": 1.4359, + "grad_norm": 1.2490142583847046, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.7321131229400635, + "epoch": 0.058, + "step": 58 + }, + { + "loss": 2.416, + "grad_norm": 3.7482848167419434, + "learning_rate": 1.942e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.059, + "step": 59 + }, + { + "loss": 1.0725, + "grad_norm": 1.117326259613037, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.06, + "step": 60 + }, + { + "loss": 0.9739, + "grad_norm": 0.8864734768867493, + "learning_rate": 1.94e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.061, + "step": 61 + }, + { + "loss": 1.1443, + "grad_norm": 0.9423307776451111, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.7739726305007935, + "epoch": 0.062, + "step": 62 + }, + { + "loss": 0.8009, + "grad_norm": 0.8988932967185974, + "learning_rate": 1.938e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.063, + "step": 63 + }, + { + "loss": 1.0508, + "grad_norm": 1.1697311401367188, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.064, + "step": 64 + }, + { + "loss": 1.2747, + "grad_norm": 1.2967511415481567, + "learning_rate": 1.936e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.7570715546607971, + "epoch": 0.065, + "step": 65 + }, + { + "loss": 1.2796, + "grad_norm": 1.2881773710250854, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7554076313972473, + "epoch": 0.066, + "step": 66 + }, + { + "loss": 2.3052, + "grad_norm": 4.034823894500732, + "learning_rate": 1.934e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.067, + "step": 67 + }, + { + "loss": 1.2806, + "grad_norm": 1.3690178394317627, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.7587354183197021, + "epoch": 0.068, + "step": 68 + }, + { + "loss": 1.1807, + "grad_norm": 1.0886963605880737, + "learning_rate": 1.932e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.7632094025611877, + "epoch": 0.069, + "step": 69 + }, + { + "loss": 1.0076, + "grad_norm": 1.3501569032669067, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.07, + "step": 70 + }, + { + "loss": 0.921, + "grad_norm": 1.0231209993362427, + "learning_rate": 1.93e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8111546039581299, + "epoch": 0.071, + "step": 71 + }, + { + "loss": 2.1999, + "grad_norm": 4.47637939453125, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.072, + "step": 72 + }, + { + "loss": 2.1852, + "grad_norm": 4.533531188964844, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.073, + "step": 73 + }, + { + "loss": 2.1623, + "grad_norm": 4.683750152587891, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.074, + "step": 74 + }, + { + "loss": 1.2988, + "grad_norm": 1.5087296962738037, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.075, + "step": 75 + }, + { + "loss": 2.1266, + "grad_norm": 4.944180011749268, + "learning_rate": 1.925e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.076, + "step": 76 + }, + { + "loss": 0.9762, + "grad_norm": 1.0376505851745605, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.077, + "step": 77 + }, + { + "loss": 2.0834, + "grad_norm": 5.394686222076416, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.078, + "step": 78 + }, + { + "loss": 0.9309, + "grad_norm": 1.0764528512954712, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8170254230499268, + "epoch": 0.079, + "step": 79 + }, + { + "loss": 0.7549, + "grad_norm": 1.089787244796753, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.08, + "step": 80 + }, + { + "loss": 1.0972, + "grad_norm": 1.2265634536743164, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.7915851473808289, + "epoch": 0.081, + "step": 81 + }, + { + "loss": 2.0061, + "grad_norm": 5.302765846252441, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.082, + "step": 82 + }, + { + "loss": 1.1197, + "grad_norm": 1.216346025466919, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.7749511003494263, + "epoch": 0.083, + "step": 83 + }, + { + "loss": 1.181, + "grad_norm": 1.5846738815307617, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.084, + "step": 84 + }, + { + "loss": 0.8929, + "grad_norm": 1.1130127906799316, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8268101811408997, + "epoch": 0.085, + "step": 85 + }, + { + "loss": 1.9339, + "grad_norm": NaN, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.086, + "step": 86 + }, + { + "loss": 1.1623, + "grad_norm": 1.7714096307754517, + "learning_rate": 1.915e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.7720465660095215, + "epoch": 0.087, + "step": 87 + }, + { + "loss": 1.0203, + "grad_norm": 1.204126000404358, + "learning_rate": 1.914e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.088, + "step": 88 + }, + { + "loss": 0.8569, + "grad_norm": 1.2058078050613403, + "learning_rate": 1.913e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.089, + "step": 89 + }, + { + "loss": 1.197, + "grad_norm": 1.8821589946746826, + "learning_rate": 1.912e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.7670549154281616, + "epoch": 0.09, + "step": 90 + }, + { + "loss": 1.1908, + "grad_norm": 1.9740996360778809, + "learning_rate": 1.911e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.7703827023506165, + "epoch": 0.091, + "step": 91 + }, + { + "loss": 0.889, + "grad_norm": 1.5037046670913696, + "learning_rate": 1.91e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8236272931098938, + "epoch": 0.092, + "step": 92 + }, + { + "loss": 1.1821, + "grad_norm": 1.539967656135559, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.093, + "step": 93 + }, + { + "loss": 1.0278, + "grad_norm": 1.2005809545516968, + "learning_rate": 1.908e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.094, + "step": 94 + }, + { + "loss": 1.1361, + "grad_norm": 1.8167128562927246, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.095, + "step": 95 + }, + { + "loss": 1.0977, + "grad_norm": 2.2985150814056396, + "learning_rate": 1.906e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.096, + "step": 96 + }, + { + "loss": 1.0695, + "grad_norm": 1.590173602104187, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.097, + "step": 97 + }, + { + "loss": 1.1519, + "grad_norm": 1.5389997959136963, + "learning_rate": 1.904e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.098, + "step": 98 + }, + { + "loss": 1.1507, + "grad_norm": 1.6002172231674194, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.099, + "step": 99 + }, + { + "loss": 1.0454, + "grad_norm": 1.181969404220581, + "learning_rate": 1.902e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.1, + "step": 100 + }, + { + "loss": 1.0897, + "grad_norm": 1.832823634147644, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.101, + "step": 101 + }, + { + "loss": 0.8593, + "grad_norm": 1.2972052097320557, + "learning_rate": 1.9e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.102, + "step": 102 + }, + { + "loss": 0.9507, + "grad_norm": 1.114174723625183, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8150684833526611, + "epoch": 0.103, + "step": 103 + }, + { + "loss": 0.8422, + "grad_norm": 1.0837013721466064, + "learning_rate": 1.898e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.104, + "step": 104 + }, + { + "loss": 0.9674, + "grad_norm": 1.1756479740142822, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.105, + "step": 105 + }, + { + "loss": 0.7975, + "grad_norm": 1.3874446153640747, + "learning_rate": 1.896e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.840266227722168, + "epoch": 0.106, + "step": 106 + }, + { + "loss": 1.0557, + "grad_norm": 1.959272027015686, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.7936772108078003, + "epoch": 0.107, + "step": 107 + }, + { + "loss": 1.0885, + "grad_norm": 1.503557801246643, + "learning_rate": 1.894e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.108, + "step": 108 + }, + { + "loss": 0.8082, + "grad_norm": 1.470276117324829, + "learning_rate": 1.893e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.8302828669548035, + "epoch": 0.109, + "step": 109 + }, + { + "loss": 1.5508, + "grad_norm": 6.328886985778809, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.6944444179534912, + "epoch": 0.11, + "step": 110 + }, + { + "loss": 1.0059, + "grad_norm": 1.5663049221038818, + "learning_rate": 1.891e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.8103161454200745, + "epoch": 0.111, + "step": 111 + }, + { + "loss": 1.0336, + "grad_norm": 1.4562171697616577, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.112, + "step": 112 + }, + { + "loss": 1.0438, + "grad_norm": 1.5646629333496094, + "learning_rate": 1.889e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.113, + "step": 113 + }, + { + "loss": 1.0279, + "grad_norm": 1.513607144355774, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.114, + "step": 114 + }, + { + "loss": 1.4402, + "grad_norm": 6.165053367614746, + "learning_rate": 1.887e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.115, + "step": 115 + }, + { + "loss": 0.7349, + "grad_norm": 1.454982876777649, + "learning_rate": 1.886e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.116, + "step": 116 + }, + { + "loss": 0.7338, + "grad_norm": 1.9169820547103882, + "learning_rate": 1.885e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.841930091381073, + "epoch": 0.117, + "step": 117 + }, + { + "loss": 0.7831, + "grad_norm": 1.3472567796707153, + "learning_rate": 1.884e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.118, + "step": 118 + }, + { + "loss": 1.028, + "grad_norm": 1.5241106748580933, + "learning_rate": 1.883e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.8036605715751648, + "epoch": 0.119, + "step": 119 + }, + { + "loss": 1.3458, + "grad_norm": 5.9579386711120605, + "learning_rate": 1.882e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.12, + "step": 120 + }, + { + "loss": 0.7727, + "grad_norm": 1.444265604019165, + "learning_rate": 1.881e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.8385518789291382, + "epoch": 0.121, + "step": 121 + }, + { + "loss": 0.6351, + "grad_norm": 1.281785488128662, + "learning_rate": 1.88e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.122, + "step": 122 + }, + { + "loss": 0.6884, + "grad_norm": 1.6917502880096436, + "learning_rate": 1.879e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.123, + "step": 123 + }, + { + "loss": 0.886, + "grad_norm": 1.6544225215911865, + "learning_rate": 1.878e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.8286189436912537, + "epoch": 0.124, + "step": 124 + }, + { + "loss": 0.7652, + "grad_norm": 1.2762014865875244, + "learning_rate": 1.877e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.125, + "step": 125 + }, + { + "loss": 1.2517, + "grad_norm": 7.621744632720947, + "learning_rate": 1.876e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.126, + "step": 126 + }, + { + "loss": 0.6909, + "grad_norm": 1.8651930093765259, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.127, + "step": 127 + }, + { + "loss": 0.9464, + "grad_norm": 2.0513856410980225, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.821963369846344, + "epoch": 0.128, + "step": 128 + }, + { + "loss": 0.8355, + "grad_norm": 1.3392603397369385, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.8405088186264038, + "epoch": 0.129, + "step": 129 + }, + { + "loss": 0.7124, + "grad_norm": 1.7539966106414795, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.861896812915802, + "epoch": 0.13, + "step": 130 + }, + { + "loss": 1.1931, + "grad_norm": 7.2109856605529785, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.131, + "step": 131 + }, + { + "loss": 0.806, + "grad_norm": 1.531593918800354, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.8424657583236694, + "epoch": 0.132, + "step": 132 + }, + { + "loss": 0.7483, + "grad_norm": 1.6686372756958008, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.133, + "step": 133 + }, + { + "loss": 0.905, + "grad_norm": 3.809466600418091, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.8336106538772583, + "epoch": 0.134, + "step": 134 + }, + { + "loss": 0.7299, + "grad_norm": 1.7963030338287354, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.135, + "step": 135 + }, + { + "loss": 0.6384, + "grad_norm": 2.485582113265991, + "learning_rate": 1.866e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.8718801736831665, + "epoch": 0.136, + "step": 136 + }, + { + "loss": 0.5473, + "grad_norm": 1.6607071161270142, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.137, + "step": 137 + }, + { + "loss": 0.6719, + "grad_norm": 1.6095962524414062, + "learning_rate": 1.864e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.860232949256897, + "epoch": 0.138, + "step": 138 + }, + { + "loss": 0.8772, + "grad_norm": 1.8398959636688232, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.8352745175361633, + "epoch": 0.139, + "step": 139 + }, + { + "loss": 0.6813, + "grad_norm": 1.754347324371338, + "learning_rate": 1.862e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.14, + "step": 140 + }, + { + "loss": 0.8176, + "grad_norm": 1.8010166883468628, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.141, + "step": 141 + }, + { + "loss": 0.6013, + "grad_norm": 2.131845712661743, + "learning_rate": 1.86e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.8768718838691711, + "epoch": 0.142, + "step": 142 + }, + { + "loss": 1.0551, + "grad_norm": 8.797135353088379, + "learning_rate": 1.859e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.8055555820465088, + "epoch": 0.143, + "step": 143 + }, + { + "loss": 0.8096, + "grad_norm": 1.6665289402008057, + "learning_rate": 1.858e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.144, + "step": 144 + }, + { + "loss": 0.6237, + "grad_norm": 2.031190872192383, + "learning_rate": 1.857e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.8735440969467163, + "epoch": 0.145, + "step": 145 + }, + { + "loss": 0.8527, + "grad_norm": 2.5186493396759033, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.8386023044586182, + "epoch": 0.146, + "step": 146 + }, + { + "loss": 0.83, + "grad_norm": 1.5677316188812256, + "learning_rate": 1.855e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.8444226980209351, + "epoch": 0.147, + "step": 147 + }, + { + "loss": 0.6951, + "grad_norm": 3.395341634750366, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.148, + "step": 148 + }, + { + "loss": 0.7634, + "grad_norm": 1.658737301826477, + "learning_rate": 1.853e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.149, + "step": 149 + }, + { + "loss": 0.6195, + "grad_norm": 1.4803838729858398, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.8776907920837402, + "epoch": 0.15, + "step": 150 + }, + { + "loss": 0.6916, + "grad_norm": 1.462860345840454, + "learning_rate": 1.851e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.151, + "step": 151 + }, + { + "loss": 0.7854, + "grad_norm": 1.6279668807983398, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.152, + "step": 152 + }, + { + "loss": 0.749, + "grad_norm": 1.8625388145446777, + "learning_rate": 1.849e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.153, + "step": 153 + }, + { + "loss": 0.6619, + "grad_norm": 1.6320242881774902, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.8679060935974121, + "epoch": 0.154, + "step": 154 + }, + { + "loss": 0.9864, + "grad_norm": NaN, + "learning_rate": 1.847e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.8222222328186035, + "epoch": 0.155, + "step": 155 + }, + { + "loss": 0.7698, + "grad_norm": 2.241466999053955, + "learning_rate": 1.847e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.156, + "step": 156 + }, + { + "loss": 0.8501, + "grad_norm": 2.594738721847534, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.8435940146446228, + "epoch": 0.157, + "step": 157 + }, + { + "loss": 0.962, + "grad_norm": 10.902610778808594, + "learning_rate": 1.845e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.8166666626930237, + "epoch": 0.158, + "step": 158 + }, + { + "loss": 0.7822, + "grad_norm": 1.6955127716064453, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.159, + "step": 159 + }, + { + "loss": 0.7942, + "grad_norm": 2.5727546215057373, + "learning_rate": 1.843e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.8519134521484375, + "epoch": 0.16, + "step": 160 + }, + { + "loss": 0.8074, + "grad_norm": 2.082172155380249, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.161, + "step": 161 + }, + { + "loss": 0.6346, + "grad_norm": 1.4917131662368774, + "learning_rate": 1.841e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.162, + "step": 162 + }, + { + "loss": 0.6574, + "grad_norm": 1.7243297100067139, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.163, + "step": 163 + }, + { + "loss": 0.7782, + "grad_norm": 2.236922264099121, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.164, + "step": 164 + }, + { + "loss": 0.7541, + "grad_norm": 2.998671531677246, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.165, + "step": 165 + }, + { + "loss": 0.7637, + "grad_norm": 2.231337070465088, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.166, + "step": 166 + }, + { + "loss": 0.4918, + "grad_norm": 2.1853654384613037, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.167, + "step": 167 + }, + { + "loss": 0.8615, + "grad_norm": 19.52778434753418, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.168, + "step": 168 + }, + { + "loss": 0.727, + "grad_norm": 2.8629372119903564, + "learning_rate": 1.834e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.169, + "step": 169 + }, + { + "loss": 0.6812, + "grad_norm": 2.578798294067383, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.8600782752037048, + "epoch": 0.17, + "step": 170 + }, + { + "loss": 0.718, + "grad_norm": 2.7950305938720703, + "learning_rate": 1.832e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.171, + "step": 171 + }, + { + "loss": 0.8269, + "grad_norm": 18.518278121948242, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.8333333134651184, + "epoch": 0.172, + "step": 172 + }, + { + "loss": 0.8122, + "grad_norm": 10.636402130126953, + "learning_rate": 1.83e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.8500000238418579, + "epoch": 0.173, + "step": 173 + }, + { + "loss": 0.5631, + "grad_norm": 1.8652675151824951, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.174, + "step": 174 + }, + { + "loss": 0.5823, + "grad_norm": 2.174743890762329, + "learning_rate": 1.828e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.175, + "step": 175 + }, + { + "loss": 0.6878, + "grad_norm": 2.426223039627075, + "learning_rate": 1.827e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.176, + "step": 176 + }, + { + "loss": 0.4815, + "grad_norm": 2.2111594676971436, + "learning_rate": 1.826e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.177, + "step": 177 + }, + { + "loss": 0.7905, + "grad_norm": 12.419157981872559, + "learning_rate": 1.825e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.178, + "step": 178 + }, + { + "loss": 0.6485, + "grad_norm": 2.6929852962493896, + "learning_rate": 1.824e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.8851913213729858, + "epoch": 0.179, + "step": 179 + }, + { + "loss": 0.5821, + "grad_norm": 2.588067054748535, + "learning_rate": 1.823e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.18, + "step": 180 + }, + { + "loss": 0.5376, + "grad_norm": 2.6413276195526123, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.181, + "step": 181 + }, + { + "loss": 0.4776, + "grad_norm": 2.0201733112335205, + "learning_rate": 1.821e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.182, + "step": 182 + }, + { + "loss": 0.7141, + "grad_norm": 8.398615837097168, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 0.8611111044883728, + "epoch": 0.183, + "step": 183 + }, + { + "loss": 0.687, + "grad_norm": 6.920986175537109, + "learning_rate": 1.819e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.184, + "step": 184 + }, + { + "loss": 0.6518, + "grad_norm": 3.54260516166687, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.185, + "step": 185 + }, + { + "loss": 0.6429, + "grad_norm": 4.033841609954834, + "learning_rate": 1.817e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.186, + "step": 186 + }, + { + "loss": 0.4786, + "grad_norm": 2.4023964405059814, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.187, + "step": 187 + }, + { + "loss": 0.5997, + "grad_norm": 2.695603370666504, + "learning_rate": 1.815e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.188, + "step": 188 + }, + { + "loss": 0.6251, + "grad_norm": 7.4209184646606445, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.189, + "step": 189 + }, + { + "loss": 0.6324, + "grad_norm": 10.130674362182617, + "learning_rate": 1.813e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.19, + "step": 190 + }, + { + "loss": 0.5939, + "grad_norm": 2.6180245876312256, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.873776912689209, + "epoch": 0.191, + "step": 191 + }, + { + "loss": 0.4098, + "grad_norm": 2.2663474082946777, + "learning_rate": 1.811e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.192, + "step": 192 + }, + { + "loss": 0.5111, + "grad_norm": 2.2139604091644287, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.8894324898719788, + "epoch": 0.193, + "step": 193 + }, + { + "loss": 0.4332, + "grad_norm": 2.2271547317504883, + "learning_rate": 1.809e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.194, + "step": 194 + }, + { + "loss": 0.4893, + "grad_norm": 2.0789742469787598, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.8972602486610413, + "epoch": 0.195, + "step": 195 + }, + { + "loss": 0.5755, + "grad_norm": 18.601898193359375, + "learning_rate": 1.807e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.196, + "step": 196 + }, + { + "loss": 0.4635, + "grad_norm": 6.127828598022461, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.197, + "step": 197 + }, + { + "loss": 0.603, + "grad_norm": 2.668287515640259, + "learning_rate": 1.805e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.198, + "step": 198 + }, + { + "loss": 0.6088, + "grad_norm": 2.419572353363037, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.8757338523864746, + "epoch": 0.199, + "step": 199 + }, + { + "loss": 0.5672, + "grad_norm": 3.028404712677002, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.8885191082954407, + "epoch": 0.2, + "step": 200 + }, + { + "loss": 0.4556, + "grad_norm": 4.009725093841553, + "learning_rate": 1.802e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.201, + "step": 201 + }, + { + "loss": 0.5269, + "grad_norm": 2.9101243019104004, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.202, + "step": 202 + }, + { + "loss": 0.6214, + "grad_norm": 2.7398433685302734, + "learning_rate": 1.8e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.8581213355064392, + "epoch": 0.203, + "step": 203 + }, + { + "loss": 0.5646, + "grad_norm": 2.60606050491333, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.204, + "step": 204 + }, + { + "loss": 0.3748, + "grad_norm": 3.7512423992156982, + "learning_rate": 1.798e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9234609007835388, + "epoch": 0.205, + "step": 205 + }, + { + "loss": 0.597, + "grad_norm": 3.150888442993164, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.206, + "step": 206 + }, + { + "loss": 0.511, + "grad_norm": 3.328899383544922, + "learning_rate": 1.796e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.207, + "step": 207 + }, + { + "loss": 0.491, + "grad_norm": 8.625993728637695, + "learning_rate": 1.795e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.208, + "step": 208 + }, + { + "loss": 0.4053, + "grad_norm": 2.2067341804504395, + "learning_rate": 1.794e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.209, + "step": 209 + }, + { + "loss": 0.4192, + "grad_norm": 2.0993006229400635, + "learning_rate": 1.793e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.21, + "step": 210 + }, + { + "loss": 0.3785, + "grad_norm": 2.821485996246338, + "learning_rate": 1.792e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9151414036750793, + "epoch": 0.211, + "step": 211 + }, + { + "loss": 0.5336, + "grad_norm": 2.169666051864624, + "learning_rate": 1.791e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.8901830315589905, + "epoch": 0.212, + "step": 212 + }, + { + "loss": 0.5235, + "grad_norm": 3.1590685844421387, + "learning_rate": 1.79e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.8835616707801819, + "epoch": 0.213, + "step": 213 + }, + { + "loss": 0.4736, + "grad_norm": 11.030704498291016, + "learning_rate": 1.789e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 0.9055555462837219, + "epoch": 0.214, + "step": 214 + }, + { + "loss": 0.5599, + "grad_norm": 3.9144341945648193, + "learning_rate": 1.788e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.215, + "step": 215 + }, + { + "loss": 0.5102, + "grad_norm": 2.9705278873443604, + "learning_rate": 1.787e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.216, + "step": 216 + }, + { + "loss": 0.4821, + "grad_norm": 3.4463229179382324, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.217, + "step": 217 + }, + { + "loss": 0.4385, + "grad_norm": 8.850930213928223, + "learning_rate": 1.785e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 0.9277777671813965, + "epoch": 0.218, + "step": 218 + }, + { + "loss": 0.4633, + "grad_norm": 2.936647415161133, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.219, + "step": 219 + }, + { + "loss": 0.4098, + "grad_norm": 6.922672271728516, + "learning_rate": 1.783e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.22, + "step": 220 + }, + { + "loss": 0.5233, + "grad_norm": 2.318746328353882, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.221, + "step": 221 + }, + { + "loss": 0.3223, + "grad_norm": 4.281177520751953, + "learning_rate": 1.781e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.222, + "step": 222 + }, + { + "loss": 0.4973, + "grad_norm": 3.6921546459198, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.8951746821403503, + "epoch": 0.223, + "step": 223 + }, + { + "loss": 0.4666, + "grad_norm": 3.4926915168762207, + "learning_rate": 1.779e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.224, + "step": 224 + }, + { + "loss": 0.3519, + "grad_norm": 2.668114423751831, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.225, + "step": 225 + }, + { + "loss": 0.4244, + "grad_norm": 2.4111084938049316, + "learning_rate": 1.777e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.226, + "step": 226 + }, + { + "loss": 0.3912, + "grad_norm": 10.561456680297852, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 0.949999988079071, + "epoch": 0.227, + "step": 227 + }, + { + "loss": 0.5091, + "grad_norm": 2.472616672515869, + "learning_rate": 1.775e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.228, + "step": 228 + }, + { + "loss": 0.4842, + "grad_norm": 2.881739854812622, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.229, + "step": 229 + }, + { + "loss": 0.4435, + "grad_norm": 3.2438275814056396, + "learning_rate": 1.773e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.23, + "step": 230 + }, + { + "loss": 0.3527, + "grad_norm": 2.2769415378570557, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.231, + "step": 231 + }, + { + "loss": 0.4951, + "grad_norm": 3.046674966812134, + "learning_rate": 1.771e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.232, + "step": 232 + }, + { + "loss": 0.4926, + "grad_norm": 4.042079925537109, + "learning_rate": 1.77e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.233, + "step": 233 + }, + { + "loss": 0.4564, + "grad_norm": 4.222212314605713, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9051580429077148, + "epoch": 0.234, + "step": 234 + }, + { + "loss": 0.3074, + "grad_norm": 3.150768280029297, + "learning_rate": 1.768e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.235, + "step": 235 + }, + { + "loss": 0.3858, + "grad_norm": 3.456815004348755, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.236, + "step": 236 + }, + { + "loss": 0.3352, + "grad_norm": 9.094295501708984, + "learning_rate": 1.766e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.237, + "step": 237 + }, + { + "loss": 0.4867, + "grad_norm": 3.2864322662353516, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.238, + "step": 238 + }, + { + "loss": 0.3303, + "grad_norm": 5.672657012939453, + "learning_rate": 1.764e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.239, + "step": 239 + }, + { + "loss": 0.4708, + "grad_norm": 3.677504062652588, + "learning_rate": 1.763e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.24, + "step": 240 + }, + { + "loss": 0.3175, + "grad_norm": 5.829269886016846, + "learning_rate": 1.762e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.241, + "step": 241 + }, + { + "loss": 0.4315, + "grad_norm": 3.211578130722046, + "learning_rate": 1.761e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.242, + "step": 242 + }, + { + "loss": 0.3084, + "grad_norm": 5.2650628089904785, + "learning_rate": 1.76e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.243, + "step": 243 + }, + { + "loss": 0.4516, + "grad_norm": 5.401496887207031, + "learning_rate": 1.759e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.244, + "step": 244 + }, + { + "loss": 0.4197, + "grad_norm": 3.938694953918457, + "learning_rate": 1.758e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.245, + "step": 245 + }, + { + "loss": 0.4329, + "grad_norm": 3.4744861125946045, + "learning_rate": 1.757e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.246, + "step": 246 + }, + { + "loss": 0.4525, + "grad_norm": 4.853247165679932, + "learning_rate": 1.756e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 0.9084858298301697, + "epoch": 0.247, + "step": 247 + }, + { + "loss": 0.2768, + "grad_norm": 5.6177144050598145, + "learning_rate": 1.755e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.248, + "step": 248 + }, + { + "loss": 0.3517, + "grad_norm": 2.8669052124023438, + "learning_rate": 1.754e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.249, + "step": 249 + }, + { + "loss": 0.4142, + "grad_norm": 3.5590577125549316, + "learning_rate": 1.753e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.25, + "step": 250 + }, + { + "loss": 0.4307, + "grad_norm": 5.072361946105957, + "learning_rate": 1.752e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.251, + "step": 251 + }, + { + "loss": 0.3981, + "grad_norm": 3.637819528579712, + "learning_rate": 1.751e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.912915825843811, + "epoch": 0.252, + "step": 252 + }, + { + "loss": 0.4344, + "grad_norm": 4.066125869750977, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.253, + "step": 253 + }, + { + "loss": 0.3574, + "grad_norm": 4.836447715759277, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.254, + "step": 254 + }, + { + "loss": 0.2738, + "grad_norm": 14.006624221801758, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.255, + "step": 255 + }, + { + "loss": 0.3416, + "grad_norm": 5.2639079093933105, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.256, + "step": 256 + }, + { + "loss": 0.2762, + "grad_norm": 12.536176681518555, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.257, + "step": 257 + }, + { + "loss": 0.4114, + "grad_norm": 6.311218738555908, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9090019464492798, + "epoch": 0.258, + "step": 258 + }, + { + "loss": 0.3912, + "grad_norm": 3.2677178382873535, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.259, + "step": 259 + }, + { + "loss": 0.3059, + "grad_norm": 4.582422256469727, + "learning_rate": 1.743e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.26, + "step": 260 + }, + { + "loss": 0.3697, + "grad_norm": 5.214661121368408, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.261, + "step": 261 + }, + { + "loss": 0.3486, + "grad_norm": 5.719533920288086, + "learning_rate": 1.741e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.262, + "step": 262 + }, + { + "loss": 0.328, + "grad_norm": 4.692359924316406, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9363992214202881, + "epoch": 0.263, + "step": 263 + }, + { + "loss": 0.3665, + "grad_norm": 2.810206174850464, + "learning_rate": 1.739e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.264, + "step": 264 + }, + { + "loss": 0.2363, + "grad_norm": 6.301739692687988, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.265, + "step": 265 + }, + { + "loss": 0.3762, + "grad_norm": 2.9034929275512695, + "learning_rate": 1.737e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.266, + "step": 266 + }, + { + "loss": 0.3573, + "grad_norm": 5.10465669631958, + "learning_rate": 1.736e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.267, + "step": 267 + }, + { + "loss": 0.3708, + "grad_norm": 2.8359761238098145, + "learning_rate": 1.735e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9251247644424438, + "epoch": 0.268, + "step": 268 + }, + { + "loss": 0.3615, + "grad_norm": 2.6100833415985107, + "learning_rate": 1.734e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.9267886877059937, + "epoch": 0.269, + "step": 269 + }, + { + "loss": 0.3131, + "grad_norm": 3.610330820083618, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.27, + "step": 270 + }, + { + "loss": 0.3301, + "grad_norm": 3.1220433712005615, + "learning_rate": 1.732e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.271, + "step": 271 + }, + { + "loss": 0.2314, + "grad_norm": 7.683000564575195, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.272, + "step": 272 + }, + { + "loss": 0.2391, + "grad_norm": 10.635171890258789, + "learning_rate": 1.73e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.273, + "step": 273 + }, + { + "loss": 0.3934, + "grad_norm": 7.659923076629639, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 0.9334442615509033, + "epoch": 0.274, + "step": 274 + }, + { + "loss": 0.3376, + "grad_norm": 5.6293864250183105, + "learning_rate": 1.728e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.275, + "step": 275 + }, + { + "loss": 0.3734, + "grad_norm": 4.872118949890137, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.276, + "step": 276 + }, + { + "loss": 0.2395, + "grad_norm": 3.4475960731506348, + "learning_rate": 1.726e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.277, + "step": 277 + }, + { + "loss": 0.3513, + "grad_norm": 3.5093634128570557, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.278, + "step": 278 + }, + { + "loss": 0.3505, + "grad_norm": 3.436389446258545, + "learning_rate": 1.724e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 0.9367720484733582, + "epoch": 0.279, + "step": 279 + }, + { + "loss": 0.3041, + "grad_norm": 3.4393298625946045, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.28, + "step": 280 + }, + { + "loss": 0.2922, + "grad_norm": 3.826392889022827, + "learning_rate": 1.722e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.281, + "step": 281 + }, + { + "loss": 0.3414, + "grad_norm": 7.017237663269043, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.282, + "step": 282 + }, + { + "loss": 0.3521, + "grad_norm": 4.018287658691406, + "learning_rate": 1.72e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.283, + "step": 283 + }, + { + "loss": 0.3455, + "grad_norm": 3.9697959423065186, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.284, + "step": 284 + }, + { + "loss": 0.3368, + "grad_norm": 3.0641541481018066, + "learning_rate": 1.718e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.285, + "step": 285 + }, + { + "loss": 0.3244, + "grad_norm": 4.277006149291992, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.286, + "step": 286 + }, + { + "loss": 0.353, + "grad_norm": 2.6876814365386963, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.287, + "step": 287 + }, + { + "loss": 0.3236, + "grad_norm": 3.7715723514556885, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.288, + "step": 288 + }, + { + "loss": 0.3158, + "grad_norm": 3.555406332015991, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.289, + "step": 289 + }, + { + "loss": 0.2062, + "grad_norm": 9.316679000854492, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.29, + "step": 290 + }, + { + "loss": 0.2002, + "grad_norm": 5.817254543304443, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.291, + "step": 291 + }, + { + "loss": 0.2809, + "grad_norm": 5.106694221496582, + "learning_rate": 1.711e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.292, + "step": 292 + }, + { + "loss": 0.295, + "grad_norm": 7.797866344451904, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.293, + "step": 293 + }, + { + "loss": 0.3144, + "grad_norm": 8.002677917480469, + "learning_rate": 1.709e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.294, + "step": 294 + }, + { + "loss": 0.2345, + "grad_norm": 4.315321445465088, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.295, + "step": 295 + }, + { + "loss": 0.306, + "grad_norm": 4.690162181854248, + "learning_rate": 1.707e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.296, + "step": 296 + }, + { + "loss": 0.3098, + "grad_norm": 4.387345790863037, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.297, + "step": 297 + }, + { + "loss": 0.2898, + "grad_norm": 5.204096794128418, + "learning_rate": 1.705e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.298, + "step": 298 + }, + { + "loss": 0.2894, + "grad_norm": 4.000877380371094, + "learning_rate": 1.704e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.299, + "step": 299 + }, + { + "loss": 0.3295, + "grad_norm": 5.276703357696533, + "learning_rate": 1.703e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9383561611175537, + "epoch": 0.3, + "step": 300 + }, + { + "loss": 0.2139, + "grad_norm": 2.6593077182769775, + "learning_rate": 1.702e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.301, + "step": 301 + }, + { + "loss": 0.2077, + "grad_norm": 9.37561321258545, + "learning_rate": 1.701e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.302, + "step": 302 + }, + { + "loss": 0.2274, + "grad_norm": 2.972815990447998, + "learning_rate": 1.7e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.303, + "step": 303 + }, + { + "loss": 0.2545, + "grad_norm": 2.4279375076293945, + "learning_rate": 1.699e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.304, + "step": 304 + }, + { + "loss": 0.2871, + "grad_norm": 2.8517541885375977, + "learning_rate": 1.698e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.305, + "step": 305 + }, + { + "loss": 0.2877, + "grad_norm": 4.114612102508545, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.306, + "step": 306 + }, + { + "loss": 0.2145, + "grad_norm": 14.7569580078125, + "learning_rate": 1.696e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.307, + "step": 307 + }, + { + "loss": 0.294, + "grad_norm": 3.094182252883911, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.308, + "step": 308 + }, + { + "loss": 0.2044, + "grad_norm": 3.026052951812744, + "learning_rate": 1.694e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.309, + "step": 309 + }, + { + "loss": 0.3061, + "grad_norm": 3.1381635665893555, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.31, + "step": 310 + }, + { + "loss": 0.2239, + "grad_norm": 2.3573496341705322, + "learning_rate": 1.692e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.311, + "step": 311 + }, + { + "loss": 0.2853, + "grad_norm": 7.762936115264893, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.312, + "step": 312 + }, + { + "loss": 0.2793, + "grad_norm": 7.716437816619873, + "learning_rate": 1.69e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.313, + "step": 313 + }, + { + "loss": 0.2764, + "grad_norm": 4.531182765960693, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.314, + "step": 314 + }, + { + "loss": 0.1807, + "grad_norm": 5.600939750671387, + "learning_rate": 1.688e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.315, + "step": 315 + }, + { + "loss": 0.1751, + "grad_norm": 6.357442378997803, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.316, + "step": 316 + }, + { + "loss": 0.2278, + "grad_norm": 4.381490230560303, + "learning_rate": 1.686e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.317, + "step": 317 + }, + { + "loss": 0.1693, + "grad_norm": 4.711330413818359, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.318, + "step": 318 + }, + { + "loss": 0.2719, + "grad_norm": 7.21658182144165, + "learning_rate": 1.684e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.319, + "step": 319 + }, + { + "loss": 0.1613, + "grad_norm": 2.806929111480713, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.32, + "step": 320 + }, + { + "loss": 0.2236, + "grad_norm": 3.729052782058716, + "learning_rate": 1.682e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.321, + "step": 321 + }, + { + "loss": 0.3026, + "grad_norm": 3.512017250061035, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.322, + "step": 322 + }, + { + "loss": 0.2492, + "grad_norm": 5.842523097991943, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.323, + "step": 323 + }, + { + "loss": 0.2591, + "grad_norm": 3.444624662399292, + "learning_rate": 1.679e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9442269802093506, + "epoch": 0.324, + "step": 324 + }, + { + "loss": 0.245, + "grad_norm": 3.560624837875366, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.325, + "step": 325 + }, + { + "loss": 0.2493, + "grad_norm": 3.812241792678833, + "learning_rate": 1.677e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.326, + "step": 326 + }, + { + "loss": 0.1623, + "grad_norm": 9.361125946044922, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.327, + "step": 327 + }, + { + "loss": 0.2385, + "grad_norm": 4.130789279937744, + "learning_rate": 1.675e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.328, + "step": 328 + }, + { + "loss": 0.248, + "grad_norm": 3.7591042518615723, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.329, + "step": 329 + }, + { + "loss": 0.2815, + "grad_norm": 6.346067905426025, + "learning_rate": 1.673e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.33, + "step": 330 + }, + { + "loss": 0.2502, + "grad_norm": 3.433945655822754, + "learning_rate": 1.672e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.331, + "step": 331 + }, + { + "loss": 0.2994, + "grad_norm": 3.7655599117279053, + "learning_rate": 1.671e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9403131008148193, + "epoch": 0.332, + "step": 332 + }, + { + "loss": 0.2622, + "grad_norm": 3.707118511199951, + "learning_rate": 1.67e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.333, + "step": 333 + }, + { + "loss": 0.2418, + "grad_norm": 5.776569843292236, + "learning_rate": 1.669e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.334, + "step": 334 + }, + { + "loss": 0.2278, + "grad_norm": 2.7461037635803223, + "learning_rate": 1.668e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.335, + "step": 335 + }, + { + "loss": 0.2152, + "grad_norm": 2.729001760482788, + "learning_rate": 1.667e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.336, + "step": 336 + }, + { + "loss": 0.2093, + "grad_norm": 2.409708261489868, + "learning_rate": 1.666e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.337, + "step": 337 + }, + { + "loss": 0.2121, + "grad_norm": 4.6761651039123535, + "learning_rate": 1.665e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.338, + "step": 338 + }, + { + "loss": 0.2645, + "grad_norm": 3.167815685272217, + "learning_rate": 1.664e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.339, + "step": 339 + }, + { + "loss": 0.1629, + "grad_norm": 12.654186248779297, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.34, + "step": 340 + }, + { + "loss": 0.2156, + "grad_norm": 2.461930751800537, + "learning_rate": 1.662e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.341, + "step": 341 + }, + { + "loss": 0.2281, + "grad_norm": 4.044505596160889, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.342, + "step": 342 + }, + { + "loss": 0.2303, + "grad_norm": 3.00589656829834, + "learning_rate": 1.66e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.343, + "step": 343 + }, + { + "loss": 0.2372, + "grad_norm": 1.9332551956176758, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.344, + "step": 344 + }, + { + "loss": 0.2303, + "grad_norm": 3.804724931716919, + "learning_rate": 1.658e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.345, + "step": 345 + }, + { + "loss": 0.1629, + "grad_norm": 13.47612190246582, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.346, + "step": 346 + }, + { + "loss": 0.2276, + "grad_norm": 3.5881187915802, + "learning_rate": 1.656e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.347, + "step": 347 + }, + { + "loss": 0.2474, + "grad_norm": 3.895529270172119, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.348, + "step": 348 + }, + { + "loss": 0.2205, + "grad_norm": 3.4531259536743164, + "learning_rate": 1.654e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.349, + "step": 349 + }, + { + "loss": 0.2277, + "grad_norm": 3.849405288696289, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.35, + "step": 350 + }, + { + "loss": 0.1993, + "grad_norm": 3.522599458694458, + "learning_rate": 1.652e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.351, + "step": 351 + }, + { + "loss": 0.2291, + "grad_norm": 3.7573893070220947, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.352, + "step": 352 + }, + { + "loss": 0.1756, + "grad_norm": 4.224817276000977, + "learning_rate": 1.65e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.353, + "step": 353 + }, + { + "loss": 0.1992, + "grad_norm": 2.2447433471679688, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.354, + "step": 354 + }, + { + "loss": 0.184, + "grad_norm": 2.0203311443328857, + "learning_rate": 1.648e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.355, + "step": 355 + }, + { + "loss": 0.2236, + "grad_norm": 3.499854803085327, + "learning_rate": 1.647e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.356, + "step": 356 + }, + { + "loss": 0.2141, + "grad_norm": 5.057332992553711, + "learning_rate": 1.646e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.357, + "step": 357 + }, + { + "loss": 0.232, + "grad_norm": 2.861778974533081, + "learning_rate": 1.645e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.358, + "step": 358 + }, + { + "loss": 0.184, + "grad_norm": 3.52634596824646, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.359, + "step": 359 + }, + { + "loss": 0.2205, + "grad_norm": 2.3115124702453613, + "learning_rate": 1.643e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.36, + "step": 360 + }, + { + "loss": 0.1838, + "grad_norm": 3.043916940689087, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.361, + "step": 361 + }, + { + "loss": 0.1874, + "grad_norm": 3.2404396533966064, + "learning_rate": 1.641e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.362, + "step": 362 + }, + { + "loss": 0.4084, + "grad_norm": 12.86927604675293, + "learning_rate": 1.64e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.363, + "step": 363 + }, + { + "loss": 0.1677, + "grad_norm": 3.4789700508117676, + "learning_rate": 1.639e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.364, + "step": 364 + }, + { + "loss": 0.1922, + "grad_norm": 4.1049699783325195, + "learning_rate": 1.638e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.365, + "step": 365 + }, + { + "loss": 0.1915, + "grad_norm": 3.2055957317352295, + "learning_rate": 1.637e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.366, + "step": 366 + }, + { + "loss": 0.166, + "grad_norm": 12.477117538452148, + "learning_rate": 1.636e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.367, + "step": 367 + }, + { + "loss": 0.1799, + "grad_norm": 4.58711051940918, + "learning_rate": 1.635e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.368, + "step": 368 + }, + { + "loss": 0.2299, + "grad_norm": 2.874641180038452, + "learning_rate": 1.634e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.369, + "step": 369 + }, + { + "loss": 0.1414, + "grad_norm": 5.157703399658203, + "learning_rate": 1.633e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.37, + "step": 370 + }, + { + "loss": 0.1812, + "grad_norm": 3.2541451454162598, + "learning_rate": 1.632e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.371, + "step": 371 + }, + { + "loss": 0.1366, + "grad_norm": 3.705273151397705, + "learning_rate": 1.631e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.372, + "step": 372 + }, + { + "loss": 0.1681, + "grad_norm": 3.6492865085601807, + "learning_rate": 1.63e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.373, + "step": 373 + }, + { + "loss": 0.1324, + "grad_norm": 3.3717288970947266, + "learning_rate": 1.629e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.374, + "step": 374 + }, + { + "loss": 0.1816, + "grad_norm": 4.410749912261963, + "learning_rate": 1.628e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.375, + "step": 375 + }, + { + "loss": 0.3611, + "grad_norm": 11.978804588317871, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.376, + "step": 376 + }, + { + "loss": 0.1686, + "grad_norm": 2.8153111934661865, + "learning_rate": 1.626e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.377, + "step": 377 + }, + { + "loss": 0.1293, + "grad_norm": 3.5253026485443115, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.378, + "step": 378 + }, + { + "loss": 0.1597, + "grad_norm": 2.9006922245025635, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.379, + "step": 379 + }, + { + "loss": 0.1975, + "grad_norm": 6.231935024261475, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.38, + "step": 380 + }, + { + "loss": 0.1232, + "grad_norm": 3.3006174564361572, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.381, + "step": 381 + }, + { + "loss": 0.1599, + "grad_norm": 3.177495241165161, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.382, + "step": 382 + }, + { + "loss": 0.1858, + "grad_norm": 2.967477798461914, + "learning_rate": 1.62e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.383, + "step": 383 + }, + { + "loss": 0.1725, + "grad_norm": 2.6947214603424072, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.384, + "step": 384 + }, + { + "loss": 0.1644, + "grad_norm": 3.6320605278015137, + "learning_rate": 1.618e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.385, + "step": 385 + }, + { + "loss": 0.1726, + "grad_norm": 6.163839817047119, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.386, + "step": 386 + }, + { + "loss": 0.2253, + "grad_norm": 3.695767879486084, + "learning_rate": 1.616e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.387, + "step": 387 + }, + { + "loss": 0.1295, + "grad_norm": 11.877620697021484, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.388, + "step": 388 + }, + { + "loss": 0.1641, + "grad_norm": 2.5848593711853027, + "learning_rate": 1.614e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.389, + "step": 389 + }, + { + "loss": 0.1299, + "grad_norm": 11.58799934387207, + "learning_rate": 1.613e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.39, + "step": 390 + }, + { + "loss": 0.153, + "grad_norm": 3.0241589546203613, + "learning_rate": 1.612e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.391, + "step": 391 + }, + { + "loss": 0.1741, + "grad_norm": 4.446482181549072, + "learning_rate": 1.611e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.392, + "step": 392 + }, + { + "loss": 0.1517, + "grad_norm": 2.0452992916107178, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.393, + "step": 393 + }, + { + "loss": 0.1482, + "grad_norm": 3.511587142944336, + "learning_rate": 1.609e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.394, + "step": 394 + }, + { + "loss": 0.1673, + "grad_norm": 4.165390968322754, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.395, + "step": 395 + }, + { + "loss": 0.1577, + "grad_norm": 2.5295603275299072, + "learning_rate": 1.607e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.396, + "step": 396 + }, + { + "loss": 0.1444, + "grad_norm": 2.6492788791656494, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.397, + "step": 397 + }, + { + "loss": 0.1731, + "grad_norm": 3.1617088317871094, + "learning_rate": 1.605e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.398, + "step": 398 + }, + { + "loss": 0.1411, + "grad_norm": 2.628790855407715, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.399, + "step": 399 + }, + { + "loss": 0.1442, + "grad_norm": 2.589632272720337, + "learning_rate": 1.603e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.4, + "step": 400 + }, + { + "loss": 0.1647, + "grad_norm": 2.7175090312957764, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.401, + "step": 401 + }, + { + "loss": 0.1225, + "grad_norm": 9.854316711425781, + "learning_rate": 1.601e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.402, + "step": 402 + }, + { + "loss": 0.1635, + "grad_norm": 2.513782501220703, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.403, + "step": 403 + }, + { + "loss": 0.1172, + "grad_norm": 4.978464126586914, + "learning_rate": 1.599e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.404, + "step": 404 + }, + { + "loss": 0.1535, + "grad_norm": 6.545207977294922, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.405, + "step": 405 + }, + { + "loss": 0.1554, + "grad_norm": 4.268946647644043, + "learning_rate": 1.597e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.406, + "step": 406 + }, + { + "loss": 0.1143, + "grad_norm": 2.5581111907958984, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.407, + "step": 407 + }, + { + "loss": 0.1446, + "grad_norm": 4.272138595581055, + "learning_rate": 1.595e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.408, + "step": 408 + }, + { + "loss": 0.1058, + "grad_norm": 1.8749103546142578, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.409, + "step": 409 + }, + { + "loss": 0.1972, + "grad_norm": 4.553700923919678, + "learning_rate": 1.593e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.41, + "step": 410 + }, + { + "loss": 0.1465, + "grad_norm": 4.258208751678467, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.411, + "step": 411 + }, + { + "loss": 0.1556, + "grad_norm": 2.6741788387298584, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.412, + "step": 412 + }, + { + "loss": 0.1074, + "grad_norm": 5.901241779327393, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.413, + "step": 413 + }, + { + "loss": 0.1999, + "grad_norm": 2.886406421661377, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 414 + }, + { + "loss": 0.163, + "grad_norm": 3.367415189743042, + "learning_rate": 1.588e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.415, + "step": 415 + }, + { + "loss": 0.1678, + "grad_norm": 2.3446123600006104, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.416, + "step": 416 + }, + { + "loss": 0.2442, + "grad_norm": 4.648331165313721, + "learning_rate": 1.586e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.417, + "step": 417 + }, + { + "loss": 0.1314, + "grad_norm": 3.296555519104004, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.418, + "step": 418 + }, + { + "loss": 0.1224, + "grad_norm": 14.873774528503418, + "learning_rate": 1.584e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.419, + "step": 419 + }, + { + "loss": 0.1792, + "grad_norm": 2.493760108947754, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.42, + "step": 420 + }, + { + "loss": 0.1289, + "grad_norm": 4.287231922149658, + "learning_rate": 1.582e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.421, + "step": 421 + }, + { + "loss": 0.1176, + "grad_norm": 12.776876449584961, + "learning_rate": 1.581e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.422, + "step": 422 + }, + { + "loss": 0.1651, + "grad_norm": 2.691632032394409, + "learning_rate": 1.58e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.423, + "step": 423 + }, + { + "loss": 0.271, + "grad_norm": 7.320021152496338, + "learning_rate": 1.579e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.424, + "step": 424 + }, + { + "loss": 0.1183, + "grad_norm": 2.511960029602051, + "learning_rate": 1.578e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.425, + "step": 425 + }, + { + "loss": 0.1387, + "grad_norm": 2.424102306365967, + "learning_rate": 1.577e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.426, + "step": 426 + }, + { + "loss": 0.1443, + "grad_norm": 3.659524917602539, + "learning_rate": 1.576e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.427, + "step": 427 + }, + { + "loss": 0.2176, + "grad_norm": 4.393547058105469, + "learning_rate": 1.575e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.428, + "step": 428 + }, + { + "loss": 0.1576, + "grad_norm": 3.995103359222412, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.429, + "step": 429 + }, + { + "loss": 0.0995, + "grad_norm": 7.335996627807617, + "learning_rate": 1.573e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.43, + "step": 430 + }, + { + "loss": 0.1224, + "grad_norm": 2.3261799812316895, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.431, + "step": 431 + }, + { + "loss": 0.1781, + "grad_norm": 3.084444761276245, + "learning_rate": 1.571e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.432, + "step": 432 + }, + { + "loss": 0.1262, + "grad_norm": 2.499669075012207, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.433, + "step": 433 + }, + { + "loss": 0.1306, + "grad_norm": 2.529611587524414, + "learning_rate": 1.569e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.434, + "step": 434 + }, + { + "loss": 0.1473, + "grad_norm": 2.308983325958252, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.435, + "step": 435 + }, + { + "loss": 0.1387, + "grad_norm": 2.9792327880859375, + "learning_rate": 1.567e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.436, + "step": 436 + }, + { + "loss": 0.1256, + "grad_norm": 3.446150302886963, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.437, + "step": 437 + }, + { + "loss": 0.1884, + "grad_norm": 2.8107986450195312, + "learning_rate": 1.565e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.438, + "step": 438 + }, + { + "loss": 0.1801, + "grad_norm": 2.476114511489868, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.439, + "step": 439 + }, + { + "loss": 0.1216, + "grad_norm": 2.8834075927734375, + "learning_rate": 1.563e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.44, + "step": 440 + }, + { + "loss": 0.1391, + "grad_norm": 3.0233523845672607, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.441, + "step": 441 + }, + { + "loss": 0.1355, + "grad_norm": 3.540644645690918, + "learning_rate": 1.561e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.442, + "step": 442 + }, + { + "loss": 0.1031, + "grad_norm": 2.104804515838623, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.443, + "step": 443 + }, + { + "loss": 0.1389, + "grad_norm": 2.2567386627197266, + "learning_rate": 1.559e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.444, + "step": 444 + }, + { + "loss": 0.116, + "grad_norm": 2.4400763511657715, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.445, + "step": 445 + }, + { + "loss": 0.1294, + "grad_norm": 2.306941509246826, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.446, + "step": 446 + }, + { + "loss": 0.1189, + "grad_norm": 2.5862247943878174, + "learning_rate": 1.556e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.447, + "step": 447 + }, + { + "loss": 0.2484, + "grad_norm": 4.606533050537109, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.448, + "step": 448 + }, + { + "loss": 0.2119, + "grad_norm": 3.4597740173339844, + "learning_rate": 1.554e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.449, + "step": 449 + }, + { + "loss": 0.1395, + "grad_norm": 3.5644280910491943, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.45, + "step": 450 + }, + { + "loss": 0.1167, + "grad_norm": 13.761821746826172, + "learning_rate": 1.552e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.451, + "step": 451 + }, + { + "loss": 0.1423, + "grad_norm": 3.3145618438720703, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.452, + "step": 452 + }, + { + "loss": 0.131, + "grad_norm": 4.129085540771484, + "learning_rate": 1.55e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.453, + "step": 453 + }, + { + "loss": 0.1337, + "grad_norm": 2.807199001312256, + "learning_rate": 1.549e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.454, + "step": 454 + }, + { + "loss": 0.1235, + "grad_norm": 2.291154384613037, + "learning_rate": 1.548e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.455, + "step": 455 + }, + { + "loss": 0.123, + "grad_norm": 3.186185836791992, + "learning_rate": 1.547e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.456, + "step": 456 + }, + { + "loss": 0.13, + "grad_norm": 2.2184228897094727, + "learning_rate": 1.546e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.457, + "step": 457 + }, + { + "loss": 0.1232, + "grad_norm": 2.6860218048095703, + "learning_rate": 1.545e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.458, + "step": 458 + }, + { + "loss": 0.1668, + "grad_norm": 2.615064859390259, + "learning_rate": 1.544e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.459, + "step": 459 + }, + { + "loss": 0.1268, + "grad_norm": 3.520294427871704, + "learning_rate": 1.543e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.46, + "step": 460 + }, + { + "loss": 0.1183, + "grad_norm": 3.490569829940796, + "learning_rate": 1.542e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.461, + "step": 461 + }, + { + "loss": 0.1025, + "grad_norm": 12.270122528076172, + "learning_rate": 1.541e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.462, + "step": 462 + }, + { + "loss": 0.1059, + "grad_norm": 2.1151371002197266, + "learning_rate": 1.54e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.463, + "step": 463 + }, + { + "loss": 0.1021, + "grad_norm": 2.0290112495422363, + "learning_rate": 1.539e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.464, + "step": 464 + }, + { + "loss": 0.0993, + "grad_norm": 10.768261909484863, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.465, + "step": 465 + }, + { + "loss": 0.1187, + "grad_norm": 3.7776851654052734, + "learning_rate": 1.537e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.466, + "step": 466 + }, + { + "loss": 0.0929, + "grad_norm": 3.5349013805389404, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.467, + "step": 467 + }, + { + "loss": 0.1292, + "grad_norm": 4.221794605255127, + "learning_rate": 1.535e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.468, + "step": 468 + }, + { + "loss": 0.1597, + "grad_norm": 3.645026445388794, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.469, + "step": 469 + }, + { + "loss": 0.1281, + "grad_norm": 4.336436748504639, + "learning_rate": 1.533e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.47, + "step": 470 + }, + { + "loss": 0.1427, + "grad_norm": 4.119178295135498, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.471, + "step": 471 + }, + { + "loss": 0.1959, + "grad_norm": 3.495059013366699, + "learning_rate": 1.531e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.472, + "step": 472 + }, + { + "loss": 0.1062, + "grad_norm": 2.910947799682617, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.473, + "step": 473 + }, + { + "loss": 0.1641, + "grad_norm": 1.9516125917434692, + "learning_rate": 1.529e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.474, + "step": 474 + }, + { + "loss": 0.1267, + "grad_norm": 2.637050151824951, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.475, + "step": 475 + }, + { + "loss": 0.1602, + "grad_norm": 2.365922689437866, + "learning_rate": 1.527e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 476 + }, + { + "loss": 0.145, + "grad_norm": 3.577690362930298, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.477, + "step": 477 + }, + { + "loss": 0.1917, + "grad_norm": 2.425001621246338, + "learning_rate": 1.525e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.478, + "step": 478 + }, + { + "loss": 0.1295, + "grad_norm": 2.570420503616333, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.479, + "step": 479 + }, + { + "loss": 0.1216, + "grad_norm": 2.951737403869629, + "learning_rate": 1.523e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.48, + "step": 480 + }, + { + "loss": 0.1172, + "grad_norm": 2.9054367542266846, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.481, + "step": 481 + }, + { + "loss": 0.1028, + "grad_norm": 11.967851638793945, + "learning_rate": 1.521e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.482, + "step": 482 + }, + { + "loss": 0.1411, + "grad_norm": 3.018132448196411, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.483, + "step": 483 + }, + { + "loss": 0.0953, + "grad_norm": 2.7196693420410156, + "learning_rate": 1.519e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.484, + "step": 484 + }, + { + "loss": 0.1322, + "grad_norm": 3.49013090133667, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.485, + "step": 485 + }, + { + "loss": 0.0793, + "grad_norm": 3.015738010406494, + "learning_rate": 1.517e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.486, + "step": 486 + }, + { + "loss": 0.1429, + "grad_norm": 2.9223875999450684, + "learning_rate": 1.516e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.487, + "step": 487 + }, + { + "loss": 0.1468, + "grad_norm": 3.956615924835205, + "learning_rate": 1.515e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.488, + "step": 488 + }, + { + "loss": 0.1171, + "grad_norm": 4.619190216064453, + "learning_rate": 1.514e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.489, + "step": 489 + }, + { + "loss": 0.0767, + "grad_norm": 1.605452299118042, + "learning_rate": 1.513e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.49, + "step": 490 + }, + { + "loss": 0.128, + "grad_norm": 4.304430961608887, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.491, + "step": 491 + }, + { + "loss": 0.0781, + "grad_norm": 1.868319034576416, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.492, + "step": 492 + }, + { + "loss": 0.1311, + "grad_norm": 2.720447540283203, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.493, + "step": 493 + }, + { + "loss": 0.1312, + "grad_norm": 3.6773548126220703, + "learning_rate": 1.509e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.494, + "step": 494 + }, + { + "loss": 0.164, + "grad_norm": 3.9428446292877197, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.495, + "step": 495 + }, + { + "loss": 0.1516, + "grad_norm": 2.488532781600952, + "learning_rate": 1.507e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.496, + "step": 496 + }, + { + "loss": 0.076, + "grad_norm": 3.0369679927825928, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.497, + "step": 497 + }, + { + "loss": 0.1552, + "grad_norm": 2.921428680419922, + "learning_rate": 1.505e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.498, + "step": 498 + }, + { + "loss": 0.0745, + "grad_norm": 4.530489921569824, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.499, + "step": 499 + }, + { + "loss": 0.1431, + "grad_norm": 2.894956350326538, + "learning_rate": 1.503e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.5, + "step": 500 + }, + { + "loss": 0.1196, + "grad_norm": 2.8564133644104004, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.501, + "step": 501 + }, + { + "loss": 0.1022, + "grad_norm": 2.487640857696533, + "learning_rate": 1.501e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.502, + "step": 502 + }, + { + "loss": 0.0816, + "grad_norm": 9.081964492797852, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.503, + "step": 503 + }, + { + "loss": 0.0696, + "grad_norm": 5.340896129608154, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.504, + "step": 504 + }, + { + "loss": 0.1355, + "grad_norm": 2.5042786598205566, + "learning_rate": 1.498e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.505, + "step": 505 + }, + { + "loss": 0.1177, + "grad_norm": 2.9676339626312256, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.506, + "step": 506 + }, + { + "loss": 0.1305, + "grad_norm": 2.792555570602417, + "learning_rate": 1.496e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.507, + "step": 507 + }, + { + "loss": 0.1155, + "grad_norm": 3.074509620666504, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.508, + "step": 508 + }, + { + "loss": 0.1274, + "grad_norm": 3.4446146488189697, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.509, + "step": 509 + }, + { + "loss": 0.0961, + "grad_norm": 4.31768798828125, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.51, + "step": 510 + }, + { + "loss": 0.1406, + "grad_norm": 3.5040206909179688, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.511, + "step": 511 + }, + { + "loss": 0.163, + "grad_norm": 3.973576307296753, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.512, + "step": 512 + }, + { + "loss": 0.1435, + "grad_norm": 2.7186615467071533, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.513, + "step": 513 + }, + { + "loss": 0.1024, + "grad_norm": 2.8186845779418945, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.514, + "step": 514 + }, + { + "loss": 0.0781, + "grad_norm": 10.394554138183594, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.515, + "step": 515 + }, + { + "loss": 0.0874, + "grad_norm": 10.657512664794922, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.516, + "step": 516 + }, + { + "loss": 0.0946, + "grad_norm": 2.6607813835144043, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.517, + "step": 517 + }, + { + "loss": 0.1189, + "grad_norm": 2.2012691497802734, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.518, + "step": 518 + }, + { + "loss": 0.1313, + "grad_norm": 3.873806953430176, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.519, + "step": 519 + }, + { + "loss": 0.0999, + "grad_norm": 1.8396018743515015, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.52, + "step": 520 + }, + { + "loss": 0.1057, + "grad_norm": 2.922558307647705, + "learning_rate": 1.482e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.521, + "step": 521 + }, + { + "loss": 0.0865, + "grad_norm": 2.5007052421569824, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.522, + "step": 522 + }, + { + "loss": 0.1029, + "grad_norm": 1.885617733001709, + "learning_rate": 1.48e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.523, + "step": 523 + }, + { + "loss": 0.0958, + "grad_norm": 1.7554020881652832, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.524, + "step": 524 + }, + { + "loss": 0.1244, + "grad_norm": 3.055809736251831, + "learning_rate": 1.478e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.525, + "step": 525 + }, + { + "loss": 0.1059, + "grad_norm": 2.518828868865967, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.526, + "step": 526 + }, + { + "loss": 0.0849, + "grad_norm": 4.157986640930176, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.527, + "step": 527 + }, + { + "loss": 0.0949, + "grad_norm": 5.624795436859131, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.528, + "step": 528 + }, + { + "loss": 0.1133, + "grad_norm": 4.383209228515625, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.529, + "step": 529 + }, + { + "loss": 0.0753, + "grad_norm": 10.447527885437012, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.53, + "step": 530 + }, + { + "loss": 0.0758, + "grad_norm": 2.0648767948150635, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.531, + "step": 531 + }, + { + "loss": 0.109, + "grad_norm": 2.311145782470703, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.532, + "step": 532 + }, + { + "loss": 0.0993, + "grad_norm": 2.5646841526031494, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.533, + "step": 533 + }, + { + "loss": 0.061, + "grad_norm": 4.201132774353027, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 534 + }, + { + "loss": 0.1403, + "grad_norm": 3.2465627193450928, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.535, + "step": 535 + }, + { + "loss": 0.0917, + "grad_norm": 4.278575420379639, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.536, + "step": 536 + }, + { + "loss": 0.1363, + "grad_norm": 2.6477434635162354, + "learning_rate": 1.466e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.537, + "step": 537 + }, + { + "loss": 0.1035, + "grad_norm": 2.616262435913086, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.538, + "step": 538 + }, + { + "loss": 0.1702, + "grad_norm": 2.8426945209503174, + "learning_rate": 1.464e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.539, + "step": 539 + }, + { + "loss": 0.0969, + "grad_norm": 2.934753179550171, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.54, + "step": 540 + }, + { + "loss": 0.0628, + "grad_norm": 6.173173904418945, + "learning_rate": 1.462e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.541, + "step": 541 + }, + { + "loss": 0.113, + "grad_norm": 2.183295249938965, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.542, + "step": 542 + }, + { + "loss": 0.0674, + "grad_norm": 2.466468095779419, + "learning_rate": 1.46e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.543, + "step": 543 + }, + { + "loss": 0.0629, + "grad_norm": 6.685276508331299, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.544, + "step": 544 + }, + { + "loss": 0.0606, + "grad_norm": 6.428196907043457, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 545 + }, + { + "loss": 0.0552, + "grad_norm": 3.2987399101257324, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 546 + }, + { + "loss": 0.1492, + "grad_norm": 3.802187919616699, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.547, + "step": 547 + }, + { + "loss": 0.0903, + "grad_norm": 3.23189115524292, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.548, + "step": 548 + }, + { + "loss": 0.0758, + "grad_norm": 3.0735082626342773, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.549, + "step": 549 + }, + { + "loss": 0.0978, + "grad_norm": 2.9236018657684326, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.55, + "step": 550 + }, + { + "loss": 0.0489, + "grad_norm": 1.232297420501709, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 551 + }, + { + "loss": 0.0472, + "grad_norm": 1.1960967779159546, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 552 + }, + { + "loss": 0.1622, + "grad_norm": 2.9212372303009033, + "learning_rate": 1.45e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.553, + "step": 553 + }, + { + "loss": 0.0964, + "grad_norm": 2.9365901947021484, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.554, + "step": 554 + }, + { + "loss": 0.1015, + "grad_norm": 3.297194719314575, + "learning_rate": 1.448e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.555, + "step": 555 + }, + { + "loss": 0.108, + "grad_norm": 3.8434770107269287, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.556, + "step": 556 + }, + { + "loss": 0.0869, + "grad_norm": 3.068513870239258, + "learning_rate": 1.446e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.557, + "step": 557 + }, + { + "loss": 0.0823, + "grad_norm": 2.382955312728882, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.558, + "step": 558 + }, + { + "loss": 0.0952, + "grad_norm": 2.0796663761138916, + "learning_rate": 1.444e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.559, + "step": 559 + }, + { + "loss": 0.0904, + "grad_norm": 2.491260290145874, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.56, + "step": 560 + }, + { + "loss": 0.0888, + "grad_norm": 1.8683680295944214, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.561, + "step": 561 + }, + { + "loss": 0.0824, + "grad_norm": 2.5860776901245117, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.562, + "step": 562 + }, + { + "loss": 0.0648, + "grad_norm": 10.482237815856934, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 563 + }, + { + "loss": 0.1033, + "grad_norm": 1.8212071657180786, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.564, + "step": 564 + }, + { + "loss": 0.1275, + "grad_norm": 2.206996440887451, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.565, + "step": 565 + }, + { + "loss": 0.1174, + "grad_norm": 2.454157590866089, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.566, + "step": 566 + }, + { + "loss": 0.0846, + "grad_norm": 2.7483479976654053, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.567, + "step": 567 + }, + { + "loss": 0.0712, + "grad_norm": 9.780473709106445, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.568, + "step": 568 + }, + { + "loss": 0.0838, + "grad_norm": 2.227144718170166, + "learning_rate": 1.434e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.569, + "step": 569 + }, + { + "loss": 0.0996, + "grad_norm": 2.4927093982696533, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.57, + "step": 570 + }, + { + "loss": 0.0723, + "grad_norm": 2.6736180782318115, + "learning_rate": 1.432e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.571, + "step": 571 + }, + { + "loss": 0.0765, + "grad_norm": 1.8901737928390503, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 0.572, + "step": 572 + }, + { + "loss": 0.0661, + "grad_norm": 1.9803191423416138, + "learning_rate": 1.43e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.573, + "step": 573 + }, + { + "loss": 0.06, + "grad_norm": 1.9032983779907227, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.574, + "step": 574 + }, + { + "loss": 0.0437, + "grad_norm": 2.9226999282836914, + "learning_rate": 1.428e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 575 + }, + { + "loss": 0.1345, + "grad_norm": 2.60559344291687, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.576, + "step": 576 + }, + { + "loss": 0.043, + "grad_norm": 3.43766713142395, + "learning_rate": 1.426e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 577 + }, + { + "loss": 0.0881, + "grad_norm": 3.27600359916687, + "learning_rate": 1.425e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.578, + "step": 578 + }, + { + "loss": 0.0777, + "grad_norm": 3.8467905521392822, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.579, + "step": 579 + }, + { + "loss": 0.0971, + "grad_norm": 3.3157150745391846, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.58, + "step": 580 + }, + { + "loss": 0.0769, + "grad_norm": 2.6883363723754883, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.581, + "step": 581 + }, + { + "loss": 0.0381, + "grad_norm": 2.187551736831665, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 582 + }, + { + "loss": 0.0571, + "grad_norm": 1.9329798221588135, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.583, + "step": 583 + }, + { + "loss": 0.0984, + "grad_norm": 2.6686573028564453, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 584 + }, + { + "loss": 0.0904, + "grad_norm": 2.7718393802642822, + "learning_rate": 1.418e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.585, + "step": 585 + }, + { + "loss": 0.0364, + "grad_norm": 3.612837314605713, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 586 + }, + { + "loss": 0.1408, + "grad_norm": 2.518528461456299, + "learning_rate": 1.416e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.587, + "step": 587 + }, + { + "loss": 0.0875, + "grad_norm": 2.7795908451080322, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.588, + "step": 588 + }, + { + "loss": 0.0644, + "grad_norm": 2.4260590076446533, + "learning_rate": 1.414e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 0.589, + "step": 589 + }, + { + "loss": 0.0884, + "grad_norm": 2.681588888168335, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 590 + }, + { + "loss": 0.1001, + "grad_norm": 2.8202459812164307, + "learning_rate": 1.412e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.591, + "step": 591 + }, + { + "loss": 0.0774, + "grad_norm": 1.7170965671539307, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.592, + "step": 592 + }, + { + "loss": 0.069, + "grad_norm": 1.68620765209198, + "learning_rate": 1.41e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.593, + "step": 593 + }, + { + "loss": 0.0694, + "grad_norm": 2.236591339111328, + "learning_rate": 1.409e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.594, + "step": 594 + }, + { + "loss": 0.0943, + "grad_norm": 2.7542996406555176, + "learning_rate": 1.408e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.595, + "step": 595 + }, + { + "loss": 0.0578, + "grad_norm": 1.8813996315002441, + "learning_rate": 1.407e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.596, + "step": 596 + }, + { + "loss": 0.0911, + "grad_norm": 2.0993378162384033, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.597, + "step": 597 + }, + { + "loss": 0.107, + "grad_norm": 2.6184418201446533, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.598, + "step": 598 + }, + { + "loss": 0.0803, + "grad_norm": 1.8751370906829834, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.599, + "step": 599 + }, + { + "loss": 0.0774, + "grad_norm": 3.0198869705200195, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.6, + "step": 600 + }, + { + "loss": 0.2953, + "grad_norm": 14.372690200805664, + "learning_rate": 1.402e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.601, + "step": 601 + }, + { + "loss": 0.0943, + "grad_norm": 2.2585110664367676, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.602, + "step": 602 + }, + { + "loss": 0.0432, + "grad_norm": 8.796082496643066, + "learning_rate": 1.4e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.603, + "step": 603 + }, + { + "loss": 0.1307, + "grad_norm": 2.903687000274658, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.604, + "step": 604 + }, + { + "loss": 0.1348, + "grad_norm": 3.1296894550323486, + "learning_rate": 1.398e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.605, + "step": 605 + }, + { + "loss": 0.1161, + "grad_norm": 2.436495542526245, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.606, + "step": 606 + }, + { + "loss": 0.0368, + "grad_norm": 5.359442710876465, + "learning_rate": 1.396e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.607, + "step": 607 + }, + { + "loss": 0.1177, + "grad_norm": 3.3482797145843506, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.608, + "step": 608 + }, + { + "loss": 0.1024, + "grad_norm": 3.229761838912964, + "learning_rate": 1.394e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.609, + "step": 609 + }, + { + "loss": 0.0988, + "grad_norm": 2.772888660430908, + "learning_rate": 1.393e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.61, + "step": 610 + }, + { + "loss": 0.0699, + "grad_norm": 2.91560435295105, + "learning_rate": 1.392e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.611, + "step": 611 + }, + { + "loss": 0.1212, + "grad_norm": 3.1388144493103027, + "learning_rate": 1.391e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.612, + "step": 612 + }, + { + "loss": 0.0776, + "grad_norm": 2.409531831741333, + "learning_rate": 1.39e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.613, + "step": 613 + }, + { + "loss": 0.0922, + "grad_norm": 2.301997423171997, + "learning_rate": 1.389e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.614, + "step": 614 + }, + { + "loss": 0.0382, + "grad_norm": 6.567748546600342, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.615, + "step": 615 + }, + { + "loss": 0.0702, + "grad_norm": 2.9374635219573975, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 616 + }, + { + "loss": 0.0952, + "grad_norm": 2.805278778076172, + "learning_rate": 1.386e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.617, + "step": 617 + }, + { + "loss": 0.0809, + "grad_norm": 2.7832789421081543, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.618, + "step": 618 + }, + { + "loss": 0.0967, + "grad_norm": 2.5809061527252197, + "learning_rate": 1.384e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.619, + "step": 619 + }, + { + "loss": 0.1193, + "grad_norm": 4.146383285522461, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.62, + "step": 620 + }, + { + "loss": 0.0646, + "grad_norm": 2.3339507579803467, + "learning_rate": 1.382e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.621, + "step": 621 + }, + { + "loss": 0.0698, + "grad_norm": 2.154700756072998, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.622, + "step": 622 + }, + { + "loss": 0.0861, + "grad_norm": 3.4389989376068115, + "learning_rate": 1.38e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.623, + "step": 623 + }, + { + "loss": 0.0744, + "grad_norm": 2.087575674057007, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.624, + "step": 624 + }, + { + "loss": 0.093, + "grad_norm": 2.7172322273254395, + "learning_rate": 1.378e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.625, + "step": 625 + }, + { + "loss": 0.0731, + "grad_norm": 2.2669014930725098, + "learning_rate": 1.377e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.626, + "step": 626 + }, + { + "loss": 0.0747, + "grad_norm": 3.104933500289917, + "learning_rate": 1.376e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.627, + "step": 627 + }, + { + "loss": 0.085, + "grad_norm": 2.475816249847412, + "learning_rate": 1.375e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.628, + "step": 628 + }, + { + "loss": 0.1415, + "grad_norm": 3.2964231967926025, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.629, + "step": 629 + }, + { + "loss": 0.0823, + "grad_norm": 1.5372464656829834, + "learning_rate": 1.373e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.63, + "step": 630 + }, + { + "loss": 0.1085, + "grad_norm": 2.136002540588379, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.631, + "step": 631 + }, + { + "loss": 0.0802, + "grad_norm": 2.1365489959716797, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.632, + "step": 632 + }, + { + "loss": 0.0359, + "grad_norm": 7.951494216918945, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.633, + "step": 633 + }, + { + "loss": 0.0344, + "grad_norm": 7.441174507141113, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.634, + "step": 634 + }, + { + "loss": 0.0838, + "grad_norm": 2.689347505569458, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.635, + "step": 635 + }, + { + "loss": 0.1337, + "grad_norm": 4.8380937576293945, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.636, + "step": 636 + }, + { + "loss": 0.1259, + "grad_norm": 3.2358460426330566, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.637, + "step": 637 + }, + { + "loss": 0.0269, + "grad_norm": 3.706432580947876, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 638 + }, + { + "loss": 0.0617, + "grad_norm": 2.4131107330322266, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.639, + "step": 639 + }, + { + "loss": 0.0225, + "grad_norm": 2.5498831272125244, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 640 + }, + { + "loss": 0.1159, + "grad_norm": 2.7629480361938477, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.641, + "step": 641 + }, + { + "loss": 0.0249, + "grad_norm": 2.194697380065918, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 642 + }, + { + "loss": 0.0852, + "grad_norm": 2.5653960704803467, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.643, + "step": 643 + }, + { + "loss": 0.0783, + "grad_norm": 2.402456283569336, + "learning_rate": 1.359e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 644 + }, + { + "loss": 0.1104, + "grad_norm": 2.646005392074585, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.645, + "step": 645 + }, + { + "loss": 0.0582, + "grad_norm": 2.135377883911133, + "learning_rate": 1.357e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.646, + "step": 646 + }, + { + "loss": 0.0242, + "grad_norm": 2.295201539993286, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 647 + }, + { + "loss": 0.0712, + "grad_norm": 2.529376745223999, + "learning_rate": 1.355e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.648, + "step": 648 + }, + { + "loss": 0.0697, + "grad_norm": 2.2107226848602295, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.649, + "step": 649 + }, + { + "loss": 0.1203, + "grad_norm": 2.456563711166382, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.65, + "step": 650 + }, + { + "loss": 0.091, + "grad_norm": 2.3880977630615234, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.651, + "step": 651 + }, + { + "loss": 0.0641, + "grad_norm": 2.5870609283447266, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.652, + "step": 652 + }, + { + "loss": 0.0678, + "grad_norm": 2.0148985385894775, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.653, + "step": 653 + }, + { + "loss": 0.0745, + "grad_norm": 2.9625463485717773, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.654, + "step": 654 + }, + { + "loss": 0.0759, + "grad_norm": 2.3625717163085938, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.655, + "step": 655 + }, + { + "loss": 0.0826, + "grad_norm": 3.747469902038574, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.656, + "step": 656 + }, + { + "loss": 0.0772, + "grad_norm": 2.4018380641937256, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.657, + "step": 657 + }, + { + "loss": 0.0834, + "grad_norm": 2.684398889541626, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.658, + "step": 658 + }, + { + "loss": 0.074, + "grad_norm": 2.106499671936035, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.659, + "step": 659 + }, + { + "loss": 0.0759, + "grad_norm": 2.1065762042999268, + "learning_rate": 1.343e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.66, + "step": 660 + }, + { + "loss": 0.1232, + "grad_norm": 2.89585280418396, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.661, + "step": 661 + }, + { + "loss": 0.0784, + "grad_norm": 2.267303943634033, + "learning_rate": 1.341e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.662, + "step": 662 + }, + { + "loss": 0.0591, + "grad_norm": 1.4712592363357544, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.663, + "step": 663 + }, + { + "loss": 0.0626, + "grad_norm": 1.9069504737854004, + "learning_rate": 1.339e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.664, + "step": 664 + }, + { + "loss": 0.1356, + "grad_norm": 3.2215309143066406, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.665, + "step": 665 + }, + { + "loss": 0.0678, + "grad_norm": 2.080892562866211, + "learning_rate": 1.337e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.666, + "step": 666 + }, + { + "loss": 0.0643, + "grad_norm": 2.593749523162842, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.667, + "step": 667 + }, + { + "loss": 0.3105, + "grad_norm": 13.254192352294922, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.668, + "step": 668 + }, + { + "loss": 0.0305, + "grad_norm": 7.083673000335693, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.669, + "step": 669 + }, + { + "loss": 0.0827, + "grad_norm": 1.9234445095062256, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.67, + "step": 670 + }, + { + "loss": 0.072, + "grad_norm": 1.6489096879959106, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.671, + "step": 671 + }, + { + "loss": 0.0786, + "grad_norm": 2.5704004764556885, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.672, + "step": 672 + }, + { + "loss": 0.1092, + "grad_norm": 2.335846424102783, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.673, + "step": 673 + }, + { + "loss": 0.08, + "grad_norm": 1.7859958410263062, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.674, + "step": 674 + }, + { + "loss": 0.0303, + "grad_norm": 6.245123386383057, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.675, + "step": 675 + }, + { + "loss": 0.0248, + "grad_norm": 6.11707878112793, + "learning_rate": 1.327e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.676, + "step": 676 + }, + { + "loss": 0.0714, + "grad_norm": 2.122776985168457, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.677, + "step": 677 + }, + { + "loss": 0.0583, + "grad_norm": 2.350274085998535, + "learning_rate": 1.325e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.678, + "step": 678 + }, + { + "loss": 0.0192, + "grad_norm": 3.1966686248779297, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 679 + }, + { + "loss": 0.087, + "grad_norm": 2.123091459274292, + "learning_rate": 1.323e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.68, + "step": 680 + }, + { + "loss": 0.0536, + "grad_norm": 2.108837842941284, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.681, + "step": 681 + }, + { + "loss": 0.0187, + "grad_norm": 2.225255012512207, + "learning_rate": 1.321e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 682 + }, + { + "loss": 0.0689, + "grad_norm": 1.968031883239746, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.683, + "step": 683 + }, + { + "loss": 0.0822, + "grad_norm": 2.5669515132904053, + "learning_rate": 1.319e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.684, + "step": 684 + }, + { + "loss": 0.0661, + "grad_norm": 2.156057596206665, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.685, + "step": 685 + }, + { + "loss": 0.0545, + "grad_norm": 2.8333444595336914, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.686, + "step": 686 + }, + { + "loss": 0.0889, + "grad_norm": 3.069793939590454, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.687, + "step": 687 + }, + { + "loss": 0.0761, + "grad_norm": 1.9274708032608032, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.688, + "step": 688 + }, + { + "loss": 0.1089, + "grad_norm": 2.992846965789795, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.689, + "step": 689 + }, + { + "loss": 0.1287, + "grad_norm": 4.56328821182251, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.69, + "step": 690 + }, + { + "loss": 0.1186, + "grad_norm": 2.255676746368408, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.691, + "step": 691 + }, + { + "loss": 0.0906, + "grad_norm": 1.8538860082626343, + "learning_rate": 1.311e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.692, + "step": 692 + }, + { + "loss": 0.2418, + "grad_norm": 11.443807601928711, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9434276223182678, + "epoch": 0.693, + "step": 693 + }, + { + "loss": 0.0399, + "grad_norm": 9.349817276000977, + "learning_rate": 1.309e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.694, + "step": 694 + }, + { + "loss": 0.037, + "grad_norm": 9.234195709228516, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.695, + "step": 695 + }, + { + "loss": 0.1228, + "grad_norm": 2.415926456451416, + "learning_rate": 1.307e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.696, + "step": 696 + }, + { + "loss": 0.0524, + "grad_norm": 2.570728063583374, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.697, + "step": 697 + }, + { + "loss": 0.086, + "grad_norm": 3.062072992324829, + "learning_rate": 1.305e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.698, + "step": 698 + }, + { + "loss": 0.0829, + "grad_norm": 2.552957534790039, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.699, + "step": 699 + }, + { + "loss": 0.1109, + "grad_norm": 2.1273176670074463, + "learning_rate": 1.303e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.7, + "step": 700 + }, + { + "loss": 0.0811, + "grad_norm": 2.13920259475708, + "learning_rate": 1.302e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.701, + "step": 701 + }, + { + "loss": 0.0689, + "grad_norm": 2.0192079544067383, + "learning_rate": 1.301e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.702, + "step": 702 + }, + { + "loss": 0.0726, + "grad_norm": 1.9012140035629272, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.703, + "step": 703 + }, + { + "loss": 0.075, + "grad_norm": 2.420971393585205, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.704, + "step": 704 + }, + { + "loss": 0.0965, + "grad_norm": 1.7867904901504517, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.705, + "step": 705 + }, + { + "loss": 0.0757, + "grad_norm": 2.5515830516815186, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.706, + "step": 706 + }, + { + "loss": 0.0758, + "grad_norm": 2.5376474857330322, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.707, + "step": 707 + }, + { + "loss": 0.0995, + "grad_norm": 1.8845465183258057, + "learning_rate": 1.295e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.708, + "step": 708 + }, + { + "loss": 0.0824, + "grad_norm": 2.292940616607666, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.709, + "step": 709 + }, + { + "loss": 0.0723, + "grad_norm": 2.140986919403076, + "learning_rate": 1.293e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.71, + "step": 710 + }, + { + "loss": 0.0714, + "grad_norm": 2.8790059089660645, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.711, + "step": 711 + }, + { + "loss": 0.0623, + "grad_norm": 1.6493089199066162, + "learning_rate": 1.291e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.712, + "step": 712 + }, + { + "loss": 0.0657, + "grad_norm": 1.8830665349960327, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.713, + "step": 713 + }, + { + "loss": 0.029, + "grad_norm": 7.065803527832031, + "learning_rate": 1.289e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.714, + "step": 714 + }, + { + "loss": 0.0952, + "grad_norm": 2.2632198333740234, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.715, + "step": 715 + }, + { + "loss": 0.0383, + "grad_norm": 8.098624229431152, + "learning_rate": 1.287e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.716, + "step": 716 + }, + { + "loss": 0.023, + "grad_norm": 5.657382011413574, + "learning_rate": 1.286e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.717, + "step": 717 + }, + { + "loss": 0.0649, + "grad_norm": 1.4795526266098022, + "learning_rate": 1.285e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.718, + "step": 718 + }, + { + "loss": 0.0737, + "grad_norm": 2.7369728088378906, + "learning_rate": 1.284e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.719, + "step": 719 + }, + { + "loss": 0.0637, + "grad_norm": 2.345536708831787, + "learning_rate": 1.283e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.72, + "step": 720 + }, + { + "loss": 0.0594, + "grad_norm": 2.2326128482818604, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.721, + "step": 721 + }, + { + "loss": 0.057, + "grad_norm": 3.0859591960906982, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.722, + "step": 722 + }, + { + "loss": 0.0709, + "grad_norm": 2.870548963546753, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.723, + "step": 723 + }, + { + "loss": 0.0772, + "grad_norm": 3.3536510467529297, + "learning_rate": 1.279e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.724, + "step": 724 + }, + { + "loss": 0.0163, + "grad_norm": 2.2633590698242188, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 725 + }, + { + "loss": 0.0128, + "grad_norm": 1.1394838094711304, + "learning_rate": 1.277e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 726 + }, + { + "loss": 0.0683, + "grad_norm": 2.8505446910858154, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.727, + "step": 727 + }, + { + "loss": 0.0557, + "grad_norm": 2.6770808696746826, + "learning_rate": 1.275e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.728, + "step": 728 + }, + { + "loss": 0.0586, + "grad_norm": 3.0272936820983887, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.729, + "step": 729 + }, + { + "loss": 0.0126, + "grad_norm": 0.8217504620552063, + "learning_rate": 1.273e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 730 + }, + { + "loss": 0.0776, + "grad_norm": 4.100428581237793, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.731, + "step": 731 + }, + { + "loss": 0.0689, + "grad_norm": 2.3711600303649902, + "learning_rate": 1.271e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.732, + "step": 732 + }, + { + "loss": 0.0797, + "grad_norm": 3.585756301879883, + "learning_rate": 1.27e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.733, + "step": 733 + }, + { + "loss": 0.0532, + "grad_norm": 2.134615421295166, + "learning_rate": 1.269e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.734, + "step": 734 + }, + { + "loss": 0.0974, + "grad_norm": 2.3772988319396973, + "learning_rate": 1.268e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.735, + "step": 735 + }, + { + "loss": 0.1153, + "grad_norm": 2.4541940689086914, + "learning_rate": 1.267e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.736, + "step": 736 + }, + { + "loss": 0.048, + "grad_norm": 1.6060377359390259, + "learning_rate": 1.266e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.737, + "step": 737 + }, + { + "loss": 0.0451, + "grad_norm": 2.1678755283355713, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.738, + "step": 738 + }, + { + "loss": 0.0748, + "grad_norm": 2.047844409942627, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.739, + "step": 739 + }, + { + "loss": 0.0824, + "grad_norm": 2.762352705001831, + "learning_rate": 1.263e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.74, + "step": 740 + }, + { + "loss": 0.1146, + "grad_norm": 3.0128841400146484, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.741, + "step": 741 + }, + { + "loss": 0.0711, + "grad_norm": 2.0650486946105957, + "learning_rate": 1.261e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.742, + "step": 742 + }, + { + "loss": 0.0334, + "grad_norm": 7.7052412033081055, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.743, + "step": 743 + }, + { + "loss": 0.0709, + "grad_norm": 1.5119361877441406, + "learning_rate": 1.259e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.744, + "step": 744 + }, + { + "loss": 0.0308, + "grad_norm": 7.3754143714904785, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.745, + "step": 745 + }, + { + "loss": 0.0995, + "grad_norm": 2.8331611156463623, + "learning_rate": 1.257e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.746, + "step": 746 + }, + { + "loss": 0.0562, + "grad_norm": 3.423184871673584, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.747, + "step": 747 + }, + { + "loss": 0.0659, + "grad_norm": 1.857692003250122, + "learning_rate": 1.255e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.748, + "step": 748 + }, + { + "loss": 0.2618, + "grad_norm": 11.681804656982422, + "learning_rate": 1.254e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.749, + "step": 749 + }, + { + "loss": 0.0791, + "grad_norm": 2.311647415161133, + "learning_rate": 1.253e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.75, + "step": 750 + }, + { + "loss": 0.0486, + "grad_norm": 2.8530430793762207, + "learning_rate": 1.252e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.751, + "step": 751 + }, + { + "loss": 0.1104, + "grad_norm": 2.617987871170044, + "learning_rate": 1.251e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.752, + "step": 752 + }, + { + "loss": 0.0195, + "grad_norm": 4.978179931640625, + "learning_rate": 1.25e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.753, + "step": 753 + }, + { + "loss": 0.0726, + "grad_norm": 2.0882959365844727, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.754, + "step": 754 + }, + { + "loss": 0.0754, + "grad_norm": 2.1230452060699463, + "learning_rate": 1.248e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.755, + "step": 755 + }, + { + "loss": 0.0707, + "grad_norm": 2.2002744674682617, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.756, + "step": 756 + }, + { + "loss": 0.0494, + "grad_norm": 1.7500207424163818, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.757, + "step": 757 + }, + { + "loss": 0.0811, + "grad_norm": 1.8128851652145386, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.758, + "step": 758 + }, + { + "loss": 0.0756, + "grad_norm": 2.397252082824707, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.759, + "step": 759 + }, + { + "loss": 0.0501, + "grad_norm": 1.975466012954712, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.76, + "step": 760 + }, + { + "loss": 0.1087, + "grad_norm": 2.2733750343322754, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 761 + }, + { + "loss": 0.1041, + "grad_norm": 2.3084492683410645, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.762, + "step": 762 + }, + { + "loss": 0.0496, + "grad_norm": 2.098421096801758, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.763, + "step": 763 + }, + { + "loss": 0.0626, + "grad_norm": 2.004920482635498, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.764, + "step": 764 + }, + { + "loss": 0.0667, + "grad_norm": 1.603124737739563, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.765, + "step": 765 + }, + { + "loss": 0.0829, + "grad_norm": 2.5960142612457275, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.766, + "step": 766 + }, + { + "loss": 0.0234, + "grad_norm": 5.8595757484436035, + "learning_rate": 1.236e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.767, + "step": 767 + }, + { + "loss": 0.1032, + "grad_norm": 1.7731209993362427, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 768 + }, + { + "loss": 0.0228, + "grad_norm": 6.049434185028076, + "learning_rate": 1.234e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.769, + "step": 769 + }, + { + "loss": 0.0828, + "grad_norm": 1.9529765844345093, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.77, + "step": 770 + }, + { + "loss": 0.0718, + "grad_norm": 1.3272991180419922, + "learning_rate": 1.232e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.771, + "step": 771 + }, + { + "loss": 0.0907, + "grad_norm": 2.2710683345794678, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.772, + "step": 772 + }, + { + "loss": 0.2171, + "grad_norm": 6.965005397796631, + "learning_rate": 1.23e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.773, + "step": 773 + }, + { + "loss": 0.0657, + "grad_norm": 2.213243007659912, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.774, + "step": 774 + }, + { + "loss": 0.1745, + "grad_norm": 6.300892353057861, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.775, + "step": 775 + }, + { + "loss": 0.06, + "grad_norm": 2.4582417011260986, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.776, + "step": 776 + }, + { + "loss": 0.0516, + "grad_norm": 1.6709243059158325, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.777, + "step": 777 + }, + { + "loss": 0.1051, + "grad_norm": 2.654740810394287, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.778, + "step": 778 + }, + { + "loss": 0.072, + "grad_norm": 2.0503504276275635, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.779, + "step": 779 + }, + { + "loss": 0.0742, + "grad_norm": 1.800299882888794, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.78, + "step": 780 + }, + { + "loss": 0.0737, + "grad_norm": 2.063502788543701, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.781, + "step": 781 + }, + { + "loss": 0.1061, + "grad_norm": 2.698178291320801, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.782, + "step": 782 + }, + { + "loss": 0.0737, + "grad_norm": 2.0112061500549316, + "learning_rate": 1.22e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.783, + "step": 783 + }, + { + "loss": 0.0195, + "grad_norm": 5.365294933319092, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.784, + "step": 784 + }, + { + "loss": 0.0601, + "grad_norm": 1.5453028678894043, + "learning_rate": 1.218e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.785, + "step": 785 + }, + { + "loss": 0.2441, + "grad_norm": 10.393324851989746, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.786, + "step": 786 + }, + { + "loss": 0.1079, + "grad_norm": 2.6032726764678955, + "learning_rate": 1.216e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.787, + "step": 787 + }, + { + "loss": 0.0639, + "grad_norm": 2.6428260803222656, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.788, + "step": 788 + }, + { + "loss": 0.0632, + "grad_norm": 1.3782398700714111, + "learning_rate": 1.214e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.789, + "step": 789 + }, + { + "loss": 0.0189, + "grad_norm": 4.952188014984131, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.79, + "step": 790 + }, + { + "loss": 0.0613, + "grad_norm": 1.8376456499099731, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.791, + "step": 791 + }, + { + "loss": 0.0539, + "grad_norm": 1.6092228889465332, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.792, + "step": 792 + }, + { + "loss": 0.0151, + "grad_norm": 3.721954345703125, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 793 + }, + { + "loss": 0.0168, + "grad_norm": 3.578442096710205, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 794 + }, + { + "loss": 0.0494, + "grad_norm": 1.714572787284851, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 795 + }, + { + "loss": 0.0715, + "grad_norm": 2.152249813079834, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 796 + }, + { + "loss": 0.0106, + "grad_norm": 1.2338261604309082, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 797 + }, + { + "loss": 0.0948, + "grad_norm": 3.4057295322418213, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 798 + }, + { + "loss": 0.0967, + "grad_norm": 2.297558546066284, + "learning_rate": 1.204e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.799, + "step": 799 + }, + { + "loss": 0.0715, + "grad_norm": 2.948807716369629, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 800 + }, + { + "loss": 0.0691, + "grad_norm": 2.480257749557495, + "learning_rate": 1.202e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.801, + "step": 801 + }, + { + "loss": 0.2602, + "grad_norm": 9.955911636352539, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.802, + "step": 802 + }, + { + "loss": 0.0623, + "grad_norm": 2.92844295501709, + "learning_rate": 1.2e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.803, + "step": 803 + }, + { + "loss": 0.0922, + "grad_norm": 2.3774516582489014, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.804, + "step": 804 + }, + { + "loss": 0.0664, + "grad_norm": 1.5494801998138428, + "learning_rate": 1.198e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.805, + "step": 805 + }, + { + "loss": 0.1929, + "grad_norm": 6.599433422088623, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.806, + "step": 806 + }, + { + "loss": 0.02, + "grad_norm": 5.4353718757629395, + "learning_rate": 1.196e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.807, + "step": 807 + }, + { + "loss": 0.0603, + "grad_norm": 1.707094669342041, + "learning_rate": 1.195e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.808, + "step": 808 + }, + { + "loss": 0.0722, + "grad_norm": 2.148479461669922, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.809, + "step": 809 + }, + { + "loss": 0.0717, + "grad_norm": 2.687295436859131, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.81, + "step": 810 + }, + { + "loss": 0.0695, + "grad_norm": 2.940627098083496, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.811, + "step": 811 + }, + { + "loss": 0.0195, + "grad_norm": 5.349563121795654, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.812, + "step": 812 + }, + { + "loss": 0.0931, + "grad_norm": 1.7995429039001465, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.813, + "step": 813 + }, + { + "loss": 0.0175, + "grad_norm": 5.07689094543457, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.814, + "step": 814 + }, + { + "loss": 0.0159, + "grad_norm": 4.247437000274658, + "learning_rate": 1.188e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.815, + "step": 815 + }, + { + "loss": 0.0783, + "grad_norm": 2.34236216545105, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.816, + "step": 816 + }, + { + "loss": 0.113, + "grad_norm": 2.772456407546997, + "learning_rate": 1.186e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.817, + "step": 817 + }, + { + "loss": 0.0621, + "grad_norm": 2.3582286834716797, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.818, + "step": 818 + }, + { + "loss": 0.0522, + "grad_norm": 3.014678716659546, + "learning_rate": 1.184e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.819, + "step": 819 + }, + { + "loss": 0.0758, + "grad_norm": 2.709341049194336, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.82, + "step": 820 + }, + { + "loss": 0.0718, + "grad_norm": 2.3536617755889893, + "learning_rate": 1.182e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.821, + "step": 821 + }, + { + "loss": 0.0789, + "grad_norm": 3.258106231689453, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.822, + "step": 822 + }, + { + "loss": 0.0763, + "grad_norm": 2.218254804611206, + "learning_rate": 1.18e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.823, + "step": 823 + }, + { + "loss": 0.0599, + "grad_norm": 2.2704806327819824, + "learning_rate": 1.179e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.824, + "step": 824 + }, + { + "loss": 0.0126, + "grad_norm": 2.4626388549804688, + "learning_rate": 1.178e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 825 + }, + { + "loss": 0.0669, + "grad_norm": 2.0617358684539795, + "learning_rate": 1.177e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.826, + "step": 826 + }, + { + "loss": 0.066, + "grad_norm": 2.0766263008117676, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.827, + "step": 827 + }, + { + "loss": 0.0618, + "grad_norm": 1.5771903991699219, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.828, + "step": 828 + }, + { + "loss": 0.0687, + "grad_norm": 1.789569616317749, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.829, + "step": 829 + }, + { + "loss": 0.0157, + "grad_norm": 4.058000087738037, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.83, + "step": 830 + }, + { + "loss": 0.0389, + "grad_norm": 1.5074262619018555, + "learning_rate": 1.172e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.831, + "step": 831 + }, + { + "loss": 0.0663, + "grad_norm": 2.1943564414978027, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.832, + "step": 832 + }, + { + "loss": 0.0734, + "grad_norm": 2.0293729305267334, + "learning_rate": 1.17e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.833, + "step": 833 + }, + { + "loss": 0.0734, + "grad_norm": 1.9577043056488037, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.834, + "step": 834 + }, + { + "loss": 0.0729, + "grad_norm": 2.053274154663086, + "learning_rate": 1.168e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 835 + }, + { + "loss": 0.1016, + "grad_norm": 4.023435115814209, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.836, + "step": 836 + }, + { + "loss": 0.0618, + "grad_norm": 2.152527093887329, + "learning_rate": 1.166e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.837, + "step": 837 + }, + { + "loss": 0.0633, + "grad_norm": 2.2773494720458984, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.838, + "step": 838 + }, + { + "loss": 0.0207, + "grad_norm": 5.423501491546631, + "learning_rate": 1.164e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.839, + "step": 839 + }, + { + "loss": 0.0651, + "grad_norm": 1.2856030464172363, + "learning_rate": 1.163e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.84, + "step": 840 + }, + { + "loss": 0.0628, + "grad_norm": 1.8682835102081299, + "learning_rate": 1.162e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 841 + }, + { + "loss": 0.0192, + "grad_norm": 4.855226516723633, + "learning_rate": 1.161e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.842, + "step": 842 + }, + { + "loss": 0.0757, + "grad_norm": 1.910493016242981, + "learning_rate": 1.16e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.843, + "step": 843 + }, + { + "loss": 0.0778, + "grad_norm": 3.503009796142578, + "learning_rate": 1.159e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.844, + "step": 844 + }, + { + "loss": 0.05, + "grad_norm": 1.867902398109436, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.845, + "step": 845 + }, + { + "loss": 0.0145, + "grad_norm": 3.8562870025634766, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 846 + }, + { + "loss": 0.0668, + "grad_norm": 1.7752705812454224, + "learning_rate": 1.156e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.847, + "step": 847 + }, + { + "loss": 0.0735, + "grad_norm": 2.393582582473755, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.848, + "step": 848 + }, + { + "loss": 0.0985, + "grad_norm": 2.7950665950775146, + "learning_rate": 1.154e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.849, + "step": 849 + }, + { + "loss": 0.0681, + "grad_norm": 2.1131601333618164, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.85, + "step": 850 + }, + { + "loss": 0.0515, + "grad_norm": 2.2755846977233887, + "learning_rate": 1.152e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.851, + "step": 851 + }, + { + "loss": 0.0434, + "grad_norm": 1.569434642791748, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.852, + "step": 852 + }, + { + "loss": 0.1047, + "grad_norm": 3.0928077697753906, + "learning_rate": 1.15e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.853, + "step": 853 + }, + { + "loss": 0.0575, + "grad_norm": 2.008404016494751, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.854, + "step": 854 + }, + { + "loss": 0.0579, + "grad_norm": 1.4861952066421509, + "learning_rate": 1.148e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.855, + "step": 855 + }, + { + "loss": 0.069, + "grad_norm": 1.9950709342956543, + "learning_rate": 1.147e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.856, + "step": 856 + }, + { + "loss": 0.0155, + "grad_norm": 4.394257068634033, + "learning_rate": 1.146e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.857, + "step": 857 + }, + { + "loss": 0.0969, + "grad_norm": 2.6770575046539307, + "learning_rate": 1.145e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.858, + "step": 858 + }, + { + "loss": 0.0712, + "grad_norm": 2.319610595703125, + "learning_rate": 1.144e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 859 + }, + { + "loss": 0.0689, + "grad_norm": 1.8970541954040527, + "learning_rate": 1.143e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.86, + "step": 860 + }, + { + "loss": 0.0899, + "grad_norm": 1.8339478969573975, + "learning_rate": 1.142e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.861, + "step": 861 + }, + { + "loss": 0.1032, + "grad_norm": 2.781162977218628, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.862, + "step": 862 + }, + { + "loss": 0.0604, + "grad_norm": 2.540081024169922, + "learning_rate": 1.14e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.863, + "step": 863 + }, + { + "loss": 0.0491, + "grad_norm": 1.9644439220428467, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.864, + "step": 864 + }, + { + "loss": 0.0802, + "grad_norm": 1.8939117193222046, + "learning_rate": 1.138e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.865, + "step": 865 + }, + { + "loss": 0.0681, + "grad_norm": 2.0177180767059326, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.866, + "step": 866 + }, + { + "loss": 0.0476, + "grad_norm": 1.9407687187194824, + "learning_rate": 1.136e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.867, + "step": 867 + }, + { + "loss": 0.0188, + "grad_norm": 5.371039390563965, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.868, + "step": 868 + }, + { + "loss": 0.0508, + "grad_norm": 1.873732566833496, + "learning_rate": 1.134e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.869, + "step": 869 + }, + { + "loss": 0.0237, + "grad_norm": 6.1496429443359375, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.87, + "step": 870 + }, + { + "loss": 0.099, + "grad_norm": 4.506502151489258, + "learning_rate": 1.132e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.871, + "step": 871 + }, + { + "loss": 0.1, + "grad_norm": 5.314243316650391, + "learning_rate": 1.131e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.872, + "step": 872 + }, + { + "loss": 0.0123, + "grad_norm": 3.1825995445251465, + "learning_rate": 1.13e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 873 + }, + { + "loss": 0.0132, + "grad_norm": 3.1502106189727783, + "learning_rate": 1.129e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 874 + }, + { + "loss": 0.0622, + "grad_norm": 2.719097375869751, + "learning_rate": 1.128e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.875, + "step": 875 + }, + { + "loss": 0.0992, + "grad_norm": 3.1199769973754883, + "learning_rate": 1.127e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.876, + "step": 876 + }, + { + "loss": 0.066, + "grad_norm": 2.5837504863739014, + "learning_rate": 1.126e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.877, + "step": 877 + }, + { + "loss": 0.0542, + "grad_norm": 2.4771666526794434, + "learning_rate": 1.125e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.878, + "step": 878 + }, + { + "loss": 0.0937, + "grad_norm": 3.6200714111328125, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.879, + "step": 879 + }, + { + "loss": 0.0674, + "grad_norm": 2.399535655975342, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.88, + "step": 880 + }, + { + "loss": 0.0678, + "grad_norm": 2.516605854034424, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.881, + "step": 881 + }, + { + "loss": 0.0668, + "grad_norm": 2.5172040462493896, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.882, + "step": 882 + }, + { + "loss": 0.0744, + "grad_norm": 2.4523816108703613, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.883, + "step": 883 + }, + { + "loss": 0.1019, + "grad_norm": 3.3321380615234375, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.884, + "step": 884 + }, + { + "loss": 0.0837, + "grad_norm": 1.8811334371566772, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.885, + "step": 885 + }, + { + "loss": 0.0531, + "grad_norm": 1.9141852855682373, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.886, + "step": 886 + }, + { + "loss": 0.0408, + "grad_norm": 1.487582802772522, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.887, + "step": 887 + }, + { + "loss": 0.0218, + "grad_norm": 5.286271095275879, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.888, + "step": 888 + }, + { + "loss": 0.0628, + "grad_norm": 1.7239201068878174, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.889, + "step": 889 + }, + { + "loss": 0.0625, + "grad_norm": 1.7386255264282227, + "learning_rate": 1.113e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.89, + "step": 890 + }, + { + "loss": 0.0405, + "grad_norm": 1.4104888439178467, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.891, + "step": 891 + }, + { + "loss": 0.0226, + "grad_norm": 4.608585834503174, + "learning_rate": 1.111e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.892, + "step": 892 + }, + { + "loss": 0.0968, + "grad_norm": 2.3830323219299316, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.893, + "step": 893 + }, + { + "loss": 0.0739, + "grad_norm": 1.8739683628082275, + "learning_rate": 1.109e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.894, + "step": 894 + }, + { + "loss": 0.058, + "grad_norm": 2.673945665359497, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.895, + "step": 895 + }, + { + "loss": 0.0943, + "grad_norm": 3.0288586616516113, + "learning_rate": 1.107e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.896, + "step": 896 + }, + { + "loss": 0.0726, + "grad_norm": 2.270813465118408, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.897, + "step": 897 + }, + { + "loss": 0.0589, + "grad_norm": 1.880444049835205, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.898, + "step": 898 + }, + { + "loss": 0.0143, + "grad_norm": 3.3361847400665283, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 899 + }, + { + "loss": 0.059, + "grad_norm": 1.848816990852356, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.9, + "step": 900 + }, + { + "loss": 0.0714, + "grad_norm": 2.0221500396728516, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.901, + "step": 901 + }, + { + "loss": 0.0668, + "grad_norm": 4.154532432556152, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.902, + "step": 902 + }, + { + "loss": 0.0617, + "grad_norm": 1.9648317098617554, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.903, + "step": 903 + }, + { + "loss": 0.0652, + "grad_norm": 2.866431474685669, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.904, + "step": 904 + }, + { + "loss": 0.0459, + "grad_norm": 2.3324079513549805, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.905, + "step": 905 + }, + { + "loss": 0.0111, + "grad_norm": 2.3991503715515137, + "learning_rate": 1.097e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 906 + }, + { + "loss": 0.0654, + "grad_norm": 1.9646960496902466, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.907, + "step": 907 + }, + { + "loss": 0.0798, + "grad_norm": 2.720228433609009, + "learning_rate": 1.095e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.908, + "step": 908 + }, + { + "loss": 0.0974, + "grad_norm": 2.5758628845214844, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.909, + "step": 909 + }, + { + "loss": 0.0621, + "grad_norm": 2.303436517715454, + "learning_rate": 1.093e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.91, + "step": 910 + }, + { + "loss": 0.0944, + "grad_norm": 2.617363929748535, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.911, + "step": 911 + }, + { + "loss": 0.0571, + "grad_norm": 1.898218035697937, + "learning_rate": 1.091e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.912, + "step": 912 + }, + { + "loss": 0.0136, + "grad_norm": 3.2630972862243652, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 913 + }, + { + "loss": 0.0482, + "grad_norm": 2.0208237171173096, + "learning_rate": 1.089e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.914, + "step": 914 + }, + { + "loss": 0.0486, + "grad_norm": 1.8037229776382446, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.915, + "step": 915 + }, + { + "loss": 0.0118, + "grad_norm": 2.722412586212158, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 916 + }, + { + "loss": 0.0687, + "grad_norm": 2.6608150005340576, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.917, + "step": 917 + }, + { + "loss": 0.0101, + "grad_norm": 1.664276361465454, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 918 + }, + { + "loss": 0.0609, + "grad_norm": 2.5043087005615234, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.919, + "step": 919 + }, + { + "loss": 0.0685, + "grad_norm": 2.0320653915405273, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.92, + "step": 920 + }, + { + "loss": 0.0709, + "grad_norm": 2.7590584754943848, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.921, + "step": 921 + }, + { + "loss": 0.0511, + "grad_norm": 2.424579620361328, + "learning_rate": 1.081e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.922, + "step": 922 + }, + { + "loss": 0.061, + "grad_norm": 1.826949119567871, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.923, + "step": 923 + }, + { + "loss": 0.0086, + "grad_norm": 1.5401605367660522, + "learning_rate": 1.079e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 924 + }, + { + "loss": 0.0667, + "grad_norm": 2.49796724319458, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.925, + "step": 925 + }, + { + "loss": 0.0741, + "grad_norm": 2.141827344894409, + "learning_rate": 1.077e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.926, + "step": 926 + }, + { + "loss": 0.0662, + "grad_norm": 2.1507174968719482, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.927, + "step": 927 + }, + { + "loss": 0.0596, + "grad_norm": 1.928731083869934, + "learning_rate": 1.075e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.928, + "step": 928 + }, + { + "loss": 0.0469, + "grad_norm": 2.391432523727417, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.929, + "step": 929 + }, + { + "loss": 0.0121, + "grad_norm": 2.9941039085388184, + "learning_rate": 1.073e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 930 + }, + { + "loss": 0.0452, + "grad_norm": 2.110806465148926, + "learning_rate": 1.072e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.931, + "step": 931 + }, + { + "loss": 0.0624, + "grad_norm": 1.8115919828414917, + "learning_rate": 1.071e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.932, + "step": 932 + }, + { + "loss": 0.0456, + "grad_norm": 1.548567533493042, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.933, + "step": 933 + }, + { + "loss": 0.0565, + "grad_norm": 1.9886720180511475, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.934, + "step": 934 + }, + { + "loss": 0.0457, + "grad_norm": 1.8589720726013184, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.935, + "step": 935 + }, + { + "loss": 0.041, + "grad_norm": 1.6640335321426392, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.936, + "step": 936 + }, + { + "loss": 0.0712, + "grad_norm": 2.0171613693237305, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.937, + "step": 937 + }, + { + "loss": 0.0628, + "grad_norm": 1.6715848445892334, + "learning_rate": 1.065e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.938, + "step": 938 + }, + { + "loss": 0.0416, + "grad_norm": 2.1554946899414062, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.939, + "step": 939 + }, + { + "loss": 0.0737, + "grad_norm": 2.242116689682007, + "learning_rate": 1.063e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.94, + "step": 940 + }, + { + "loss": 0.0177, + "grad_norm": 4.810120105743408, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.941, + "step": 941 + }, + { + "loss": 0.0649, + "grad_norm": 1.675683617591858, + "learning_rate": 1.061e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.942, + "step": 942 + }, + { + "loss": 0.0727, + "grad_norm": 2.5127744674682617, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.943, + "step": 943 + }, + { + "loss": 0.0587, + "grad_norm": 2.14599871635437, + "learning_rate": 1.059e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.944, + "step": 944 + }, + { + "loss": 0.1132, + "grad_norm": 2.5991926193237305, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.945, + "step": 945 + }, + { + "loss": 0.0786, + "grad_norm": 2.0661518573760986, + "learning_rate": 1.057e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.946, + "step": 946 + }, + { + "loss": 0.0686, + "grad_norm": 1.411996841430664, + "learning_rate": 1.056e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 947 + }, + { + "loss": 0.0886, + "grad_norm": 1.8908826112747192, + "learning_rate": 1.055e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.948, + "step": 948 + }, + { + "loss": 0.0795, + "grad_norm": 1.8596928119659424, + "learning_rate": 1.054e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.949, + "step": 949 + }, + { + "loss": 0.064, + "grad_norm": 2.0051939487457275, + "learning_rate": 1.053e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.95, + "step": 950 + }, + { + "loss": 0.0761, + "grad_norm": 1.7486968040466309, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 951 + }, + { + "loss": 0.0519, + "grad_norm": 1.7253214120864868, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.952, + "step": 952 + }, + { + "loss": 0.0688, + "grad_norm": 1.7860913276672363, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.953, + "step": 953 + }, + { + "loss": 0.0287, + "grad_norm": 6.397044658660889, + "learning_rate": 1.049e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 954 + }, + { + "loss": 0.0877, + "grad_norm": 1.6188372373580933, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.955, + "step": 955 + }, + { + "loss": 0.0595, + "grad_norm": 1.6029514074325562, + "learning_rate": 1.047e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.956, + "step": 956 + }, + { + "loss": 0.2163, + "grad_norm": 8.956819534301758, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.957, + "step": 957 + }, + { + "loss": 0.0666, + "grad_norm": 1.4872380495071411, + "learning_rate": 1.045e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.958, + "step": 958 + }, + { + "loss": 0.092, + "grad_norm": 3.029266595840454, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.959, + "step": 959 + }, + { + "loss": 0.0757, + "grad_norm": 1.899221658706665, + "learning_rate": 1.043e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.96, + "step": 960 + }, + { + "loss": 0.0666, + "grad_norm": 1.577907681465149, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.961, + "step": 961 + }, + { + "loss": 0.0581, + "grad_norm": 1.467238426208496, + "learning_rate": 1.041e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 962 + }, + { + "loss": 0.1923, + "grad_norm": 8.706313133239746, + "learning_rate": 1.04e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.963, + "step": 963 + }, + { + "loss": 0.062, + "grad_norm": 2.0428693294525146, + "learning_rate": 1.039e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.964, + "step": 964 + }, + { + "loss": 0.0775, + "grad_norm": 2.0258123874664307, + "learning_rate": 1.038e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.965, + "step": 965 + }, + { + "loss": 0.0661, + "grad_norm": 1.7304749488830566, + "learning_rate": 1.037e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.966, + "step": 966 + }, + { + "loss": 0.0547, + "grad_norm": 1.6691105365753174, + "learning_rate": 1.036e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.967, + "step": 967 + }, + { + "loss": 0.0617, + "grad_norm": 1.681009292602539, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.968, + "step": 968 + }, + { + "loss": 0.0544, + "grad_norm": 1.8074179887771606, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.969, + "step": 969 + }, + { + "loss": 0.0396, + "grad_norm": 1.812711477279663, + "learning_rate": 1.033e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.97, + "step": 970 + }, + { + "loss": 0.0577, + "grad_norm": 2.0831782817840576, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.971, + "step": 971 + }, + { + "loss": 0.0776, + "grad_norm": 1.3640745878219604, + "learning_rate": 1.031e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.972, + "step": 972 + }, + { + "loss": 0.0454, + "grad_norm": 1.9006543159484863, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.973, + "step": 973 + }, + { + "loss": 0.0633, + "grad_norm": 1.6996928453445435, + "learning_rate": 1.029e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.974, + "step": 974 + }, + { + "loss": 0.0738, + "grad_norm": 1.9721561670303345, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.975, + "step": 975 + }, + { + "loss": 0.0439, + "grad_norm": 2.2615768909454346, + "learning_rate": 1.027e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.976, + "step": 976 + }, + { + "loss": 0.0237, + "grad_norm": 5.635776519775391, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.977, + "step": 977 + }, + { + "loss": 0.094, + "grad_norm": 2.4352505207061768, + "learning_rate": 1.025e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.978, + "step": 978 + }, + { + "loss": 0.0648, + "grad_norm": 1.6868159770965576, + "learning_rate": 1.024e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.979, + "step": 979 + }, + { + "loss": 0.0652, + "grad_norm": 2.1479756832122803, + "learning_rate": 1.023e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.98, + "step": 980 + }, + { + "loss": 0.0597, + "grad_norm": 2.0000855922698975, + "learning_rate": 1.022e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.981, + "step": 981 + }, + { + "loss": 0.0643, + "grad_norm": 2.511259078979492, + "learning_rate": 1.021e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.982, + "step": 982 + }, + { + "loss": 0.0161, + "grad_norm": 3.99651837348938, + "learning_rate": 1.02e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.983, + "step": 983 + }, + { + "loss": 0.0649, + "grad_norm": 2.231045722961426, + "learning_rate": 1.019e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.984, + "step": 984 + }, + { + "loss": 0.0386, + "grad_norm": 1.9224427938461304, + "learning_rate": 1.018e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.985, + "step": 985 + }, + { + "loss": 0.0673, + "grad_norm": 2.328557014465332, + "learning_rate": 1.017e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.986, + "step": 986 + }, + { + "loss": 0.0642, + "grad_norm": 2.1176366806030273, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.987, + "step": 987 + }, + { + "loss": 0.0643, + "grad_norm": 2.319209098815918, + "learning_rate": 1.015e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.988, + "step": 988 + }, + { + "loss": 0.0126, + "grad_norm": 2.7921886444091797, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 989 + }, + { + "loss": 0.056, + "grad_norm": 1.6485341787338257, + "learning_rate": 1.013e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.99, + "step": 990 + }, + { + "loss": 0.0559, + "grad_norm": 1.85313081741333, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.991, + "step": 991 + }, + { + "loss": 0.0718, + "grad_norm": 2.0347867012023926, + "learning_rate": 1.011e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.992, + "step": 992 + }, + { + "loss": 0.0611, + "grad_norm": 2.6210453510284424, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.993, + "step": 993 + }, + { + "loss": 0.0428, + "grad_norm": 2.1774537563323975, + "learning_rate": 1.009e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.994, + "step": 994 + }, + { + "loss": 0.0564, + "grad_norm": 1.4708741903305054, + "learning_rate": 1.008e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.995, + "step": 995 + }, + { + "loss": 0.0461, + "grad_norm": 2.133490562438965, + "learning_rate": 1.007e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.996, + "step": 996 + }, + { + "loss": 0.0654, + "grad_norm": 1.8513908386230469, + "learning_rate": 1.006e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.997, + "step": 997 + }, + { + "loss": 0.0467, + "grad_norm": 2.651682138442993, + "learning_rate": 1.005e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.998, + "step": 998 + }, + { + "loss": 0.0496, + "grad_norm": 1.6719735860824585, + "learning_rate": 1.004e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.999, + "step": 999 + }, + { + "loss": 0.064, + "grad_norm": 1.7016679048538208, + "learning_rate": 1.003e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.0, + "step": 1000 + }, + { + "loss": 0.0601, + "grad_norm": 1.5496330261230469, + "learning_rate": 1.002e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.001, + "step": 1001 + }, + { + "loss": 0.0185, + "grad_norm": 4.8348541259765625, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687985.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.002, + "step": 1002 + }, + { + "loss": 0.0205, + "grad_norm": 5.356715202331543, + "learning_rate": 1e-05, + "num_tokens": 688167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.003, + "step": 1003 + }, + { + "loss": 0.065, + "grad_norm": 2.8306968212127686, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.004, + "step": 1004 + }, + { + "loss": 0.048, + "grad_norm": 1.684121012687683, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.005, + "step": 1005 + }, + { + "loss": 0.0611, + "grad_norm": 1.78119957447052, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.006, + "step": 1006 + }, + { + "loss": 0.069, + "grad_norm": 2.2316365242004395, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.007, + "step": 1007 + }, + { + "loss": 0.0779, + "grad_norm": 2.183338165283203, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.008, + "step": 1008 + }, + { + "loss": 0.0642, + "grad_norm": 1.943967580795288, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.009, + "step": 1009 + }, + { + "loss": 0.0415, + "grad_norm": 1.6110951900482178, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.01, + "step": 1010 + }, + { + "loss": 0.0117, + "grad_norm": 3.0185630321502686, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 1011 + }, + { + "loss": 0.0992, + "grad_norm": 3.14607310295105, + "learning_rate": 9.91e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 1.012, + "step": 1012 + }, + { + "loss": 0.047, + "grad_norm": 1.2475289106369019, + "learning_rate": 9.9e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.013, + "step": 1013 + }, + { + "loss": 0.0819, + "grad_norm": 2.5398612022399902, + "learning_rate": 9.89e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.014, + "step": 1014 + }, + { + "loss": 0.0555, + "grad_norm": 1.682294249534607, + "learning_rate": 9.88e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.015, + "step": 1015 + }, + { + "loss": 0.0867, + "grad_norm": 2.457875967025757, + "learning_rate": 9.87e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.016, + "step": 1016 + }, + { + "loss": 0.0667, + "grad_norm": 1.7135660648345947, + "learning_rate": 9.86e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.017, + "step": 1017 + }, + { + "loss": 0.0378, + "grad_norm": 1.4605510234832764, + "learning_rate": 9.85e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.018, + "step": 1018 + }, + { + "loss": 0.0612, + "grad_norm": 3.01509690284729, + "learning_rate": 9.84e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.019, + "step": 1019 + }, + { + "loss": 0.0623, + "grad_norm": 2.2433955669403076, + "learning_rate": 9.83e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.02, + "step": 1020 + }, + { + "loss": 0.0192, + "grad_norm": 5.402326583862305, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.021, + "step": 1021 + }, + { + "loss": 0.099, + "grad_norm": 4.552786827087402, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.022, + "step": 1022 + }, + { + "loss": 0.0569, + "grad_norm": 2.1845462322235107, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.023, + "step": 1023 + }, + { + "loss": 0.063, + "grad_norm": 2.7287683486938477, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.024, + "step": 1024 + }, + { + "loss": 0.0426, + "grad_norm": 2.1356048583984375, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.025, + "step": 1025 + }, + { + "loss": 0.0626, + "grad_norm": 2.1982219219207764, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.026, + "step": 1026 + }, + { + "loss": 0.0881, + "grad_norm": 2.790822982788086, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.027, + "step": 1027 + }, + { + "loss": 0.0872, + "grad_norm": 2.464653968811035, + "learning_rate": 9.75e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.028, + "step": 1028 + }, + { + "loss": 0.0144, + "grad_norm": 3.807983636856079, + "learning_rate": 9.74e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.029, + "step": 1029 + }, + { + "loss": 0.0594, + "grad_norm": 1.6763768196105957, + "learning_rate": 9.73e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.03, + "step": 1030 + }, + { + "loss": 0.0882, + "grad_norm": 1.924737811088562, + "learning_rate": 9.72e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.031, + "step": 1031 + }, + { + "loss": 0.0488, + "grad_norm": 2.331883430480957, + "learning_rate": 9.71e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.032, + "step": 1032 + }, + { + "loss": 0.088, + "grad_norm": 2.7460174560546875, + "learning_rate": 9.7e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.033, + "step": 1033 + }, + { + "loss": 0.0446, + "grad_norm": 1.7645024061203003, + "learning_rate": 9.69e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.034, + "step": 1034 + }, + { + "loss": 0.0806, + "grad_norm": 1.7870028018951416, + "learning_rate": 9.68e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.035, + "step": 1035 + }, + { + "loss": 0.0602, + "grad_norm": 1.6170544624328613, + "learning_rate": 9.67e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.036, + "step": 1036 + }, + { + "loss": 0.0427, + "grad_norm": 2.0376412868499756, + "learning_rate": 9.66e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.037, + "step": 1037 + }, + { + "loss": 0.0636, + "grad_norm": 2.1391189098358154, + "learning_rate": 9.65e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.038, + "step": 1038 + }, + { + "loss": 0.0127, + "grad_norm": 3.4139318466186523, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 1039 + }, + { + "loss": 0.0532, + "grad_norm": 2.2980690002441406, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.04, + "step": 1040 + }, + { + "loss": 0.042, + "grad_norm": 1.7804741859436035, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.041, + "step": 1041 + }, + { + "loss": 0.039, + "grad_norm": 1.5417966842651367, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.042, + "step": 1042 + }, + { + "loss": 0.0691, + "grad_norm": 1.9181416034698486, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.043, + "step": 1043 + }, + { + "loss": 0.0105, + "grad_norm": 2.567687511444092, + "learning_rate": 9.59e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 1044 + }, + { + "loss": 0.0513, + "grad_norm": 2.1507062911987305, + "learning_rate": 9.58e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.045, + "step": 1045 + }, + { + "loss": 0.0661, + "grad_norm": 2.6471474170684814, + "learning_rate": 9.57e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.046, + "step": 1046 + }, + { + "loss": 0.0528, + "grad_norm": 1.6081326007843018, + "learning_rate": 9.56e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.047, + "step": 1047 + }, + { + "loss": 0.0148, + "grad_norm": 3.6129963397979736, + "learning_rate": 9.55e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.048, + "step": 1048 + }, + { + "loss": 0.0589, + "grad_norm": 1.6536871194839478, + "learning_rate": 9.54e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 1049 + }, + { + "loss": 0.0893, + "grad_norm": 2.1024138927459717, + "learning_rate": 9.53e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.05, + "step": 1050 + }, + { + "loss": 0.0628, + "grad_norm": 1.6858649253845215, + "learning_rate": 9.52e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.051, + "step": 1051 + }, + { + "loss": 0.0532, + "grad_norm": 1.6352399587631226, + "learning_rate": 9.51e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.052, + "step": 1052 + }, + { + "loss": 0.0673, + "grad_norm": 1.62017822265625, + "learning_rate": 9.5e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.053, + "step": 1053 + }, + { + "loss": 0.0577, + "grad_norm": 1.5879229307174683, + "learning_rate": 9.49e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.054, + "step": 1054 + }, + { + "loss": 0.0148, + "grad_norm": 4.010829925537109, + "learning_rate": 9.48e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.055, + "step": 1055 + }, + { + "loss": 0.0147, + "grad_norm": 4.00789213180542, + "learning_rate": 9.47e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.056, + "step": 1056 + }, + { + "loss": 0.015, + "grad_norm": 4.107461929321289, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.057, + "step": 1057 + }, + { + "loss": 0.0458, + "grad_norm": 2.3218655586242676, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.058, + "step": 1058 + }, + { + "loss": 0.0119, + "grad_norm": 2.9490623474121094, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 1059 + }, + { + "loss": 0.0367, + "grad_norm": 1.8217196464538574, + "learning_rate": 9.43e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.06, + "step": 1060 + }, + { + "loss": 0.0079, + "grad_norm": 1.3022953271865845, + "learning_rate": 9.42e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 1061 + }, + { + "loss": 0.0724, + "grad_norm": 2.17926287651062, + "learning_rate": 9.41e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.062, + "step": 1062 + }, + { + "loss": 0.039, + "grad_norm": 1.739366888999939, + "learning_rate": 9.4e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.063, + "step": 1063 + }, + { + "loss": 0.0534, + "grad_norm": 2.180590867996216, + "learning_rate": 9.39e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.064, + "step": 1064 + }, + { + "loss": 0.0063, + "grad_norm": 0.5163084864616394, + "learning_rate": 9.38e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 1065 + }, + { + "loss": 0.0584, + "grad_norm": 2.8058063983917236, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.066, + "step": 1066 + }, + { + "loss": 0.0582, + "grad_norm": 2.005493640899658, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.067, + "step": 1067 + }, + { + "loss": 0.0497, + "grad_norm": 2.923448324203491, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.068, + "step": 1068 + }, + { + "loss": 0.006, + "grad_norm": 0.48110926151275635, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 1069 + }, + { + "loss": 0.0704, + "grad_norm": 2.408653497695923, + "learning_rate": 9.33e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.07, + "step": 1070 + }, + { + "loss": 0.0878, + "grad_norm": 2.767408847808838, + "learning_rate": 9.32e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 1071 + }, + { + "loss": 0.0599, + "grad_norm": 1.9640824794769287, + "learning_rate": 9.31e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.072, + "step": 1072 + }, + { + "loss": 0.0674, + "grad_norm": 2.939439535140991, + "learning_rate": 9.3e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.073, + "step": 1073 + }, + { + "loss": 0.0866, + "grad_norm": 2.223776340484619, + "learning_rate": 9.29e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.074, + "step": 1074 + }, + { + "loss": 0.0819, + "grad_norm": 1.7831770181655884, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.075, + "step": 1075 + }, + { + "loss": 0.0552, + "grad_norm": 1.528134822845459, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.076, + "step": 1076 + }, + { + "loss": 0.0105, + "grad_norm": 2.722768783569336, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 1077 + }, + { + "loss": 0.0559, + "grad_norm": 1.601446509361267, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.078, + "step": 1078 + }, + { + "loss": 0.0571, + "grad_norm": 1.6370468139648438, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.079, + "step": 1079 + }, + { + "loss": 0.0611, + "grad_norm": 1.7496470212936401, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.08, + "step": 1080 + }, + { + "loss": 0.0582, + "grad_norm": 1.8051985502243042, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.081, + "step": 1081 + }, + { + "loss": 0.0527, + "grad_norm": 1.1893869638442993, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.082, + "step": 1082 + }, + { + "loss": 0.0613, + "grad_norm": 1.7861930131912231, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.083, + "step": 1083 + }, + { + "loss": 0.0771, + "grad_norm": 1.6442121267318726, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.084, + "step": 1084 + }, + { + "loss": 0.0614, + "grad_norm": 1.7604858875274658, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.085, + "step": 1085 + }, + { + "loss": 0.0686, + "grad_norm": 1.7211897373199463, + "learning_rate": 9.17e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.086, + "step": 1086 + }, + { + "loss": 0.0851, + "grad_norm": 2.2072157859802246, + "learning_rate": 9.16e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.087, + "step": 1087 + }, + { + "loss": 0.0234, + "grad_norm": 6.049727916717529, + "learning_rate": 9.15e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.088, + "step": 1088 + }, + { + "loss": 0.0462, + "grad_norm": 2.178677558898926, + "learning_rate": 9.14e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.089, + "step": 1089 + }, + { + "loss": 0.0866, + "grad_norm": 2.1971359252929688, + "learning_rate": 9.13e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.09, + "step": 1090 + }, + { + "loss": 0.0701, + "grad_norm": 2.604931116104126, + "learning_rate": 9.12e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.091, + "step": 1091 + }, + { + "loss": 0.1403, + "grad_norm": 4.8585004806518555, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.092, + "step": 1092 + }, + { + "loss": 0.0418, + "grad_norm": 2.0918304920196533, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.093, + "step": 1093 + }, + { + "loss": 0.0607, + "grad_norm": 1.5581291913986206, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.094, + "step": 1094 + }, + { + "loss": 0.0464, + "grad_norm": 2.2121376991271973, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.095, + "step": 1095 + }, + { + "loss": 0.0187, + "grad_norm": 5.02223539352417, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.096, + "step": 1096 + }, + { + "loss": 0.051, + "grad_norm": 1.1968108415603638, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.097, + "step": 1097 + }, + { + "loss": 0.0379, + "grad_norm": 1.5838263034820557, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.098, + "step": 1098 + }, + { + "loss": 0.0599, + "grad_norm": 2.1656548976898193, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.099, + "step": 1099 + }, + { + "loss": 0.0531, + "grad_norm": 1.5780129432678223, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1, + "step": 1100 + }, + { + "loss": 0.0101, + "grad_norm": 2.5371878147125244, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 1101 + }, + { + "loss": 0.0635, + "grad_norm": 1.7947604656219482, + "learning_rate": 9.01e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.102, + "step": 1102 + }, + { + "loss": 0.0522, + "grad_norm": 2.101656436920166, + "learning_rate": 9e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.103, + "step": 1103 + }, + { + "loss": 0.0803, + "grad_norm": 1.9881861209869385, + "learning_rate": 8.99e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.104, + "step": 1104 + }, + { + "loss": 0.0618, + "grad_norm": 1.884840965270996, + "learning_rate": 8.98e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.105, + "step": 1105 + }, + { + "loss": 0.0554, + "grad_norm": 1.8216484785079956, + "learning_rate": 8.97e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.106, + "step": 1106 + }, + { + "loss": 0.0631, + "grad_norm": 2.1785407066345215, + "learning_rate": 8.96e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.107, + "step": 1107 + }, + { + "loss": 0.0409, + "grad_norm": 1.5896263122558594, + "learning_rate": 8.95e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.108, + "step": 1108 + }, + { + "loss": 0.1964, + "grad_norm": 6.368833541870117, + "learning_rate": 8.94e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 1.109, + "step": 1109 + }, + { + "loss": 0.0087, + "grad_norm": 1.9522284269332886, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 1110 + }, + { + "loss": 0.2323, + "grad_norm": 7.9943718910217285, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 1.111, + "step": 1111 + }, + { + "loss": 0.0801, + "grad_norm": 1.92306387424469, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.112, + "step": 1112 + }, + { + "loss": 0.045, + "grad_norm": 1.3462337255477905, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.113, + "step": 1113 + }, + { + "loss": 0.0721, + "grad_norm": 2.416792869567871, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 1114 + }, + { + "loss": 0.0406, + "grad_norm": 2.1178133487701416, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.115, + "step": 1115 + }, + { + "loss": 0.0559, + "grad_norm": 1.5205347537994385, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.116, + "step": 1116 + }, + { + "loss": 0.0342, + "grad_norm": 1.617630124092102, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.117, + "step": 1117 + }, + { + "loss": 0.0438, + "grad_norm": 2.34078049659729, + "learning_rate": 8.85e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1179999999999999, + "step": 1118 + }, + { + "loss": 0.0753, + "grad_norm": 1.8780885934829712, + "learning_rate": 8.84e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.119, + "step": 1119 + }, + { + "loss": 0.147, + "grad_norm": 5.077685356140137, + "learning_rate": 8.83e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.12, + "step": 1120 + }, + { + "loss": 0.0469, + "grad_norm": 1.9634060859680176, + "learning_rate": 8.82e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.121, + "step": 1121 + }, + { + "loss": 0.0662, + "grad_norm": 1.4567596912384033, + "learning_rate": 8.81e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1219999999999999, + "step": 1122 + }, + { + "loss": 0.0167, + "grad_norm": 4.722336292266846, + "learning_rate": 8.8e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.123, + "step": 1123 + }, + { + "loss": 0.0388, + "grad_norm": 2.1787490844726562, + "learning_rate": 8.79e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.124, + "step": 1124 + }, + { + "loss": 0.0508, + "grad_norm": 1.4540494680404663, + "learning_rate": 8.78e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.125, + "step": 1125 + }, + { + "loss": 0.0463, + "grad_norm": 1.9126884937286377, + "learning_rate": 8.77e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.126, + "step": 1126 + }, + { + "loss": 0.0413, + "grad_norm": 1.3725852966308594, + "learning_rate": 8.76e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.127, + "step": 1127 + }, + { + "loss": 0.0406, + "grad_norm": 1.769464373588562, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.1280000000000001, + "step": 1128 + }, + { + "loss": 0.0157, + "grad_norm": 4.246346473693848, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.129, + "step": 1129 + }, + { + "loss": 0.1541, + "grad_norm": 4.8993754386901855, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.13, + "step": 1130 + }, + { + "loss": 0.041, + "grad_norm": 1.7246980667114258, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.131, + "step": 1131 + }, + { + "loss": 0.0726, + "grad_norm": 2.2514991760253906, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1320000000000001, + "step": 1132 + }, + { + "loss": 0.0097, + "grad_norm": 2.538367509841919, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 1133 + }, + { + "loss": 0.083, + "grad_norm": 2.2139499187469482, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.134, + "step": 1134 + }, + { + "loss": 0.0086, + "grad_norm": 2.0688657760620117, + "learning_rate": 8.68e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 1135 + }, + { + "loss": 0.0579, + "grad_norm": 1.7580430507659912, + "learning_rate": 8.67e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.1360000000000001, + "step": 1136 + }, + { + "loss": 0.0071, + "grad_norm": 1.2317492961883545, + "learning_rate": 8.66e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 1137 + }, + { + "loss": 0.0547, + "grad_norm": 1.7383458614349365, + "learning_rate": 8.65e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.138, + "step": 1138 + }, + { + "loss": 0.0493, + "grad_norm": 1.9442108869552612, + "learning_rate": 8.64e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.139, + "step": 1139 + }, + { + "loss": 0.0743, + "grad_norm": 2.8182926177978516, + "learning_rate": 8.63e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.1400000000000001, + "step": 1140 + }, + { + "loss": 0.0058, + "grad_norm": 0.5721865296363831, + "learning_rate": 8.62e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 1141 + }, + { + "loss": 0.0615, + "grad_norm": 2.226674795150757, + "learning_rate": 8.61e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.142, + "step": 1142 + }, + { + "loss": 0.0063, + "grad_norm": 0.8222597241401672, + "learning_rate": 8.6e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 1143 + }, + { + "loss": 0.0679, + "grad_norm": 2.1432037353515625, + "learning_rate": 8.59e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.144, + "step": 1144 + }, + { + "loss": 0.0604, + "grad_norm": 2.196251392364502, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.145, + "step": 1145 + }, + { + "loss": 0.0067, + "grad_norm": 0.9334397912025452, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 1146 + }, + { + "loss": 0.0877, + "grad_norm": 2.9189441204071045, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.147, + "step": 1147 + }, + { + "loss": 0.04, + "grad_norm": 1.8555492162704468, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.148, + "step": 1148 + }, + { + "loss": 0.0433, + "grad_norm": 2.1462485790252686, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.149, + "step": 1149 + }, + { + "loss": 0.0912, + "grad_norm": 2.674384593963623, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.15, + "step": 1150 + }, + { + "loss": 0.0806, + "grad_norm": 2.1967833042144775, + "learning_rate": 8.52e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.151, + "step": 1151 + }, + { + "loss": 0.0397, + "grad_norm": 1.576885461807251, + "learning_rate": 8.51e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.152, + "step": 1152 + }, + { + "loss": 0.0385, + "grad_norm": 1.8607549667358398, + "learning_rate": 8.5e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.153, + "step": 1153 + }, + { + "loss": 0.0591, + "grad_norm": 2.075608491897583, + "learning_rate": 8.49e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.154, + "step": 1154 + }, + { + "loss": 0.0072, + "grad_norm": 1.595956563949585, + "learning_rate": 8.48e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 1155 + }, + { + "loss": 0.0107, + "grad_norm": 2.7350447177886963, + "learning_rate": 8.47e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 1156 + }, + { + "loss": 0.0675, + "grad_norm": 1.7995527982711792, + "learning_rate": 8.46e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.157, + "step": 1157 + }, + { + "loss": 0.0655, + "grad_norm": 2.3666279315948486, + "learning_rate": 8.45e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.158, + "step": 1158 + }, + { + "loss": 0.0898, + "grad_norm": 2.2464659214019775, + "learning_rate": 8.44e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.159, + "step": 1159 + }, + { + "loss": 0.0555, + "grad_norm": 2.4049134254455566, + "learning_rate": 8.43e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.16, + "step": 1160 + }, + { + "loss": 0.0835, + "grad_norm": 2.0087289810180664, + "learning_rate": 8.42e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.161, + "step": 1161 + }, + { + "loss": 0.0679, + "grad_norm": 2.1180970668792725, + "learning_rate": 8.41e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.162, + "step": 1162 + }, + { + "loss": 0.0605, + "grad_norm": 1.7271490097045898, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.163, + "step": 1163 + }, + { + "loss": 0.0381, + "grad_norm": 2.031334400177002, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.164, + "step": 1164 + }, + { + "loss": 0.0639, + "grad_norm": 1.7528166770935059, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.165, + "step": 1165 + }, + { + "loss": 0.1307, + "grad_norm": 3.783503293991089, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.166, + "step": 1166 + }, + { + "loss": 0.0473, + "grad_norm": 2.779741048812866, + "learning_rate": 8.36e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.167, + "step": 1167 + }, + { + "loss": 0.0455, + "grad_norm": 1.9504565000534058, + "learning_rate": 8.35e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.168, + "step": 1168 + }, + { + "loss": 0.0662, + "grad_norm": 2.2791426181793213, + "learning_rate": 8.34e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.169, + "step": 1169 + }, + { + "loss": 0.0857, + "grad_norm": 2.4661900997161865, + "learning_rate": 8.33e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.17, + "step": 1170 + }, + { + "loss": 0.0817, + "grad_norm": 2.018150568008423, + "learning_rate": 8.32e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.171, + "step": 1171 + }, + { + "loss": 0.0491, + "grad_norm": 1.4105336666107178, + "learning_rate": 8.31e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.172, + "step": 1172 + }, + { + "loss": 0.0705, + "grad_norm": 1.7099734544754028, + "learning_rate": 8.3e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.173, + "step": 1173 + }, + { + "loss": 0.0197, + "grad_norm": 5.4979472160339355, + "learning_rate": 8.29e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.174, + "step": 1174 + }, + { + "loss": 0.0515, + "grad_norm": 1.9852694272994995, + "learning_rate": 8.28e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.175, + "step": 1175 + }, + { + "loss": 0.0435, + "grad_norm": 1.3928176164627075, + "learning_rate": 8.27e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.176, + "step": 1176 + }, + { + "loss": 0.062, + "grad_norm": 2.7774510383605957, + "learning_rate": 8.26e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.177, + "step": 1177 + }, + { + "loss": 0.053, + "grad_norm": 0.9669445753097534, + "learning_rate": 8.25e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.178, + "step": 1178 + }, + { + "loss": 0.0178, + "grad_norm": 4.694067478179932, + "learning_rate": 8.24e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.179, + "step": 1179 + }, + { + "loss": 0.0133, + "grad_norm": 3.8942577838897705, + "learning_rate": 8.23e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.18, + "step": 1180 + }, + { + "loss": 0.042, + "grad_norm": 1.4630885124206543, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.181, + "step": 1181 + }, + { + "loss": 0.0598, + "grad_norm": 1.6373014450073242, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.182, + "step": 1182 + }, + { + "loss": 0.0454, + "grad_norm": 1.9768292903900146, + "learning_rate": 8.2e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.183, + "step": 1183 + }, + { + "loss": 0.0734, + "grad_norm": 1.4859123229980469, + "learning_rate": 8.19e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.184, + "step": 1184 + }, + { + "loss": 0.0647, + "grad_norm": 1.7751868963241577, + "learning_rate": 8.18e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.185, + "step": 1185 + }, + { + "loss": 0.0643, + "grad_norm": 1.6454154253005981, + "learning_rate": 8.17e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.186, + "step": 1186 + }, + { + "loss": 0.0511, + "grad_norm": 1.9402817487716675, + "learning_rate": 8.16e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.187, + "step": 1187 + }, + { + "loss": 0.047, + "grad_norm": 1.6513389348983765, + "learning_rate": 8.15e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.188, + "step": 1188 + }, + { + "loss": 0.0107, + "grad_norm": 2.9602744579315186, + "learning_rate": 8.14e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 1189 + }, + { + "loss": 0.0708, + "grad_norm": 1.9953235387802124, + "learning_rate": 8.13e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.19, + "step": 1190 + }, + { + "loss": 0.0562, + "grad_norm": 1.7549750804901123, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.191, + "step": 1191 + }, + { + "loss": 0.0589, + "grad_norm": 2.0597615242004395, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.192, + "step": 1192 + }, + { + "loss": 0.0469, + "grad_norm": 1.7559466361999512, + "learning_rate": 8.1e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.193, + "step": 1193 + }, + { + "loss": 0.0757, + "grad_norm": 2.0765254497528076, + "learning_rate": 8.09e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.194, + "step": 1194 + }, + { + "loss": 0.0118, + "grad_norm": 3.379472017288208, + "learning_rate": 8.08e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 1195 + }, + { + "loss": 0.0692, + "grad_norm": 1.6905264854431152, + "learning_rate": 8.07e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.196, + "step": 1196 + }, + { + "loss": 0.0493, + "grad_norm": 2.3974990844726562, + "learning_rate": 8.06e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.197, + "step": 1197 + }, + { + "loss": 0.0533, + "grad_norm": 1.609572410583496, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.198, + "step": 1198 + }, + { + "loss": 0.0727, + "grad_norm": 2.563096523284912, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.199, + "step": 1199 + }, + { + "loss": 0.0556, + "grad_norm": 2.0002143383026123, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.2, + "step": 1200 + }, + { + "loss": 0.0487, + "grad_norm": 1.7846338748931885, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.201, + "step": 1201 + }, + { + "loss": 0.0802, + "grad_norm": 2.2537660598754883, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.202, + "step": 1202 + }, + { + "loss": 0.0584, + "grad_norm": 3.043835163116455, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.203, + "step": 1203 + }, + { + "loss": 0.012, + "grad_norm": 3.2526142597198486, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.204, + "step": 1204 + }, + { + "loss": 0.063, + "grad_norm": 1.3797202110290527, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.205, + "step": 1205 + }, + { + "loss": 0.0658, + "grad_norm": 2.5818750858306885, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.206, + "step": 1206 + }, + { + "loss": 0.0108, + "grad_norm": 3.089911699295044, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 1207 + }, + { + "loss": 0.0781, + "grad_norm": 2.348559856414795, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.208, + "step": 1208 + }, + { + "loss": 0.053, + "grad_norm": 1.6293948888778687, + "learning_rate": 7.94e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.209, + "step": 1209 + }, + { + "loss": 0.0541, + "grad_norm": 1.7948721647262573, + "learning_rate": 7.93e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.21, + "step": 1210 + }, + { + "loss": 0.0408, + "grad_norm": 2.3477344512939453, + "learning_rate": 7.92e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.211, + "step": 1211 + }, + { + "loss": 0.0579, + "grad_norm": 2.6738388538360596, + "learning_rate": 7.91e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.212, + "step": 1212 + }, + { + "loss": 0.055, + "grad_norm": 1.522643804550171, + "learning_rate": 7.9e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.213, + "step": 1213 + }, + { + "loss": 0.0634, + "grad_norm": 1.585366129875183, + "learning_rate": 7.89e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.214, + "step": 1214 + }, + { + "loss": 0.0616, + "grad_norm": 1.645047664642334, + "learning_rate": 7.88e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.215, + "step": 1215 + }, + { + "loss": 0.0757, + "grad_norm": 1.689460039138794, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.216, + "step": 1216 + }, + { + "loss": 0.0454, + "grad_norm": 2.0291545391082764, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.217, + "step": 1217 + }, + { + "loss": 0.0104, + "grad_norm": 3.0368359088897705, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 1218 + }, + { + "loss": 0.0097, + "grad_norm": 2.792633533477783, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 1219 + }, + { + "loss": 0.0776, + "grad_norm": 2.638593912124634, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.22, + "step": 1220 + }, + { + "loss": 0.0612, + "grad_norm": 2.7605133056640625, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.221, + "step": 1221 + }, + { + "loss": 0.0884, + "grad_norm": 2.6775927543640137, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.222, + "step": 1222 + }, + { + "loss": 0.0752, + "grad_norm": 1.9850537776947021, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.223, + "step": 1223 + }, + { + "loss": 0.0439, + "grad_norm": 1.5452102422714233, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.224, + "step": 1224 + }, + { + "loss": 0.0435, + "grad_norm": 2.2355833053588867, + "learning_rate": 7.78e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.225, + "step": 1225 + }, + { + "loss": 0.0532, + "grad_norm": 1.7478253841400146, + "learning_rate": 7.77e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.226, + "step": 1226 + }, + { + "loss": 0.0106, + "grad_norm": 3.0870492458343506, + "learning_rate": 7.76e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 1227 + }, + { + "loss": 0.0534, + "grad_norm": 1.8180068731307983, + "learning_rate": 7.75e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.228, + "step": 1228 + }, + { + "loss": 0.0088, + "grad_norm": 2.428753137588501, + "learning_rate": 7.74e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 1229 + }, + { + "loss": 0.0094, + "grad_norm": 2.480687141418457, + "learning_rate": 7.73e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 1230 + }, + { + "loss": 0.056, + "grad_norm": 1.977836012840271, + "learning_rate": 7.72e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.231, + "step": 1231 + }, + { + "loss": 0.0576, + "grad_norm": 2.694723129272461, + "learning_rate": 7.71e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.232, + "step": 1232 + }, + { + "loss": 0.0559, + "grad_norm": 1.785524606704712, + "learning_rate": 7.7e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.233, + "step": 1233 + }, + { + "loss": 0.0548, + "grad_norm": 1.7176051139831543, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.234, + "step": 1234 + }, + { + "loss": 0.07, + "grad_norm": 1.961999773979187, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2349999999999999, + "step": 1235 + }, + { + "loss": 0.0592, + "grad_norm": 2.465545654296875, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.236, + "step": 1236 + }, + { + "loss": 0.0378, + "grad_norm": 1.4544801712036133, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.237, + "step": 1237 + }, + { + "loss": 0.0602, + "grad_norm": 1.772146224975586, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.238, + "step": 1238 + }, + { + "loss": 0.04, + "grad_norm": 2.1550979614257812, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2389999999999999, + "step": 1239 + }, + { + "loss": 0.0448, + "grad_norm": 2.0862441062927246, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.24, + "step": 1240 + }, + { + "loss": 0.073, + "grad_norm": 1.8445123434066772, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.241, + "step": 1241 + }, + { + "loss": 0.0701, + "grad_norm": 1.734731912612915, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.242, + "step": 1242 + }, + { + "loss": 0.0621, + "grad_norm": 2.5419921875, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2429999999999999, + "step": 1243 + }, + { + "loss": 0.0387, + "grad_norm": 2.232482671737671, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.244, + "step": 1244 + }, + { + "loss": 0.041, + "grad_norm": 2.1068978309631348, + "learning_rate": 7.58e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.245, + "step": 1245 + }, + { + "loss": 0.0677, + "grad_norm": 1.7934560775756836, + "learning_rate": 7.57e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.246, + "step": 1246 + }, + { + "loss": 0.0866, + "grad_norm": 2.3774123191833496, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.2469999999999999, + "step": 1247 + }, + { + "loss": 0.0188, + "grad_norm": 5.182284832000732, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.248, + "step": 1248 + }, + { + "loss": 0.0517, + "grad_norm": 1.6540446281433105, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.249, + "step": 1249 + }, + { + "loss": 0.0801, + "grad_norm": 1.7044258117675781, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.25, + "step": 1250 + }, + { + "loss": 0.018, + "grad_norm": 4.825031757354736, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.251, + "step": 1251 + }, + { + "loss": 0.0579, + "grad_norm": 1.9127049446105957, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.252, + "step": 1252 + }, + { + "loss": 0.0387, + "grad_norm": 1.524353265762329, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2530000000000001, + "step": 1253 + }, + { + "loss": 0.0743, + "grad_norm": 1.8598476648330688, + "learning_rate": 7.49e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.254, + "step": 1254 + }, + { + "loss": 0.0364, + "grad_norm": 1.6264195442199707, + "learning_rate": 7.48e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.255, + "step": 1255 + }, + { + "loss": 0.0746, + "grad_norm": 1.4887213706970215, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.256, + "step": 1256 + }, + { + "loss": 0.0117, + "grad_norm": 3.425563335418701, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 1257 + }, + { + "loss": 0.0552, + "grad_norm": 1.6610738039016724, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.258, + "step": 1258 + }, + { + "loss": 0.0105, + "grad_norm": 2.9016385078430176, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 1259 + }, + { + "loss": 0.0657, + "grad_norm": 2.349597215652466, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.26, + "step": 1260 + }, + { + "loss": 0.0706, + "grad_norm": 1.7171733379364014, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.2610000000000001, + "step": 1261 + }, + { + "loss": 0.0076, + "grad_norm": 2.070596933364868, + "learning_rate": 7.41e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 1262 + }, + { + "loss": 0.082, + "grad_norm": 2.476560115814209, + "learning_rate": 7.4e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.263, + "step": 1263 + }, + { + "loss": 0.0696, + "grad_norm": 2.013134002685547, + "learning_rate": 7.39e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 1264 + }, + { + "loss": 0.0456, + "grad_norm": 2.0719385147094727, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2650000000000001, + "step": 1265 + }, + { + "loss": 0.0789, + "grad_norm": 2.737678289413452, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.266, + "step": 1266 + }, + { + "loss": 0.0755, + "grad_norm": 2.932962417602539, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.267, + "step": 1267 + }, + { + "loss": 0.0621, + "grad_norm": 1.5760010480880737, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.268, + "step": 1268 + }, + { + "loss": 0.145, + "grad_norm": 4.413599491119385, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.2690000000000001, + "step": 1269 + }, + { + "loss": 0.052, + "grad_norm": 1.3965295553207397, + "learning_rate": 7.33e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.27, + "step": 1270 + }, + { + "loss": 0.0507, + "grad_norm": 1.5652461051940918, + "learning_rate": 7.32e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.271, + "step": 1271 + }, + { + "loss": 0.1608, + "grad_norm": 5.22923469543457, + "learning_rate": 7.31e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 1.272, + "step": 1272 + }, + { + "loss": 0.04, + "grad_norm": 2.1607284545898438, + "learning_rate": 7.3e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2730000000000001, + "step": 1273 + }, + { + "loss": 0.0093, + "grad_norm": 2.755345106124878, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 1274 + }, + { + "loss": 0.0403, + "grad_norm": 1.6918083429336548, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.275, + "step": 1275 + }, + { + "loss": 0.0569, + "grad_norm": 1.4805766344070435, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.276, + "step": 1276 + }, + { + "loss": 0.0639, + "grad_norm": 1.9898265600204468, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2770000000000001, + "step": 1277 + }, + { + "loss": 0.0764, + "grad_norm": 2.4644553661346436, + "learning_rate": 7.25e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.278, + "step": 1278 + }, + { + "loss": 0.0458, + "grad_norm": 1.6111081838607788, + "learning_rate": 7.24e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.279, + "step": 1279 + }, + { + "loss": 0.0439, + "grad_norm": 1.847048282623291, + "learning_rate": 7.23e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.28, + "step": 1280 + }, + { + "loss": 0.0485, + "grad_norm": 2.2336626052856445, + "learning_rate": 7.22e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2810000000000001, + "step": 1281 + }, + { + "loss": 0.0204, + "grad_norm": 5.058897972106934, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.282, + "step": 1282 + }, + { + "loss": 0.059, + "grad_norm": 1.464397668838501, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.283, + "step": 1283 + }, + { + "loss": 0.0663, + "grad_norm": 1.986909031867981, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.284, + "step": 1284 + }, + { + "loss": 0.0553, + "grad_norm": 1.3948322534561157, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.285, + "step": 1285 + }, + { + "loss": 0.0762, + "grad_norm": 1.8114221096038818, + "learning_rate": 7.17e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.286, + "step": 1286 + }, + { + "loss": 0.0596, + "grad_norm": 1.3451945781707764, + "learning_rate": 7.16e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 1287 + }, + { + "loss": 0.066, + "grad_norm": 1.6588683128356934, + "learning_rate": 7.15e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.288, + "step": 1288 + }, + { + "loss": 0.0486, + "grad_norm": 1.8605456352233887, + "learning_rate": 7.14e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.289, + "step": 1289 + }, + { + "loss": 0.0567, + "grad_norm": 1.8595200777053833, + "learning_rate": 7.13e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.29, + "step": 1290 + }, + { + "loss": 0.0651, + "grad_norm": 1.3704520463943481, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.291, + "step": 1291 + }, + { + "loss": 0.0776, + "grad_norm": 1.5874192714691162, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.292, + "step": 1292 + }, + { + "loss": 0.0584, + "grad_norm": 1.6083050966262817, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.293, + "step": 1293 + }, + { + "loss": 0.0526, + "grad_norm": 2.637402296066284, + "learning_rate": 7.09e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.294, + "step": 1294 + }, + { + "loss": 0.0434, + "grad_norm": 1.125180721282959, + "learning_rate": 7.08e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.295, + "step": 1295 + }, + { + "loss": 0.0604, + "grad_norm": 1.9658552408218384, + "learning_rate": 7.07e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.296, + "step": 1296 + }, + { + "loss": 0.0609, + "grad_norm": 2.3239123821258545, + "learning_rate": 7.06e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.297, + "step": 1297 + }, + { + "loss": 0.0822, + "grad_norm": 2.9983248710632324, + "learning_rate": 7.05e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.298, + "step": 1298 + }, + { + "loss": 0.062, + "grad_norm": 1.7106144428253174, + "learning_rate": 7.04e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.299, + "step": 1299 + }, + { + "loss": 0.0542, + "grad_norm": 1.9297690391540527, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3, + "step": 1300 + }, + { + "loss": 0.0174, + "grad_norm": 4.6414361000061035, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.301, + "step": 1301 + }, + { + "loss": 0.0755, + "grad_norm": 2.1787867546081543, + "learning_rate": 7.01e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.302, + "step": 1302 + }, + { + "loss": 0.015, + "grad_norm": 4.113848686218262, + "learning_rate": 7e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.303, + "step": 1303 + }, + { + "loss": 0.0492, + "grad_norm": 1.3803060054779053, + "learning_rate": 6.99e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.304, + "step": 1304 + }, + { + "loss": 0.0512, + "grad_norm": 1.5045576095581055, + "learning_rate": 6.98e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.305, + "step": 1305 + }, + { + "loss": 0.0608, + "grad_norm": 1.5915031433105469, + "learning_rate": 6.97e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.306, + "step": 1306 + }, + { + "loss": 0.0583, + "grad_norm": 1.2304151058197021, + "learning_rate": 6.96e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.307, + "step": 1307 + }, + { + "loss": 0.0563, + "grad_norm": 1.7730633020401, + "learning_rate": 6.95e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.308, + "step": 1308 + }, + { + "loss": 0.0684, + "grad_norm": 1.730749249458313, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.309, + "step": 1309 + }, + { + "loss": 0.052, + "grad_norm": 1.6816562414169312, + "learning_rate": 6.93e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.31, + "step": 1310 + }, + { + "loss": 0.0732, + "grad_norm": 2.309110164642334, + "learning_rate": 6.92e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.311, + "step": 1311 + }, + { + "loss": 0.0634, + "grad_norm": 1.8224540948867798, + "learning_rate": 6.91e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.312, + "step": 1312 + }, + { + "loss": 0.0584, + "grad_norm": 1.9186445474624634, + "learning_rate": 6.9e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.313, + "step": 1313 + }, + { + "loss": 0.0348, + "grad_norm": 1.3239874839782715, + "learning_rate": 6.89e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.314, + "step": 1314 + }, + { + "loss": 0.0938, + "grad_norm": 2.3451895713806152, + "learning_rate": 6.88e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.315, + "step": 1315 + }, + { + "loss": 0.0623, + "grad_norm": 1.8779281377792358, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.316, + "step": 1316 + }, + { + "loss": 0.167, + "grad_norm": 4.993703842163086, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.317, + "step": 1317 + }, + { + "loss": 0.0142, + "grad_norm": 4.2328338623046875, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.318, + "step": 1318 + }, + { + "loss": 0.0792, + "grad_norm": 2.0863592624664307, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.319, + "step": 1319 + }, + { + "loss": 0.044, + "grad_norm": 2.3412485122680664, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.32, + "step": 1320 + }, + { + "loss": 0.0404, + "grad_norm": 1.4804179668426514, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.321, + "step": 1321 + }, + { + "loss": 0.0168, + "grad_norm": 4.645394802093506, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.322, + "step": 1322 + }, + { + "loss": 0.0718, + "grad_norm": 1.6375811100006104, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.323, + "step": 1323 + }, + { + "loss": 0.06, + "grad_norm": 1.5656460523605347, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.324, + "step": 1324 + }, + { + "loss": 0.065, + "grad_norm": 1.7190107107162476, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.325, + "step": 1325 + }, + { + "loss": 0.0152, + "grad_norm": 3.9972171783447266, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.326, + "step": 1326 + }, + { + "loss": 0.0679, + "grad_norm": 2.4974441528320312, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 1327 + }, + { + "loss": 0.0582, + "grad_norm": 2.3485262393951416, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.328, + "step": 1328 + }, + { + "loss": 0.0829, + "grad_norm": 2.598663091659546, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.329, + "step": 1329 + }, + { + "loss": 0.01, + "grad_norm": 2.8793528079986572, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 1330 + }, + { + "loss": 0.0661, + "grad_norm": 1.9478849172592163, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.331, + "step": 1331 + }, + { + "loss": 0.0715, + "grad_norm": 1.916156530380249, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.332, + "step": 1332 + }, + { + "loss": 0.0601, + "grad_norm": 1.6466504335403442, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.333, + "step": 1333 + }, + { + "loss": 0.01, + "grad_norm": 2.8242533206939697, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 1334 + }, + { + "loss": 0.0409, + "grad_norm": 1.506545066833496, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.335, + "step": 1335 + }, + { + "loss": 0.0809, + "grad_norm": 1.7198259830474854, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.336, + "step": 1336 + }, + { + "loss": 0.1451, + "grad_norm": 4.725864887237549, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 1.337, + "step": 1337 + }, + { + "loss": 0.0649, + "grad_norm": 1.4829907417297363, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.338, + "step": 1338 + }, + { + "loss": 0.0779, + "grad_norm": 1.798589825630188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.339, + "step": 1339 + }, + { + "loss": 0.0645, + "grad_norm": 2.8309855461120605, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.34, + "step": 1340 + }, + { + "loss": 0.0573, + "grad_norm": 2.2329795360565186, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.341, + "step": 1341 + }, + { + "loss": 0.0633, + "grad_norm": 1.7102524042129517, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.342, + "step": 1342 + }, + { + "loss": 0.0533, + "grad_norm": 1.8966953754425049, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.343, + "step": 1343 + }, + { + "loss": 0.1242, + "grad_norm": 3.5069096088409424, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3439999999999999, + "step": 1344 + }, + { + "loss": 0.0668, + "grad_norm": 1.6451408863067627, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.345, + "step": 1345 + }, + { + "loss": 0.0168, + "grad_norm": 4.646505355834961, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.346, + "step": 1346 + }, + { + "loss": 0.0122, + "grad_norm": 3.5036394596099854, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.347, + "step": 1347 + }, + { + "loss": 0.054, + "grad_norm": 1.476265788078308, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3479999999999999, + "step": 1348 + }, + { + "loss": 0.0771, + "grad_norm": 2.343313455581665, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.349, + "step": 1349 + }, + { + "loss": 0.041, + "grad_norm": 1.5659995079040527, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.35, + "step": 1350 + }, + { + "loss": 0.0377, + "grad_norm": 1.196007251739502, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.351, + "step": 1351 + }, + { + "loss": 0.1297, + "grad_norm": 3.8112542629241943, + "learning_rate": 6.51e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 1.3519999999999999, + "step": 1352 + }, + { + "loss": 0.0526, + "grad_norm": 1.3368208408355713, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.353, + "step": 1353 + }, + { + "loss": 0.0444, + "grad_norm": 1.8093925714492798, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.354, + "step": 1354 + }, + { + "loss": 0.0101, + "grad_norm": 2.882591485977173, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 1355 + }, + { + "loss": 0.0437, + "grad_norm": 1.7717807292938232, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3559999999999999, + "step": 1356 + }, + { + "loss": 0.0546, + "grad_norm": 2.2301149368286133, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.357, + "step": 1357 + }, + { + "loss": 0.0102, + "grad_norm": 2.8497674465179443, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 1358 + }, + { + "loss": 0.059, + "grad_norm": 1.9033845663070679, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.359, + "step": 1359 + }, + { + "loss": 0.0431, + "grad_norm": 1.6551549434661865, + "learning_rate": 6.43e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3599999999999999, + "step": 1360 + }, + { + "loss": 0.0585, + "grad_norm": 1.5250738859176636, + "learning_rate": 6.42e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.361, + "step": 1361 + }, + { + "loss": 0.0576, + "grad_norm": 1.7390161752700806, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.362, + "step": 1362 + }, + { + "loss": 0.0642, + "grad_norm": 2.0047788619995117, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.363, + "step": 1363 + }, + { + "loss": 0.0409, + "grad_norm": 1.696035385131836, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.3639999999999999, + "step": 1364 + }, + { + "loss": 0.0577, + "grad_norm": 1.9078930616378784, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.365, + "step": 1365 + }, + { + "loss": 0.0098, + "grad_norm": 2.792039155960083, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 1366 + }, + { + "loss": 0.0582, + "grad_norm": 1.8414034843444824, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.367, + "step": 1367 + }, + { + "loss": 0.0545, + "grad_norm": 2.1793394088745117, + "learning_rate": 6.35e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 1368 + }, + { + "loss": 0.0449, + "grad_norm": 2.220048666000366, + "learning_rate": 6.34e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.369, + "step": 1369 + }, + { + "loss": 0.0545, + "grad_norm": 1.9344781637191772, + "learning_rate": 6.33e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.37, + "step": 1370 + }, + { + "loss": 0.0567, + "grad_norm": 1.8442058563232422, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.371, + "step": 1371 + }, + { + "loss": 0.0118, + "grad_norm": 3.14497971534729, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.3719999999999999, + "step": 1372 + }, + { + "loss": 0.0721, + "grad_norm": 2.7254114151000977, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.373, + "step": 1373 + }, + { + "loss": 0.0587, + "grad_norm": 1.436458945274353, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.374, + "step": 1374 + }, + { + "loss": 0.1323, + "grad_norm": 3.204223871231079, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.375, + "step": 1375 + }, + { + "loss": 0.0704, + "grad_norm": 1.601090431213379, + "learning_rate": 6.27e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.376, + "step": 1376 + }, + { + "loss": 0.0601, + "grad_norm": 1.5754057168960571, + "learning_rate": 6.26e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.377, + "step": 1377 + }, + { + "loss": 0.0711, + "grad_norm": 1.8766717910766602, + "learning_rate": 6.25e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.3780000000000001, + "step": 1378 + }, + { + "loss": 0.059, + "grad_norm": 2.119466781616211, + "learning_rate": 6.24e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.379, + "step": 1379 + }, + { + "loss": 0.0772, + "grad_norm": 1.8192287683486938, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.38, + "step": 1380 + }, + { + "loss": 0.0588, + "grad_norm": 1.6275320053100586, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.381, + "step": 1381 + }, + { + "loss": 0.0417, + "grad_norm": 2.3129870891571045, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3820000000000001, + "step": 1382 + }, + { + "loss": 0.0444, + "grad_norm": 1.6177237033843994, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.383, + "step": 1383 + }, + { + "loss": 0.0566, + "grad_norm": 2.093630075454712, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.384, + "step": 1384 + }, + { + "loss": 0.0655, + "grad_norm": 1.9267455339431763, + "learning_rate": 6.18e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.385, + "step": 1385 + }, + { + "loss": 0.0442, + "grad_norm": 1.0200287103652954, + "learning_rate": 6.17e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3860000000000001, + "step": 1386 + }, + { + "loss": 0.0638, + "grad_norm": 1.3187520503997803, + "learning_rate": 6.16e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.387, + "step": 1387 + }, + { + "loss": 0.0364, + "grad_norm": 1.6464682817459106, + "learning_rate": 6.15e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.388, + "step": 1388 + }, + { + "loss": 0.0775, + "grad_norm": 2.474910020828247, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.389, + "step": 1389 + }, + { + "loss": 0.0621, + "grad_norm": 1.1011793613433838, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.3900000000000001, + "step": 1390 + }, + { + "loss": 0.0218, + "grad_norm": 5.168939113616943, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.391, + "step": 1391 + }, + { + "loss": 0.0221, + "grad_norm": 5.572858810424805, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.392, + "step": 1392 + }, + { + "loss": 0.0561, + "grad_norm": 1.8146536350250244, + "learning_rate": 6.1e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.393, + "step": 1393 + }, + { + "loss": 0.0804, + "grad_norm": 3.2232189178466797, + "learning_rate": 6.09e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.3940000000000001, + "step": 1394 + }, + { + "loss": 0.039, + "grad_norm": 1.8940805196762085, + "learning_rate": 6.08e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.395, + "step": 1395 + }, + { + "loss": 0.0584, + "grad_norm": 2.0325937271118164, + "learning_rate": 6.07e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.396, + "step": 1396 + }, + { + "loss": 0.0422, + "grad_norm": 1.980771541595459, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.397, + "step": 1397 + }, + { + "loss": 0.0593, + "grad_norm": 1.710123896598816, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.3980000000000001, + "step": 1398 + }, + { + "loss": 0.0592, + "grad_norm": 2.430305004119873, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.399, + "step": 1399 + }, + { + "loss": 0.0467, + "grad_norm": 2.204895496368408, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.4, + "step": 1400 + }, + { + "loss": 0.0496, + "grad_norm": 1.7684513330459595, + "learning_rate": 6.02e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.401, + "step": 1401 + }, + { + "loss": 0.0462, + "grad_norm": 1.7807819843292236, + "learning_rate": 6.01e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.4020000000000001, + "step": 1402 + }, + { + "loss": 0.08, + "grad_norm": 1.9608607292175293, + "learning_rate": 6e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.403, + "step": 1403 + }, + { + "loss": 0.0588, + "grad_norm": 1.6851762533187866, + "learning_rate": 5.99e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.404, + "step": 1404 + }, + { + "loss": 0.0448, + "grad_norm": 1.395566701889038, + "learning_rate": 5.98e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 1.405, + "step": 1405 + }, + { + "loss": 0.0771, + "grad_norm": 1.94028639793396, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.4060000000000001, + "step": 1406 + }, + { + "loss": 0.0717, + "grad_norm": 2.421177864074707, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.407, + "step": 1407 + }, + { + "loss": 0.0602, + "grad_norm": 1.947490930557251, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.408, + "step": 1408 + }, + { + "loss": 0.084, + "grad_norm": 3.4976916313171387, + "learning_rate": 5.94e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.409, + "step": 1409 + }, + { + "loss": 0.0146, + "grad_norm": 3.9808900356292725, + "learning_rate": 5.93e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.41, + "step": 1410 + }, + { + "loss": 0.0583, + "grad_norm": 1.8078984022140503, + "learning_rate": 5.92e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 1411 + }, + { + "loss": 0.0687, + "grad_norm": 1.9551893472671509, + "learning_rate": 5.91e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.412, + "step": 1412 + }, + { + "loss": 0.0133, + "grad_norm": 3.68121075630188, + "learning_rate": 5.9e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.413, + "step": 1413 + }, + { + "loss": 0.0411, + "grad_norm": 1.987641453742981, + "learning_rate": 5.89e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.414, + "step": 1414 + }, + { + "loss": 0.0527, + "grad_norm": 1.6725058555603027, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.415, + "step": 1415 + }, + { + "loss": 0.0516, + "grad_norm": 1.3503282070159912, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.416, + "step": 1416 + }, + { + "loss": 0.0439, + "grad_norm": 1.5804824829101562, + "learning_rate": 5.86e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.417, + "step": 1417 + }, + { + "loss": 0.0481, + "grad_norm": 1.3769683837890625, + "learning_rate": 5.85e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.418, + "step": 1418 + }, + { + "loss": 0.0108, + "grad_norm": 3.01991868019104, + "learning_rate": 5.84e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.419, + "step": 1419 + }, + { + "loss": 0.0497, + "grad_norm": 1.416107177734375, + "learning_rate": 5.83e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.42, + "step": 1420 + }, + { + "loss": 0.0377, + "grad_norm": 1.3515864610671997, + "learning_rate": 5.82e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.421, + "step": 1421 + }, + { + "loss": 0.0607, + "grad_norm": 1.8614403009414673, + "learning_rate": 5.81e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.422, + "step": 1422 + }, + { + "loss": 0.0679, + "grad_norm": 2.109128952026367, + "learning_rate": 5.8e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.423, + "step": 1423 + }, + { + "loss": 0.0751, + "grad_norm": 1.5067026615142822, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.424, + "step": 1424 + }, + { + "loss": 0.0547, + "grad_norm": 1.5301975011825562, + "learning_rate": 5.78e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.425, + "step": 1425 + }, + { + "loss": 0.0683, + "grad_norm": 2.2441554069519043, + "learning_rate": 5.77e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.426, + "step": 1426 + }, + { + "loss": 0.0458, + "grad_norm": 1.8737249374389648, + "learning_rate": 5.76e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.427, + "step": 1427 + }, + { + "loss": 0.0687, + "grad_norm": 1.9434070587158203, + "learning_rate": 5.75e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.428, + "step": 1428 + }, + { + "loss": 0.0806, + "grad_norm": 1.8568007946014404, + "learning_rate": 5.74e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.429, + "step": 1429 + }, + { + "loss": 0.065, + "grad_norm": 2.0390608310699463, + "learning_rate": 5.73e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.43, + "step": 1430 + }, + { + "loss": 0.0615, + "grad_norm": 1.7913262844085693, + "learning_rate": 5.72e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.431, + "step": 1431 + }, + { + "loss": 0.0515, + "grad_norm": 2.496122121810913, + "learning_rate": 5.71e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.432, + "step": 1432 + }, + { + "loss": 0.0501, + "grad_norm": 1.633486270904541, + "learning_rate": 5.7e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.433, + "step": 1433 + }, + { + "loss": 0.0171, + "grad_norm": 4.812644958496094, + "learning_rate": 5.69e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.434, + "step": 1434 + }, + { + "loss": 0.0756, + "grad_norm": 2.208841562271118, + "learning_rate": 5.68e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.435, + "step": 1435 + }, + { + "loss": 0.0358, + "grad_norm": 1.725355625152588, + "learning_rate": 5.67e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.436, + "step": 1436 + }, + { + "loss": 0.0173, + "grad_norm": 4.879479885101318, + "learning_rate": 5.66e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.437, + "step": 1437 + }, + { + "loss": 0.1386, + "grad_norm": 3.6769933700561523, + "learning_rate": 5.65e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.438, + "step": 1438 + }, + { + "loss": 0.0712, + "grad_norm": 1.624098300933838, + "learning_rate": 5.64e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.439, + "step": 1439 + }, + { + "loss": 0.0534, + "grad_norm": 2.2485837936401367, + "learning_rate": 5.63e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.44, + "step": 1440 + }, + { + "loss": 0.0572, + "grad_norm": 1.977672815322876, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.441, + "step": 1441 + }, + { + "loss": 0.0515, + "grad_norm": 2.81058669090271, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.442, + "step": 1442 + }, + { + "loss": 0.0118, + "grad_norm": 3.3733158111572266, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.443, + "step": 1443 + }, + { + "loss": 0.0546, + "grad_norm": 1.634824275970459, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.444, + "step": 1444 + }, + { + "loss": 0.0549, + "grad_norm": 1.9184083938598633, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.445, + "step": 1445 + }, + { + "loss": 0.1835, + "grad_norm": 5.609441757202148, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 1.446, + "step": 1446 + }, + { + "loss": 0.0568, + "grad_norm": 1.4348167181015015, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.447, + "step": 1447 + }, + { + "loss": 0.0711, + "grad_norm": 1.6240220069885254, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.448, + "step": 1448 + }, + { + "loss": 0.0395, + "grad_norm": 1.7122279405593872, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.449, + "step": 1449 + }, + { + "loss": 0.0092, + "grad_norm": 2.6746726036071777, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 1450 + }, + { + "loss": 0.0516, + "grad_norm": 1.2466599941253662, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 1451 + }, + { + "loss": 0.0755, + "grad_norm": 2.3185651302337646, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.452, + "step": 1452 + }, + { + "loss": 0.0107, + "grad_norm": 3.2160799503326416, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.453, + "step": 1453 + }, + { + "loss": 0.0353, + "grad_norm": 1.6237694025039673, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.454, + "step": 1454 + }, + { + "loss": 0.052, + "grad_norm": 1.6856698989868164, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.455, + "step": 1455 + }, + { + "loss": 0.0672, + "grad_norm": 1.7814722061157227, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.456, + "step": 1456 + }, + { + "loss": 0.0354, + "grad_norm": 1.4843939542770386, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.457, + "step": 1457 + }, + { + "loss": 0.0642, + "grad_norm": 1.6205660104751587, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.458, + "step": 1458 + }, + { + "loss": 0.0694, + "grad_norm": 2.024721384048462, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.459, + "step": 1459 + }, + { + "loss": 0.0587, + "grad_norm": 1.8312665224075317, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.46, + "step": 1460 + }, + { + "loss": 0.0411, + "grad_norm": 1.8380608558654785, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.461, + "step": 1461 + }, + { + "loss": 0.0597, + "grad_norm": 1.7451549768447876, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.462, + "step": 1462 + }, + { + "loss": 0.0773, + "grad_norm": 1.7938144207000732, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.463, + "step": 1463 + }, + { + "loss": 0.0639, + "grad_norm": 2.6028213500976562, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.464, + "step": 1464 + }, + { + "loss": 0.0686, + "grad_norm": 1.8541765213012695, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.465, + "step": 1465 + }, + { + "loss": 0.0548, + "grad_norm": 1.739157795906067, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.466, + "step": 1466 + }, + { + "loss": 0.0131, + "grad_norm": 3.847865581512451, + "learning_rate": 5.36e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.467, + "step": 1467 + }, + { + "loss": 0.0556, + "grad_norm": 1.4072014093399048, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.468, + "step": 1468 + }, + { + "loss": 0.0656, + "grad_norm": 1.7529304027557373, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.4689999999999999, + "step": 1469 + }, + { + "loss": 0.0472, + "grad_norm": 1.359227180480957, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 1470 + }, + { + "loss": 0.0553, + "grad_norm": 1.8881477117538452, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.471, + "step": 1471 + }, + { + "loss": 0.0728, + "grad_norm": 1.792786717414856, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.472, + "step": 1472 + }, + { + "loss": 0.0589, + "grad_norm": 1.9897642135620117, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.4729999999999999, + "step": 1473 + }, + { + "loss": 0.0641, + "grad_norm": 2.224968433380127, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.474, + "step": 1474 + }, + { + "loss": 0.0176, + "grad_norm": 4.579442977905273, + "learning_rate": 5.28e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.475, + "step": 1475 + }, + { + "loss": 0.0465, + "grad_norm": 1.7030646800994873, + "learning_rate": 5.27e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.476, + "step": 1476 + }, + { + "loss": 0.0638, + "grad_norm": 1.8251057863235474, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.4769999999999999, + "step": 1477 + }, + { + "loss": 0.0532, + "grad_norm": 1.7170004844665527, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.478, + "step": 1478 + }, + { + "loss": 0.0146, + "grad_norm": 4.36711311340332, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.479, + "step": 1479 + }, + { + "loss": 0.0384, + "grad_norm": 1.4616270065307617, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.48, + "step": 1480 + }, + { + "loss": 0.0536, + "grad_norm": 1.4146326780319214, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4809999999999999, + "step": 1481 + }, + { + "loss": 0.058, + "grad_norm": 1.4087859392166138, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.482, + "step": 1482 + }, + { + "loss": 0.0131, + "grad_norm": 3.685961961746216, + "learning_rate": 5.2e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.483, + "step": 1483 + }, + { + "loss": 0.054, + "grad_norm": 2.024017572402954, + "learning_rate": 5.19e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.484, + "step": 1484 + }, + { + "loss": 0.0127, + "grad_norm": 3.772671699523926, + "learning_rate": 5.18e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.4849999999999999, + "step": 1485 + }, + { + "loss": 0.0119, + "grad_norm": 3.4980599880218506, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.486, + "step": 1486 + }, + { + "loss": 0.0759, + "grad_norm": 2.152510643005371, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.487, + "step": 1487 + }, + { + "loss": 0.0408, + "grad_norm": 1.5923069715499878, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.488, + "step": 1488 + }, + { + "loss": 0.0085, + "grad_norm": 2.5293490886688232, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 1489 + }, + { + "loss": 0.0694, + "grad_norm": 2.434215545654297, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.49, + "step": 1490 + }, + { + "loss": 0.0084, + "grad_norm": 2.269744873046875, + "learning_rate": 5.12e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 1491 + }, + { + "loss": 0.0472, + "grad_norm": 2.460083246231079, + "learning_rate": 5.11e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.492, + "step": 1492 + }, + { + "loss": 0.0346, + "grad_norm": 1.8150253295898438, + "learning_rate": 5.1e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.4929999999999999, + "step": 1493 + }, + { + "loss": 0.0436, + "grad_norm": 2.3509392738342285, + "learning_rate": 5.09e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.494, + "step": 1494 + }, + { + "loss": 0.0413, + "grad_norm": 1.7899376153945923, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.495, + "step": 1495 + }, + { + "loss": 0.0068, + "grad_norm": 1.4986844062805176, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 1496 + }, + { + "loss": 0.0719, + "grad_norm": 1.9978880882263184, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4969999999999999, + "step": 1497 + }, + { + "loss": 0.0407, + "grad_norm": 1.5322047472000122, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.498, + "step": 1498 + }, + { + "loss": 0.0057, + "grad_norm": 1.21915602684021, + "learning_rate": 5.04e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 1499 + }, + { + "loss": 0.0392, + "grad_norm": 1.8600904941558838, + "learning_rate": 5.03e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5, + "step": 1500 + }, + { + "loss": 0.058, + "grad_norm": 1.788377285003662, + "learning_rate": 5.02e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.501, + "step": 1501 + }, + { + "loss": 0.073, + "grad_norm": 2.0460190773010254, + "learning_rate": 5.01e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 1502 + }, + { + "loss": 0.0631, + "grad_norm": 2.3501951694488525, + "learning_rate": 5e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5030000000000001, + "step": 1503 + }, + { + "loss": 0.0655, + "grad_norm": 1.5405539274215698, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.504, + "step": 1504 + }, + { + "loss": 0.0527, + "grad_norm": 2.613194227218628, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.505, + "step": 1505 + }, + { + "loss": 0.0533, + "grad_norm": 2.3490524291992188, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.506, + "step": 1506 + }, + { + "loss": 0.007, + "grad_norm": 1.7071534395217896, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 1507 + }, + { + "loss": 0.0063, + "grad_norm": 1.578574776649475, + "learning_rate": 4.95e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 1508 + }, + { + "loss": 0.0586, + "grad_norm": 1.7500479221343994, + "learning_rate": 4.94e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.509, + "step": 1509 + }, + { + "loss": 0.0489, + "grad_norm": 2.1021506786346436, + "learning_rate": 4.93e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.51, + "step": 1510 + }, + { + "loss": 0.0505, + "grad_norm": 1.444482684135437, + "learning_rate": 4.92e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5110000000000001, + "step": 1511 + }, + { + "loss": 0.0663, + "grad_norm": 2.043468475341797, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.512, + "step": 1512 + }, + { + "loss": 0.0429, + "grad_norm": 1.7074294090270996, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.513, + "step": 1513 + }, + { + "loss": 0.0655, + "grad_norm": 2.4234681129455566, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.514, + "step": 1514 + }, + { + "loss": 0.0766, + "grad_norm": 2.124605655670166, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.5150000000000001, + "step": 1515 + }, + { + "loss": 0.0549, + "grad_norm": 1.533837080001831, + "learning_rate": 4.87e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.516, + "step": 1516 + }, + { + "loss": 0.0674, + "grad_norm": 1.8479790687561035, + "learning_rate": 4.86e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.517, + "step": 1517 + }, + { + "loss": 0.0105, + "grad_norm": 2.9812541007995605, + "learning_rate": 4.85e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 1518 + }, + { + "loss": 0.0394, + "grad_norm": 1.3361161947250366, + "learning_rate": 4.84e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5190000000000001, + "step": 1519 + }, + { + "loss": 0.0526, + "grad_norm": 1.8740735054016113, + "learning_rate": 4.83e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.52, + "step": 1520 + }, + { + "loss": 0.0622, + "grad_norm": 2.8182497024536133, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.521, + "step": 1521 + }, + { + "loss": 0.053, + "grad_norm": 1.3909233808517456, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.522, + "step": 1522 + }, + { + "loss": 0.0352, + "grad_norm": 1.3657585382461548, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5230000000000001, + "step": 1523 + }, + { + "loss": 0.0667, + "grad_norm": 1.9412925243377686, + "learning_rate": 4.79e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.524, + "step": 1524 + }, + { + "loss": 0.0536, + "grad_norm": 1.9261113405227661, + "learning_rate": 4.78e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.525, + "step": 1525 + }, + { + "loss": 0.0371, + "grad_norm": 1.7484430074691772, + "learning_rate": 4.77e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.526, + "step": 1526 + }, + { + "loss": 0.0629, + "grad_norm": 1.5757131576538086, + "learning_rate": 4.76e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5270000000000001, + "step": 1527 + }, + { + "loss": 0.0743, + "grad_norm": 2.2460429668426514, + "learning_rate": 4.75e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.528, + "step": 1528 + }, + { + "loss": 0.0537, + "grad_norm": 2.029741048812866, + "learning_rate": 4.74e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.529, + "step": 1529 + }, + { + "loss": 0.0363, + "grad_norm": 1.7011500597000122, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.53, + "step": 1530 + }, + { + "loss": 0.0773, + "grad_norm": 2.4450201988220215, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.5310000000000001, + "step": 1531 + }, + { + "loss": 0.0597, + "grad_norm": 2.192077159881592, + "learning_rate": 4.71e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.532, + "step": 1532 + }, + { + "loss": 0.0539, + "grad_norm": 1.464800238609314, + "learning_rate": 4.7e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.533, + "step": 1533 + }, + { + "loss": 0.0762, + "grad_norm": 2.326375722885132, + "learning_rate": 4.69e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.534, + "step": 1534 + }, + { + "loss": 0.0517, + "grad_norm": 1.547634482383728, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5350000000000001, + "step": 1535 + }, + { + "loss": 0.0783, + "grad_norm": 2.2572309970855713, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.536, + "step": 1536 + }, + { + "loss": 0.0644, + "grad_norm": 2.7545583248138428, + "learning_rate": 4.66e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.537, + "step": 1537 + }, + { + "loss": 0.0596, + "grad_norm": 1.4186100959777832, + "learning_rate": 4.65e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.538, + "step": 1538 + }, + { + "loss": 0.0408, + "grad_norm": 1.7284655570983887, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5390000000000001, + "step": 1539 + }, + { + "loss": 0.0605, + "grad_norm": 1.7523491382598877, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.54, + "step": 1540 + }, + { + "loss": 0.0593, + "grad_norm": 1.346951961517334, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.541, + "step": 1541 + }, + { + "loss": 0.0618, + "grad_norm": 1.4633326530456543, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.542, + "step": 1542 + }, + { + "loss": 0.0401, + "grad_norm": 1.6125143766403198, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5430000000000001, + "step": 1543 + }, + { + "loss": 0.0703, + "grad_norm": 1.801979422569275, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.544, + "step": 1544 + }, + { + "loss": 0.0168, + "grad_norm": 4.75988245010376, + "learning_rate": 4.58e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.545, + "step": 1545 + }, + { + "loss": 0.0395, + "grad_norm": 1.7274175882339478, + "learning_rate": 4.57e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.546, + "step": 1546 + }, + { + "loss": 0.0673, + "grad_norm": 1.813065767288208, + "learning_rate": 4.56e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5470000000000002, + "step": 1547 + }, + { + "loss": 0.0149, + "grad_norm": 4.271875858306885, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.548, + "step": 1548 + }, + { + "loss": 0.0663, + "grad_norm": 2.038168430328369, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.549, + "step": 1549 + }, + { + "loss": 0.0129, + "grad_norm": 3.939451217651367, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.55, + "step": 1550 + }, + { + "loss": 0.0375, + "grad_norm": 1.818014144897461, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5510000000000002, + "step": 1551 + }, + { + "loss": 0.0589, + "grad_norm": 1.9127329587936401, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.552, + "step": 1552 + }, + { + "loss": 0.062, + "grad_norm": 2.125767946243286, + "learning_rate": 4.5e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.553, + "step": 1553 + }, + { + "loss": 0.0627, + "grad_norm": 1.3601936101913452, + "learning_rate": 4.49e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.554, + "step": 1554 + }, + { + "loss": 0.0573, + "grad_norm": 1.9718780517578125, + "learning_rate": 4.48e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.5550000000000002, + "step": 1555 + }, + { + "loss": 0.0702, + "grad_norm": 1.8015897274017334, + "learning_rate": 4.47e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.556, + "step": 1556 + }, + { + "loss": 0.0456, + "grad_norm": 2.072335958480835, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.557, + "step": 1557 + }, + { + "loss": 0.0567, + "grad_norm": 1.921351432800293, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.558, + "step": 1558 + }, + { + "loss": 0.065, + "grad_norm": 1.5375345945358276, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5590000000000002, + "step": 1559 + }, + { + "loss": 0.0384, + "grad_norm": 1.3858362436294556, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.56, + "step": 1560 + }, + { + "loss": 0.0613, + "grad_norm": 1.8221303224563599, + "learning_rate": 4.42e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.561, + "step": 1561 + }, + { + "loss": 0.051, + "grad_norm": 1.5935691595077515, + "learning_rate": 4.41e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.562, + "step": 1562 + }, + { + "loss": 0.052, + "grad_norm": 1.4923861026763916, + "learning_rate": 4.4e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.563, + "step": 1563 + }, + { + "loss": 0.0114, + "grad_norm": 3.3136603832244873, + "learning_rate": 4.39e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.564, + "step": 1564 + }, + { + "loss": 0.0634, + "grad_norm": 1.8046377897262573, + "learning_rate": 4.38e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.565, + "step": 1565 + }, + { + "loss": 0.01, + "grad_norm": 2.8774094581604004, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.5659999999999998, + "step": 1566 + }, + { + "loss": 0.0506, + "grad_norm": 1.315585732460022, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.567, + "step": 1567 + }, + { + "loss": 0.051, + "grad_norm": 1.6535403728485107, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.568, + "step": 1568 + }, + { + "loss": 0.069, + "grad_norm": 1.9435205459594727, + "learning_rate": 4.34e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.569, + "step": 1569 + }, + { + "loss": 0.0599, + "grad_norm": 1.8793127536773682, + "learning_rate": 4.33e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.5699999999999998, + "step": 1570 + }, + { + "loss": 0.0098, + "grad_norm": 2.910207986831665, + "learning_rate": 4.32e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 1571 + }, + { + "loss": 0.0636, + "grad_norm": 2.1943273544311523, + "learning_rate": 4.31e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.572, + "step": 1572 + }, + { + "loss": 0.0567, + "grad_norm": 1.5598511695861816, + "learning_rate": 4.3e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.573, + "step": 1573 + }, + { + "loss": 0.0453, + "grad_norm": 1.9701513051986694, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 1574 + }, + { + "loss": 0.0102, + "grad_norm": 3.0775904655456543, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.575, + "step": 1575 + }, + { + "loss": 0.0422, + "grad_norm": 1.8043560981750488, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.576, + "step": 1576 + }, + { + "loss": 0.0473, + "grad_norm": 1.871073842048645, + "learning_rate": 4.26e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.577, + "step": 1577 + }, + { + "loss": 0.0514, + "grad_norm": 1.4562617540359497, + "learning_rate": 4.25e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5779999999999998, + "step": 1578 + }, + { + "loss": 0.0367, + "grad_norm": 1.4301601648330688, + "learning_rate": 4.24e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.579, + "step": 1579 + }, + { + "loss": 0.0504, + "grad_norm": 1.6110836267471313, + "learning_rate": 4.23e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.58, + "step": 1580 + }, + { + "loss": 0.074, + "grad_norm": 2.0486574172973633, + "learning_rate": 4.22e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.581, + "step": 1581 + }, + { + "loss": 0.1233, + "grad_norm": 3.3242132663726807, + "learning_rate": 4.21e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5819999999999999, + "step": 1582 + }, + { + "loss": 0.0647, + "grad_norm": 1.307567834854126, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.583, + "step": 1583 + }, + { + "loss": 0.0609, + "grad_norm": 1.7847832441329956, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.584, + "step": 1584 + }, + { + "loss": 0.0095, + "grad_norm": 2.857769727706909, + "learning_rate": 4.18e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 1585 + }, + { + "loss": 0.0358, + "grad_norm": 1.3912484645843506, + "learning_rate": 4.17e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5859999999999999, + "step": 1586 + }, + { + "loss": 0.0389, + "grad_norm": 1.5175739526748657, + "learning_rate": 4.16e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.587, + "step": 1587 + }, + { + "loss": 0.0126, + "grad_norm": 3.7526566982269287, + "learning_rate": 4.15e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.588, + "step": 1588 + }, + { + "loss": 0.0558, + "grad_norm": 1.6538053750991821, + "learning_rate": 4.14e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.589, + "step": 1589 + }, + { + "loss": 0.0538, + "grad_norm": 1.3453150987625122, + "learning_rate": 4.13e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5899999999999999, + "step": 1590 + }, + { + "loss": 0.0608, + "grad_norm": 2.0873332023620605, + "learning_rate": 4.12e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.591, + "step": 1591 + }, + { + "loss": 0.0611, + "grad_norm": 1.9410951137542725, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.592, + "step": 1592 + }, + { + "loss": 0.0769, + "grad_norm": 1.8411427736282349, + "learning_rate": 4.1e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.593, + "step": 1593 + }, + { + "loss": 0.0111, + "grad_norm": 3.2430572509765625, + "learning_rate": 4.09e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 1594 + }, + { + "loss": 0.0722, + "grad_norm": 2.1307482719421387, + "learning_rate": 4.08e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.595, + "step": 1595 + }, + { + "loss": 0.0377, + "grad_norm": 2.088995933532715, + "learning_rate": 4.07e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.596, + "step": 1596 + }, + { + "loss": 0.0617, + "grad_norm": 1.546595811843872, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.597, + "step": 1597 + }, + { + "loss": 0.0683, + "grad_norm": 1.7900023460388184, + "learning_rate": 4.05e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.5979999999999999, + "step": 1598 + }, + { + "loss": 0.057, + "grad_norm": 1.5026994943618774, + "learning_rate": 4.04e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.599, + "step": 1599 + }, + { + "loss": 0.0468, + "grad_norm": 1.8879090547561646, + "learning_rate": 4.03e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6, + "step": 1600 + }, + { + "loss": 0.0345, + "grad_norm": 1.3179066181182861, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.601, + "step": 1601 + }, + { + "loss": 0.0363, + "grad_norm": 1.297089695930481, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.6019999999999999, + "step": 1602 + }, + { + "loss": 0.0465, + "grad_norm": 1.4451963901519775, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.603, + "step": 1603 + }, + { + "loss": 0.0593, + "grad_norm": 1.6601592302322388, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.604, + "step": 1604 + }, + { + "loss": 0.0633, + "grad_norm": 1.759940266609192, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.605, + "step": 1605 + }, + { + "loss": 0.0394, + "grad_norm": 1.640942096710205, + "learning_rate": 3.97e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.6059999999999999, + "step": 1606 + }, + { + "loss": 0.0107, + "grad_norm": 3.121732711791992, + "learning_rate": 3.96e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.607, + "step": 1607 + }, + { + "loss": 0.0343, + "grad_norm": 1.376590371131897, + "learning_rate": 3.95e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.608, + "step": 1608 + }, + { + "loss": 0.0731, + "grad_norm": 1.5605193376541138, + "learning_rate": 3.94e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.609, + "step": 1609 + }, + { + "loss": 0.011, + "grad_norm": 3.3589043617248535, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6099999999999999, + "step": 1610 + }, + { + "loss": 0.0541, + "grad_norm": 1.0635466575622559, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.611, + "step": 1611 + }, + { + "loss": 0.0801, + "grad_norm": 2.1112594604492188, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.612, + "step": 1612 + }, + { + "loss": 0.0541, + "grad_norm": 1.915789008140564, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.613, + "step": 1613 + }, + { + "loss": 0.0097, + "grad_norm": 2.9668385982513428, + "learning_rate": 3.89e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 1614 + }, + { + "loss": 0.0785, + "grad_norm": 1.7575700283050537, + "learning_rate": 3.88e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 1.615, + "step": 1615 + }, + { + "loss": 0.0092, + "grad_norm": 2.8856735229492188, + "learning_rate": 3.87e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 1616 + }, + { + "loss": 0.0842, + "grad_norm": 2.108201265335083, + "learning_rate": 3.86e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.617, + "step": 1617 + }, + { + "loss": 0.0513, + "grad_norm": 1.646217942237854, + "learning_rate": 3.85e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6179999999999999, + "step": 1618 + }, + { + "loss": 0.0323, + "grad_norm": 1.7345075607299805, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.619, + "step": 1619 + }, + { + "loss": 0.0508, + "grad_norm": 2.1174609661102295, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.62, + "step": 1620 + }, + { + "loss": 0.0794, + "grad_norm": 1.751968502998352, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.621, + "step": 1621 + }, + { + "loss": 0.052, + "grad_norm": 2.0297329425811768, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6219999999999999, + "step": 1622 + }, + { + "loss": 0.0414, + "grad_norm": 1.4483790397644043, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.623, + "step": 1623 + }, + { + "loss": 0.0387, + "grad_norm": 1.6367487907409668, + "learning_rate": 3.79e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.624, + "step": 1624 + }, + { + "loss": 0.0579, + "grad_norm": 1.947627305984497, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.625, + "step": 1625 + }, + { + "loss": 0.0746, + "grad_norm": 1.7073363065719604, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.626, + "step": 1626 + }, + { + "loss": 0.07, + "grad_norm": 2.310190439224243, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.627, + "step": 1627 + }, + { + "loss": 0.0614, + "grad_norm": 1.841750979423523, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6280000000000001, + "step": 1628 + }, + { + "loss": 0.01, + "grad_norm": 3.1444506645202637, + "learning_rate": 3.74e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 1629 + }, + { + "loss": 0.0522, + "grad_norm": 1.662224292755127, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.63, + "step": 1630 + }, + { + "loss": 0.0132, + "grad_norm": 3.9977800846099854, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.631, + "step": 1631 + }, + { + "loss": 0.0544, + "grad_norm": 1.3922324180603027, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6320000000000001, + "step": 1632 + }, + { + "loss": 0.054, + "grad_norm": 2.120187759399414, + "learning_rate": 3.7e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.633, + "step": 1633 + }, + { + "loss": 0.0536, + "grad_norm": 1.914109468460083, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.634, + "step": 1634 + }, + { + "loss": 0.0598, + "grad_norm": 1.831244707107544, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.635, + "step": 1635 + }, + { + "loss": 0.0573, + "grad_norm": 1.5706382989883423, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6360000000000001, + "step": 1636 + }, + { + "loss": 0.1282, + "grad_norm": 2.7458832263946533, + "learning_rate": 3.66e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 1.637, + "step": 1637 + }, + { + "loss": 0.0356, + "grad_norm": 1.4152108430862427, + "learning_rate": 3.65e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.638, + "step": 1638 + }, + { + "loss": 0.0121, + "grad_norm": 3.4849400520324707, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.639, + "step": 1639 + }, + { + "loss": 0.0702, + "grad_norm": 1.8692002296447754, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.6400000000000001, + "step": 1640 + }, + { + "loss": 0.0601, + "grad_norm": 1.828239917755127, + "learning_rate": 3.62e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.641, + "step": 1641 + }, + { + "loss": 0.0399, + "grad_norm": 1.8158057928085327, + "learning_rate": 3.61e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.642, + "step": 1642 + }, + { + "loss": 0.0451, + "grad_norm": 1.7628754377365112, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.643, + "step": 1643 + }, + { + "loss": 0.0679, + "grad_norm": 1.837315320968628, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6440000000000001, + "step": 1644 + }, + { + "loss": 0.0112, + "grad_norm": 3.3357973098754883, + "learning_rate": 3.58e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.645, + "step": 1645 + }, + { + "loss": 0.0501, + "grad_norm": 1.5952306985855103, + "learning_rate": 3.57e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 1646 + }, + { + "loss": 0.0742, + "grad_norm": 2.5686585903167725, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.647, + "step": 1647 + }, + { + "loss": 0.0109, + "grad_norm": 3.133192777633667, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 1648 + }, + { + "loss": 0.068, + "grad_norm": 1.585485577583313, + "learning_rate": 3.54e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.649, + "step": 1649 + }, + { + "loss": 0.0687, + "grad_norm": 2.0019702911376953, + "learning_rate": 3.53e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.65, + "step": 1650 + }, + { + "loss": 0.0575, + "grad_norm": 1.6265766620635986, + "learning_rate": 3.52e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.651, + "step": 1651 + }, + { + "loss": 0.0707, + "grad_norm": 1.6374586820602417, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6520000000000001, + "step": 1652 + }, + { + "loss": 0.0697, + "grad_norm": 2.4204654693603516, + "learning_rate": 3.5e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.653, + "step": 1653 + }, + { + "loss": 0.0588, + "grad_norm": 2.1378262042999268, + "learning_rate": 3.49e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.654, + "step": 1654 + }, + { + "loss": 0.0562, + "grad_norm": 2.214315414428711, + "learning_rate": 3.48e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.655, + "step": 1655 + }, + { + "loss": 0.0124, + "grad_norm": 3.5861706733703613, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6560000000000001, + "step": 1656 + }, + { + "loss": 0.0487, + "grad_norm": 1.6121397018432617, + "learning_rate": 3.46e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.657, + "step": 1657 + }, + { + "loss": 0.0556, + "grad_norm": 2.084545850753784, + "learning_rate": 3.45e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.658, + "step": 1658 + }, + { + "loss": 0.0471, + "grad_norm": 1.8340671062469482, + "learning_rate": 3.44e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.659, + "step": 1659 + }, + { + "loss": 0.0507, + "grad_norm": 1.5023232698440552, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6600000000000001, + "step": 1660 + }, + { + "loss": 0.055, + "grad_norm": 1.5226930379867554, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.661, + "step": 1661 + }, + { + "loss": 0.0689, + "grad_norm": 1.8650307655334473, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.662, + "step": 1662 + }, + { + "loss": 0.0687, + "grad_norm": 1.4976561069488525, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.663, + "step": 1663 + }, + { + "loss": 0.012, + "grad_norm": 3.7820823192596436, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6640000000000001, + "step": 1664 + }, + { + "loss": 0.0644, + "grad_norm": 1.6768338680267334, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.665, + "step": 1665 + }, + { + "loss": 0.0508, + "grad_norm": 1.6384755373001099, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.666, + "step": 1666 + }, + { + "loss": 0.0557, + "grad_norm": 1.67027747631073, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.667, + "step": 1667 + }, + { + "loss": 0.0443, + "grad_norm": 1.8305268287658691, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6680000000000001, + "step": 1668 + }, + { + "loss": 0.0398, + "grad_norm": 1.6602362394332886, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.669, + "step": 1669 + }, + { + "loss": 0.0479, + "grad_norm": 1.694201946258545, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.67, + "step": 1670 + }, + { + "loss": 0.0693, + "grad_norm": 1.8437001705169678, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.671, + "step": 1671 + }, + { + "loss": 0.0512, + "grad_norm": 1.319399118423462, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6720000000000002, + "step": 1672 + }, + { + "loss": 0.0141, + "grad_norm": 4.160251617431641, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.673, + "step": 1673 + }, + { + "loss": 0.0473, + "grad_norm": 1.736594557762146, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 1674 + }, + { + "loss": 0.0117, + "grad_norm": 3.6965503692626953, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.675, + "step": 1675 + }, + { + "loss": 0.0129, + "grad_norm": 3.8872127532958984, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6760000000000002, + "step": 1676 + }, + { + "loss": 0.0338, + "grad_norm": 1.6114709377288818, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.677, + "step": 1677 + }, + { + "loss": 0.0401, + "grad_norm": 1.4854273796081543, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.678, + "step": 1678 + }, + { + "loss": 0.0091, + "grad_norm": 2.8193323612213135, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 1679 + }, + { + "loss": 0.0104, + "grad_norm": 3.194824457168579, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 1680 + }, + { + "loss": 0.0082, + "grad_norm": 2.627159357070923, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 1681 + }, + { + "loss": 0.0715, + "grad_norm": 2.015965223312378, + "learning_rate": 3.21e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.682, + "step": 1682 + }, + { + "loss": 0.0752, + "grad_norm": 1.8641659021377563, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.683, + "step": 1683 + }, + { + "loss": 0.0446, + "grad_norm": 1.8558416366577148, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 1684 + }, + { + "loss": 0.0754, + "grad_norm": 2.614729881286621, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.685, + "step": 1685 + }, + { + "loss": 0.0781, + "grad_norm": 2.3581247329711914, + "learning_rate": 3.17e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.686, + "step": 1686 + }, + { + "loss": 0.044, + "grad_norm": 2.02897310256958, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.687, + "step": 1687 + }, + { + "loss": 0.0576, + "grad_norm": 1.8537285327911377, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.688, + "step": 1688 + }, + { + "loss": 0.0673, + "grad_norm": 2.3672072887420654, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 1689 + }, + { + "loss": 0.0406, + "grad_norm": 2.049578905105591, + "learning_rate": 3.13e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.69, + "step": 1690 + }, + { + "loss": 0.0514, + "grad_norm": 1.8079686164855957, + "learning_rate": 3.12e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.6909999999999998, + "step": 1691 + }, + { + "loss": 0.0467, + "grad_norm": 1.5584005117416382, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.692, + "step": 1692 + }, + { + "loss": 0.0073, + "grad_norm": 2.0741705894470215, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 1693 + }, + { + "loss": 0.0501, + "grad_norm": 1.9797930717468262, + "learning_rate": 3.09e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.694, + "step": 1694 + }, + { + "loss": 0.0514, + "grad_norm": 1.531952977180481, + "learning_rate": 3.08e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 1695 + }, + { + "loss": 0.0511, + "grad_norm": 2.27657413482666, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.696, + "step": 1696 + }, + { + "loss": 0.0501, + "grad_norm": 1.5408827066421509, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.697, + "step": 1697 + }, + { + "loss": 0.0356, + "grad_norm": 1.3495177030563354, + "learning_rate": 3.05e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.698, + "step": 1698 + }, + { + "loss": 0.0524, + "grad_norm": 2.264927864074707, + "learning_rate": 3.04e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6989999999999998, + "step": 1699 + }, + { + "loss": 0.0085, + "grad_norm": 2.3997385501861572, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 1700 + }, + { + "loss": 0.0537, + "grad_norm": 2.03108811378479, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.701, + "step": 1701 + }, + { + "loss": 0.0625, + "grad_norm": 1.5735002756118774, + "learning_rate": 3.01e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.702, + "step": 1702 + }, + { + "loss": 0.0498, + "grad_norm": 1.4873791933059692, + "learning_rate": 3e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7029999999999998, + "step": 1703 + }, + { + "loss": 0.0401, + "grad_norm": 1.646492600440979, + "learning_rate": 2.99e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.704, + "step": 1704 + }, + { + "loss": 0.0092, + "grad_norm": 2.825364828109741, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 1705 + }, + { + "loss": 0.0094, + "grad_norm": 2.7768924236297607, + "learning_rate": 2.97e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 1706 + }, + { + "loss": 0.0095, + "grad_norm": 2.475404977798462, + "learning_rate": 2.96e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 1707 + }, + { + "loss": 0.0416, + "grad_norm": 2.0638792514801025, + "learning_rate": 2.95e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.708, + "step": 1708 + }, + { + "loss": 0.0544, + "grad_norm": 1.6516914367675781, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.709, + "step": 1709 + }, + { + "loss": 0.0534, + "grad_norm": 1.9903455972671509, + "learning_rate": 2.93e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.71, + "step": 1710 + }, + { + "loss": 0.061, + "grad_norm": 1.6336207389831543, + "learning_rate": 2.92e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7109999999999999, + "step": 1711 + }, + { + "loss": 0.0484, + "grad_norm": 1.5735485553741455, + "learning_rate": 2.91e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.712, + "step": 1712 + }, + { + "loss": 0.0523, + "grad_norm": 1.7996323108673096, + "learning_rate": 2.9e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.713, + "step": 1713 + }, + { + "loss": 0.0568, + "grad_norm": 1.6357063055038452, + "learning_rate": 2.89e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.714, + "step": 1714 + }, + { + "loss": 0.0097, + "grad_norm": 2.460446357727051, + "learning_rate": 2.88e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 1715 + }, + { + "loss": 0.0488, + "grad_norm": 1.7914141416549683, + "learning_rate": 2.87e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.716, + "step": 1716 + }, + { + "loss": 0.0426, + "grad_norm": 2.875281572341919, + "learning_rate": 2.86e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.717, + "step": 1717 + }, + { + "loss": 0.0535, + "grad_norm": 1.9656765460968018, + "learning_rate": 2.85e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.718, + "step": 1718 + }, + { + "loss": 0.0582, + "grad_norm": 1.7268273830413818, + "learning_rate": 2.84e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.7189999999999999, + "step": 1719 + }, + { + "loss": 0.0625, + "grad_norm": 1.7748886346817017, + "learning_rate": 2.83e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 1720 + }, + { + "loss": 0.0624, + "grad_norm": 1.655421257019043, + "learning_rate": 2.82e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.721, + "step": 1721 + }, + { + "loss": 0.0418, + "grad_norm": 1.857727289199829, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.722, + "step": 1722 + }, + { + "loss": 0.0628, + "grad_norm": 1.6072860956192017, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7229999999999999, + "step": 1723 + }, + { + "loss": 0.0079, + "grad_norm": 2.1282646656036377, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 1724 + }, + { + "loss": 0.0097, + "grad_norm": 2.870497465133667, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 1725 + }, + { + "loss": 0.0573, + "grad_norm": 2.2278597354888916, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.726, + "step": 1726 + }, + { + "loss": 0.0479, + "grad_norm": 1.6248372793197632, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.7269999999999999, + "step": 1727 + }, + { + "loss": 0.0098, + "grad_norm": 3.043905258178711, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 1728 + }, + { + "loss": 0.0515, + "grad_norm": 1.613357424736023, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.729, + "step": 1729 + }, + { + "loss": 0.0391, + "grad_norm": 1.959555983543396, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.73, + "step": 1730 + }, + { + "loss": 0.0085, + "grad_norm": 2.4167284965515137, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 1731 + }, + { + "loss": 0.0638, + "grad_norm": 1.9236712455749512, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.732, + "step": 1732 + }, + { + "loss": 0.0359, + "grad_norm": 1.9113582372665405, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.733, + "step": 1733 + }, + { + "loss": 0.0083, + "grad_norm": 2.5152554512023926, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 1734 + }, + { + "loss": 0.0471, + "grad_norm": 1.6409229040145874, + "learning_rate": 2.68e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7349999999999999, + "step": 1735 + }, + { + "loss": 0.0695, + "grad_norm": 2.0613510608673096, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.736, + "step": 1736 + }, + { + "loss": 0.057, + "grad_norm": 2.3862340450286865, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.737, + "step": 1737 + }, + { + "loss": 0.0733, + "grad_norm": 2.13395357131958, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.738, + "step": 1738 + }, + { + "loss": 0.0398, + "grad_norm": 1.8025071620941162, + "learning_rate": 2.64e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7389999999999999, + "step": 1739 + }, + { + "loss": 0.0076, + "grad_norm": 2.0499792098999023, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 1740 + }, + { + "loss": 0.061, + "grad_norm": 1.6320290565490723, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.741, + "step": 1741 + }, + { + "loss": 0.0581, + "grad_norm": 1.9588946104049683, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.742, + "step": 1742 + }, + { + "loss": 0.062, + "grad_norm": 1.8158897161483765, + "learning_rate": 2.6e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.7429999999999999, + "step": 1743 + }, + { + "loss": 0.0464, + "grad_norm": 2.4023096561431885, + "learning_rate": 2.59e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.744, + "step": 1744 + }, + { + "loss": 0.0604, + "grad_norm": 2.0760178565979004, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.745, + "step": 1745 + }, + { + "loss": 0.0721, + "grad_norm": 1.8943363428115845, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.746, + "step": 1746 + }, + { + "loss": 0.0394, + "grad_norm": 1.6580768823623657, + "learning_rate": 2.56e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.7469999999999999, + "step": 1747 + }, + { + "loss": 0.0575, + "grad_norm": 1.7064754962921143, + "learning_rate": 2.55e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.748, + "step": 1748 + }, + { + "loss": 0.1451, + "grad_norm": 5.286960124969482, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 1.749, + "step": 1749 + }, + { + "loss": 0.0367, + "grad_norm": 1.5256696939468384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.75, + "step": 1750 + }, + { + "loss": 0.0352, + "grad_norm": 1.4353508949279785, + "learning_rate": 2.52e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.751, + "step": 1751 + }, + { + "loss": 0.0544, + "grad_norm": 1.449508547782898, + "learning_rate": 2.51e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.752, + "step": 1752 + }, + { + "loss": 0.0088, + "grad_norm": 2.6737008094787598, + "learning_rate": 2.5e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 1753 + }, + { + "loss": 0.054, + "grad_norm": 1.1922411918640137, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.754, + "step": 1754 + }, + { + "loss": 0.0108, + "grad_norm": 3.180657386779785, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.755, + "step": 1755 + }, + { + "loss": 0.0636, + "grad_norm": 1.900195598602295, + "learning_rate": 2.47e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.756, + "step": 1756 + }, + { + "loss": 0.0602, + "grad_norm": 2.505511522293091, + "learning_rate": 2.46e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7570000000000001, + "step": 1757 + }, + { + "loss": 0.0516, + "grad_norm": 1.517896056175232, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.758, + "step": 1758 + }, + { + "loss": 0.0653, + "grad_norm": 1.5359817743301392, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.759, + "step": 1759 + }, + { + "loss": 0.062, + "grad_norm": 2.56500244140625, + "learning_rate": 2.43e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.76, + "step": 1760 + }, + { + "loss": 0.0616, + "grad_norm": 1.2327522039413452, + "learning_rate": 2.42e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7610000000000001, + "step": 1761 + }, + { + "loss": 0.0641, + "grad_norm": 2.0313050746917725, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.762, + "step": 1762 + }, + { + "loss": 0.0509, + "grad_norm": 1.9020798206329346, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.763, + "step": 1763 + }, + { + "loss": 0.0573, + "grad_norm": 1.3576561212539673, + "learning_rate": 2.39e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.764, + "step": 1764 + }, + { + "loss": 0.0359, + "grad_norm": 1.6285313367843628, + "learning_rate": 2.38e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7650000000000001, + "step": 1765 + }, + { + "loss": 0.0779, + "grad_norm": 2.119893789291382, + "learning_rate": 2.37e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.766, + "step": 1766 + }, + { + "loss": 0.0459, + "grad_norm": 1.8730247020721436, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.767, + "step": 1767 + }, + { + "loss": 0.0359, + "grad_norm": 1.5724204778671265, + "learning_rate": 2.35e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.768, + "step": 1768 + }, + { + "loss": 0.0375, + "grad_norm": 1.7161457538604736, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.7690000000000001, + "step": 1769 + }, + { + "loss": 0.0522, + "grad_norm": 1.3714388608932495, + "learning_rate": 2.33e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.77, + "step": 1770 + }, + { + "loss": 0.0368, + "grad_norm": 1.6326324939727783, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.771, + "step": 1771 + }, + { + "loss": 0.0526, + "grad_norm": 1.4099246263504028, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.772, + "step": 1772 + }, + { + "loss": 0.0343, + "grad_norm": 1.331606149673462, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7730000000000001, + "step": 1773 + }, + { + "loss": 0.0521, + "grad_norm": 2.03346586227417, + "learning_rate": 2.29e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.774, + "step": 1774 + }, + { + "loss": 0.0738, + "grad_norm": 2.287825584411621, + "learning_rate": 2.28e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.775, + "step": 1775 + }, + { + "loss": 0.0711, + "grad_norm": 1.560683012008667, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.776, + "step": 1776 + }, + { + "loss": 0.0483, + "grad_norm": 1.860205888748169, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.7770000000000001, + "step": 1777 + }, + { + "loss": 0.0418, + "grad_norm": 1.6539009809494019, + "learning_rate": 2.25e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.778, + "step": 1778 + }, + { + "loss": 0.0669, + "grad_norm": 1.5473995208740234, + "learning_rate": 2.24e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.779, + "step": 1779 + }, + { + "loss": 0.0488, + "grad_norm": 1.3596010208129883, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.78, + "step": 1780 + }, + { + "loss": 0.0407, + "grad_norm": 1.8577399253845215, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7810000000000001, + "step": 1781 + }, + { + "loss": 0.0639, + "grad_norm": 2.693002462387085, + "learning_rate": 2.21e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.782, + "step": 1782 + }, + { + "loss": 0.0146, + "grad_norm": 4.3713555335998535, + "learning_rate": 2.2e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.783, + "step": 1783 + }, + { + "loss": 0.0702, + "grad_norm": 1.8829140663146973, + "learning_rate": 2.19e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.784, + "step": 1784 + }, + { + "loss": 0.0145, + "grad_norm": 4.203199863433838, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.7850000000000001, + "step": 1785 + }, + { + "loss": 0.0418, + "grad_norm": 1.0440939664840698, + "learning_rate": 2.17e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.786, + "step": 1786 + }, + { + "loss": 0.0658, + "grad_norm": 1.5156137943267822, + "learning_rate": 2.16e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.787, + "step": 1787 + }, + { + "loss": 0.0506, + "grad_norm": 1.6226084232330322, + "learning_rate": 2.15e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.788, + "step": 1788 + }, + { + "loss": 0.087, + "grad_norm": 1.8399536609649658, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7890000000000001, + "step": 1789 + }, + { + "loss": 0.0607, + "grad_norm": 2.031243324279785, + "learning_rate": 2.13e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.79, + "step": 1790 + }, + { + "loss": 0.0609, + "grad_norm": 1.581013798713684, + "learning_rate": 2.12e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.791, + "step": 1791 + }, + { + "loss": 0.0149, + "grad_norm": 4.233753681182861, + "learning_rate": 2.11e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.792, + "step": 1792 + }, + { + "loss": 0.0698, + "grad_norm": 1.890411615371704, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7930000000000001, + "step": 1793 + }, + { + "loss": 0.0529, + "grad_norm": 1.3680751323699951, + "learning_rate": 2.09e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.794, + "step": 1794 + }, + { + "loss": 0.0528, + "grad_norm": 1.9651073217391968, + "learning_rate": 2.08e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.795, + "step": 1795 + }, + { + "loss": 0.0133, + "grad_norm": 3.887544631958008, + "learning_rate": 2.07e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.796, + "step": 1796 + }, + { + "loss": 0.05, + "grad_norm": 1.304778814315796, + "learning_rate": 2.06e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7970000000000002, + "step": 1797 + }, + { + "loss": 0.071, + "grad_norm": 1.9661753177642822, + "learning_rate": 2.05e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.798, + "step": 1798 + }, + { + "loss": 0.0557, + "grad_norm": 1.5037291049957275, + "learning_rate": 2.04e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.799, + "step": 1799 + }, + { + "loss": 0.0372, + "grad_norm": 1.4804255962371826, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.8, + "step": 1800 + }, + { + "loss": 0.0645, + "grad_norm": 1.577778697013855, + "learning_rate": 2.02e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.8010000000000002, + "step": 1801 + }, + { + "loss": 0.0399, + "grad_norm": 1.5963507890701294, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.802, + "step": 1802 + }, + { + "loss": 0.0612, + "grad_norm": 1.7424527406692505, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.803, + "step": 1803 + }, + { + "loss": 0.0377, + "grad_norm": 1.4296543598175049, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.804, + "step": 1804 + }, + { + "loss": 0.0378, + "grad_norm": 1.4681419134140015, + "learning_rate": 1.98e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8050000000000002, + "step": 1805 + }, + { + "loss": 0.0385, + "grad_norm": 1.876345157623291, + "learning_rate": 1.97e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.806, + "step": 1806 + }, + { + "loss": 0.0454, + "grad_norm": 1.3991385698318481, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.807, + "step": 1807 + }, + { + "loss": 0.0706, + "grad_norm": 1.6286864280700684, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.808, + "step": 1808 + }, + { + "loss": 0.0409, + "grad_norm": 1.7534390687942505, + "learning_rate": 1.94e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8090000000000002, + "step": 1809 + }, + { + "loss": 0.1302, + "grad_norm": 4.238317966461182, + "learning_rate": 1.93e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.81, + "step": 1810 + }, + { + "loss": 0.0525, + "grad_norm": 2.2462339401245117, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.811, + "step": 1811 + }, + { + "loss": 0.0609, + "grad_norm": 1.5136423110961914, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.812, + "step": 1812 + }, + { + "loss": 0.0595, + "grad_norm": 1.4645228385925293, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.813, + "step": 1813 + }, + { + "loss": 0.0485, + "grad_norm": 1.4663139581680298, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.814, + "step": 1814 + }, + { + "loss": 0.0117, + "grad_norm": 3.569246768951416, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.815, + "step": 1815 + }, + { + "loss": 0.0765, + "grad_norm": 1.4224154949188232, + "learning_rate": 1.87e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.8159999999999998, + "step": 1816 + }, + { + "loss": 0.0517, + "grad_norm": 1.4875210523605347, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.817, + "step": 1817 + }, + { + "loss": 0.0123, + "grad_norm": 3.643899440765381, + "learning_rate": 1.85e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.818, + "step": 1818 + }, + { + "loss": 0.0358, + "grad_norm": 1.7132638692855835, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.819, + "step": 1819 + }, + { + "loss": 0.0396, + "grad_norm": 1.291243553161621, + "learning_rate": 1.83e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8199999999999998, + "step": 1820 + }, + { + "loss": 0.0611, + "grad_norm": 1.6885188817977905, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.821, + "step": 1821 + }, + { + "loss": 0.0507, + "grad_norm": 1.215349555015564, + "learning_rate": 1.81e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.822, + "step": 1822 + }, + { + "loss": 0.0508, + "grad_norm": 1.5074315071105957, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.823, + "step": 1823 + }, + { + "loss": 0.0593, + "grad_norm": 1.500303030014038, + "learning_rate": 1.79e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8239999999999998, + "step": 1824 + }, + { + "loss": 0.0696, + "grad_norm": 2.0285537242889404, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.825, + "step": 1825 + }, + { + "loss": 0.051, + "grad_norm": 1.3399317264556885, + "learning_rate": 1.77e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.826, + "step": 1826 + }, + { + "loss": 0.0479, + "grad_norm": 1.868754506111145, + "learning_rate": 1.76e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.827, + "step": 1827 + }, + { + "loss": 0.0123, + "grad_norm": 3.5505826473236084, + "learning_rate": 1.75e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.8279999999999998, + "step": 1828 + }, + { + "loss": 0.0384, + "grad_norm": 1.1001877784729004, + "learning_rate": 1.74e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.829, + "step": 1829 + }, + { + "loss": 0.0503, + "grad_norm": 1.5732758045196533, + "learning_rate": 1.73e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.83, + "step": 1830 + }, + { + "loss": 0.0569, + "grad_norm": 1.4768040180206299, + "learning_rate": 1.72e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.831, + "step": 1831 + }, + { + "loss": 0.0376, + "grad_norm": 2.298859119415283, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8319999999999999, + "step": 1832 + }, + { + "loss": 0.0626, + "grad_norm": 1.4698207378387451, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 1833 + }, + { + "loss": 0.0527, + "grad_norm": 1.462391972541809, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.834, + "step": 1834 + }, + { + "loss": 0.0751, + "grad_norm": 2.242673873901367, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.835, + "step": 1835 + }, + { + "loss": 0.0633, + "grad_norm": 1.4788683652877808, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.8359999999999999, + "step": 1836 + }, + { + "loss": 0.0523, + "grad_norm": 1.5662829875946045, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.837, + "step": 1837 + }, + { + "loss": 0.0496, + "grad_norm": 1.2137081623077393, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.838, + "step": 1838 + }, + { + "loss": 0.0144, + "grad_norm": 3.972593307495117, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.839, + "step": 1839 + }, + { + "loss": 0.0612, + "grad_norm": 2.0851247310638428, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.8399999999999999, + "step": 1840 + }, + { + "loss": 0.0351, + "grad_norm": 1.7115992307662964, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.841, + "step": 1841 + }, + { + "loss": 0.0543, + "grad_norm": 1.7121071815490723, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.842, + "step": 1842 + }, + { + "loss": 0.0398, + "grad_norm": 2.520775318145752, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.843, + "step": 1843 + }, + { + "loss": 0.0588, + "grad_norm": 1.4704424142837524, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8439999999999999, + "step": 1844 + }, + { + "loss": 0.0393, + "grad_norm": 1.1732555627822876, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.845, + "step": 1845 + }, + { + "loss": 0.0126, + "grad_norm": 3.8587839603424072, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.846, + "step": 1846 + }, + { + "loss": 0.0154, + "grad_norm": 4.2589006423950195, + "learning_rate": 1.56e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.847, + "step": 1847 + }, + { + "loss": 0.0525, + "grad_norm": 1.5793870687484741, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.8479999999999999, + "step": 1848 + }, + { + "loss": 0.0711, + "grad_norm": 1.637081265449524, + "learning_rate": 1.54e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.849, + "step": 1849 + }, + { + "loss": 0.0367, + "grad_norm": 1.405205488204956, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.85, + "step": 1850 + }, + { + "loss": 0.0122, + "grad_norm": 3.7381093502044678, + "learning_rate": 1.52e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.851, + "step": 1851 + }, + { + "loss": 0.0595, + "grad_norm": 1.4563549757003784, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8519999999999999, + "step": 1852 + }, + { + "loss": 0.012, + "grad_norm": 3.3752598762512207, + "learning_rate": 1.5e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.853, + "step": 1853 + }, + { + "loss": 0.0575, + "grad_norm": 1.6581268310546875, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.854, + "step": 1854 + }, + { + "loss": 0.037, + "grad_norm": 1.6496632099151611, + "learning_rate": 1.48e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.855, + "step": 1855 + }, + { + "loss": 0.0435, + "grad_norm": 2.816823959350586, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.8559999999999999, + "step": 1856 + }, + { + "loss": 0.0691, + "grad_norm": 1.9923897981643677, + "learning_rate": 1.46e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.857, + "step": 1857 + }, + { + "loss": 0.0601, + "grad_norm": 1.9515984058380127, + "learning_rate": 1.45e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.858, + "step": 1858 + }, + { + "loss": 0.0097, + "grad_norm": 3.0719552040100098, + "learning_rate": 1.44e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 1859 + }, + { + "loss": 0.0641, + "grad_norm": 1.8086748123168945, + "learning_rate": 1.43e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8599999999999999, + "step": 1860 + }, + { + "loss": 0.067, + "grad_norm": 1.6446064710617065, + "learning_rate": 1.42e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.861, + "step": 1861 + }, + { + "loss": 0.0101, + "grad_norm": 3.0983476638793945, + "learning_rate": 1.41e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 1862 + }, + { + "loss": 0.0362, + "grad_norm": 1.6780548095703125, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.863, + "step": 1863 + }, + { + "loss": 0.054, + "grad_norm": 1.5340514183044434, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8639999999999999, + "step": 1864 + }, + { + "loss": 0.0562, + "grad_norm": 1.6704845428466797, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.865, + "step": 1865 + }, + { + "loss": 0.0647, + "grad_norm": 2.0944159030914307, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.866, + "step": 1866 + }, + { + "loss": 0.0497, + "grad_norm": 1.6780622005462646, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.867, + "step": 1867 + }, + { + "loss": 0.0531, + "grad_norm": 1.5871188640594482, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8679999999999999, + "step": 1868 + }, + { + "loss": 0.061, + "grad_norm": 1.572225570678711, + "learning_rate": 1.34e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.869, + "step": 1869 + }, + { + "loss": 0.0636, + "grad_norm": 1.7540369033813477, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.87, + "step": 1870 + }, + { + "loss": 0.0516, + "grad_norm": 1.9117010831832886, + "learning_rate": 1.32e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.871, + "step": 1871 + }, + { + "loss": 0.0516, + "grad_norm": 1.8945181369781494, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8719999999999999, + "step": 1872 + }, + { + "loss": 0.1903, + "grad_norm": 7.168573379516602, + "learning_rate": 1.3e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 1.873, + "step": 1873 + }, + { + "loss": 0.0584, + "grad_norm": 1.7484742403030396, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.874, + "step": 1874 + }, + { + "loss": 0.0592, + "grad_norm": 1.998748540878296, + "learning_rate": 1.28e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.875, + "step": 1875 + }, + { + "loss": 0.0132, + "grad_norm": 3.7218382358551025, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.876, + "step": 1876 + }, + { + "loss": 0.0397, + "grad_norm": 1.7368042469024658, + "learning_rate": 1.26e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.877, + "step": 1877 + }, + { + "loss": 0.0747, + "grad_norm": 1.7804408073425293, + "learning_rate": 1.25e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8780000000000001, + "step": 1878 + }, + { + "loss": 0.0564, + "grad_norm": 1.812559962272644, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.879, + "step": 1879 + }, + { + "loss": 0.0359, + "grad_norm": 1.5748106241226196, + "learning_rate": 1.23e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.88, + "step": 1880 + }, + { + "loss": 0.1015, + "grad_norm": 2.9346442222595215, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.881, + "step": 1881 + }, + { + "loss": 0.0714, + "grad_norm": 2.8724288940429688, + "learning_rate": 1.21e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.8820000000000001, + "step": 1882 + }, + { + "loss": 0.0544, + "grad_norm": 1.6409680843353271, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.883, + "step": 1883 + }, + { + "loss": 0.0569, + "grad_norm": 1.441733479499817, + "learning_rate": 1.19e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.884, + "step": 1884 + }, + { + "loss": 0.0709, + "grad_norm": 2.3944602012634277, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.885, + "step": 1885 + }, + { + "loss": 0.0593, + "grad_norm": 2.0737223625183105, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8860000000000001, + "step": 1886 + }, + { + "loss": 0.011, + "grad_norm": 3.4782493114471436, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.887, + "step": 1887 + }, + { + "loss": 0.0115, + "grad_norm": 3.5657458305358887, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.888, + "step": 1888 + }, + { + "loss": 0.0598, + "grad_norm": 1.5167820453643799, + "learning_rate": 1.14e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.889, + "step": 1889 + }, + { + "loss": 0.0507, + "grad_norm": 1.6942130327224731, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.8900000000000001, + "step": 1890 + }, + { + "loss": 0.05, + "grad_norm": 1.4450113773345947, + "learning_rate": 1.12e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.891, + "step": 1891 + }, + { + "loss": 0.0672, + "grad_norm": 1.7840543985366821, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.892, + "step": 1892 + }, + { + "loss": 0.0114, + "grad_norm": 3.6806554794311523, + "learning_rate": 1.1e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.893, + "step": 1893 + }, + { + "loss": 0.0433, + "grad_norm": 2.5975944995880127, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.8940000000000001, + "step": 1894 + }, + { + "loss": 0.048, + "grad_norm": 1.2934935092926025, + "learning_rate": 1.08e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.895, + "step": 1895 + }, + { + "loss": 0.0129, + "grad_norm": 3.9428789615631104, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.896, + "step": 1896 + }, + { + "loss": 0.0106, + "grad_norm": 3.178393840789795, + "learning_rate": 1.06e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.897, + "step": 1897 + }, + { + "loss": 0.0601, + "grad_norm": 1.3654727935791016, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8980000000000001, + "step": 1898 + }, + { + "loss": 0.0372, + "grad_norm": 1.596958041191101, + "learning_rate": 1.04e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.899, + "step": 1899 + }, + { + "loss": 0.0407, + "grad_norm": 1.3870348930358887, + "learning_rate": 1.03e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9, + "step": 1900 + }, + { + "loss": 0.0398, + "grad_norm": 1.8837169408798218, + "learning_rate": 1.02e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.901, + "step": 1901 + }, + { + "loss": 0.0685, + "grad_norm": 2.1320674419403076, + "learning_rate": 1.01e-06, + "num_tokens": 1308570.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9020000000000001, + "step": 1902 + }, + { + "loss": 0.0824, + "grad_norm": 2.3401284217834473, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.903, + "step": 1903 + }, + { + "loss": 0.0107, + "grad_norm": 3.2646677494049072, + "learning_rate": 9.9e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 1904 + }, + { + "loss": 0.053, + "grad_norm": 1.7195311784744263, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.905, + "step": 1905 + }, + { + "loss": 0.0388, + "grad_norm": 1.4336844682693481, + "learning_rate": 9.7e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.9060000000000001, + "step": 1906 + }, + { + "loss": 0.0496, + "grad_norm": 1.5110867023468018, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.907, + "step": 1907 + }, + { + "loss": 0.0106, + "grad_norm": 3.0311079025268555, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.908, + "step": 1908 + }, + { + "loss": 0.0536, + "grad_norm": 1.9689549207687378, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.909, + "step": 1909 + }, + { + "loss": 0.0761, + "grad_norm": 2.2891626358032227, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.9100000000000001, + "step": 1910 + }, + { + "loss": 0.0099, + "grad_norm": 2.886558771133423, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 1911 + }, + { + "loss": 0.0509, + "grad_norm": 2.247649669647217, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.912, + "step": 1912 + }, + { + "loss": 0.0396, + "grad_norm": 1.8190995454788208, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.913, + "step": 1913 + }, + { + "loss": 0.0681, + "grad_norm": 1.9473356008529663, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.9140000000000001, + "step": 1914 + }, + { + "loss": 0.0583, + "grad_norm": 1.7244383096694946, + "learning_rate": 8.8e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.915, + "step": 1915 + }, + { + "loss": 0.0497, + "grad_norm": 1.471281886100769, + "learning_rate": 8.7e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.916, + "step": 1916 + }, + { + "loss": 0.0105, + "grad_norm": 3.1323492527008057, + "learning_rate": 8.6e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.917, + "step": 1917 + }, + { + "loss": 0.0587, + "grad_norm": 1.6258044242858887, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9180000000000001, + "step": 1918 + }, + { + "loss": 0.0396, + "grad_norm": 3.7344205379486084, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.919, + "step": 1919 + }, + { + "loss": 0.0669, + "grad_norm": 1.567430853843689, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.92, + "step": 1920 + }, + { + "loss": 0.0403, + "grad_norm": 2.391710042953491, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.921, + "step": 1921 + }, + { + "loss": 0.0731, + "grad_norm": 1.7387372255325317, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 1922 + }, + { + "loss": 0.0346, + "grad_norm": 1.5562756061553955, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.923, + "step": 1923 + }, + { + "loss": 0.0094, + "grad_norm": 2.8271360397338867, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 1924 + }, + { + "loss": 0.0458, + "grad_norm": 2.486022472381592, + "learning_rate": 7.8e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.925, + "step": 1925 + }, + { + "loss": 0.0432, + "grad_norm": 1.4174907207489014, + "learning_rate": 7.7e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9260000000000002, + "step": 1926 + }, + { + "loss": 0.0685, + "grad_norm": 1.9511269330978394, + "learning_rate": 7.6e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.927, + "step": 1927 + }, + { + "loss": 0.0541, + "grad_norm": 1.7855056524276733, + "learning_rate": 7.5e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.928, + "step": 1928 + }, + { + "loss": 0.0381, + "grad_norm": 1.345107913017273, + "learning_rate": 7.4e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.929, + "step": 1929 + }, + { + "loss": 0.0405, + "grad_norm": 2.1388049125671387, + "learning_rate": 7.3e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9300000000000002, + "step": 1930 + }, + { + "loss": 0.065, + "grad_norm": 1.9286760091781616, + "learning_rate": 7.2e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.931, + "step": 1931 + }, + { + "loss": 0.0084, + "grad_norm": 2.553018808364868, + "learning_rate": 7.1e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 1932 + }, + { + "loss": 0.0591, + "grad_norm": 1.3521795272827148, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.933, + "step": 1933 + }, + { + "loss": 0.0407, + "grad_norm": 2.3110647201538086, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.9340000000000002, + "step": 1934 + }, + { + "loss": 0.0087, + "grad_norm": 2.560931921005249, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 1935 + }, + { + "loss": 0.1207, + "grad_norm": 3.6795732975006104, + "learning_rate": 6.7e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 1.936, + "step": 1936 + }, + { + "loss": 0.0079, + "grad_norm": 2.1008386611938477, + "learning_rate": 6.6e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 1937 + }, + { + "loss": 0.0087, + "grad_norm": 2.5367555618286133, + "learning_rate": 6.5e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 1938 + }, + { + "loss": 0.0518, + "grad_norm": 2.0541486740112305, + "learning_rate": 6.4e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.939, + "step": 1939 + }, + { + "loss": 0.0618, + "grad_norm": 1.8797075748443604, + "learning_rate": 6.3e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.94, + "step": 1940 + }, + { + "loss": 0.0628, + "grad_norm": 2.0876829624176025, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9409999999999998, + "step": 1941 + }, + { + "loss": 0.0453, + "grad_norm": 1.7904268503189087, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.942, + "step": 1942 + }, + { + "loss": 0.009, + "grad_norm": 2.73040771484375, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 1943 + }, + { + "loss": 0.0617, + "grad_norm": 1.6844722032546997, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.944, + "step": 1944 + }, + { + "loss": 0.0431, + "grad_norm": 1.8085075616836548, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9449999999999998, + "step": 1945 + }, + { + "loss": 0.0554, + "grad_norm": 1.8000997304916382, + "learning_rate": 5.7e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.946, + "step": 1946 + }, + { + "loss": 0.0608, + "grad_norm": 1.8177446126937866, + "learning_rate": 5.6e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.947, + "step": 1947 + }, + { + "loss": 0.0624, + "grad_norm": 1.5957430601119995, + "learning_rate": 5.5e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.948, + "step": 1948 + }, + { + "loss": 0.0615, + "grad_norm": 1.5245059728622437, + "learning_rate": 5.4e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9489999999999998, + "step": 1949 + }, + { + "loss": 0.0087, + "grad_norm": 2.8260550498962402, + "learning_rate": 5.3e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 1950 + }, + { + "loss": 0.0491, + "grad_norm": 1.5616376399993896, + "learning_rate": 5.2e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.951, + "step": 1951 + }, + { + "loss": 0.0552, + "grad_norm": 1.530611276626587, + "learning_rate": 5.1e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.952, + "step": 1952 + }, + { + "loss": 0.0563, + "grad_norm": 1.5877563953399658, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.9529999999999998, + "step": 1953 + }, + { + "loss": 0.034, + "grad_norm": 1.3671666383743286, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.954, + "step": 1954 + }, + { + "loss": 0.0447, + "grad_norm": 1.4045659303665161, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.955, + "step": 1955 + }, + { + "loss": 0.0523, + "grad_norm": 1.3664851188659668, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.956, + "step": 1956 + }, + { + "loss": 0.0545, + "grad_norm": 1.9731861352920532, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9569999999999999, + "step": 1957 + }, + { + "loss": 0.056, + "grad_norm": 1.9783090353012085, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.958, + "step": 1958 + }, + { + "loss": 0.0103, + "grad_norm": 3.2062110900878906, + "learning_rate": 4.4e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.959, + "step": 1959 + }, + { + "loss": 0.0356, + "grad_norm": 1.8231993913650513, + "learning_rate": 4.3e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.96, + "step": 1960 + }, + { + "loss": 0.0525, + "grad_norm": 1.708391785621643, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9609999999999999, + "step": 1961 + }, + { + "loss": 0.0794, + "grad_norm": 2.159344434738159, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.962, + "step": 1962 + }, + { + "loss": 0.0815, + "grad_norm": 1.9803351163864136, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 1963 + }, + { + "loss": 0.0442, + "grad_norm": 2.2135045528411865, + "learning_rate": 3.9e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.964, + "step": 1964 + }, + { + "loss": 0.0082, + "grad_norm": 2.504026174545288, + "learning_rate": 3.8e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 1965 + }, + { + "loss": 0.0524, + "grad_norm": 2.4293482303619385, + "learning_rate": 3.7e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.966, + "step": 1966 + }, + { + "loss": 0.0543, + "grad_norm": 1.5671586990356445, + "learning_rate": 3.6e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.967, + "step": 1967 + }, + { + "loss": 0.0549, + "grad_norm": 2.1507840156555176, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.968, + "step": 1968 + }, + { + "loss": 0.0561, + "grad_norm": 1.4668017625808716, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9689999999999999, + "step": 1969 + }, + { + "loss": 0.008, + "grad_norm": 2.4691226482391357, + "learning_rate": 3.3e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 1970 + }, + { + "loss": 0.0104, + "grad_norm": 3.135504722595215, + "learning_rate": 3.2e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.971, + "step": 1971 + }, + { + "loss": 0.0442, + "grad_norm": 1.5039496421813965, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 1972 + }, + { + "loss": 0.035, + "grad_norm": 1.5489939451217651, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9729999999999999, + "step": 1973 + }, + { + "loss": 0.0687, + "grad_norm": 1.601294994354248, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.974, + "step": 1974 + }, + { + "loss": 0.0629, + "grad_norm": 1.7154121398925781, + "learning_rate": 2.8e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.975, + "step": 1975 + }, + { + "loss": 0.0587, + "grad_norm": 2.0388171672821045, + "learning_rate": 2.7e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 1976 + }, + { + "loss": 0.051, + "grad_norm": 1.9510704278945923, + "learning_rate": 2.6e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9769999999999999, + "step": 1977 + }, + { + "loss": 0.0512, + "grad_norm": 1.7245160341262817, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.978, + "step": 1978 + }, + { + "loss": 0.0465, + "grad_norm": 1.383158802986145, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.979, + "step": 1979 + }, + { + "loss": 0.054, + "grad_norm": 2.2401952743530273, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.98, + "step": 1980 + }, + { + "loss": 0.0516, + "grad_norm": 2.7115116119384766, + "learning_rate": 2.2e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.9809999999999999, + "step": 1981 + }, + { + "loss": 0.0095, + "grad_norm": 2.8770017623901367, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 1982 + }, + { + "loss": 0.0618, + "grad_norm": 1.8771051168441772, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.983, + "step": 1983 + }, + { + "loss": 0.0524, + "grad_norm": 1.3788121938705444, + "learning_rate": 1.9e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.984, + "step": 1984 + }, + { + "loss": 0.0582, + "grad_norm": 1.583976149559021, + "learning_rate": 1.8e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9849999999999999, + "step": 1985 + }, + { + "loss": 0.0802, + "grad_norm": 1.9991214275360107, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.986, + "step": 1986 + }, + { + "loss": 0.0085, + "grad_norm": 2.6479129791259766, + "learning_rate": 1.6e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 1987 + }, + { + "loss": 0.06, + "grad_norm": 1.4170489311218262, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.988, + "step": 1988 + }, + { + "loss": 0.0502, + "grad_norm": 1.5151011943817139, + "learning_rate": 1.4e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9889999999999999, + "step": 1989 + }, + { + "loss": 0.0639, + "grad_norm": 1.8262159824371338, + "learning_rate": 1.3e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.99, + "step": 1990 + }, + { + "loss": 0.039, + "grad_norm": 1.5687544345855713, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.991, + "step": 1991 + }, + { + "loss": 0.0601, + "grad_norm": 1.4482007026672363, + "learning_rate": 1.1e-07, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.992, + "step": 1992 + }, + { + "loss": 0.0549, + "grad_norm": 2.2203757762908936, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9929999999999999, + "step": 1993 + }, + { + "loss": 0.0089, + "grad_norm": 2.697735071182251, + "learning_rate": 9e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 1994 + }, + { + "loss": 0.0595, + "grad_norm": 1.751516342163086, + "learning_rate": 8e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.995, + "step": 1995 + }, + { + "loss": 0.0499, + "grad_norm": 1.9196858406066895, + "learning_rate": 7e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.996, + "step": 1996 + }, + { + "loss": 0.0385, + "grad_norm": 1.7155030965805054, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.9969999999999999, + "step": 1997 + }, + { + "loss": 0.0089, + "grad_norm": 2.759751558303833, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 1998 + }, + { + "loss": 0.0393, + "grad_norm": 1.8272897005081177, + "learning_rate": 4e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.999, + "step": 1999 + }, + { + "loss": 0.0626, + "grad_norm": 1.7148770093917847, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 2.0, + "step": 2000 + }, + { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502, + "epoch": 2.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba2cfbcdcee3753e484b0a1b3adcb35ee225098 --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "unsloth_available": false, + "train_runtime": 234.6302, + "train_loss": 0.19233327957964502, + "train_metrics": { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..7d529fa55bcfe3e68b31a5702ee3f96c63526167 --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 2.158, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely to continue the sentence smoothly? Please provide a brief justification for your choice.\" rationale: \"The chosen candidate is more likely to maintain coherence with the existing context and future predictions accurately.\" \n\nMy reasoning:\n- The", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.557, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI; justification: The instruction is incomplete. Please provide a specific question or task related to the candidates mentioned in order for me to choose an appropriate candidate_id and provide a justified response.\" \"rationale\": \"This question asks for a justification, but no question or task was provided in the initial instruction. To fulfill this request, I need more information about what", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 2.04, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate is most likely to be correct for the following sentence fragment: 'The quick brown fox jumps over the lazy dog.'? Choose one candidate_id and provide your justification.\" \nrationale: The choice of 'cand_07' as the", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.059, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"I would choose candidate_id=cand_05; rationale=This candidate uses", + "candidate_id": "cand_05", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.007, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you select as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"Selecting the best candidate involves considering factors such as syntactic correctness, semantic coherence", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.127, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5778936849f9a5bb988c315271fbf3c3507aba26 --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "model_index": 1, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 1, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json b/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..d8c5d1cfe6fab1b4a4647f03f5ca461b1739180f --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json @@ -0,0 +1,36011 @@ +[ + { + "loss": 2.9686, + "grad_norm": 1.1798820495605469, + "learning_rate": 2e-05, + "num_tokens": 91.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 2.9639, + "grad_norm": 1.146132469177246, + "learning_rate": 1.9995e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 1.2609, + "grad_norm": 0.2891564667224884, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 694.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 2.9479, + "grad_norm": 1.1511788368225098, + "learning_rate": 1.9985000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.002, + "step": 4 + }, + { + "loss": 0.8201, + "grad_norm": 0.27247434854507446, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1297.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 1.1688, + "grad_norm": 0.30153799057006836, + "learning_rate": 1.9975e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 2.927, + "grad_norm": 1.123976469039917, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1900.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 2.9219, + "grad_norm": 1.1258331537246704, + "learning_rate": 1.9965e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 1.2624, + "grad_norm": 0.3105297088623047, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 2503.0, + "mean_token_accuracy": 0.7592955231666565, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.8468, + "grad_norm": 0.27270445227622986, + "learning_rate": 1.9955e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.005, + "step": 10 + }, + { + "loss": 1.1895, + "grad_norm": 0.31019389629364014, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3527.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 2.8961, + "grad_norm": 1.0758286714553833, + "learning_rate": 1.9945e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 1.1822, + "grad_norm": 0.3052140772342682, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4130.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 2.8831, + "grad_norm": 1.0789313316345215, + "learning_rate": 1.9935e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.8383, + "grad_norm": 0.2903873026371002, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 4733.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 1.2037, + "grad_norm": 0.3023833632469177, + "learning_rate": 1.9925e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 1.2477, + "grad_norm": 0.28835517168045044, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 5757.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 1.237, + "grad_norm": 0.30421048402786255, + "learning_rate": 1.9915e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 2.8549, + "grad_norm": 1.0703911781311035, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6360.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 1.2092, + "grad_norm": 0.30991482734680176, + "learning_rate": 1.9905e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7690802216529846, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 1.2362, + "grad_norm": 0.3097628951072693, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7384.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 1.223, + "grad_norm": 0.31258082389831543, + "learning_rate": 1.9895000000000002e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 2.8321, + "grad_norm": 1.0650557279586792, + "learning_rate": 1.989e-05, + "num_tokens": 7987.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 1.1381, + "grad_norm": 0.31106889247894287, + "learning_rate": 1.9885e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.8059, + "grad_norm": 0.28179118037223816, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9011.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 2.8152, + "grad_norm": 1.0609599351882935, + "learning_rate": 1.9875000000000002e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 2.8078, + "grad_norm": 1.06212317943573, + "learning_rate": 1.987e-05, + "num_tokens": 9193.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 1.205, + "grad_norm": 0.3027011752128601, + "learning_rate": 1.9865e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 1.1295, + "grad_norm": 0.30131977796554565, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10217.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 2.7894, + "grad_norm": 1.0723512172698975, + "learning_rate": 1.9855000000000002e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 1.1157, + "grad_norm": 0.30370256304740906, + "learning_rate": 1.985e-05, + "num_tokens": 10820.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 1.2198, + "grad_norm": 0.3102725148200989, + "learning_rate": 1.9845e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 2.7699, + "grad_norm": 1.0780471563339233, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11423.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 2.7633, + "grad_norm": 1.0721458196640015, + "learning_rate": 1.9835000000000002e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.8241, + "grad_norm": 0.2753015458583832, + "learning_rate": 1.983e-05, + "num_tokens": 12026.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 1.2029, + "grad_norm": 0.32459118962287903, + "learning_rate": 1.9825e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 2.7393, + "grad_norm": 1.089471459388733, + "learning_rate": 1.982e-05, + "num_tokens": 12629.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 2.7339, + "grad_norm": 1.085958480834961, + "learning_rate": 1.9815000000000003e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 2.7235, + "grad_norm": 1.1013903617858887, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 12811.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 1.1925, + "grad_norm": 0.322603315114975, + "learning_rate": 1.9805e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 1.0755, + "grad_norm": 0.33030447363853455, + "learning_rate": 1.98e-05, + "num_tokens": 13835.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.8072, + "grad_norm": 0.292123407125473, + "learning_rate": 1.9795000000000003e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.7719, + "grad_norm": 0.2785574495792389, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14859.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 2.6826, + "grad_norm": 1.1196017265319824, + "learning_rate": 1.9785e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 2.6763, + "grad_norm": 1.1198991537094116, + "learning_rate": 1.978e-05, + "num_tokens": 15041.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 1.0823, + "grad_norm": 0.3456343412399292, + "learning_rate": 1.9775000000000003e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 1.1172, + "grad_norm": 0.3377469480037689, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16065.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 1.19, + "grad_norm": 0.3273194134235382, + "learning_rate": 1.9765e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 1.0897, + "grad_norm": 0.330640584230423, + "learning_rate": 1.976e-05, + "num_tokens": 17089.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 2.6381, + "grad_norm": 1.1452019214630127, + "learning_rate": 1.9755000000000003e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.7974, + "grad_norm": 0.30913424491882324, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 17692.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 1.175, + "grad_norm": 0.3387100100517273, + "learning_rate": 1.9745e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 1.1322, + "grad_norm": 0.3353443443775177, + "learning_rate": 1.974e-05, + "num_tokens": 18716.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 2.6086, + "grad_norm": 1.1715646982192993, + "learning_rate": 1.9735000000000003e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 2.5992, + "grad_norm": 1.1846489906311035, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18898.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 2.5913, + "grad_norm": 1.1861159801483154, + "learning_rate": 1.9725000000000002e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 1.1598, + "grad_norm": 0.3380836546421051, + "learning_rate": 1.972e-05, + "num_tokens": 19501.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 1.1193, + "grad_norm": 0.34247249364852905, + "learning_rate": 1.9715000000000004e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 2.5644, + "grad_norm": 1.205854892730713, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20104.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 2.5553, + "grad_norm": 1.211520791053772, + "learning_rate": 1.9705000000000002e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 2.5452, + "grad_norm": 1.2238597869873047, + "learning_rate": 1.97e-05, + "num_tokens": 20286.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 1.1531, + "grad_norm": 0.3495417535305023, + "learning_rate": 1.9695e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 1.0714, + "grad_norm": 0.3549030125141144, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21310.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.765, + "grad_norm": 0.3008621335029602, + "learning_rate": 1.9685000000000002e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 1.0392, + "grad_norm": 0.3398958444595337, + "learning_rate": 1.968e-05, + "num_tokens": 22334.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 1.0477, + "grad_norm": 0.35012176632881165, + "learning_rate": 1.9675e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 2.4882, + "grad_norm": 1.2684752941131592, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 22937.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 2.478, + "grad_norm": 1.2892162799835205, + "learning_rate": 1.9665000000000002e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 2.4664, + "grad_norm": 1.296135663986206, + "learning_rate": 1.966e-05, + "num_tokens": 23119.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.7605, + "grad_norm": 0.3300800323486328, + "learning_rate": 1.9655e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.7663, + "grad_norm": 0.33007505536079407, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24143.0, + "mean_token_accuracy": 0.8512719869613647, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 2.4349, + "grad_norm": 1.3247182369232178, + "learning_rate": 1.9645e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 1.0354, + "grad_norm": 0.3528023660182953, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 24746.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.738, + "grad_norm": 0.3283436894416809, + "learning_rate": 1.9635e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 1.1271, + "grad_norm": 0.38431045413017273, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 25770.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": 1.0373, + "grad_norm": 0.3673364818096161, + "learning_rate": 1.9625e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 1.156, + "grad_norm": 0.3851627707481384, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26794.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 2.3789, + "grad_norm": 1.3850467205047607, + "learning_rate": 1.9615e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 2.3734, + "grad_norm": 1.3814043998718262, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 26976.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 2.3599, + "grad_norm": 1.3965320587158203, + "learning_rate": 1.9605e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 2.3458, + "grad_norm": 1.4337000846862793, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27158.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.7631, + "grad_norm": 0.328967422246933, + "learning_rate": 1.9595e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 1.0816, + "grad_norm": 0.40056440234184265, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28182.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.761, + "grad_norm": 0.34349334239959717, + "learning_rate": 1.9585e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.7308, + "grad_norm": 0.35714098811149597, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29206.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 2.2886, + "grad_norm": 1.4950672388076782, + "learning_rate": 1.9575e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 2.2801, + "grad_norm": 1.5058231353759766, + "learning_rate": 1.957e-05, + "num_tokens": 29388.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 2.2683, + "grad_norm": 1.5141775608062744, + "learning_rate": 1.9565e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.9814, + "grad_norm": 0.3899815082550049, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 29991.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 1.1155, + "grad_norm": 0.40274983644485474, + "learning_rate": 1.9555e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 2.2309, + "grad_norm": 1.5758429765701294, + "learning_rate": 1.955e-05, + "num_tokens": 30594.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 1.0635, + "grad_norm": 0.4182218015193939, + "learning_rate": 1.9545e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.7083, + "grad_norm": 0.35819146037101746, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31618.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 2.1959, + "grad_norm": 1.6126611232757568, + "learning_rate": 1.9535000000000002e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 2.1797, + "grad_norm": 1.676061987876892, + "learning_rate": 1.953e-05, + "num_tokens": 31800.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 1.0347, + "grad_norm": 0.4216737151145935, + "learning_rate": 1.9525e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.6884, + "grad_norm": 0.39531153440475464, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32824.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 2.1441, + "grad_norm": 1.7453250885009766, + "learning_rate": 1.9515000000000002e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 2.1265, + "grad_norm": 1.7851935625076294, + "learning_rate": 1.951e-05, + "num_tokens": 33006.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 2.112, + "grad_norm": 1.830625057220459, + "learning_rate": 1.9505e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 2.0989, + "grad_norm": 1.851873755455017, + "learning_rate": 1.95e-05, + "num_tokens": 33188.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.6824, + "grad_norm": 0.39206984639167786, + "learning_rate": 1.9495000000000002e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.6874, + "grad_norm": 0.3998919725418091, + "learning_rate": 1.949e-05, + "num_tokens": 34212.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 1.0692, + "grad_norm": 0.45781052112579346, + "learning_rate": 1.9485e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.052, + "step": 104 + }, + { + "loss": 1.061, + "grad_norm": 0.4857180714607239, + "learning_rate": 1.948e-05, + "num_tokens": 35236.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.9418, + "grad_norm": 0.4719521701335907, + "learning_rate": 1.9475000000000002e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.9888, + "grad_norm": 0.4797465205192566, + "learning_rate": 1.947e-05, + "num_tokens": 36260.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 1.994, + "grad_norm": 2.2058191299438477, + "learning_rate": 1.9465e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.7016, + "grad_norm": 0.41740846633911133, + "learning_rate": 1.946e-05, + "num_tokens": 36863.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.6818, + "grad_norm": 0.43658050894737244, + "learning_rate": 1.9455000000000003e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.6655, + "grad_norm": 0.46398866176605225, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37887.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 1.9355, + "grad_norm": 2.4030585289001465, + "learning_rate": 1.9445e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 1.0308, + "grad_norm": 0.47935715317726135, + "learning_rate": 1.944e-05, + "num_tokens": 38490.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": 0.6529, + "grad_norm": 0.5175711512565613, + "learning_rate": 1.9435000000000003e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 1.9, + "grad_norm": 2.3800323009490967, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39093.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 1.0589, + "grad_norm": 0.5446810722351074, + "learning_rate": 1.9425e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 1.8661, + "grad_norm": 2.2952208518981934, + "learning_rate": 1.942e-05, + "num_tokens": 39696.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 1.8546, + "grad_norm": 2.2471399307250977, + "learning_rate": 1.9415000000000003e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 1.8394, + "grad_norm": 2.1859543323516846, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 39878.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.6737, + "grad_norm": 0.5614652633666992, + "learning_rate": 1.9405e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.6406, + "grad_norm": 0.5995651483535767, + "learning_rate": 1.94e-05, + "num_tokens": 40902.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.9218, + "grad_norm": 0.6819480657577515, + "learning_rate": 1.9395000000000003e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.9464, + "grad_norm": 0.6670010089874268, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 41926.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.9323, + "grad_norm": 0.8481072187423706, + "learning_rate": 1.9385e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.6372, + "grad_norm": 0.5398988127708435, + "learning_rate": 1.938e-05, + "num_tokens": 42950.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.6362, + "grad_norm": 0.5465712547302246, + "learning_rate": 1.9375e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 1.7297, + "grad_norm": 2.4601035118103027, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 43553.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.6423, + "grad_norm": 0.5248544812202454, + "learning_rate": 1.9365000000000002e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 1.7024, + "grad_norm": 2.7017173767089844, + "learning_rate": 1.936e-05, + "num_tokens": 44156.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.8623, + "grad_norm": 0.6321293711662292, + "learning_rate": 1.9355e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.8852, + "grad_norm": 0.7586547136306763, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45180.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 1.6632, + "grad_norm": 3.066443920135498, + "learning_rate": 1.9345000000000002e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 1.642, + "grad_norm": 3.3219645023345947, + "learning_rate": 1.934e-05, + "num_tokens": 45362.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 1.623, + "grad_norm": 3.5062637329101562, + "learning_rate": 1.9335e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 1.6017, + "grad_norm": 3.623307228088379, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 45544.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.8752, + "grad_norm": 0.7358177900314331, + "learning_rate": 1.9325000000000002e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.9563, + "grad_norm": 0.8089514970779419, + "learning_rate": 1.932e-05, + "num_tokens": 46568.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.9479, + "grad_norm": 0.8843920826911926, + "learning_rate": 1.9315e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 1.5158, + "grad_norm": 3.546642303466797, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47171.0, + "mean_token_accuracy": 0.7333333492279053, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.5831, + "grad_norm": 0.7032448053359985, + "learning_rate": 1.9305000000000002e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.8191, + "grad_norm": 0.9835058450698853, + "learning_rate": 1.93e-05, + "num_tokens": 48195.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.5936, + "grad_norm": 0.7396312952041626, + "learning_rate": 1.9295e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 1.4418, + "grad_norm": 3.6846494674682617, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48798.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 1.4276, + "grad_norm": 3.8224549293518066, + "learning_rate": 1.9285000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 1.4024, + "grad_norm": 3.874878168106079, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 48980.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 1.3769, + "grad_norm": 3.8388218879699707, + "learning_rate": 1.9275e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 1.3516, + "grad_norm": 3.6529314517974854, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49162.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 1.3215, + "grad_norm": 3.6978349685668945, + "learning_rate": 1.9265000000000003e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.7666666507720947, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 1.2966, + "grad_norm": 3.7301321029663086, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49344.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.9111, + "grad_norm": 0.9517998695373535, + "learning_rate": 1.9255e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 1.2327, + "grad_norm": 4.175051212310791, + "learning_rate": 1.925e-05, + "num_tokens": 49947.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 1.2076, + "grad_norm": 4.348862171173096, + "learning_rate": 1.9245000000000003e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.5662, + "grad_norm": 0.9280498623847961, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 50550.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.8844, + "grad_norm": 1.042202353477478, + "learning_rate": 1.9235e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 1.1432, + "grad_norm": NaN, + "learning_rate": 1.923e-05, + "num_tokens": 51153.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 1.1364, + "grad_norm": 3.4773733615875244, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.7888888716697693, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.5305, + "grad_norm": 1.0232493877410889, + "learning_rate": 1.9225000000000003e-05, + "num_tokens": 51756.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.8352, + "grad_norm": 1.172676920890808, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.5667, + "grad_norm": 1.041461706161499, + "learning_rate": 1.9215e-05, + "num_tokens": 52780.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.5104, + "grad_norm": 1.050549030303955, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.875, + "grad_norm": 1.1163139343261719, + "learning_rate": 1.9205000000000003e-05, + "num_tokens": 53804.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.799, + "grad_norm": 0.9202898740768433, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 1.0468, + "grad_norm": 6.722721576690674, + "learning_rate": 1.9195000000000002e-05, + "num_tokens": 54407.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 1.032, + "grad_norm": 6.30849027633667, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.8387, + "grad_norm": 0.8642046451568604, + "learning_rate": 1.9185000000000004e-05, + "num_tokens": 55010.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.8299, + "grad_norm": 0.8796883821487427, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.9957, + "grad_norm": 6.16769552230835, + "learning_rate": 1.9175000000000002e-05, + "num_tokens": 55613.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.7521, + "grad_norm": 0.8700262904167175, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.5251, + "grad_norm": 1.2144312858581543, + "learning_rate": 1.9165000000000004e-05, + "num_tokens": 56637.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": 0.76, + "grad_norm": 0.9009570479393005, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.941, + "grad_norm": 5.8355841636657715, + "learning_rate": 1.9155000000000002e-05, + "num_tokens": 57240.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.928, + "grad_norm": 5.541483402252197, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.716, + "grad_norm": 1.0414000749588013, + "learning_rate": 1.9145000000000004e-05, + "num_tokens": 57843.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.8929, + "grad_norm": 4.810738563537598, + "learning_rate": 1.914e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.7684, + "grad_norm": 1.2132883071899414, + "learning_rate": 1.9135000000000002e-05, + "num_tokens": 58446.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.6497, + "grad_norm": 1.1370697021484375, + "learning_rate": 1.913e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.6995, + "grad_norm": 1.2495081424713135, + "learning_rate": 1.9125000000000004e-05, + "num_tokens": 59470.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.4539, + "grad_norm": 1.0713244676589966, + "learning_rate": 1.912e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.8311, + "grad_norm": 8.016578674316406, + "learning_rate": 1.9115000000000002e-05, + "num_tokens": 60073.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.7657, + "grad_norm": 1.6656423807144165, + "learning_rate": 1.911e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.7687, + "grad_norm": 1.0611323118209839, + "learning_rate": 1.9105e-05, + "num_tokens": 61097.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.8062, + "grad_norm": 10.057961463928223, + "learning_rate": 1.91e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.4494, + "grad_norm": 0.8912132978439331, + "learning_rate": 1.9095000000000003e-05, + "num_tokens": 61700.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.7813, + "grad_norm": 8.121318817138672, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.762, + "grad_norm": 7.607242584228516, + "learning_rate": 1.9085e-05, + "num_tokens": 61882.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.7692, + "grad_norm": 1.015843391418457, + "learning_rate": 1.908e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.7587, + "grad_norm": 0.9659166932106018, + "learning_rate": 1.9075000000000003e-05, + "num_tokens": 62906.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.6702, + "grad_norm": 1.6121653318405151, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.7191, + "grad_norm": 5.08962345123291, + "learning_rate": 1.9065e-05, + "num_tokens": 63509.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.7033, + "grad_norm": 1.2752808332443237, + "learning_rate": 1.906e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.7025, + "grad_norm": 5.420579433441162, + "learning_rate": 1.9055e-05, + "num_tokens": 64112.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.6507, + "grad_norm": 0.9945167899131775, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.5894, + "grad_norm": 1.0229939222335815, + "learning_rate": 1.9045e-05, + "num_tokens": 65136.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.6627, + "grad_norm": 9.837233543395996, + "learning_rate": 1.904e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.7, + "grad_norm": 1.4510327577590942, + "learning_rate": 1.9035e-05, + "num_tokens": 65739.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.6437, + "grad_norm": 11.414746284484863, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.631, + "grad_norm": 10.233067512512207, + "learning_rate": 1.9025e-05, + "num_tokens": 65921.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.6945, + "grad_norm": 1.3608763217926025, + "learning_rate": 1.902e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.6546, + "grad_norm": 1.217339038848877, + "learning_rate": 1.9015e-05, + "num_tokens": 66945.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.6805, + "grad_norm": 1.5453741550445557, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.5748, + "grad_norm": 4.581247806549072, + "learning_rate": 1.9005000000000002e-05, + "num_tokens": 67548.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.6366, + "grad_norm": 1.6470707654953003, + "learning_rate": 1.9e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.4235, + "grad_norm": 0.9932326078414917, + "learning_rate": 1.8995e-05, + "num_tokens": 68572.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": 0.6296, + "grad_norm": 1.9582555294036865, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.5822, + "grad_norm": 1.569627046585083, + "learning_rate": 1.8985000000000002e-05, + "num_tokens": 69596.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.5748, + "grad_norm": 1.2322492599487305, + "learning_rate": 1.898e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.6398, + "grad_norm": 1.6496992111206055, + "learning_rate": 1.8975e-05, + "num_tokens": 70620.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.3614, + "grad_norm": 1.1484179496765137, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.6247, + "grad_norm": 2.376291275024414, + "learning_rate": 1.8965000000000002e-05, + "num_tokens": 71644.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.5296, + "grad_norm": 1.148452877998352, + "learning_rate": 1.896e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.3511, + "grad_norm": 1.6766430139541626, + "learning_rate": 1.8955e-05, + "num_tokens": 72668.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.5254, + "grad_norm": 13.195364952087402, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.5164, + "grad_norm": 10.336882591247559, + "learning_rate": 1.8945000000000002e-05, + "num_tokens": 72850.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.5768, + "grad_norm": 1.2533048391342163, + "learning_rate": 1.894e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.5941, + "grad_norm": 1.1360353231430054, + "learning_rate": 1.8935e-05, + "num_tokens": 73874.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.4831, + "grad_norm": 6.034897327423096, + "learning_rate": 1.893e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.4774, + "grad_norm": 5.36783504486084, + "learning_rate": 1.8925000000000003e-05, + "num_tokens": 74056.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.3472, + "grad_norm": 2.312915563583374, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.4547, + "grad_norm": 5.124778747558594, + "learning_rate": 1.8915e-05, + "num_tokens": 74659.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.4438, + "grad_norm": 3.7214717864990234, + "learning_rate": 1.891e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.5071, + "grad_norm": 1.825179100036621, + "learning_rate": 1.8905000000000003e-05, + "num_tokens": 75262.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.4157, + "grad_norm": 2.892442464828491, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.4085, + "grad_norm": 3.1406774520874023, + "learning_rate": 1.8895e-05, + "num_tokens": 75444.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.532, + "grad_norm": 2.529170274734497, + "learning_rate": 1.889e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.3828, + "grad_norm": 3.846367597579956, + "learning_rate": 1.8885000000000003e-05, + "num_tokens": 76047.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.5073, + "grad_norm": 2.1968491077423096, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.5165, + "grad_norm": 1.508063793182373, + "learning_rate": 1.8875e-05, + "num_tokens": 77071.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.3491, + "grad_norm": 2.4780421257019043, + "learning_rate": 1.887e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.3379, + "grad_norm": 2.2446343898773193, + "learning_rate": 1.8865000000000003e-05, + "num_tokens": 77253.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.3318, + "grad_norm": 3.05029296875, + "learning_rate": 1.886e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.3173, + "grad_norm": 2.2870967388153076, + "learning_rate": 1.8855e-05, + "num_tokens": 77435.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.3278, + "grad_norm": 1.3750704526901245, + "learning_rate": 1.885e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.2964, + "grad_norm": 2.238151788711548, + "learning_rate": 1.8845000000000003e-05, + "num_tokens": 78038.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.305, + "grad_norm": 1.4246138334274292, + "learning_rate": 1.884e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.3385, + "grad_norm": 1.810808777809143, + "learning_rate": 1.8835000000000002e-05, + "num_tokens": 79062.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.5181, + "grad_norm": 2.939674139022827, + "learning_rate": 1.883e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.4909, + "grad_norm": 2.4543910026550293, + "learning_rate": 1.8825000000000004e-05, + "num_tokens": 80086.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.2604, + "grad_norm": 2.63846492767334, + "learning_rate": 1.882e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.2533, + "grad_norm": 3.536795139312744, + "learning_rate": 1.8815000000000002e-05, + "num_tokens": 80268.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.2449, + "grad_norm": 2.941943645477295, + "learning_rate": 1.881e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.4928, + "grad_norm": 2.69899582862854, + "learning_rate": 1.8805000000000004e-05, + "num_tokens": 80871.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.3019, + "grad_norm": 1.5328068733215332, + "learning_rate": 1.88e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.4154, + "grad_norm": 5.932051181793213, + "learning_rate": 1.8795000000000002e-05, + "num_tokens": 81895.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.4072, + "grad_norm": 3.7254579067230225, + "learning_rate": 1.879e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.2266, + "grad_norm": 4.67811918258667, + "learning_rate": 1.8785e-05, + "num_tokens": 82498.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.2835, + "grad_norm": 2.31062650680542, + "learning_rate": 1.878e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.222, + "grad_norm": 4.9225335121154785, + "learning_rate": 1.8775000000000002e-05, + "num_tokens": 83101.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.4098, + "grad_norm": 2.3302409648895264, + "learning_rate": 1.877e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.4401, + "grad_norm": 1.917952299118042, + "learning_rate": 1.8765e-05, + "num_tokens": 84125.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.3927, + "grad_norm": 4.312741279602051, + "learning_rate": 1.876e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.2032, + "grad_norm": 4.237610340118408, + "learning_rate": 1.8755000000000003e-05, + "num_tokens": 84728.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.2, + "grad_norm": 4.144465446472168, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.1974, + "grad_norm": 4.548800945281982, + "learning_rate": 1.8745e-05, + "num_tokens": 84910.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.2936, + "grad_norm": 1.368138313293457, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.4425, + "grad_norm": 1.6547119617462158, + "learning_rate": 1.8735e-05, + "num_tokens": 85934.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.1815, + "grad_norm": 1.936987042427063, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.3853, + "grad_norm": 1.9844653606414795, + "learning_rate": 1.8725e-05, + "num_tokens": 86537.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.3816, + "grad_norm": 2.563992977142334, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.1717, + "grad_norm": 1.9275789260864258, + "learning_rate": 1.8715e-05, + "num_tokens": 87140.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.3635, + "grad_norm": 2.198817014694214, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.166, + "grad_norm": 2.225175380706787, + "learning_rate": 1.8705e-05, + "num_tokens": 87743.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.1618, + "grad_norm": 1.4393062591552734, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.3188, + "grad_norm": 1.8201826810836792, + "learning_rate": 1.8695e-05, + "num_tokens": 88346.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.3957, + "grad_norm": 1.8483490943908691, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.3545, + "grad_norm": 2.5658915042877197, + "learning_rate": 1.8685e-05, + "num_tokens": 89370.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.4109, + "grad_norm": 2.197061777114868, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.3934, + "grad_norm": 1.9570775032043457, + "learning_rate": 1.8675e-05, + "num_tokens": 90394.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.149, + "grad_norm": 2.242249011993408, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.3673, + "grad_norm": 2.5640757083892822, + "learning_rate": 1.8665000000000002e-05, + "num_tokens": 90997.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.3437, + "grad_norm": 1.6239393949508667, + "learning_rate": 1.866e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.1448, + "grad_norm": 2.4205758571624756, + "learning_rate": 1.8655e-05, + "num_tokens": 91600.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.2803, + "grad_norm": 1.5447510480880737, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.2501, + "grad_norm": 1.2362499237060547, + "learning_rate": 1.8645000000000002e-05, + "num_tokens": 92624.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.263, + "grad_norm": 1.3345736265182495, + "learning_rate": 1.864e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.3598, + "grad_norm": 5.145051002502441, + "learning_rate": 1.8635e-05, + "num_tokens": 93648.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.143, + "grad_norm": 3.363790988922119, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.3858, + "grad_norm": 2.9212327003479004, + "learning_rate": 1.8625000000000002e-05, + "num_tokens": 94251.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.1404, + "grad_norm": 2.9169602394104004, + "learning_rate": 1.862e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.2422, + "grad_norm": 1.9243407249450684, + "learning_rate": 1.8615e-05, + "num_tokens": 94854.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.3585, + "grad_norm": 4.024987697601318, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.3474, + "grad_norm": 2.019094944000244, + "learning_rate": 1.8605000000000002e-05, + "num_tokens": 95878.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.3368, + "grad_norm": 1.5415781736373901, + "learning_rate": 1.86e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.1373, + "grad_norm": 3.6068742275238037, + "learning_rate": 1.8595e-05, + "num_tokens": 96481.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.2176, + "grad_norm": 1.1446317434310913, + "learning_rate": 1.859e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.1328, + "grad_norm": 3.26859974861145, + "learning_rate": 1.8585000000000002e-05, + "num_tokens": 97084.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.131, + "grad_norm": 2.849381446838379, + "learning_rate": 1.858e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.3323, + "grad_norm": 4.831865310668945, + "learning_rate": 1.8575e-05, + "num_tokens": 97687.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.3036, + "grad_norm": 1.8017945289611816, + "learning_rate": 1.857e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.3478, + "grad_norm": 4.759650707244873, + "learning_rate": 1.8565000000000003e-05, + "num_tokens": 98711.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.1239, + "grad_norm": 1.6707216501235962, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.3554, + "grad_norm": 3.568655014038086, + "learning_rate": 1.8555e-05, + "num_tokens": 99314.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.1219, + "grad_norm": 1.743139624595642, + "learning_rate": 1.855e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.3297, + "grad_norm": 3.192558526992798, + "learning_rate": 1.8545000000000003e-05, + "num_tokens": 99917.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.339, + "grad_norm": 2.8700854778289795, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.3341, + "grad_norm": 3.1597092151641846, + "learning_rate": 1.8535e-05, + "num_tokens": 100941.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.3151, + "grad_norm": 2.549912929534912, + "learning_rate": 1.853e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.249, + "grad_norm": 4.164290904998779, + "learning_rate": 1.8525000000000003e-05, + "num_tokens": 101965.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.2877, + "grad_norm": 1.8462411165237427, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.2215, + "grad_norm": 1.49083411693573, + "learning_rate": 1.8515e-05, + "num_tokens": 102989.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.2631, + "grad_norm": 1.5168116092681885, + "learning_rate": 1.851e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.3179, + "grad_norm": 3.1732399463653564, + "learning_rate": 1.8505000000000003e-05, + "num_tokens": 104013.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.315, + "grad_norm": 2.9725892543792725, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.2763, + "grad_norm": 1.4138047695159912, + "learning_rate": 1.8495e-05, + "num_tokens": 105037.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.3151, + "grad_norm": 2.3229987621307373, + "learning_rate": 1.849e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.2862, + "grad_norm": 3.2318272590637207, + "learning_rate": 1.8485000000000003e-05, + "num_tokens": 106061.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.2339, + "grad_norm": 3.401787757873535, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.2094, + "grad_norm": 2.1061453819274902, + "learning_rate": 1.8475000000000002e-05, + "num_tokens": 107085.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.2863, + "grad_norm": 1.6479979753494263, + "learning_rate": 1.847e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.1445, + "grad_norm": 7.635932445526123, + "learning_rate": 1.8465e-05, + "num_tokens": 107688.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.1347, + "grad_norm": 6.305334091186523, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.2233, + "grad_norm": 3.41860294342041, + "learning_rate": 1.8455000000000002e-05, + "num_tokens": 108291.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.128, + "grad_norm": 5.801213264465332, + "learning_rate": 1.845e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.1283, + "grad_norm": 5.675178527832031, + "learning_rate": 1.8445e-05, + "num_tokens": 108473.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.3029, + "grad_norm": 5.509076118469238, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.1112, + "grad_norm": 2.6948108673095703, + "learning_rate": 1.8435000000000002e-05, + "num_tokens": 109076.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.107, + "grad_norm": 2.523871421813965, + "learning_rate": 1.843e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.2636, + "grad_norm": 2.1710612773895264, + "learning_rate": 1.8425e-05, + "num_tokens": 109679.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.2891, + "grad_norm": 2.2263383865356445, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.2611, + "grad_norm": 1.752862572669983, + "learning_rate": 1.8415e-05, + "num_tokens": 110703.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.1023, + "grad_norm": 3.256633996963501, + "learning_rate": 1.841e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.1009, + "grad_norm": 2.10860276222229, + "learning_rate": 1.8405e-05, + "num_tokens": 110885.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.2849, + "grad_norm": 3.3475303649902344, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.2727, + "grad_norm": 2.763415575027466, + "learning_rate": 1.8395e-05, + "num_tokens": 111909.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.1914, + "grad_norm": 1.7206056118011475, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.2981, + "grad_norm": 4.825778484344482, + "learning_rate": 1.8385e-05, + "num_tokens": 112933.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.2575, + "grad_norm": 2.3532052040100098, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.3108, + "grad_norm": 2.1766650676727295, + "learning_rate": 1.8375e-05, + "num_tokens": 113957.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.2547, + "grad_norm": 1.6271114349365234, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.2451, + "grad_norm": 1.533071517944336, + "learning_rate": 1.8365e-05, + "num_tokens": 114981.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.2362, + "grad_norm": 1.4881736040115356, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0898, + "grad_norm": 1.764446496963501, + "learning_rate": 1.8355e-05, + "num_tokens": 115584.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.2345, + "grad_norm": 1.3447750806808472, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.2802, + "grad_norm": 3.713470458984375, + "learning_rate": 1.8345e-05, + "num_tokens": 116608.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.1853, + "grad_norm": 1.427515983581543, + "learning_rate": 1.834e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0921, + "grad_norm": 2.3074567317962646, + "learning_rate": 1.8335e-05, + "num_tokens": 117211.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0887, + "grad_norm": 2.2687530517578125, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.2126, + "grad_norm": 3.1814491748809814, + "learning_rate": 1.8325e-05, + "num_tokens": 117814.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0881, + "grad_norm": 2.606569528579712, + "learning_rate": 1.832e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.1751, + "grad_norm": 2.4892592430114746, + "learning_rate": 1.8315e-05, + "num_tokens": 118417.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.2011, + "grad_norm": 2.357940673828125, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.2168, + "grad_norm": 2.8288958072662354, + "learning_rate": 1.8305000000000002e-05, + "num_tokens": 119441.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.183, + "grad_norm": 1.945565104484558, + "learning_rate": 1.83e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0746, + "grad_norm": 1.7267169952392578, + "learning_rate": 1.8295e-05, + "num_tokens": 120044.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0733, + "grad_norm": 1.9393048286437988, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0728, + "grad_norm": 2.1715469360351562, + "learning_rate": 1.8285000000000002e-05, + "num_tokens": 120226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0704, + "grad_norm": 2.0847175121307373, + "learning_rate": 1.828e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.1791, + "grad_norm": 1.5438156127929688, + "learning_rate": 1.8275e-05, + "num_tokens": 120829.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.2073, + "grad_norm": 1.6084765195846558, + "learning_rate": 1.827e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.2215, + "grad_norm": 1.543698787689209, + "learning_rate": 1.8265000000000002e-05, + "num_tokens": 121853.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.1904, + "grad_norm": 1.41824209690094, + "learning_rate": 1.826e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.2005, + "grad_norm": 1.6803160905838013, + "learning_rate": 1.8255e-05, + "num_tokens": 122877.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0605, + "grad_norm": 1.5710349082946777, + "learning_rate": 1.825e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0615, + "grad_norm": 1.633989691734314, + "learning_rate": 1.8245000000000002e-05, + "num_tokens": 123059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.1828, + "grad_norm": 1.6902644634246826, + "learning_rate": 1.824e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0558, + "grad_norm": 1.7157853841781616, + "learning_rate": 1.8235e-05, + "num_tokens": 123662.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0515, + "grad_norm": 1.4476577043533325, + "learning_rate": 1.823e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0502, + "grad_norm": 2.1938326358795166, + "learning_rate": 1.8225000000000003e-05, + "num_tokens": 123844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.1783, + "grad_norm": 2.738436460494995, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.275, + "grad_norm": 3.493831157684326, + "learning_rate": 1.8215e-05, + "num_tokens": 124868.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.1786, + "grad_norm": 1.7162284851074219, + "learning_rate": 1.821e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0448, + "grad_norm": 2.925360679626465, + "learning_rate": 1.8205000000000003e-05, + "num_tokens": 125471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.3138, + "grad_norm": 4.2967753410339355, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.0381, + "grad_norm": 1.3151957988739014, + "learning_rate": 1.8195e-05, + "num_tokens": 126074.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.1773, + "grad_norm": 1.440629243850708, + "learning_rate": 1.819e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0361, + "grad_norm": 1.378117561340332, + "learning_rate": 1.8185000000000003e-05, + "num_tokens": 126677.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0357, + "grad_norm": 1.3120638132095337, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 1.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0333, + "grad_norm": 1.1625266075134277, + "learning_rate": 1.8175e-05, + "num_tokens": 126859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0292, + "grad_norm": 1.198464035987854, + "learning_rate": 1.817e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.193, + "grad_norm": 1.9310072660446167, + "learning_rate": 1.8165000000000003e-05, + "num_tokens": 127462.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": 0.209, + "grad_norm": 1.7112150192260742, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.1398, + "grad_norm": 1.4659478664398193, + "learning_rate": 1.8155e-05, + "num_tokens": 128486.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.1688, + "grad_norm": 3.3470299243927, + "learning_rate": 1.815e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.2416, + "grad_norm": 3.232045888900757, + "learning_rate": 1.8145e-05, + "num_tokens": 129510.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0242, + "grad_norm": 2.809112548828125, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 1.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.0222, + "grad_norm": 2.652397394180298, + "learning_rate": 1.8135000000000002e-05, + "num_tokens": 129692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.1619, + "grad_norm": 1.6935186386108398, + "learning_rate": 1.813e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0206, + "grad_norm": 1.8048573732376099, + "learning_rate": 1.8125e-05, + "num_tokens": 130295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.0199, + "grad_norm": 1.7344465255737305, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0174, + "grad_norm": 1.6794533729553223, + "learning_rate": 1.8115000000000002e-05, + "num_tokens": 130477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0172, + "grad_norm": 2.995704174041748, + "learning_rate": 1.811e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 1.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.241, + "grad_norm": 2.3058347702026367, + "learning_rate": 1.8105e-05, + "num_tokens": 131080.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.2068, + "grad_norm": 2.030050277709961, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.1573, + "grad_norm": 2.108264207839966, + "learning_rate": 1.8095000000000002e-05, + "num_tokens": 132104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0128, + "grad_norm": 0.9666662812232971, + "learning_rate": 1.809e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.1613, + "grad_norm": 1.9703510999679565, + "learning_rate": 1.8085e-05, + "num_tokens": 132707.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.1579, + "grad_norm": 1.7536500692367554, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.2503, + "grad_norm": 3.074944257736206, + "learning_rate": 1.8075000000000002e-05, + "num_tokens": 133731.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.154, + "grad_norm": 2.3541879653930664, + "learning_rate": 1.807e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.1655, + "grad_norm": 1.2853813171386719, + "learning_rate": 1.8065e-05, + "num_tokens": 134755.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.1481, + "grad_norm": 1.4534378051757812, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0156, + "grad_norm": 2.346766710281372, + "learning_rate": 1.8055000000000002e-05, + "num_tokens": 135358.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0179, + "grad_norm": 2.7506628036499023, + "learning_rate": 1.805e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 1.0, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.2665, + "grad_norm": 7.800353050231934, + "learning_rate": 1.8045e-05, + "num_tokens": 135961.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0137, + "grad_norm": 1.6062291860580444, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 1.0, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.1298, + "grad_norm": 1.9706884622573853, + "learning_rate": 1.8035000000000003e-05, + "num_tokens": 136564.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.1587, + "grad_norm": 4.288624286651611, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.1706, + "grad_norm": 2.351865291595459, + "learning_rate": 1.8025e-05, + "num_tokens": 137588.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.1391, + "grad_norm": 2.3107855319976807, + "learning_rate": 1.802e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0116, + "grad_norm": 1.2413067817687988, + "learning_rate": 1.8015000000000003e-05, + "num_tokens": 138191.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.1528, + "grad_norm": 2.238205671310425, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0111, + "grad_norm": 1.0291837453842163, + "learning_rate": 1.8005e-05, + "num_tokens": 138794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": 0.2551, + "grad_norm": 3.0084855556488037, + "learning_rate": 1.8e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.271, + "grad_norm": 3.355750560760498, + "learning_rate": 1.7995000000000003e-05, + "num_tokens": 139818.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.1479, + "grad_norm": 3.3119289875030518, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.1951, + "grad_norm": 3.4890756607055664, + "learning_rate": 1.7985e-05, + "num_tokens": 140842.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.1439, + "grad_norm": 2.5274429321289062, + "learning_rate": 1.798e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.1537, + "grad_norm": 3.0909008979797363, + "learning_rate": 1.7975000000000003e-05, + "num_tokens": 141866.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0107, + "grad_norm": 2.0530686378479004, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 1.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.011, + "grad_norm": 1.7325184345245361, + "learning_rate": 1.7965e-05, + "num_tokens": 142048.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.135, + "grad_norm": 1.9106756448745728, + "learning_rate": 1.796e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.177, + "grad_norm": 3.206461191177368, + "learning_rate": 1.7955000000000003e-05, + "num_tokens": 143072.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0095, + "grad_norm": 0.8696625828742981, + "learning_rate": 1.795e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 1.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.1656, + "grad_norm": 5.9883856773376465, + "learning_rate": 1.7945000000000002e-05, + "num_tokens": 143675.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.2393, + "grad_norm": 3.601959466934204, + "learning_rate": 1.794e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0092, + "grad_norm": 1.547377586364746, + "learning_rate": 1.7935000000000004e-05, + "num_tokens": 144278.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0099, + "grad_norm": 1.7349345684051514, + "learning_rate": 1.793e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 1.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.1454, + "grad_norm": 2.134899377822876, + "learning_rate": 1.7925000000000002e-05, + "num_tokens": 144881.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.2317, + "grad_norm": 3.7199866771698, + "learning_rate": 1.792e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.2081, + "grad_norm": 3.7679033279418945, + "learning_rate": 1.7915000000000004e-05, + "num_tokens": 145905.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0084, + "grad_norm": 0.7981175184249878, + "learning_rate": 1.791e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 1.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0078, + "grad_norm": 0.624564528465271, + "learning_rate": 1.7905000000000002e-05, + "num_tokens": 146087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.153, + "grad_norm": 1.46378755569458, + "learning_rate": 1.79e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0085, + "grad_norm": 1.403277039527893, + "learning_rate": 1.7895000000000004e-05, + "num_tokens": 146690.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.1413, + "grad_norm": 2.821493148803711, + "learning_rate": 1.789e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.1268, + "grad_norm": 2.5567212104797363, + "learning_rate": 1.7885000000000002e-05, + "num_tokens": 147714.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.1303, + "grad_norm": 2.5823540687561035, + "learning_rate": 1.788e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0075, + "grad_norm": 1.26413094997406, + "learning_rate": 1.7875e-05, + "num_tokens": 148317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0067, + "grad_norm": 0.9559513330459595, + "learning_rate": 1.787e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0069, + "grad_norm": 0.641984224319458, + "learning_rate": 1.7865000000000003e-05, + "num_tokens": 148499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.1762, + "grad_norm": 2.6874637603759766, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0062, + "grad_norm": 0.4612693786621094, + "learning_rate": 1.7855e-05, + "num_tokens": 149102.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.1284, + "grad_norm": 2.1469764709472656, + "learning_rate": 1.785e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.1216, + "grad_norm": 2.77829909324646, + "learning_rate": 1.7845000000000003e-05, + "num_tokens": 150126.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0056, + "grad_norm": 0.3416956067085266, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 1.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0055, + "grad_norm": 0.3599971830844879, + "learning_rate": 1.7835e-05, + "num_tokens": 150308.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0054, + "grad_norm": 0.3336946368217468, + "learning_rate": 1.783e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 1.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.1384, + "grad_norm": 2.486008882522583, + "learning_rate": 1.7825e-05, + "num_tokens": 150911.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.1366, + "grad_norm": 1.806955337524414, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.0053, + "grad_norm": 0.3250260651111603, + "learning_rate": 1.7815e-05, + "num_tokens": 151514.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0048, + "grad_norm": 0.33809739351272583, + "learning_rate": 1.781e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 1.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.1241, + "grad_norm": 1.514503002166748, + "learning_rate": 1.7805e-05, + "num_tokens": 152117.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.1369, + "grad_norm": 1.73817777633667, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.005, + "grad_norm": 0.6402959227561951, + "learning_rate": 1.7795e-05, + "num_tokens": 152720.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.1392, + "grad_norm": 2.1087169647216797, + "learning_rate": 1.779e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0056, + "grad_norm": 0.7931351661682129, + "learning_rate": 1.7785e-05, + "num_tokens": 153323.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.1216, + "grad_norm": 2.559343099594116, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.1415, + "grad_norm": 3.7847163677215576, + "learning_rate": 1.7775000000000002e-05, + "num_tokens": 154347.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0056, + "grad_norm": 0.6650505661964417, + "learning_rate": 1.777e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0058, + "grad_norm": 0.6711560487747192, + "learning_rate": 1.7765e-05, + "num_tokens": 154529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.1339, + "grad_norm": 2.383869171142578, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.1384, + "grad_norm": 2.9380829334259033, + "learning_rate": 1.7755000000000002e-05, + "num_tokens": 155553.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.1355, + "grad_norm": 3.530726432800293, + "learning_rate": 1.775e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0057, + "grad_norm": 0.6963756680488586, + "learning_rate": 1.7745e-05, + "num_tokens": 156156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0047, + "grad_norm": 0.45467251539230347, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.1322, + "grad_norm": 2.1101133823394775, + "learning_rate": 1.7735000000000002e-05, + "num_tokens": 156759.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.3436, + "grad_norm": 10.156854629516602, + "learning_rate": 1.773e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.1111, + "grad_norm": 1.9533101320266724, + "learning_rate": 1.7725e-05, + "num_tokens": 157783.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0054, + "grad_norm": 0.571807861328125, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 1.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0045, + "grad_norm": 0.6374226808547974, + "learning_rate": 1.7715000000000002e-05, + "num_tokens": 157965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.1115, + "grad_norm": 1.9669644832611084, + "learning_rate": 1.771e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.1336, + "grad_norm": 1.4811934232711792, + "learning_rate": 1.7705e-05, + "num_tokens": 158989.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.2041, + "grad_norm": 3.112797737121582, + "learning_rate": 1.77e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0045, + "grad_norm": 0.5766833424568176, + "learning_rate": 1.7695000000000003e-05, + "num_tokens": 159592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.1237, + "grad_norm": 1.863338589668274, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.1236, + "grad_norm": 2.4069719314575195, + "learning_rate": 1.7685e-05, + "num_tokens": 160616.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0055, + "grad_norm": 0.8338965177536011, + "learning_rate": 1.768e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 1.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0044, + "grad_norm": 0.5481887459754944, + "learning_rate": 1.7675000000000003e-05, + "num_tokens": 160798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.1354, + "grad_norm": 4.145319938659668, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.1279, + "grad_norm": 3.560887575149536, + "learning_rate": 1.7665e-05, + "num_tokens": 161822.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0044, + "grad_norm": 0.43582797050476074, + "learning_rate": 1.766e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 1.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.004, + "grad_norm": 0.3212014138698578, + "learning_rate": 1.7655000000000003e-05, + "num_tokens": 162004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.1956, + "grad_norm": 2.662240982055664, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0038, + "grad_norm": 0.32649490237236023, + "learning_rate": 1.7645e-05, + "num_tokens": 162607.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0039, + "grad_norm": 0.33435314893722534, + "learning_rate": 1.764e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": 0.1217, + "grad_norm": 3.422117233276367, + "learning_rate": 1.7635000000000003e-05, + "num_tokens": 163210.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.1169, + "grad_norm": 1.9841532707214355, + "learning_rate": 1.763e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0035, + "grad_norm": 0.23611226677894592, + "learning_rate": 1.7625e-05, + "num_tokens": 163813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0036, + "grad_norm": 0.35102367401123047, + "learning_rate": 1.762e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 1.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0034, + "grad_norm": 0.22219745814800262, + "learning_rate": 1.7615000000000003e-05, + "num_tokens": 163995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.1109, + "grad_norm": 1.8000237941741943, + "learning_rate": 1.761e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0034, + "grad_norm": 0.4621182084083557, + "learning_rate": 1.7605000000000002e-05, + "num_tokens": 164598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0035, + "grad_norm": 0.5149714350700378, + "learning_rate": 1.76e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.004, + "grad_norm": 0.5277268886566162, + "learning_rate": 1.7595000000000003e-05, + "num_tokens": 164780.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.1178, + "grad_norm": 1.9578617811203003, + "learning_rate": 1.759e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0032, + "grad_norm": 0.30999821424484253, + "learning_rate": 1.7585000000000002e-05, + "num_tokens": 165383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0032, + "grad_norm": 0.3227098882198334, + "learning_rate": 1.758e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 1.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0032, + "grad_norm": 0.2970958352088928, + "learning_rate": 1.7575000000000004e-05, + "num_tokens": 165565.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.1054, + "grad_norm": 3.3750076293945312, + "learning_rate": 1.757e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.003, + "grad_norm": 0.315746933221817, + "learning_rate": 1.7565000000000002e-05, + "num_tokens": 166168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.1014, + "grad_norm": 1.7110451459884644, + "learning_rate": 1.756e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.1009, + "grad_norm": 2.0282938480377197, + "learning_rate": 1.7555e-05, + "num_tokens": 167192.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0029, + "grad_norm": 0.18862634897232056, + "learning_rate": 1.755e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 1.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.1251, + "grad_norm": 1.5325688123703003, + "learning_rate": 1.7545000000000002e-05, + "num_tokens": 167795.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0032, + "grad_norm": 0.37112897634506226, + "learning_rate": 1.754e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 1.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.0031, + "grad_norm": 0.32201266288757324, + "learning_rate": 1.7535e-05, + "num_tokens": 167977.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.003, + "grad_norm": 0.32648831605911255, + "learning_rate": 1.753e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 1.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": 0.1251, + "grad_norm": 2.044515371322632, + "learning_rate": 1.7525000000000002e-05, + "num_tokens": 168580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.1099, + "grad_norm": 2.5852344036102295, + "learning_rate": 1.752e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0032, + "grad_norm": 0.33884692192077637, + "learning_rate": 1.7515e-05, + "num_tokens": 169183.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.1006, + "grad_norm": 1.9987916946411133, + "learning_rate": 1.751e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0895, + "grad_norm": 2.697984457015991, + "learning_rate": 1.7505e-05, + "num_tokens": 170207.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.0034, + "grad_norm": 0.4763769507408142, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 1.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0899, + "grad_norm": 3.0565173625946045, + "learning_rate": 1.7495e-05, + "num_tokens": 170810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0909, + "grad_norm": 1.3817325830459595, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0044, + "grad_norm": 0.8519660830497742, + "learning_rate": 1.7485e-05, + "num_tokens": 171413.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.1095, + "grad_norm": 2.0203707218170166, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0048, + "grad_norm": 1.1067970991134644, + "learning_rate": 1.7475e-05, + "num_tokens": 172016.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.1167, + "grad_norm": 2.3915855884552, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0049, + "grad_norm": 1.0700874328613281, + "learning_rate": 1.7465e-05, + "num_tokens": 172619.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.004, + "grad_norm": 0.6739718317985535, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 1.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.1176, + "grad_norm": 2.5957095623016357, + "learning_rate": 1.7455e-05, + "num_tokens": 173222.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": 0.0763, + "grad_norm": 2.0077261924743652, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0028, + "grad_norm": 0.2505457103252411, + "learning_rate": 1.7445e-05, + "num_tokens": 173825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0025, + "grad_norm": 0.1596791297197342, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 1.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.1892, + "grad_norm": 2.4415338039398193, + "learning_rate": 1.7435e-05, + "num_tokens": 174428.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.1134, + "grad_norm": 2.0744497776031494, + "learning_rate": 1.743e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0991, + "grad_norm": 2.4540417194366455, + "learning_rate": 1.7425e-05, + "num_tokens": 175452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0025, + "grad_norm": 0.17656919360160828, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.1227, + "grad_norm": 2.1174721717834473, + "learning_rate": 1.7415000000000002e-05, + "num_tokens": 176055.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0026, + "grad_norm": 0.23843693733215332, + "learning_rate": 1.741e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 1.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.1103, + "grad_norm": 3.4821200370788574, + "learning_rate": 1.7405e-05, + "num_tokens": 176658.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0027, + "grad_norm": 0.3274306654930115, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 1.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0924, + "grad_norm": 1.685363531112671, + "learning_rate": 1.7395000000000002e-05, + "num_tokens": 177261.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0028, + "grad_norm": 0.3265073299407959, + "learning_rate": 1.739e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 1.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.1099, + "grad_norm": 3.1508426666259766, + "learning_rate": 1.7385e-05, + "num_tokens": 177864.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.1034, + "grad_norm": 1.8193601369857788, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.1016, + "grad_norm": 1.59476637840271, + "learning_rate": 1.7375000000000002e-05, + "num_tokens": 178888.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.1998, + "grad_norm": 3.547844648361206, + "learning_rate": 1.737e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.004, + "grad_norm": 0.7272564172744751, + "learning_rate": 1.7365e-05, + "num_tokens": 179491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0046, + "grad_norm": 0.918525755405426, + "learning_rate": 1.736e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 1.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.1078, + "grad_norm": 2.3493764400482178, + "learning_rate": 1.7355000000000002e-05, + "num_tokens": 180094.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0042, + "grad_norm": 0.7224324941635132, + "learning_rate": 1.735e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 1.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0043, + "grad_norm": 0.6705859303474426, + "learning_rate": 1.7345e-05, + "num_tokens": 180276.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.1953, + "grad_norm": 2.93843674659729, + "learning_rate": 1.734e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.0034, + "grad_norm": 0.46903571486473083, + "learning_rate": 1.7335000000000003e-05, + "num_tokens": 180879.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0938, + "grad_norm": 2.1053452491760254, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0026, + "grad_norm": 0.24292589724063873, + "learning_rate": 1.7325e-05, + "num_tokens": 181482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0922, + "grad_norm": 2.257225275039673, + "learning_rate": 1.732e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.003, + "grad_norm": 0.4069388508796692, + "learning_rate": 1.7315000000000003e-05, + "num_tokens": 182085.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.183, + "grad_norm": 3.2919442653656006, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.1693, + "grad_norm": 2.224686861038208, + "learning_rate": 1.7305e-05, + "num_tokens": 183109.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.1085, + "grad_norm": 1.8910117149353027, + "learning_rate": 1.73e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0026, + "grad_norm": 0.40661975741386414, + "learning_rate": 1.7295000000000003e-05, + "num_tokens": 183712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0027, + "grad_norm": 0.4873325228691101, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 1.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0028, + "grad_norm": 0.6161079406738281, + "learning_rate": 1.7285e-05, + "num_tokens": 183894.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0027, + "grad_norm": 0.4630989134311676, + "learning_rate": 1.728e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 1.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0985, + "grad_norm": 1.9053902626037598, + "learning_rate": 1.7275000000000003e-05, + "num_tokens": 184497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.0026, + "grad_norm": 0.37032097578048706, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 1.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.0024, + "grad_norm": 0.27917778491973877, + "learning_rate": 1.7265e-05, + "num_tokens": 184679.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0826, + "grad_norm": 2.2242591381073, + "learning_rate": 1.726e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0023, + "grad_norm": 0.22320418059825897, + "learning_rate": 1.7255000000000003e-05, + "num_tokens": 185282.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0958, + "grad_norm": 2.1955316066741943, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.1204, + "grad_norm": 2.8383123874664307, + "learning_rate": 1.7245000000000002e-05, + "num_tokens": 186306.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0025, + "grad_norm": 0.2997134327888489, + "learning_rate": 1.724e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0024, + "grad_norm": 0.24415498971939087, + "learning_rate": 1.7235e-05, + "num_tokens": 186488.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0885, + "grad_norm": 2.02583384513855, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0927, + "grad_norm": 2.139193534851074, + "learning_rate": 1.7225000000000002e-05, + "num_tokens": 187512.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0025, + "grad_norm": 0.3212721347808838, + "learning_rate": 1.722e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.1594, + "grad_norm": 1.6018428802490234, + "learning_rate": 1.7215e-05, + "num_tokens": 188115.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0027, + "grad_norm": 0.43617552518844604, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 1.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.1228, + "grad_norm": 1.8676470518112183, + "learning_rate": 1.7205000000000002e-05, + "num_tokens": 188718.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": 0.1523, + "grad_norm": 2.5800390243530273, + "learning_rate": 1.72e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0036, + "grad_norm": 0.7294099926948547, + "learning_rate": 1.7195e-05, + "num_tokens": 189321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.0797, + "grad_norm": 2.594087600708008, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.1031, + "grad_norm": 3.2291526794433594, + "learning_rate": 1.7185e-05, + "num_tokens": 190345.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0036, + "grad_norm": 0.7465726733207703, + "learning_rate": 1.718e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 1.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.1692, + "grad_norm": 2.709357500076294, + "learning_rate": 1.7175e-05, + "num_tokens": 190948.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.1003, + "grad_norm": 2.117990493774414, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.1015, + "grad_norm": 2.4742591381073, + "learning_rate": 1.7165e-05, + "num_tokens": 191972.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0911, + "grad_norm": 2.098302125930786, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.1107, + "grad_norm": 1.915540337562561, + "learning_rate": 1.7155e-05, + "num_tokens": 192996.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0049, + "grad_norm": 1.0682960748672485, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0965, + "grad_norm": 1.5651695728302002, + "learning_rate": 1.7145e-05, + "num_tokens": 193599.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.103, + "grad_norm": 2.3110480308532715, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.005, + "grad_norm": 1.1688706874847412, + "learning_rate": 1.7135e-05, + "num_tokens": 194202.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0801, + "grad_norm": 2.4091689586639404, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.159, + "grad_norm": 2.0551347732543945, + "learning_rate": 1.7125e-05, + "num_tokens": 195226.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.004, + "grad_norm": 0.8690920472145081, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0052, + "grad_norm": 1.225834608078003, + "learning_rate": 1.7115e-05, + "num_tokens": 195408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0038, + "grad_norm": 0.7105492949485779, + "learning_rate": 1.711e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0027, + "grad_norm": 0.3135615587234497, + "learning_rate": 1.7105e-05, + "num_tokens": 195590.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0025, + "grad_norm": 0.33731189370155334, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 1.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0024, + "grad_norm": 0.6950210928916931, + "learning_rate": 1.7095e-05, + "num_tokens": 195772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.166, + "grad_norm": 3.7873523235321045, + "learning_rate": 1.709e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.081, + "grad_norm": 2.6900861263275146, + "learning_rate": 1.7085e-05, + "num_tokens": 196796.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.002, + "grad_norm": 0.19354696571826935, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 1.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0935, + "grad_norm": 2.4997594356536865, + "learning_rate": 1.7075e-05, + "num_tokens": 197399.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.002, + "grad_norm": 0.24508339166641235, + "learning_rate": 1.707e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 1.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0019, + "grad_norm": 0.1790609359741211, + "learning_rate": 1.7065e-05, + "num_tokens": 197581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.1101, + "grad_norm": 2.382162570953369, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.1892, + "grad_norm": 3.0123023986816406, + "learning_rate": 1.7055000000000002e-05, + "num_tokens": 198605.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0019, + "grad_norm": 0.27882760763168335, + "learning_rate": 1.705e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0019, + "grad_norm": 0.23136040568351746, + "learning_rate": 1.7045e-05, + "num_tokens": 198787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.1046, + "grad_norm": 1.8799446821212769, + "learning_rate": 1.704e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0018, + "grad_norm": 0.23780478537082672, + "learning_rate": 1.7035000000000002e-05, + "num_tokens": 199390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0849, + "grad_norm": 1.9498792886734009, + "learning_rate": 1.703e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.298, + "step": 596 + }, + { + "loss": 0.0953, + "grad_norm": 2.2400667667388916, + "learning_rate": 1.7025e-05, + "num_tokens": 200414.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.002, + "grad_norm": 0.3908434510231018, + "learning_rate": 1.702e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 1.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0939, + "grad_norm": 2.667379140853882, + "learning_rate": 1.7015000000000002e-05, + "num_tokens": 201017.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.0745, + "grad_norm": 2.066331624984741, + "learning_rate": 1.701e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0025, + "grad_norm": 0.5688944458961487, + "learning_rate": 1.7005e-05, + "num_tokens": 201620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.1069, + "grad_norm": 2.021451950073242, + "learning_rate": 1.7e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.003, + "grad_norm": 0.6418687105178833, + "learning_rate": 1.6995000000000002e-05, + "num_tokens": 202223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0029, + "grad_norm": 0.6194710731506348, + "learning_rate": 1.699e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 1.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.1193, + "grad_norm": 3.001216411590576, + "learning_rate": 1.6985e-05, + "num_tokens": 202826.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": 0.1078, + "grad_norm": 2.1146023273468018, + "learning_rate": 1.698e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.098, + "grad_norm": 3.064103841781616, + "learning_rate": 1.6975000000000003e-05, + "num_tokens": 203850.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0747, + "grad_norm": 3.1524202823638916, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.1506, + "grad_norm": 3.1213419437408447, + "learning_rate": 1.6965e-05, + "num_tokens": 204874.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0038, + "grad_norm": 0.8761835098266602, + "learning_rate": 1.696e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0733, + "grad_norm": 2.0461108684539795, + "learning_rate": 1.6955000000000003e-05, + "num_tokens": 205477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0948, + "grad_norm": 2.52803111076355, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0036, + "grad_norm": 0.837294340133667, + "learning_rate": 1.6945e-05, + "num_tokens": 206080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0036, + "grad_norm": 0.8330880403518677, + "learning_rate": 1.694e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0608, + "grad_norm": 1.6941643953323364, + "learning_rate": 1.6935000000000003e-05, + "num_tokens": 206683.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0897, + "grad_norm": 1.850446105003357, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.0933, + "grad_norm": 2.3541157245635986, + "learning_rate": 1.6925e-05, + "num_tokens": 207707.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0026, + "grad_norm": 0.45243605971336365, + "learning_rate": 1.692e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0694, + "grad_norm": 2.299668312072754, + "learning_rate": 1.6915e-05, + "num_tokens": 208310.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0029, + "grad_norm": 0.6032459139823914, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.0967, + "grad_norm": 2.7924766540527344, + "learning_rate": 1.6905e-05, + "num_tokens": 208913.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0027, + "grad_norm": 0.5459297299385071, + "learning_rate": 1.69e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0033, + "grad_norm": 0.7005264759063721, + "learning_rate": 1.6895e-05, + "num_tokens": 209095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0712, + "grad_norm": 2.0087270736694336, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0816, + "grad_norm": 2.023620843887329, + "learning_rate": 1.6885000000000002e-05, + "num_tokens": 210119.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0884, + "grad_norm": 3.3579723834991455, + "learning_rate": 1.688e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.1001, + "grad_norm": 2.1446380615234375, + "learning_rate": 1.6875e-05, + "num_tokens": 211143.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0024, + "grad_norm": 0.46906810998916626, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.003, + "grad_norm": 0.6180875897407532, + "learning_rate": 1.6865000000000002e-05, + "num_tokens": 211325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0024, + "grad_norm": 0.44018203020095825, + "learning_rate": 1.686e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0021, + "grad_norm": 0.3610388934612274, + "learning_rate": 1.6855e-05, + "num_tokens": 211507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0025, + "grad_norm": 0.42492103576660156, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0785, + "grad_norm": 2.052070379257202, + "learning_rate": 1.6845000000000002e-05, + "num_tokens": 212110.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0955, + "grad_norm": 1.5501021146774292, + "learning_rate": 1.684e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0017, + "grad_norm": 0.14774425327777863, + "learning_rate": 1.6835e-05, + "num_tokens": 212713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0016, + "grad_norm": 0.13003599643707275, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0016, + "grad_norm": 0.11263933777809143, + "learning_rate": 1.6825000000000002e-05, + "num_tokens": 212895.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0814, + "grad_norm": 2.4652907848358154, + "learning_rate": 1.682e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0016, + "grad_norm": 0.1284048706293106, + "learning_rate": 1.6815e-05, + "num_tokens": 213498.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0016, + "grad_norm": 0.14626798033714294, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 1.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0984, + "grad_norm": 2.53958797454834, + "learning_rate": 1.6805000000000003e-05, + "num_tokens": 214101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0983, + "grad_norm": 2.0881552696228027, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0016, + "grad_norm": 0.14537213742733002, + "learning_rate": 1.6795e-05, + "num_tokens": 214704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.0642, + "grad_norm": 2.0831480026245117, + "learning_rate": 1.679e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.0016, + "grad_norm": 0.12770842015743256, + "learning_rate": 1.6785000000000003e-05, + "num_tokens": 215307.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0878, + "grad_norm": 2.531637668609619, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0864, + "grad_norm": 2.4697654247283936, + "learning_rate": 1.6775e-05, + "num_tokens": 216331.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0865, + "grad_norm": 1.655576229095459, + "learning_rate": 1.677e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.1086, + "grad_norm": 2.826423168182373, + "learning_rate": 1.6765000000000003e-05, + "num_tokens": 217355.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.1042, + "grad_norm": 3.4096198081970215, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.0027, + "grad_norm": 0.5534147620201111, + "learning_rate": 1.6755e-05, + "num_tokens": 217958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0611, + "grad_norm": 1.5646562576293945, + "learning_rate": 1.675e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0033, + "grad_norm": 1.048545479774475, + "learning_rate": 1.6745000000000003e-05, + "num_tokens": 218561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.005, + "grad_norm": 1.3414465188980103, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0032, + "grad_norm": 0.636330246925354, + "learning_rate": 1.6735e-05, + "num_tokens": 218743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0882, + "grad_norm": 1.7900675535202026, + "learning_rate": 1.673e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.0883, + "grad_norm": 1.8037763833999634, + "learning_rate": 1.6725000000000003e-05, + "num_tokens": 219767.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0733, + "grad_norm": 1.7987661361694336, + "learning_rate": 1.672e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0033, + "grad_norm": 0.6671841740608215, + "learning_rate": 1.6715000000000002e-05, + "num_tokens": 220370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": 0.0699, + "grad_norm": 2.178269147872925, + "learning_rate": 1.671e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0653, + "grad_norm": 2.165506601333618, + "learning_rate": 1.6705000000000004e-05, + "num_tokens": 221394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0828, + "grad_norm": 1.837323546409607, + "learning_rate": 1.67e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0798, + "grad_norm": 2.296050548553467, + "learning_rate": 1.6695000000000002e-05, + "num_tokens": 222418.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.169, + "grad_norm": 3.554818868637085, + "learning_rate": 1.669e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.1585, + "grad_norm": 2.993666887283325, + "learning_rate": 1.6685000000000004e-05, + "num_tokens": 223442.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0864, + "grad_norm": 3.0106112957000732, + "learning_rate": 1.668e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0605, + "grad_norm": 1.362823247909546, + "learning_rate": 1.6675000000000002e-05, + "num_tokens": 224466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.0055, + "grad_norm": 1.2802313566207886, + "learning_rate": 1.667e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0906, + "grad_norm": 2.1969728469848633, + "learning_rate": 1.6665000000000004e-05, + "num_tokens": 225069.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.0919, + "grad_norm": 3.0707828998565674, + "learning_rate": 1.666e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0061, + "grad_norm": 1.514074444770813, + "learning_rate": 1.6655000000000002e-05, + "num_tokens": 225672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0968, + "grad_norm": 2.7561936378479004, + "learning_rate": 1.665e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0887, + "grad_norm": 2.4263193607330322, + "learning_rate": 1.6645e-05, + "num_tokens": 226696.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0924, + "grad_norm": 2.360464572906494, + "learning_rate": 1.664e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.0926, + "grad_norm": 2.564941644668579, + "learning_rate": 1.6635000000000003e-05, + "num_tokens": 227720.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0849, + "grad_norm": 3.0359439849853516, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.1488, + "grad_norm": 2.505728006362915, + "learning_rate": 1.6625e-05, + "num_tokens": 228744.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0757, + "grad_norm": 1.8170560598373413, + "learning_rate": 1.662e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0083, + "grad_norm": 2.0260066986083984, + "learning_rate": 1.6615000000000003e-05, + "num_tokens": 229347.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0088, + "grad_norm": 2.0579655170440674, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0698, + "grad_norm": 2.465139865875244, + "learning_rate": 1.6605e-05, + "num_tokens": 229950.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.0865, + "grad_norm": 2.2099132537841797, + "learning_rate": 1.66e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0585, + "grad_norm": 2.1250336170196533, + "learning_rate": 1.6595e-05, + "num_tokens": 230974.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0047, + "grad_norm": 1.0128132104873657, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 1.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0934, + "grad_norm": 2.2283778190612793, + "learning_rate": 1.6585e-05, + "num_tokens": 231577.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0616, + "grad_norm": 1.5224443674087524, + "learning_rate": 1.658e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0831, + "grad_norm": 2.9646942615509033, + "learning_rate": 1.6575e-05, + "num_tokens": 232601.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.1237, + "grad_norm": 2.9797046184539795, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0636, + "grad_norm": 2.184934139251709, + "learning_rate": 1.6565e-05, + "num_tokens": 233625.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0624, + "grad_norm": 2.1586413383483887, + "learning_rate": 1.656e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.004, + "grad_norm": 0.7300480604171753, + "learning_rate": 1.6555e-05, + "num_tokens": 234228.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0034, + "grad_norm": 0.6544972062110901, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 1.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0696, + "grad_norm": 2.013485908508301, + "learning_rate": 1.6545e-05, + "num_tokens": 234831.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0029, + "grad_norm": 0.5221191048622131, + "learning_rate": 1.654e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 1.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0772, + "grad_norm": 1.8417952060699463, + "learning_rate": 1.6535e-05, + "num_tokens": 235434.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0713, + "grad_norm": 1.9944443702697754, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.0658, + "grad_norm": 1.900722861289978, + "learning_rate": 1.6525000000000002e-05, + "num_tokens": 236458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0679, + "grad_norm": 2.4299168586730957, + "learning_rate": 1.652e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.06, + "grad_norm": 1.561680793762207, + "learning_rate": 1.6515e-05, + "num_tokens": 237482.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0739, + "grad_norm": 1.774482011795044, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0436, + "grad_norm": 1.7762006521224976, + "learning_rate": 1.6505000000000002e-05, + "num_tokens": 238506.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0034, + "grad_norm": 0.7131043672561646, + "learning_rate": 1.65e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0823, + "grad_norm": 2.994682550430298, + "learning_rate": 1.6495e-05, + "num_tokens": 239109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": 0.0776, + "grad_norm": 2.6362464427948, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0846, + "grad_norm": 2.8052642345428467, + "learning_rate": 1.6485000000000002e-05, + "num_tokens": 240133.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0048, + "grad_norm": 1.1239407062530518, + "learning_rate": 1.648e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 1.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0841, + "grad_norm": 2.1707019805908203, + "learning_rate": 1.6475e-05, + "num_tokens": 240736.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0423, + "grad_norm": 1.9918863773345947, + "learning_rate": 1.647e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.0903, + "grad_norm": 2.1334235668182373, + "learning_rate": 1.6465000000000002e-05, + "num_tokens": 241760.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0067, + "grad_norm": 1.6682239770889282, + "learning_rate": 1.646e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 1.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0652, + "grad_norm": 1.4505804777145386, + "learning_rate": 1.6455e-05, + "num_tokens": 242363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0769, + "grad_norm": 1.6511123180389404, + "learning_rate": 1.645e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.006, + "grad_norm": 1.3824306726455688, + "learning_rate": 1.6445000000000003e-05, + "num_tokens": 242966.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0742, + "grad_norm": 2.109647512435913, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.1414, + "grad_norm": 2.5469703674316406, + "learning_rate": 1.6435e-05, + "num_tokens": 243990.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0669, + "grad_norm": 1.3465361595153809, + "learning_rate": 1.643e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.082, + "grad_norm": 2.1633052825927734, + "learning_rate": 1.6425000000000003e-05, + "num_tokens": 245014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0043, + "grad_norm": 0.926991879940033, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.143, + "grad_norm": 2.2284176349639893, + "learning_rate": 1.6415e-05, + "num_tokens": 245617.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0981, + "grad_norm": 2.301908493041992, + "learning_rate": 1.641e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0448, + "grad_norm": 1.2258681058883667, + "learning_rate": 1.6405000000000003e-05, + "num_tokens": 246641.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.0043, + "grad_norm": 0.9370044469833374, + "learning_rate": 1.64e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 1.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0788, + "grad_norm": 3.762192964553833, + "learning_rate": 1.6395e-05, + "num_tokens": 247244.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.0046, + "grad_norm": 0.9186903238296509, + "learning_rate": 1.639e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 1.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0035, + "grad_norm": 0.6930652260780334, + "learning_rate": 1.6385000000000003e-05, + "num_tokens": 247426.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.3322, + "grad_norm": 9.659932136535645, + "learning_rate": 1.638e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0662, + "grad_norm": 1.7305420637130737, + "learning_rate": 1.6375e-05, + "num_tokens": 248450.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0024, + "grad_norm": 0.3103489577770233, + "learning_rate": 1.637e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 1.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0915, + "grad_norm": 2.235250234603882, + "learning_rate": 1.6365000000000003e-05, + "num_tokens": 249053.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0596, + "grad_norm": 2.24996280670166, + "learning_rate": 1.636e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0679, + "grad_norm": 2.596879005432129, + "learning_rate": 1.6355000000000002e-05, + "num_tokens": 250077.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0642, + "grad_norm": 1.9771475791931152, + "learning_rate": 1.635e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0024, + "grad_norm": 0.7699919939041138, + "learning_rate": 1.6345000000000004e-05, + "num_tokens": 250680.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792540490627289, + "learning_rate": 1.634e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0021, + "grad_norm": 0.32606813311576843, + "learning_rate": 1.6335000000000002e-05, + "num_tokens": 250862.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0574, + "grad_norm": 2.3009800910949707, + "learning_rate": 1.633e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0966, + "grad_norm": 2.396700859069824, + "learning_rate": 1.6325e-05, + "num_tokens": 251886.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.1378, + "grad_norm": 2.726357936859131, + "learning_rate": 1.632e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0022, + "grad_norm": 0.36913836002349854, + "learning_rate": 1.6315000000000002e-05, + "num_tokens": 252489.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0021, + "grad_norm": 0.34592556953430176, + "learning_rate": 1.631e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0024, + "grad_norm": 0.45417988300323486, + "learning_rate": 1.6305e-05, + "num_tokens": 252671.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0672, + "grad_norm": 2.153691053390503, + "learning_rate": 1.63e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0021, + "grad_norm": 0.35626691579818726, + "learning_rate": 1.6295000000000002e-05, + "num_tokens": 253274.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0021, + "grad_norm": 0.37343284487724304, + "learning_rate": 1.629e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.002, + "grad_norm": 0.34979110956192017, + "learning_rate": 1.6285e-05, + "num_tokens": 253456.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.078, + "grad_norm": 2.1453590393066406, + "learning_rate": 1.628e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0019, + "grad_norm": 0.21562984585762024, + "learning_rate": 1.6275e-05, + "num_tokens": 254059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0017, + "grad_norm": 0.18868863582611084, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.075, + "grad_norm": 2.238870143890381, + "learning_rate": 1.6265e-05, + "num_tokens": 254662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0712, + "grad_norm": 1.3297274112701416, + "learning_rate": 1.626e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.2668, + "grad_norm": 6.078666687011719, + "learning_rate": 1.6255e-05, + "num_tokens": 255686.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0017, + "grad_norm": 0.18387450277805328, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 1.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0017, + "grad_norm": 0.1908990740776062, + "learning_rate": 1.6245e-05, + "num_tokens": 255868.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0796, + "grad_norm": 1.9942879676818848, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0017, + "grad_norm": 0.18278343975543976, + "learning_rate": 1.6235e-05, + "num_tokens": 256471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0017, + "grad_norm": 0.2012937068939209, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0019, + "grad_norm": 0.23027914762496948, + "learning_rate": 1.6225e-05, + "num_tokens": 256653.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.088, + "grad_norm": 2.3463082313537598, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0015, + "grad_norm": 0.1516222059726715, + "learning_rate": 1.6215e-05, + "num_tokens": 257256.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0592, + "grad_norm": 1.780516505241394, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0016, + "grad_norm": 0.1569552719593048, + "learning_rate": 1.6205e-05, + "num_tokens": 257859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0015, + "grad_norm": 0.15376536548137665, + "learning_rate": 1.62e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0016, + "grad_norm": 0.16803313791751862, + "learning_rate": 1.6195e-05, + "num_tokens": 258041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0877, + "grad_norm": 1.7319484949111938, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0015, + "grad_norm": 0.14868228137493134, + "learning_rate": 1.6185000000000002e-05, + "num_tokens": 258644.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0703, + "grad_norm": 1.626076102256775, + "learning_rate": 1.618e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0852, + "grad_norm": 1.4952802658081055, + "learning_rate": 1.6175e-05, + "num_tokens": 259668.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0782, + "grad_norm": 1.6785380840301514, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0806, + "grad_norm": 1.424209475517273, + "learning_rate": 1.6165000000000002e-05, + "num_tokens": 260692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0018, + "grad_norm": 0.27588197588920593, + "learning_rate": 1.616e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 1.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0682, + "grad_norm": 2.780993938446045, + "learning_rate": 1.6155e-05, + "num_tokens": 261295.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.0027, + "grad_norm": 0.5201116800308228, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0028, + "grad_norm": 0.5331841111183167, + "learning_rate": 1.6145000000000002e-05, + "num_tokens": 261477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.1404, + "grad_norm": 3.156398296356201, + "learning_rate": 1.614e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.003, + "grad_norm": 0.5515365600585938, + "learning_rate": 1.6135e-05, + "num_tokens": 262080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.0029, + "grad_norm": 0.5499039888381958, + "learning_rate": 1.613e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0473, + "grad_norm": 1.4062751531600952, + "learning_rate": 1.6125000000000002e-05, + "num_tokens": 262683.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0638, + "grad_norm": 1.5207608938217163, + "learning_rate": 1.612e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0018, + "grad_norm": 0.24566565454006195, + "learning_rate": 1.6115e-05, + "num_tokens": 263286.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0019, + "grad_norm": 0.26229217648506165, + "learning_rate": 1.611e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 1.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0019, + "grad_norm": 0.2518826425075531, + "learning_rate": 1.6105000000000003e-05, + "num_tokens": 263468.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.066, + "grad_norm": 1.8491489887237549, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0771, + "grad_norm": 2.3547780513763428, + "learning_rate": 1.6095e-05, + "num_tokens": 264492.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": 0.067, + "grad_norm": 1.581396222114563, + "learning_rate": 1.609e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0017, + "grad_norm": 0.22524242103099823, + "learning_rate": 1.6085000000000003e-05, + "num_tokens": 265095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0825, + "grad_norm": 1.542362928390503, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.0019, + "grad_norm": 0.2753300964832306, + "learning_rate": 1.6075e-05, + "num_tokens": 265698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0594, + "grad_norm": 2.435917377471924, + "learning_rate": 1.607e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0671, + "grad_norm": 1.3892773389816284, + "learning_rate": 1.6065000000000003e-05, + "num_tokens": 266722.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0552, + "grad_norm": 1.9706708192825317, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0029, + "grad_norm": 0.5541112422943115, + "learning_rate": 1.6055e-05, + "num_tokens": 267325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0765, + "grad_norm": 2.187875270843506, + "learning_rate": 1.605e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0533, + "grad_norm": 1.9069744348526, + "learning_rate": 1.6045000000000003e-05, + "num_tokens": 268349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0034, + "grad_norm": 0.6806110739707947, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0033, + "grad_norm": 0.6904415488243103, + "learning_rate": 1.6035e-05, + "num_tokens": 268531.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0835, + "grad_norm": 1.7817496061325073, + "learning_rate": 1.603e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.003, + "grad_norm": 0.576019823551178, + "learning_rate": 1.6025000000000003e-05, + "num_tokens": 269134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0444, + "grad_norm": 2.0043082237243652, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0456, + "grad_norm": 1.6300431489944458, + "learning_rate": 1.6015e-05, + "num_tokens": 270158.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.002, + "grad_norm": 0.3286590874195099, + "learning_rate": 1.601e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0593, + "grad_norm": 3.0931613445281982, + "learning_rate": 1.6005e-05, + "num_tokens": 270761.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0774, + "grad_norm": 2.7380502223968506, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0025, + "grad_norm": 0.5391877293586731, + "learning_rate": 1.5995000000000002e-05, + "num_tokens": 271364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0022, + "grad_norm": 0.43329155445098877, + "learning_rate": 1.599e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0753, + "grad_norm": 2.46846866607666, + "learning_rate": 1.5985e-05, + "num_tokens": 271967.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0021, + "grad_norm": 0.3546755313873291, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0023, + "grad_norm": 0.4083067774772644, + "learning_rate": 1.5975000000000002e-05, + "num_tokens": 272149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.002, + "grad_norm": 0.3581921458244324, + "learning_rate": 1.597e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0613, + "grad_norm": 2.8087387084960938, + "learning_rate": 1.5965e-05, + "num_tokens": 272752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0015, + "grad_norm": 0.1888950765132904, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0671, + "grad_norm": 2.2728195190429688, + "learning_rate": 1.5955e-05, + "num_tokens": 273355.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0829, + "grad_norm": 2.8371574878692627, + "learning_rate": 1.595e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0013, + "grad_norm": 0.12679244577884674, + "learning_rate": 1.5945e-05, + "num_tokens": 273958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0014, + "grad_norm": 0.14318323135375977, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0013, + "grad_norm": 0.12078670412302017, + "learning_rate": 1.5935e-05, + "num_tokens": 274140.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0605, + "grad_norm": 2.762150764465332, + "learning_rate": 1.593e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0013, + "grad_norm": 0.1383422166109085, + "learning_rate": 1.5925e-05, + "num_tokens": 274743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0012, + "grad_norm": 0.1123310998082161, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0802, + "grad_norm": 2.965071201324463, + "learning_rate": 1.5915e-05, + "num_tokens": 275346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.1343, + "grad_norm": 3.2984137535095215, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0829, + "grad_norm": 1.568178415298462, + "learning_rate": 1.5905e-05, + "num_tokens": 276370.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0014, + "grad_norm": 0.21307793259620667, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 1.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0579, + "grad_norm": 2.5958898067474365, + "learning_rate": 1.5895e-05, + "num_tokens": 276973.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0013, + "grad_norm": 0.1617453545331955, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0014, + "grad_norm": 0.1798456758260727, + "learning_rate": 1.5885e-05, + "num_tokens": 277155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0016, + "grad_norm": 0.20433904230594635, + "learning_rate": 1.588e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0486, + "grad_norm": 1.5812333822250366, + "learning_rate": 1.5875e-05, + "num_tokens": 277758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.1437, + "grad_norm": 3.0360054969787598, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0804, + "grad_norm": 2.6603028774261475, + "learning_rate": 1.5865e-05, + "num_tokens": 278782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": 0.0814, + "grad_norm": 1.870706558227539, + "learning_rate": 1.586e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0718, + "grad_norm": 1.5813627243041992, + "learning_rate": 1.5855e-05, + "num_tokens": 279806.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0729, + "grad_norm": 2.107619285583496, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0683, + "grad_norm": 1.209026575088501, + "learning_rate": 1.5845e-05, + "num_tokens": 280830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.2674, + "grad_norm": 6.916773319244385, + "learning_rate": 1.584e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0581, + "grad_norm": 2.1409847736358643, + "learning_rate": 1.5835e-05, + "num_tokens": 281854.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0054, + "grad_norm": 1.191935420036316, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0057, + "grad_norm": 1.2228178977966309, + "learning_rate": 1.5825000000000002e-05, + "num_tokens": 282036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.005, + "grad_norm": 1.1271437406539917, + "learning_rate": 1.582e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0545, + "grad_norm": 2.2059969902038574, + "learning_rate": 1.5815e-05, + "num_tokens": 282639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.1348, + "grad_norm": 2.8853166103363037, + "learning_rate": 1.581e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0027, + "grad_norm": 0.5147932767868042, + "learning_rate": 1.5805000000000002e-05, + "num_tokens": 283242.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0514, + "grad_norm": 1.7287933826446533, + "learning_rate": 1.58e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0024, + "grad_norm": 0.41022399067878723, + "learning_rate": 1.5795e-05, + "num_tokens": 283845.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0021, + "grad_norm": 0.31408146023750305, + "learning_rate": 1.579e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 1.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0019, + "grad_norm": 0.3368740677833557, + "learning_rate": 1.5785000000000002e-05, + "num_tokens": 284027.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0733, + "grad_norm": 1.9898301362991333, + "learning_rate": 1.578e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.2631, + "grad_norm": 6.1759562492370605, + "learning_rate": 1.5775e-05, + "num_tokens": 285051.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0518, + "grad_norm": 1.7494398355484009, + "learning_rate": 1.577e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0634, + "grad_norm": 3.39536452293396, + "learning_rate": 1.5765000000000002e-05, + "num_tokens": 286075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0015, + "grad_norm": 0.16311416029930115, + "learning_rate": 1.576e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0014, + "grad_norm": 0.1292622685432434, + "learning_rate": 1.5755e-05, + "num_tokens": 286257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0487, + "grad_norm": 1.4789959192276, + "learning_rate": 1.575e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0704, + "grad_norm": 1.8533966541290283, + "learning_rate": 1.5745000000000003e-05, + "num_tokens": 287281.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0431, + "grad_norm": 1.6309059858322144, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.125, + "grad_norm": 1.811131238937378, + "learning_rate": 1.5735e-05, + "num_tokens": 288305.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0018, + "grad_norm": 0.2807428240776062, + "learning_rate": 1.573e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 1.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.0991, + "grad_norm": 2.5759706497192383, + "learning_rate": 1.5725000000000003e-05, + "num_tokens": 288908.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0646, + "grad_norm": 2.325784206390381, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0021, + "grad_norm": 0.398372620344162, + "learning_rate": 1.5715e-05, + "num_tokens": 289511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.002, + "grad_norm": 0.34870296716690063, + "learning_rate": 1.571e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0458, + "grad_norm": 1.5269895792007446, + "learning_rate": 1.5705000000000003e-05, + "num_tokens": 290114.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0023, + "grad_norm": 0.4617532789707184, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.1164, + "grad_norm": 2.049588680267334, + "learning_rate": 1.5695e-05, + "num_tokens": 290717.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0707, + "grad_norm": 3.5546929836273193, + "learning_rate": 1.569e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0686, + "grad_norm": 1.6962814331054688, + "learning_rate": 1.5685e-05, + "num_tokens": 291741.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0509, + "grad_norm": 1.9832770824432373, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0028, + "grad_norm": 0.5347197651863098, + "learning_rate": 1.5675e-05, + "num_tokens": 292344.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.0716, + "grad_norm": 2.209432363510132, + "learning_rate": 1.567e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0826, + "grad_norm": 1.7408462762832642, + "learning_rate": 1.5665e-05, + "num_tokens": 293368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0553, + "grad_norm": 1.7983943223953247, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0042, + "grad_norm": 0.8812737464904785, + "learning_rate": 1.5655000000000002e-05, + "num_tokens": 293971.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0649, + "grad_norm": 2.0859007835388184, + "learning_rate": 1.565e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0581, + "grad_norm": 1.566475510597229, + "learning_rate": 1.5645e-05, + "num_tokens": 294995.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0045, + "grad_norm": 0.9423922896385193, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0517, + "grad_norm": 1.8182531595230103, + "learning_rate": 1.5635e-05, + "num_tokens": 295598.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.1177, + "grad_norm": 2.7388081550598145, + "learning_rate": 1.563e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.1132, + "grad_norm": 2.579310655593872, + "learning_rate": 1.5625e-05, + "num_tokens": 296622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": 0.065, + "grad_norm": 1.4705184698104858, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0032, + "grad_norm": 0.6671587228775024, + "learning_rate": 1.5615000000000002e-05, + "num_tokens": 297225.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0579, + "grad_norm": 2.3290131092071533, + "learning_rate": 1.561e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0814, + "grad_norm": 2.8370614051818848, + "learning_rate": 1.5605e-05, + "num_tokens": 298249.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0689, + "grad_norm": 2.715596914291382, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.0671, + "grad_norm": 1.7622898817062378, + "learning_rate": 1.5595000000000002e-05, + "num_tokens": 299273.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0042, + "grad_norm": 0.9052322506904602, + "learning_rate": 1.559e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.049, + "grad_norm": 1.3162498474121094, + "learning_rate": 1.5585e-05, + "num_tokens": 299876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0036, + "grad_norm": 0.7319129109382629, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 1.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.0032, + "grad_norm": 0.6452810764312744, + "learning_rate": 1.5575000000000002e-05, + "num_tokens": 300058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0757, + "grad_norm": 2.2865378856658936, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0538, + "grad_norm": 1.7665457725524902, + "learning_rate": 1.5565e-05, + "num_tokens": 301082.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.047, + "grad_norm": 1.9683163166046143, + "learning_rate": 1.556e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0664, + "grad_norm": 2.087733030319214, + "learning_rate": 1.5555000000000003e-05, + "num_tokens": 302106.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0023, + "grad_norm": 0.39902573823928833, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 1.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0021, + "grad_norm": 0.34475409984588623, + "learning_rate": 1.5545e-05, + "num_tokens": 302288.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0532, + "grad_norm": 1.763016700744629, + "learning_rate": 1.554e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0625, + "grad_norm": 2.4447097778320312, + "learning_rate": 1.5535000000000003e-05, + "num_tokens": 303312.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.2444, + "grad_norm": 5.089849948883057, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.1233, + "grad_norm": 1.9174350500106812, + "learning_rate": 1.5525e-05, + "num_tokens": 304336.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.002, + "grad_norm": 0.34749460220336914, + "learning_rate": 1.552e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 1.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.075, + "grad_norm": 1.8123295307159424, + "learning_rate": 1.5515000000000003e-05, + "num_tokens": 304939.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0942, + "grad_norm": 2.2524919509887695, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0023, + "grad_norm": 0.4282050132751465, + "learning_rate": 1.5505e-05, + "num_tokens": 305542.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0022, + "grad_norm": 0.4201665222644806, + "learning_rate": 1.55e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0022, + "grad_norm": 0.38267236948013306, + "learning_rate": 1.5495000000000003e-05, + "num_tokens": 305724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0668, + "grad_norm": 1.5852563381195068, + "learning_rate": 1.549e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0686, + "grad_norm": 2.5186655521392822, + "learning_rate": 1.5485e-05, + "num_tokens": 306748.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0018, + "grad_norm": 0.3009900450706482, + "learning_rate": 1.548e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 1.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0591, + "grad_norm": 2.0340046882629395, + "learning_rate": 1.5475000000000003e-05, + "num_tokens": 307351.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.0652, + "grad_norm": 2.206228017807007, + "learning_rate": 1.547e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0485, + "grad_norm": 1.763405203819275, + "learning_rate": 1.5465000000000002e-05, + "num_tokens": 308375.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.002, + "grad_norm": 0.35779571533203125, + "learning_rate": 1.546e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0019, + "grad_norm": 0.32313865423202515, + "learning_rate": 1.5455000000000004e-05, + "num_tokens": 308557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0749, + "grad_norm": 2.2083141803741455, + "learning_rate": 1.545e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0762, + "grad_norm": 1.5048847198486328, + "learning_rate": 1.5445000000000002e-05, + "num_tokens": 309581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0536, + "grad_norm": 1.6958098411560059, + "learning_rate": 1.544e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0718, + "grad_norm": 1.9835456609725952, + "learning_rate": 1.5435000000000004e-05, + "num_tokens": 310605.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0459, + "grad_norm": 1.618090033531189, + "learning_rate": 1.543e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0611, + "grad_norm": 1.508302092552185, + "learning_rate": 1.5425000000000002e-05, + "num_tokens": 311629.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.1341, + "grad_norm": 3.744704008102417, + "learning_rate": 1.542e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0649, + "grad_norm": 1.4073272943496704, + "learning_rate": 1.5415e-05, + "num_tokens": 312653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0059, + "grad_norm": 1.3199745416641235, + "learning_rate": 1.541e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0445, + "grad_norm": 1.7224688529968262, + "learning_rate": 1.5405000000000002e-05, + "num_tokens": 313256.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.0697, + "grad_norm": 1.5272228717803955, + "learning_rate": 1.54e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0061, + "grad_norm": 1.3069825172424316, + "learning_rate": 1.5395e-05, + "num_tokens": 313859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0059, + "grad_norm": 1.285326600074768, + "learning_rate": 1.539e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0044, + "grad_norm": 0.9240864515304565, + "learning_rate": 1.5385000000000003e-05, + "num_tokens": 314041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0535, + "grad_norm": 1.9520580768585205, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0426, + "grad_norm": 1.3014405965805054, + "learning_rate": 1.5375e-05, + "num_tokens": 315065.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0024, + "grad_norm": 0.4011932611465454, + "learning_rate": 1.537e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0019, + "grad_norm": 0.2749421298503876, + "learning_rate": 1.5365e-05, + "num_tokens": 315247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0022, + "grad_norm": 0.31892502307891846, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 1.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0483, + "grad_norm": 2.0664267539978027, + "learning_rate": 1.5355e-05, + "num_tokens": 315850.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0819, + "grad_norm": 2.846149206161499, + "learning_rate": 1.535e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0013, + "grad_norm": 0.1373102068901062, + "learning_rate": 1.5345e-05, + "num_tokens": 316453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0013, + "grad_norm": 0.1736987680196762, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.053, + "grad_norm": 1.4268443584442139, + "learning_rate": 1.5335e-05, + "num_tokens": 317056.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0917, + "grad_norm": 1.9649128913879395, + "learning_rate": 1.533e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.1411, + "grad_norm": 2.5292632579803467, + "learning_rate": 1.5325e-05, + "num_tokens": 318080.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0527, + "grad_norm": 1.9480016231536865, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.0846, + "grad_norm": 2.2493338584899902, + "learning_rate": 1.5315e-05, + "num_tokens": 319104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0013, + "grad_norm": 0.13474015891551971, + "learning_rate": 1.531e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0832, + "grad_norm": 1.5178154706954956, + "learning_rate": 1.5305e-05, + "num_tokens": 319707.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": 0.0795, + "grad_norm": 2.071016788482666, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.1163, + "grad_norm": 2.11936092376709, + "learning_rate": 1.5295000000000002e-05, + "num_tokens": 320731.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0018, + "grad_norm": 0.2738206088542938, + "learning_rate": 1.529e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 1.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0674, + "grad_norm": 1.7774465084075928, + "learning_rate": 1.5285e-05, + "num_tokens": 321334.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0019, + "grad_norm": 0.3061210513114929, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.1228, + "grad_norm": 2.0818684101104736, + "learning_rate": 1.5275000000000002e-05, + "num_tokens": 321937.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0716, + "grad_norm": 1.6649255752563477, + "learning_rate": 1.527e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0026, + "grad_norm": 0.477672815322876, + "learning_rate": 1.5265e-05, + "num_tokens": 322540.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0728, + "grad_norm": 1.9350183010101318, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0631, + "grad_norm": 1.786603569984436, + "learning_rate": 1.5255000000000002e-05, + "num_tokens": 323564.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.1006, + "grad_norm": 2.4447789192199707, + "learning_rate": 1.525e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0034, + "grad_norm": 0.6078147292137146, + "learning_rate": 1.5245e-05, + "num_tokens": 324167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0769, + "grad_norm": 1.76687753200531, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.1099, + "grad_norm": 1.7330924272537231, + "learning_rate": 1.5235000000000002e-05, + "num_tokens": 325191.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.1119, + "grad_norm": 2.317302942276001, + "learning_rate": 1.523e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0047, + "grad_norm": 0.8692587018013, + "learning_rate": 1.5225e-05, + "num_tokens": 325794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0752, + "grad_norm": 2.7787444591522217, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0042, + "grad_norm": 0.7904698252677917, + "learning_rate": 1.5215000000000003e-05, + "num_tokens": 326397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0637, + "grad_norm": 1.9206311702728271, + "learning_rate": 1.521e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0641, + "grad_norm": 1.5487322807312012, + "learning_rate": 1.5205000000000001e-05, + "num_tokens": 327421.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0034, + "grad_norm": 0.6128824949264526, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0026, + "grad_norm": 0.4303649365901947, + "learning_rate": 1.5195000000000003e-05, + "num_tokens": 327603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0024, + "grad_norm": 0.3603818118572235, + "learning_rate": 1.519e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 1.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.0722, + "grad_norm": 1.3239399194717407, + "learning_rate": 1.5185000000000001e-05, + "num_tokens": 328206.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0714, + "grad_norm": 1.5037869215011597, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0462, + "grad_norm": 1.4942961931228638, + "learning_rate": 1.5175000000000001e-05, + "num_tokens": 329230.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0019, + "grad_norm": 0.2582552134990692, + "learning_rate": 1.517e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0018, + "grad_norm": 0.22304527461528778, + "learning_rate": 1.5165000000000001e-05, + "num_tokens": 329412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.083, + "grad_norm": 2.117966890335083, + "learning_rate": 1.516e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.0018, + "grad_norm": 0.21721050143241882, + "learning_rate": 1.5155000000000001e-05, + "num_tokens": 330015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0016, + "grad_norm": 0.20195893943309784, + "learning_rate": 1.515e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0807, + "grad_norm": 2.2437827587127686, + "learning_rate": 1.5145000000000002e-05, + "num_tokens": 330618.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0803, + "grad_norm": 2.0074269771575928, + "learning_rate": 1.514e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.1081, + "grad_norm": 2.117880344390869, + "learning_rate": 1.5135000000000002e-05, + "num_tokens": 331642.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0498, + "grad_norm": 1.624760389328003, + "learning_rate": 1.513e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0017, + "grad_norm": 0.2406463772058487, + "learning_rate": 1.5125e-05, + "num_tokens": 332245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.078, + "grad_norm": 1.9976122379302979, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0017, + "grad_norm": 0.2691337466239929, + "learning_rate": 1.5115000000000002e-05, + "num_tokens": 332848.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0017, + "grad_norm": 0.3240523040294647, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.002, + "grad_norm": 0.3948870897293091, + "learning_rate": 1.5105e-05, + "num_tokens": 333030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.058, + "grad_norm": 2.228799343109131, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0019, + "grad_norm": 0.30388572812080383, + "learning_rate": 1.5095000000000002e-05, + "num_tokens": 333633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0018, + "grad_norm": 0.23492957651615143, + "learning_rate": 1.509e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0767, + "grad_norm": 1.961020588874817, + "learning_rate": 1.5085e-05, + "num_tokens": 334236.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0015, + "grad_norm": 0.18129733204841614, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0016, + "grad_norm": 0.20082105696201324, + "learning_rate": 1.5075000000000002e-05, + "num_tokens": 334418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0715, + "grad_norm": 1.6847742795944214, + "learning_rate": 1.507e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.1066, + "grad_norm": 1.804700255393982, + "learning_rate": 1.5065e-05, + "num_tokens": 335442.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0017, + "grad_norm": 0.24969542026519775, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 1.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.076, + "grad_norm": 1.119564175605774, + "learning_rate": 1.5055000000000002e-05, + "num_tokens": 336045.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.1127, + "grad_norm": 1.9994937181472778, + "learning_rate": 1.505e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0018, + "grad_norm": 0.27987295389175415, + "learning_rate": 1.5045e-05, + "num_tokens": 336648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0019, + "grad_norm": 0.3454192876815796, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0023, + "grad_norm": 0.4122897684574127, + "learning_rate": 1.5035000000000003e-05, + "num_tokens": 336830.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": 0.1004, + "grad_norm": 1.930411696434021, + "learning_rate": 1.503e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0019, + "grad_norm": 0.29886701703071594, + "learning_rate": 1.5025000000000001e-05, + "num_tokens": 337433.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0016, + "grad_norm": 0.2443024218082428, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.0673, + "grad_norm": 1.4124706983566284, + "learning_rate": 1.5015000000000001e-05, + "num_tokens": 338036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0799, + "grad_norm": 2.3533709049224854, + "learning_rate": 1.501e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0713, + "grad_norm": 1.8907470703125, + "learning_rate": 1.5005000000000001e-05, + "num_tokens": 339060.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0689, + "grad_norm": 2.691020965576172, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0497, + "grad_norm": 1.6671160459518433, + "learning_rate": 1.4995000000000001e-05, + "num_tokens": 340084.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.002, + "grad_norm": 0.29797157645225525, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 1.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0019, + "grad_norm": 0.29996100068092346, + "learning_rate": 1.4985000000000001e-05, + "num_tokens": 340266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.0024, + "grad_norm": 0.4070133566856384, + "learning_rate": 1.498e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0021, + "grad_norm": 0.3220314681529999, + "learning_rate": 1.4975000000000001e-05, + "num_tokens": 340448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0019, + "grad_norm": 0.3058181405067444, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0018, + "grad_norm": 0.28231292963027954, + "learning_rate": 1.4965e-05, + "num_tokens": 340630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0692, + "grad_norm": 1.5155085325241089, + "learning_rate": 1.496e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.0683, + "grad_norm": 1.8045986890792847, + "learning_rate": 1.4955000000000002e-05, + "num_tokens": 341654.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.0408, + "grad_norm": 1.349377989768982, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0734, + "grad_norm": 1.7803888320922852, + "learning_rate": 1.4945e-05, + "num_tokens": 342678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0014, + "grad_norm": 0.1658269613981247, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 1.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0625, + "grad_norm": 1.7009806632995605, + "learning_rate": 1.4935000000000002e-05, + "num_tokens": 343281.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0017, + "grad_norm": 0.25617343187332153, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.0625, + "grad_norm": 1.769629955291748, + "learning_rate": 1.4925e-05, + "num_tokens": 343884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0017, + "grad_norm": 0.2548482418060303, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 1.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.0016, + "grad_norm": 0.2222324013710022, + "learning_rate": 1.4915000000000002e-05, + "num_tokens": 344066.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0774, + "grad_norm": 4.686360836029053, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0459, + "grad_norm": 2.749084234237671, + "learning_rate": 1.4905e-05, + "num_tokens": 345090.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.1302, + "grad_norm": 4.177389621734619, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.1173, + "grad_norm": 4.055930137634277, + "learning_rate": 1.4895000000000002e-05, + "num_tokens": 346114.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.002, + "grad_norm": 0.3603017032146454, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": 0.0693, + "grad_norm": 1.6064629554748535, + "learning_rate": 1.4885e-05, + "num_tokens": 346717.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0652, + "grad_norm": 1.3037128448486328, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0689, + "grad_norm": 2.06034779548645, + "learning_rate": 1.4875000000000002e-05, + "num_tokens": 347741.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0029, + "grad_norm": 0.5724895596504211, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 1.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0033, + "grad_norm": 0.6629590392112732, + "learning_rate": 1.4865e-05, + "num_tokens": 347923.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0024, + "grad_norm": 0.453980416059494, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 1.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0024, + "grad_norm": 0.4251463711261749, + "learning_rate": 1.4855000000000001e-05, + "num_tokens": 348105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0019, + "grad_norm": 0.30966171622276306, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 1.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.002, + "grad_norm": 0.3118286430835724, + "learning_rate": 1.4845000000000001e-05, + "num_tokens": 348287.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0555, + "grad_norm": 1.792464256286621, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0675, + "grad_norm": 1.5182185173034668, + "learning_rate": 1.4835000000000001e-05, + "num_tokens": 349311.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0674, + "grad_norm": 2.3636367321014404, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0656, + "grad_norm": 2.3102426528930664, + "learning_rate": 1.4825000000000001e-05, + "num_tokens": 350335.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0649, + "grad_norm": 1.6550447940826416, + "learning_rate": 1.482e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0633, + "grad_norm": 1.6831378936767578, + "learning_rate": 1.4815000000000001e-05, + "num_tokens": 351359.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0012, + "grad_norm": 0.14287354052066803, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 1.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0506, + "grad_norm": 1.8767977952957153, + "learning_rate": 1.4805e-05, + "num_tokens": 351962.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0798, + "grad_norm": 1.768181562423706, + "learning_rate": 1.48e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0537, + "grad_norm": 1.7165502309799194, + "learning_rate": 1.4795000000000001e-05, + "num_tokens": 352986.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0016, + "grad_norm": 0.24984677135944366, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.06, + "grad_norm": 1.5225651264190674, + "learning_rate": 1.4785e-05, + "num_tokens": 353589.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0453, + "grad_norm": 1.48419988155365, + "learning_rate": 1.478e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": 0.0693, + "grad_norm": 1.9988808631896973, + "learning_rate": 1.4775000000000002e-05, + "num_tokens": 354613.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0419, + "grad_norm": 1.4052188396453857, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0582, + "grad_norm": 1.6217740774154663, + "learning_rate": 1.4765e-05, + "num_tokens": 355637.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0778, + "grad_norm": 1.9261959791183472, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0593, + "grad_norm": 1.315152645111084, + "learning_rate": 1.4755000000000002e-05, + "num_tokens": 356661.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0069, + "grad_norm": 1.2978978157043457, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0412, + "grad_norm": 1.215545654296875, + "learning_rate": 1.4745e-05, + "num_tokens": 357264.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0075, + "grad_norm": 1.4120475053787231, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 1.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": 0.033, + "grad_norm": 1.2826626300811768, + "learning_rate": 1.4735000000000002e-05, + "num_tokens": 357867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.0074, + "grad_norm": 1.4002093076705933, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0695, + "grad_norm": 2.1978306770324707, + "learning_rate": 1.4725e-05, + "num_tokens": 358470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0693, + "grad_norm": 1.8518682718276978, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0048, + "grad_norm": 0.920648455619812, + "learning_rate": 1.4715000000000002e-05, + "num_tokens": 359073.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0041, + "grad_norm": 0.7800686955451965, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0515, + "grad_norm": 2.606135606765747, + "learning_rate": 1.4705e-05, + "num_tokens": 359676.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0024, + "grad_norm": 0.40420445799827576, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 1.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0719, + "grad_norm": 1.9594024419784546, + "learning_rate": 1.4695e-05, + "num_tokens": 360279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0018, + "grad_norm": 0.245815709233284, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.0787, + "grad_norm": 2.42266845703125, + "learning_rate": 1.4685000000000001e-05, + "num_tokens": 360882.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0014, + "grad_norm": 0.19625961780548096, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 1.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0014, + "grad_norm": 0.18439820408821106, + "learning_rate": 1.4675000000000001e-05, + "num_tokens": 361064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0012, + "grad_norm": 0.15009146928787231, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0961, + "grad_norm": 1.6586538553237915, + "learning_rate": 1.4665000000000001e-05, + "num_tokens": 361667.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.056, + "grad_norm": 1.6204346418380737, + "learning_rate": 1.466e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0629, + "grad_norm": 3.179530382156372, + "learning_rate": 1.4655000000000001e-05, + "num_tokens": 362691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0475, + "grad_norm": 1.5324857234954834, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0475, + "grad_norm": 1.6246694326400757, + "learning_rate": 1.4645e-05, + "num_tokens": 363715.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.1217, + "grad_norm": 3.528550624847412, + "learning_rate": 1.464e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0013, + "grad_norm": 0.17739705741405487, + "learning_rate": 1.4635000000000001e-05, + "num_tokens": 364318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0736, + "grad_norm": 1.7169992923736572, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.1137, + "grad_norm": 2.5113534927368164, + "learning_rate": 1.4625e-05, + "num_tokens": 365342.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.105, + "grad_norm": 2.1154234409332275, + "learning_rate": 1.462e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.0014, + "grad_norm": 0.19033615291118622, + "learning_rate": 1.4615000000000002e-05, + "num_tokens": 365945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0521, + "grad_norm": 1.7730141878128052, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.0016, + "grad_norm": 0.24216671288013458, + "learning_rate": 1.4605e-05, + "num_tokens": 366548.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0018, + "grad_norm": 0.27462536096572876, + "learning_rate": 1.46e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": 0.0744, + "grad_norm": 1.9374821186065674, + "learning_rate": 1.4595000000000002e-05, + "num_tokens": 367151.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0679, + "grad_norm": 1.6294903755187988, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0495, + "grad_norm": 1.4929898977279663, + "learning_rate": 1.4585e-05, + "num_tokens": 368175.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0026, + "grad_norm": 0.4472891092300415, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0023, + "grad_norm": 0.36597439646720886, + "learning_rate": 1.4575000000000002e-05, + "num_tokens": 368357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0024, + "grad_norm": 0.42359644174575806, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0022, + "grad_norm": 0.37764036655426025, + "learning_rate": 1.4565e-05, + "num_tokens": 368539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0021, + "grad_norm": 0.34881848096847534, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0019, + "grad_norm": 0.2842845320701599, + "learning_rate": 1.4555000000000002e-05, + "num_tokens": 368721.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0016, + "grad_norm": 0.23593850433826447, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.0773, + "grad_norm": 1.4594675302505493, + "learning_rate": 1.4545e-05, + "num_tokens": 369324.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.1, + "grad_norm": 1.863494873046875, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0013, + "grad_norm": 0.13081954419612885, + "learning_rate": 1.4535e-05, + "num_tokens": 369927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0443, + "grad_norm": 1.7305635213851929, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0012, + "grad_norm": 0.12010564655065536, + "learning_rate": 1.4525e-05, + "num_tokens": 370530.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.046, + "grad_norm": 1.4965153932571411, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0013, + "grad_norm": 0.1335715800523758, + "learning_rate": 1.4515e-05, + "num_tokens": 371133.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0789, + "grad_norm": 2.0868091583251953, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0013, + "grad_norm": 0.1260039061307907, + "learning_rate": 1.4505000000000001e-05, + "num_tokens": 371736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0013, + "grad_norm": 0.1729843020439148, + "learning_rate": 1.45e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0014, + "grad_norm": 0.1744985431432724, + "learning_rate": 1.4495000000000001e-05, + "num_tokens": 371918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0012, + "grad_norm": 0.12203537672758102, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.1175, + "grad_norm": 2.857239007949829, + "learning_rate": 1.4485e-05, + "num_tokens": 372521.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0012, + "grad_norm": 0.13221806287765503, + "learning_rate": 1.448e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0558, + "grad_norm": 1.8117022514343262, + "learning_rate": 1.4475000000000001e-05, + "num_tokens": 373124.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.0746, + "grad_norm": 1.5601890087127686, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0667, + "grad_norm": 2.6270835399627686, + "learning_rate": 1.4465e-05, + "num_tokens": 374148.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.07, + "grad_norm": 2.4209983348846436, + "learning_rate": 1.446e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0759, + "grad_norm": 1.9546290636062622, + "learning_rate": 1.4455000000000001e-05, + "num_tokens": 375172.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0673, + "grad_norm": 2.9238405227661133, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0505, + "grad_norm": 1.4308744668960571, + "learning_rate": 1.4445e-05, + "num_tokens": 376196.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0491, + "grad_norm": 1.8547859191894531, + "learning_rate": 1.444e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0611, + "grad_norm": 1.7769485712051392, + "learning_rate": 1.4435000000000002e-05, + "num_tokens": 377220.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0026, + "grad_norm": 0.4414771497249603, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 1.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0645, + "grad_norm": 2.1288139820098877, + "learning_rate": 1.4425e-05, + "num_tokens": 377823.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0449, + "grad_norm": 1.480977177619934, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0622, + "grad_norm": 1.4551938772201538, + "learning_rate": 1.4415000000000002e-05, + "num_tokens": 378847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0435, + "grad_norm": 1.613083004951477, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0666, + "grad_norm": 1.3638219833374023, + "learning_rate": 1.4405e-05, + "num_tokens": 379871.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0447, + "grad_norm": 1.5498117208480835, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0423, + "grad_norm": 1.8802024126052856, + "learning_rate": 1.4395000000000002e-05, + "num_tokens": 380895.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0077, + "grad_norm": 1.3431289196014404, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0075, + "grad_norm": 1.2728586196899414, + "learning_rate": 1.4385e-05, + "num_tokens": 381077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0072, + "grad_norm": 1.205004096031189, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0776, + "grad_norm": 1.9510324001312256, + "learning_rate": 1.4375e-05, + "num_tokens": 381680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0585, + "grad_norm": 1.6569032669067383, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0437, + "grad_norm": 1.996708631515503, + "learning_rate": 1.4365000000000002e-05, + "num_tokens": 382704.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.1022, + "grad_norm": 1.9323452711105347, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.1023, + "grad_norm": 2.318890333175659, + "learning_rate": 1.4355e-05, + "num_tokens": 383728.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0406, + "grad_norm": 1.4253126382827759, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0032, + "grad_norm": 0.5123540759086609, + "learning_rate": 1.4345000000000002e-05, + "num_tokens": 384331.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0466, + "grad_norm": 1.6153643131256104, + "learning_rate": 1.434e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.003, + "grad_norm": 0.468280553817749, + "learning_rate": 1.4335e-05, + "num_tokens": 384934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0028, + "grad_norm": 0.4284001588821411, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0484, + "grad_norm": 1.9119105339050293, + "learning_rate": 1.4325000000000003e-05, + "num_tokens": 385537.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0616, + "grad_norm": 2.9587130546569824, + "learning_rate": 1.432e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0645, + "grad_norm": 2.1663818359375, + "learning_rate": 1.4315000000000001e-05, + "num_tokens": 386561.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0022, + "grad_norm": 0.33302196860313416, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0019, + "grad_norm": 0.2560519278049469, + "learning_rate": 1.4305000000000003e-05, + "num_tokens": 386743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0504, + "grad_norm": 2.333263397216797, + "learning_rate": 1.43e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0514, + "grad_norm": 1.790854573249817, + "learning_rate": 1.4295000000000001e-05, + "num_tokens": 387767.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0478, + "grad_norm": 1.8263012170791626, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0017, + "grad_norm": 0.22925561666488647, + "learning_rate": 1.4285000000000003e-05, + "num_tokens": 388370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0508, + "grad_norm": 1.9549782276153564, + "learning_rate": 1.428e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0461, + "grad_norm": 2.7456071376800537, + "learning_rate": 1.4275000000000001e-05, + "num_tokens": 389394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0019, + "grad_norm": 0.25512465834617615, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0018, + "grad_norm": 0.2454918771982193, + "learning_rate": 1.4265000000000001e-05, + "num_tokens": 389576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.0016, + "grad_norm": 0.20499202609062195, + "learning_rate": 1.426e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0016, + "grad_norm": 0.22024467587471008, + "learning_rate": 1.4255000000000002e-05, + "num_tokens": 389758.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.1054, + "grad_norm": 1.7958146333694458, + "learning_rate": 1.425e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0016, + "grad_norm": 0.19123780727386475, + "learning_rate": 1.4245000000000002e-05, + "num_tokens": 390361.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0015, + "grad_norm": 0.1973554641008377, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0515, + "grad_norm": 1.5054925680160522, + "learning_rate": 1.4235000000000002e-05, + "num_tokens": 390964.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0645, + "grad_norm": 1.4418784379959106, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0428, + "grad_norm": 1.3686002492904663, + "learning_rate": 1.4225000000000002e-05, + "num_tokens": 391988.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0015, + "grad_norm": 0.18040749430656433, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 1.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0666, + "grad_norm": 1.9525736570358276, + "learning_rate": 1.4215e-05, + "num_tokens": 392591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0416, + "grad_norm": 1.5055146217346191, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0016, + "grad_norm": 0.21493053436279297, + "learning_rate": 1.4205000000000002e-05, + "num_tokens": 393194.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0483, + "grad_norm": 1.4553972482681274, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0017, + "grad_norm": 0.24199633300304413, + "learning_rate": 1.4195e-05, + "num_tokens": 393797.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0017, + "grad_norm": 0.22347070276737213, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0446, + "grad_norm": 1.314347743988037, + "learning_rate": 1.4185000000000002e-05, + "num_tokens": 394400.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.002, + "grad_norm": 0.3113741874694824, + "learning_rate": 1.418e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0634, + "grad_norm": 1.786219596862793, + "learning_rate": 1.4175e-05, + "num_tokens": 395003.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0906, + "grad_norm": 2.9753689765930176, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0019, + "grad_norm": 0.2806491255760193, + "learning_rate": 1.4165000000000002e-05, + "num_tokens": 395606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0444, + "grad_norm": 1.8984386920928955, + "learning_rate": 1.416e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0023, + "grad_norm": 0.3554719090461731, + "learning_rate": 1.4155000000000001e-05, + "num_tokens": 396209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0021, + "grad_norm": 0.3154850900173187, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.002, + "grad_norm": 0.2822473347187042, + "learning_rate": 1.4145000000000003e-05, + "num_tokens": 396391.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.0933, + "grad_norm": 2.0030465126037598, + "learning_rate": 1.414e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0018, + "grad_norm": 0.25846239924430847, + "learning_rate": 1.4135000000000001e-05, + "num_tokens": 396994.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0576, + "grad_norm": 1.3536447286605835, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.0018, + "grad_norm": 0.23509684205055237, + "learning_rate": 1.4125000000000003e-05, + "num_tokens": 397597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0402, + "grad_norm": 1.1482503414154053, + "learning_rate": 1.412e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.065, + "grad_norm": 1.7037919759750366, + "learning_rate": 1.4115000000000001e-05, + "num_tokens": 398621.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0691, + "grad_norm": 1.7646807432174683, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0804, + "grad_norm": 1.7181248664855957, + "learning_rate": 1.4105000000000001e-05, + "num_tokens": 399645.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0019, + "grad_norm": 0.2505536675453186, + "learning_rate": 1.41e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0618, + "grad_norm": 1.5859951972961426, + "learning_rate": 1.4095000000000001e-05, + "num_tokens": 400248.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0019, + "grad_norm": 0.2755191922187805, + "learning_rate": 1.409e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 1.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0555, + "grad_norm": 1.4727070331573486, + "learning_rate": 1.4085000000000002e-05, + "num_tokens": 400851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0481, + "grad_norm": 1.8706026077270508, + "learning_rate": 1.408e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.0474, + "grad_norm": 1.1995218992233276, + "learning_rate": 1.4075000000000002e-05, + "num_tokens": 401875.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0543, + "grad_norm": 1.2178373336791992, + "learning_rate": 1.407e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0562, + "grad_norm": 1.595617413520813, + "learning_rate": 1.4065000000000002e-05, + "num_tokens": 402899.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0029, + "grad_norm": 0.46309027075767517, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 1.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0031, + "grad_norm": 0.5019537210464478, + "learning_rate": 1.4055e-05, + "num_tokens": 403081.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.0481, + "grad_norm": 1.4502179622650146, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0612, + "grad_norm": 1.3172924518585205, + "learning_rate": 1.4045000000000002e-05, + "num_tokens": 404105.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0643, + "grad_norm": 1.8145051002502441, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0939, + "grad_norm": 2.2837142944335938, + "learning_rate": 1.4035e-05, + "num_tokens": 405129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0444, + "grad_norm": 1.4133625030517578, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0762, + "grad_norm": 3.3270263671875, + "learning_rate": 1.4025000000000002e-05, + "num_tokens": 406153.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0382, + "grad_norm": 1.5502580404281616, + "learning_rate": 1.402e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0624, + "grad_norm": 2.8620283603668213, + "learning_rate": 1.4015e-05, + "num_tokens": 407177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0062, + "grad_norm": 0.9600316286087036, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": 0.232, + "grad_norm": 6.662532329559326, + "learning_rate": 1.4005000000000002e-05, + "num_tokens": 407780.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.2308, + "grad_norm": 5.728747844696045, + "learning_rate": 1.4e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0064, + "grad_norm": 1.0067918300628662, + "learning_rate": 1.3995e-05, + "num_tokens": 408383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0717, + "grad_norm": 2.222224712371826, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0049, + "grad_norm": 0.7748068571090698, + "learning_rate": 1.3985000000000002e-05, + "num_tokens": 408986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0042, + "grad_norm": 0.6555838584899902, + "learning_rate": 1.398e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.1053, + "grad_norm": 2.1453135013580322, + "learning_rate": 1.3975000000000001e-05, + "num_tokens": 409589.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0814, + "grad_norm": 2.092453718185425, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0025, + "grad_norm": 0.37734025716781616, + "learning_rate": 1.3965000000000003e-05, + "num_tokens": 410192.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.0859, + "grad_norm": 2.4313082695007324, + "learning_rate": 1.396e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0571, + "grad_norm": 1.533075213432312, + "learning_rate": 1.3955000000000001e-05, + "num_tokens": 411216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0907, + "grad_norm": 1.7440866231918335, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0015, + "grad_norm": 0.19383682310581207, + "learning_rate": 1.3945000000000001e-05, + "num_tokens": 411819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0015, + "grad_norm": 0.1786634922027588, + "learning_rate": 1.394e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.065, + "grad_norm": 2.1025426387786865, + "learning_rate": 1.3935000000000001e-05, + "num_tokens": 412422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0879, + "grad_norm": 1.9717315435409546, + "learning_rate": 1.393e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0726, + "grad_norm": 2.1733202934265137, + "learning_rate": 1.3925000000000001e-05, + "num_tokens": 413446.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": 0.0635, + "grad_norm": 2.1671876907348633, + "learning_rate": 1.392e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0728, + "grad_norm": 1.5356316566467285, + "learning_rate": 1.3915000000000001e-05, + "num_tokens": 414470.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0014, + "grad_norm": 0.16603456437587738, + "learning_rate": 1.391e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0558, + "grad_norm": 1.9890317916870117, + "learning_rate": 1.3905000000000002e-05, + "num_tokens": 415073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0015, + "grad_norm": 0.20005646347999573, + "learning_rate": 1.39e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.1005, + "grad_norm": 3.5178253650665283, + "learning_rate": 1.3895e-05, + "num_tokens": 415676.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0702, + "grad_norm": 2.5081353187561035, + "learning_rate": 1.389e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0017, + "grad_norm": 0.23757857084274292, + "learning_rate": 1.3885000000000002e-05, + "num_tokens": 416279.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0531, + "grad_norm": 1.5659825801849365, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.002, + "grad_norm": 0.3491363525390625, + "learning_rate": 1.3875e-05, + "num_tokens": 416882.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0663, + "grad_norm": 1.5751999616622925, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0018, + "grad_norm": 0.3209178149700165, + "learning_rate": 1.3865000000000002e-05, + "num_tokens": 417485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0019, + "grad_norm": 0.3630707561969757, + "learning_rate": 1.386e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0437, + "grad_norm": 1.6397857666015625, + "learning_rate": 1.3855e-05, + "num_tokens": 418088.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0585, + "grad_norm": 2.164947748184204, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0754, + "grad_norm": 1.7066527605056763, + "learning_rate": 1.3845000000000002e-05, + "num_tokens": 419112.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0021, + "grad_norm": 0.3518334627151489, + "learning_rate": 1.384e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 1.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.0505, + "grad_norm": 1.5215017795562744, + "learning_rate": 1.3835e-05, + "num_tokens": 419715.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0572, + "grad_norm": 1.9514737129211426, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0023, + "grad_norm": 0.4249929189682007, + "learning_rate": 1.3825000000000002e-05, + "num_tokens": 420318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0661, + "grad_norm": 1.7851744890213013, + "learning_rate": 1.382e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": 0.0621, + "grad_norm": 1.3740767240524292, + "learning_rate": 1.3815e-05, + "num_tokens": 421342.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0841, + "grad_norm": 2.665015459060669, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0028, + "grad_norm": 0.4941730797290802, + "learning_rate": 1.3805000000000003e-05, + "num_tokens": 421945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.044, + "grad_norm": 1.4924557209014893, + "learning_rate": 1.38e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.0511, + "grad_norm": 2.1234307289123535, + "learning_rate": 1.3795000000000001e-05, + "num_tokens": 422969.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0426, + "grad_norm": 1.1785792112350464, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.0773, + "grad_norm": 1.6448895931243896, + "learning_rate": 1.3785000000000001e-05, + "num_tokens": 423993.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0594, + "grad_norm": 1.792230486869812, + "learning_rate": 1.378e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0535, + "grad_norm": 1.3552350997924805, + "learning_rate": 1.3775000000000001e-05, + "num_tokens": 425017.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0388, + "grad_norm": 1.0532437562942505, + "learning_rate": 1.377e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0759, + "grad_norm": 2.1115078926086426, + "learning_rate": 1.3765000000000001e-05, + "num_tokens": 426041.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0056, + "grad_norm": 0.8818362355232239, + "learning_rate": 1.376e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 1.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0051, + "grad_norm": 0.8002524971961975, + "learning_rate": 1.3755000000000001e-05, + "num_tokens": 426223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0619, + "grad_norm": 2.207181692123413, + "learning_rate": 1.375e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0053, + "grad_norm": 0.814557671546936, + "learning_rate": 1.3745000000000001e-05, + "num_tokens": 426826.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0621, + "grad_norm": 1.6394788026809692, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.0678, + "grad_norm": 1.9382132291793823, + "learning_rate": 1.3735e-05, + "num_tokens": 427850.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0396, + "grad_norm": 1.3062744140625, + "learning_rate": 1.373e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.1056, + "grad_norm": 1.7765963077545166, + "learning_rate": 1.3725000000000002e-05, + "num_tokens": 428874.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0036, + "grad_norm": 0.5703164339065552, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.062, + "grad_norm": 1.6491400003433228, + "learning_rate": 1.3715e-05, + "num_tokens": 429477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0414, + "grad_norm": 1.2670550346374512, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0977, + "grad_norm": 2.5612552165985107, + "learning_rate": 1.3705000000000002e-05, + "num_tokens": 430501.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.043, + "grad_norm": 1.5120333433151245, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0513, + "grad_norm": 1.3469822406768799, + "learning_rate": 1.3695e-05, + "num_tokens": 431525.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.052, + "grad_norm": 1.3584448099136353, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0043, + "grad_norm": 0.6871080994606018, + "learning_rate": 1.3685000000000002e-05, + "num_tokens": 432128.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0038, + "grad_norm": 0.6316184401512146, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 1.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0039, + "grad_norm": 0.6172608733177185, + "learning_rate": 1.3675e-05, + "num_tokens": 432310.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0034, + "grad_norm": 0.5193918943405151, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": 0.0545, + "grad_norm": 1.789426326751709, + "learning_rate": 1.3665000000000002e-05, + "num_tokens": 432913.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.0681, + "grad_norm": 1.8359259366989136, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0856, + "grad_norm": 2.033186197280884, + "learning_rate": 1.3655e-05, + "num_tokens": 433937.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0019, + "grad_norm": 0.2717677354812622, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0902, + "grad_norm": 1.8082786798477173, + "learning_rate": 1.3645000000000002e-05, + "num_tokens": 434540.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0019, + "grad_norm": 0.27892598509788513, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0017, + "grad_norm": 0.21636277437210083, + "learning_rate": 1.3635e-05, + "num_tokens": 434722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0017, + "grad_norm": 0.21708306670188904, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0442, + "grad_norm": 1.8083100318908691, + "learning_rate": 1.3625e-05, + "num_tokens": 435325.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0015, + "grad_norm": 0.16797110438346863, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0013, + "grad_norm": 0.1489250212907791, + "learning_rate": 1.3615000000000001e-05, + "num_tokens": 435507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0013, + "grad_norm": 0.14432698488235474, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0845, + "grad_norm": 1.7793538570404053, + "learning_rate": 1.3605000000000001e-05, + "num_tokens": 436110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.072, + "grad_norm": 2.0468149185180664, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0012, + "grad_norm": 0.13057845830917358, + "learning_rate": 1.3595000000000001e-05, + "num_tokens": 436713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0012, + "grad_norm": 0.1187715157866478, + "learning_rate": 1.359e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0652, + "grad_norm": 1.7846852540969849, + "learning_rate": 1.3585000000000001e-05, + "num_tokens": 437316.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.001, + "grad_norm": 0.09880056232213974, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 1.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0543, + "grad_norm": 1.7948801517486572, + "learning_rate": 1.3575e-05, + "num_tokens": 437919.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0613, + "grad_norm": 1.7139854431152344, + "learning_rate": 1.357e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0932, + "grad_norm": 2.8757143020629883, + "learning_rate": 1.3565000000000001e-05, + "num_tokens": 438943.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0542, + "grad_norm": 1.7751576900482178, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0011, + "grad_norm": 0.10208199918270111, + "learning_rate": 1.3555e-05, + "num_tokens": 439546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0441, + "grad_norm": 1.3240106105804443, + "learning_rate": 1.355e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0013, + "grad_norm": 0.14222493767738342, + "learning_rate": 1.3545000000000002e-05, + "num_tokens": 440149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0013, + "grad_norm": 0.15622317790985107, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.049, + "grad_norm": 1.685028076171875, + "learning_rate": 1.3535e-05, + "num_tokens": 440752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0013, + "grad_norm": 0.15723161399364471, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0014, + "grad_norm": 0.1701563447713852, + "learning_rate": 1.3525000000000002e-05, + "num_tokens": 440934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0554, + "grad_norm": 1.94820237159729, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": 0.0868, + "grad_norm": 1.4613052606582642, + "learning_rate": 1.3515e-05, + "num_tokens": 441958.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0606, + "grad_norm": 1.5318107604980469, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0695, + "grad_norm": 1.676740050315857, + "learning_rate": 1.3505000000000002e-05, + "num_tokens": 442982.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0588, + "grad_norm": 1.5801854133605957, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.002, + "grad_norm": 0.27110394835472107, + "learning_rate": 1.3495e-05, + "num_tokens": 443585.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0493, + "grad_norm": 1.5821062326431274, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0409, + "grad_norm": 1.4319894313812256, + "learning_rate": 1.3485000000000002e-05, + "num_tokens": 444609.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0451, + "grad_norm": 1.562462329864502, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0621, + "grad_norm": 1.4181314706802368, + "learning_rate": 1.3475e-05, + "num_tokens": 445633.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0031, + "grad_norm": 0.48450395464897156, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0561, + "grad_norm": 1.5698680877685547, + "learning_rate": 1.3465e-05, + "num_tokens": 446236.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0035, + "grad_norm": 0.5244553685188293, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0036, + "grad_norm": 0.534037709236145, + "learning_rate": 1.3455e-05, + "num_tokens": 446418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0728, + "grad_norm": 2.4191722869873047, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0694, + "grad_norm": 2.0287888050079346, + "learning_rate": 1.3445000000000001e-05, + "num_tokens": 447442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.057, + "grad_norm": 1.7234476804733276, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0033, + "grad_norm": 0.48596495389938354, + "learning_rate": 1.3435000000000001e-05, + "num_tokens": 448045.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0572, + "grad_norm": 1.4727040529251099, + "learning_rate": 1.343e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0031, + "grad_norm": 0.4591142535209656, + "learning_rate": 1.3425000000000001e-05, + "num_tokens": 448648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0578, + "grad_norm": 1.542529582977295, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0544, + "grad_norm": 1.567787766456604, + "learning_rate": 1.3415e-05, + "num_tokens": 449672.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.048, + "grad_norm": 1.4822731018066406, + "learning_rate": 1.341e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0033, + "grad_norm": 0.47298771142959595, + "learning_rate": 1.3405000000000001e-05, + "num_tokens": 450275.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.0885, + "grad_norm": 2.084674119949341, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0571, + "grad_norm": 1.5821152925491333, + "learning_rate": 1.3395e-05, + "num_tokens": 451299.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.003, + "grad_norm": 0.44274547696113586, + "learning_rate": 1.339e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0461, + "grad_norm": 1.7462387084960938, + "learning_rate": 1.3385000000000001e-05, + "num_tokens": 451902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0557, + "grad_norm": 1.9857844114303589, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0419, + "grad_norm": 1.386896014213562, + "learning_rate": 1.3375e-05, + "num_tokens": 452926.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0457, + "grad_norm": 1.6964994668960571, + "learning_rate": 1.337e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.0029, + "grad_norm": 0.42876869440078735, + "learning_rate": 1.3365000000000002e-05, + "num_tokens": 453529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.1072, + "grad_norm": 2.350618839263916, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0495, + "grad_norm": 1.449182152748108, + "learning_rate": 1.3355e-05, + "num_tokens": 454553.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0608, + "grad_norm": 2.024829149246216, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0431, + "grad_norm": 1.3092213869094849, + "learning_rate": 1.3345000000000002e-05, + "num_tokens": 455577.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0035, + "grad_norm": 0.5321254134178162, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 1.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0033, + "grad_norm": 0.4984612762928009, + "learning_rate": 1.3335e-05, + "num_tokens": 455759.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.2288, + "grad_norm": 3.947110652923584, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0031, + "grad_norm": 0.4745834767818451, + "learning_rate": 1.3325000000000002e-05, + "num_tokens": 456362.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0032, + "grad_norm": 0.5151614546775818, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0542, + "grad_norm": 1.0336432456970215, + "learning_rate": 1.3315e-05, + "num_tokens": 456965.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0562, + "grad_norm": 1.5250927209854126, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0431, + "grad_norm": 1.4132592678070068, + "learning_rate": 1.3305e-05, + "num_tokens": 457989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.074, + "grad_norm": 1.864004373550415, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.0023, + "grad_norm": 0.32277822494506836, + "learning_rate": 1.3295e-05, + "num_tokens": 458592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": 0.0656, + "grad_norm": 1.8421293497085571, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0784, + "grad_norm": 1.431746482849121, + "learning_rate": 1.3285e-05, + "num_tokens": 459616.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0859, + "grad_norm": 2.2143869400024414, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0638, + "grad_norm": 2.397982597351074, + "learning_rate": 1.3275e-05, + "num_tokens": 460640.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.072, + "grad_norm": 1.9987224340438843, + "learning_rate": 1.327e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0026, + "grad_norm": 0.3712107837200165, + "learning_rate": 1.3265000000000001e-05, + "num_tokens": 461243.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0027, + "grad_norm": 0.3893998861312866, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 1.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0024, + "grad_norm": 0.3540315330028534, + "learning_rate": 1.3255e-05, + "num_tokens": 461425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0022, + "grad_norm": 0.3253246545791626, + "learning_rate": 1.325e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 1.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0585, + "grad_norm": 1.6001460552215576, + "learning_rate": 1.3245000000000001e-05, + "num_tokens": 462028.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.0472, + "grad_norm": 1.4387136697769165, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.002, + "grad_norm": 0.2645460069179535, + "learning_rate": 1.3235e-05, + "num_tokens": 462631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0486, + "grad_norm": 1.7650330066680908, + "learning_rate": 1.323e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0018, + "grad_norm": 0.23414187133312225, + "learning_rate": 1.3225000000000001e-05, + "num_tokens": 463234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0019, + "grad_norm": 0.2595520317554474, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0598, + "grad_norm": 1.4952349662780762, + "learning_rate": 1.3215e-05, + "num_tokens": 463837.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.0777, + "grad_norm": 1.956957221031189, + "learning_rate": 1.321e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0426, + "grad_norm": 1.263728141784668, + "learning_rate": 1.3205000000000001e-05, + "num_tokens": 464861.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0018, + "grad_norm": 0.2717933654785156, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 1.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0017, + "grad_norm": 0.24730290472507477, + "learning_rate": 1.3195e-05, + "num_tokens": 465043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0017, + "grad_norm": 0.25752246379852295, + "learning_rate": 1.319e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0594, + "grad_norm": 1.2743943929672241, + "learning_rate": 1.3185000000000002e-05, + "num_tokens": 465646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0468, + "grad_norm": 1.4228495359420776, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0015, + "grad_norm": 0.2151045948266983, + "learning_rate": 1.3175e-05, + "num_tokens": 466249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0707, + "grad_norm": 1.637633204460144, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0557, + "grad_norm": 1.91914963722229, + "learning_rate": 1.3165000000000002e-05, + "num_tokens": 467273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0017, + "grad_norm": 0.22663576900959015, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0447, + "grad_norm": 1.3842930793762207, + "learning_rate": 1.3155e-05, + "num_tokens": 467876.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0402, + "grad_norm": 1.3382936716079712, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.0722, + "grad_norm": 1.7016624212265015, + "learning_rate": 1.3145e-05, + "num_tokens": 468900.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0603, + "grad_norm": 1.7416592836380005, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0545, + "grad_norm": 2.0610973834991455, + "learning_rate": 1.3135e-05, + "num_tokens": 469924.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0027, + "grad_norm": 0.42048102617263794, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0647, + "grad_norm": 1.5505709648132324, + "learning_rate": 1.3125e-05, + "num_tokens": 470527.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0899, + "grad_norm": 1.7793169021606445, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0032, + "grad_norm": 0.5216090083122253, + "learning_rate": 1.3115000000000002e-05, + "num_tokens": 471130.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0886, + "grad_norm": 1.749000906944275, + "learning_rate": 1.311e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0917, + "grad_norm": 2.4577291011810303, + "learning_rate": 1.3105e-05, + "num_tokens": 472154.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0032, + "grad_norm": 0.5224512815475464, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0637, + "grad_norm": 1.690381646156311, + "learning_rate": 1.3095000000000003e-05, + "num_tokens": 472757.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0718, + "grad_norm": 2.1140615940093994, + "learning_rate": 1.309e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0041, + "grad_norm": 0.6610037684440613, + "learning_rate": 1.3085000000000001e-05, + "num_tokens": 473360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.1995, + "grad_norm": 5.919976711273193, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0035, + "grad_norm": 0.5762227177619934, + "learning_rate": 1.3075000000000003e-05, + "num_tokens": 473963.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0035, + "grad_norm": 0.558562695980072, + "learning_rate": 1.307e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 1.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0029, + "grad_norm": 0.4903852343559265, + "learning_rate": 1.3065000000000001e-05, + "num_tokens": 474145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0024, + "grad_norm": 0.40001630783081055, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 1.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.002, + "grad_norm": 0.3093484044075012, + "learning_rate": 1.3055000000000003e-05, + "num_tokens": 474327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0813, + "grad_norm": 1.846347451210022, + "learning_rate": 1.305e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0466, + "grad_norm": 1.9397575855255127, + "learning_rate": 1.3045000000000001e-05, + "num_tokens": 475351.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.0012, + "grad_norm": 0.1433739811182022, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 1.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0647, + "grad_norm": 1.7246447801589966, + "learning_rate": 1.3035000000000001e-05, + "num_tokens": 475954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0774, + "grad_norm": 1.6557238101959229, + "learning_rate": 1.303e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0732, + "grad_norm": 1.2370885610580444, + "learning_rate": 1.3025000000000002e-05, + "num_tokens": 476978.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0011, + "grad_norm": 0.11068759858608246, + "learning_rate": 1.302e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 1.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0921, + "grad_norm": 2.1499900817871094, + "learning_rate": 1.3015000000000002e-05, + "num_tokens": 477581.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0012, + "grad_norm": 0.12917853891849518, + "learning_rate": 1.301e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0644, + "grad_norm": 1.2409875392913818, + "learning_rate": 1.3005000000000002e-05, + "num_tokens": 478184.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0594, + "grad_norm": 1.3983649015426636, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0013, + "grad_norm": 0.17072346806526184, + "learning_rate": 1.2995000000000002e-05, + "num_tokens": 478787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0623, + "grad_norm": 1.6930880546569824, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0395, + "grad_norm": 1.0536465644836426, + "learning_rate": 1.2985e-05, + "num_tokens": 479811.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0593, + "grad_norm": 1.2563151121139526, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.0455, + "grad_norm": 1.3295787572860718, + "learning_rate": 1.2975000000000002e-05, + "num_tokens": 480835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.074, + "grad_norm": 1.3767396211624146, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0456, + "grad_norm": 1.3392114639282227, + "learning_rate": 1.2965e-05, + "num_tokens": 481859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.091, + "grad_norm": 2.6617116928100586, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0783, + "grad_norm": 2.208951473236084, + "learning_rate": 1.2955000000000002e-05, + "num_tokens": 482883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0026, + "grad_norm": 0.425293892621994, + "learning_rate": 1.295e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 1.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0751, + "grad_norm": 1.7252588272094727, + "learning_rate": 1.2945e-05, + "num_tokens": 483486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0032, + "grad_norm": 0.5211181640625, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 1.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": 0.0931, + "grad_norm": 2.448201894760132, + "learning_rate": 1.2935000000000002e-05, + "num_tokens": 484089.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.059, + "grad_norm": 1.2256298065185547, + "learning_rate": 1.293e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0037, + "grad_norm": 0.5853725671768188, + "learning_rate": 1.2925e-05, + "num_tokens": 484692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0667, + "grad_norm": 1.6646796464920044, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0034, + "grad_norm": 0.5198765993118286, + "learning_rate": 1.2915000000000003e-05, + "num_tokens": 485295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.06, + "grad_norm": 1.8327956199645996, + "learning_rate": 1.291e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0578, + "grad_norm": 1.4550710916519165, + "learning_rate": 1.2905000000000001e-05, + "num_tokens": 486319.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0035, + "grad_norm": 0.5253085494041443, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 1.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": 0.0664, + "grad_norm": 2.0553388595581055, + "learning_rate": 1.2895000000000003e-05, + "num_tokens": 486922.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0507, + "grad_norm": 1.2666943073272705, + "learning_rate": 1.289e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0419, + "grad_norm": 1.1951980590820312, + "learning_rate": 1.2885000000000001e-05, + "num_tokens": 487946.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0521, + "grad_norm": 1.5074187517166138, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0039, + "grad_norm": 0.5865699648857117, + "learning_rate": 1.2875000000000001e-05, + "num_tokens": 488549.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0032, + "grad_norm": 0.4775572121143341, + "learning_rate": 1.287e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 1.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0861, + "grad_norm": 1.977977991104126, + "learning_rate": 1.2865000000000001e-05, + "num_tokens": 489152.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0415, + "grad_norm": 1.351745843887329, + "learning_rate": 1.286e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0027, + "grad_norm": 0.3994472920894623, + "learning_rate": 1.2855000000000001e-05, + "num_tokens": 489755.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0027, + "grad_norm": 0.40307220816612244, + "learning_rate": 1.285e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 1.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0023, + "grad_norm": 0.3672088086605072, + "learning_rate": 1.2845000000000002e-05, + "num_tokens": 489937.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0024, + "grad_norm": 0.3693186938762665, + "learning_rate": 1.284e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0022, + "grad_norm": 0.3379809856414795, + "learning_rate": 1.2835000000000002e-05, + "num_tokens": 490119.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0692, + "grad_norm": 1.80624520778656, + "learning_rate": 1.283e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0015, + "grad_norm": 0.19782321155071259, + "learning_rate": 1.2825e-05, + "num_tokens": 490722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.0765, + "grad_norm": 2.1652674674987793, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0524, + "grad_norm": 1.3651760816574097, + "learning_rate": 1.2815000000000002e-05, + "num_tokens": 491746.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0013, + "grad_norm": 0.15779025852680206, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 1.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0405, + "grad_norm": 1.4021095037460327, + "learning_rate": 1.2805e-05, + "num_tokens": 492349.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0012, + "grad_norm": 0.14934077858924866, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0466, + "grad_norm": 1.3255256414413452, + "learning_rate": 1.2795000000000002e-05, + "num_tokens": 492952.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0011, + "grad_norm": 0.13669109344482422, + "learning_rate": 1.279e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0706, + "grad_norm": 2.915336847305298, + "learning_rate": 1.2785e-05, + "num_tokens": 493555.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0012, + "grad_norm": 0.14015723764896393, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 1.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0731, + "grad_norm": 1.5240583419799805, + "learning_rate": 1.2775000000000002e-05, + "num_tokens": 494158.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0011, + "grad_norm": 0.11803555488586426, + "learning_rate": 1.277e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0011, + "grad_norm": 0.13458400964736938, + "learning_rate": 1.2765e-05, + "num_tokens": 494340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0012, + "grad_norm": 0.14607498049736023, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0011, + "grad_norm": 0.12011824548244476, + "learning_rate": 1.2755000000000002e-05, + "num_tokens": 494522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0011, + "grad_norm": 0.13116565346717834, + "learning_rate": 1.275e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0011, + "grad_norm": 0.11727877706289291, + "learning_rate": 1.2745e-05, + "num_tokens": 494704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.0501, + "grad_norm": 1.6986955404281616, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0472, + "grad_norm": 1.4376126527786255, + "learning_rate": 1.2735000000000003e-05, + "num_tokens": 495728.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.001, + "grad_norm": 0.11870448291301727, + "learning_rate": 1.273e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0011, + "grad_norm": 0.11969612538814545, + "learning_rate": 1.2725000000000001e-05, + "num_tokens": 495910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0493, + "grad_norm": 1.3840702772140503, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.001, + "grad_norm": 0.10890035331249237, + "learning_rate": 1.2715000000000001e-05, + "num_tokens": 496513.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0011, + "grad_norm": 0.12227390706539154, + "learning_rate": 1.271e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0752, + "grad_norm": 2.110506057739258, + "learning_rate": 1.2705000000000001e-05, + "num_tokens": 497116.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0011, + "grad_norm": 0.1325536072254181, + "learning_rate": 1.27e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 1.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0386, + "grad_norm": 1.118979811668396, + "learning_rate": 1.2695000000000001e-05, + "num_tokens": 497719.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.066, + "grad_norm": 1.572615623474121, + "learning_rate": 1.269e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0679, + "grad_norm": 1.6447997093200684, + "learning_rate": 1.2685000000000001e-05, + "num_tokens": 498743.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0012, + "grad_norm": 0.1418675184249878, + "learning_rate": 1.268e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0471, + "grad_norm": 1.3554447889328003, + "learning_rate": 1.2675000000000001e-05, + "num_tokens": 499346.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0012, + "grad_norm": 0.1589028388261795, + "learning_rate": 1.267e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0943, + "grad_norm": 2.5991010665893555, + "learning_rate": 1.2665e-05, + "num_tokens": 499949.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0495, + "grad_norm": 1.6441336870193481, + "learning_rate": 1.266e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0765, + "grad_norm": 1.842661738395691, + "learning_rate": 1.2655000000000002e-05, + "num_tokens": 500973.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0016, + "grad_norm": 0.22247855365276337, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0401, + "grad_norm": 1.3632177114486694, + "learning_rate": 1.2645e-05, + "num_tokens": 501576.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0021, + "grad_norm": 0.31719765067100525, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0376, + "grad_norm": 1.1765908002853394, + "learning_rate": 1.2635000000000002e-05, + "num_tokens": 502179.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0024, + "grad_norm": 0.33981993794441223, + "learning_rate": 1.263e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 1.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0712, + "grad_norm": 1.7833467721939087, + "learning_rate": 1.2625e-05, + "num_tokens": 502782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0407, + "grad_norm": 1.2483290433883667, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0597, + "grad_norm": 1.2847890853881836, + "learning_rate": 1.2615000000000002e-05, + "num_tokens": 503806.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0706, + "grad_norm": 2.0048041343688965, + "learning_rate": 1.261e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0033, + "grad_norm": 0.48029038310050964, + "learning_rate": 1.2605e-05, + "num_tokens": 504409.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0853, + "grad_norm": 1.8489866256713867, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0664, + "grad_norm": 1.9049607515335083, + "learning_rate": 1.2595000000000002e-05, + "num_tokens": 505433.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0038, + "grad_norm": 0.5629300475120544, + "learning_rate": 1.259e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0035, + "grad_norm": 0.5016162395477295, + "learning_rate": 1.2585e-05, + "num_tokens": 505615.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.0034, + "grad_norm": 0.533896803855896, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 1.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0398, + "grad_norm": 1.6724116802215576, + "learning_rate": 1.2575000000000002e-05, + "num_tokens": 506218.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0649, + "grad_norm": 1.1757819652557373, + "learning_rate": 1.257e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0028, + "grad_norm": 0.3974631726741791, + "learning_rate": 1.2565e-05, + "num_tokens": 506821.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0022, + "grad_norm": 0.33079567551612854, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 1.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0612, + "grad_norm": 1.6804654598236084, + "learning_rate": 1.2555000000000001e-05, + "num_tokens": 507424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0814, + "grad_norm": 1.6637822389602661, + "learning_rate": 1.255e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0463, + "grad_norm": 1.2395890951156616, + "learning_rate": 1.2545000000000001e-05, + "num_tokens": 508448.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0022, + "grad_norm": 0.3290168046951294, + "learning_rate": 1.254e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0474, + "grad_norm": 1.62813138961792, + "learning_rate": 1.2535000000000001e-05, + "num_tokens": 509051.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0432, + "grad_norm": 1.1684247255325317, + "learning_rate": 1.253e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.191, + "grad_norm": 4.108924865722656, + "learning_rate": 1.2525000000000001e-05, + "num_tokens": 510075.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0022, + "grad_norm": 0.32842448353767395, + "learning_rate": 1.252e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0692, + "grad_norm": 1.0593329668045044, + "learning_rate": 1.2515000000000001e-05, + "num_tokens": 510678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.046, + "grad_norm": 1.279249906539917, + "learning_rate": 1.251e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0021, + "grad_norm": 0.32091253995895386, + "learning_rate": 1.2505e-05, + "num_tokens": 511281.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0404, + "grad_norm": 1.2973002195358276, + "learning_rate": 1.25e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0023, + "grad_norm": 0.34064143896102905, + "learning_rate": 1.2495000000000001e-05, + "num_tokens": 511884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0793, + "grad_norm": 1.864046573638916, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0023, + "grad_norm": 0.3757898211479187, + "learning_rate": 1.2485e-05, + "num_tokens": 512487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0024, + "grad_norm": 0.381061315536499, + "learning_rate": 1.248e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 1.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0676, + "grad_norm": 1.62307608127594, + "learning_rate": 1.2475000000000002e-05, + "num_tokens": 513090.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.047, + "grad_norm": 1.570786476135254, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0566, + "grad_norm": 1.7626087665557861, + "learning_rate": 1.2465e-05, + "num_tokens": 514114.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0581, + "grad_norm": 1.7678264379501343, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0417, + "grad_norm": 1.4467406272888184, + "learning_rate": 1.2455000000000002e-05, + "num_tokens": 515138.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0712, + "grad_norm": 1.5711795091629028, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.0026, + "grad_norm": 0.41801631450653076, + "learning_rate": 1.2445e-05, + "num_tokens": 515741.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0499, + "grad_norm": 1.5882858037948608, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0504, + "grad_norm": 1.1772035360336304, + "learning_rate": 1.2435000000000002e-05, + "num_tokens": 516765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0618, + "grad_norm": 1.7687872648239136, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0664, + "grad_norm": 1.677937626838684, + "learning_rate": 1.2425e-05, + "num_tokens": 517789.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.004, + "grad_norm": 0.654071569442749, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 1.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0409, + "grad_norm": 1.5208879709243774, + "learning_rate": 1.2415000000000002e-05, + "num_tokens": 518392.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0035, + "grad_norm": 0.5567553639411926, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 1.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0798, + "grad_norm": 2.2302029132843018, + "learning_rate": 1.2405e-05, + "num_tokens": 518995.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0669, + "grad_norm": 2.0240256786346436, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0839, + "grad_norm": 1.8468784093856812, + "learning_rate": 1.2395e-05, + "num_tokens": 520019.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0584, + "grad_norm": 2.1111018657684326, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0043, + "grad_norm": 0.755431592464447, + "learning_rate": 1.2385000000000001e-05, + "num_tokens": 520622.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0432, + "grad_norm": 1.864660620689392, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0693, + "grad_norm": 3.3374569416046143, + "learning_rate": 1.2375000000000001e-05, + "num_tokens": 521646.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0388, + "grad_norm": 1.5575084686279297, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.0645, + "grad_norm": 1.5467334985733032, + "learning_rate": 1.2365000000000001e-05, + "num_tokens": 522670.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0037, + "grad_norm": 0.5897421836853027, + "learning_rate": 1.236e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 1.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0819, + "grad_norm": 3.0543386936187744, + "learning_rate": 1.2355000000000001e-05, + "num_tokens": 523273.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.004, + "grad_norm": 0.647894024848938, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0032, + "grad_norm": 0.5120076537132263, + "learning_rate": 1.2345e-05, + "num_tokens": 523455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0032, + "grad_norm": 0.50294429063797, + "learning_rate": 1.234e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0654, + "grad_norm": 1.3424628973007202, + "learning_rate": 1.2335000000000001e-05, + "num_tokens": 524058.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0898, + "grad_norm": 2.0473086833953857, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0023, + "grad_norm": 0.36929139494895935, + "learning_rate": 1.2325e-05, + "num_tokens": 524661.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0021, + "grad_norm": 0.3227180540561676, + "learning_rate": 1.232e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0663, + "grad_norm": 1.83015775680542, + "learning_rate": 1.2315000000000002e-05, + "num_tokens": 525264.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0657, + "grad_norm": 1.8247884511947632, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0016, + "grad_norm": 0.21814872324466705, + "learning_rate": 1.2305e-05, + "num_tokens": 525867.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.07, + "grad_norm": 1.3606796264648438, + "learning_rate": 1.23e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.0521, + "grad_norm": 1.5558913946151733, + "learning_rate": 1.2295000000000002e-05, + "num_tokens": 526891.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": 0.0768, + "grad_norm": 1.718390703201294, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.2012, + "grad_norm": 3.623452663421631, + "learning_rate": 1.2285e-05, + "num_tokens": 527915.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0922, + "grad_norm": 2.289684534072876, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.0665, + "grad_norm": 1.6864427328109741, + "learning_rate": 1.2275000000000002e-05, + "num_tokens": 528939.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0017, + "grad_norm": 0.2226596623659134, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.195, + "grad_norm": 3.805149555206299, + "learning_rate": 1.2265e-05, + "num_tokens": 529542.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0651, + "grad_norm": 1.3887238502502441, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0016, + "grad_norm": 0.20220878720283508, + "learning_rate": 1.2255000000000002e-05, + "num_tokens": 530145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0456, + "grad_norm": 1.4763877391815186, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0017, + "grad_norm": 0.2297908216714859, + "learning_rate": 1.2245e-05, + "num_tokens": 530748.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0479, + "grad_norm": 1.846569538116455, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0018, + "grad_norm": 0.2527587115764618, + "learning_rate": 1.2235e-05, + "num_tokens": 531351.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0828, + "grad_norm": 1.8091585636138916, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.002, + "grad_norm": 0.29240918159484863, + "learning_rate": 1.2225e-05, + "num_tokens": 531954.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.0568, + "grad_norm": 1.4905025959014893, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0022, + "grad_norm": 0.29934078454971313, + "learning_rate": 1.2215e-05, + "num_tokens": 532557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.0655, + "grad_norm": 1.620811939239502, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0699, + "grad_norm": 1.4509178400039673, + "learning_rate": 1.2205000000000001e-05, + "num_tokens": 533581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0537, + "grad_norm": 1.6190178394317627, + "learning_rate": 1.22e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.083, + "grad_norm": 2.0025248527526855, + "learning_rate": 1.2195000000000001e-05, + "num_tokens": 534605.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3503265976905823, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": 0.0699, + "grad_norm": 1.2692803144454956, + "learning_rate": 1.2185e-05, + "num_tokens": 535208.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0024, + "grad_norm": 0.3514065146446228, + "learning_rate": 1.218e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 1.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0025, + "grad_norm": 0.3770548701286316, + "learning_rate": 1.2175000000000001e-05, + "num_tokens": 535390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0024, + "grad_norm": 0.3553021550178528, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 1.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0657, + "grad_norm": 1.3145198822021484, + "learning_rate": 1.2165e-05, + "num_tokens": 535993.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0453, + "grad_norm": 1.1688368320465088, + "learning_rate": 1.216e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.1801, + "grad_norm": 3.7217485904693604, + "learning_rate": 1.2155000000000001e-05, + "num_tokens": 537017.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0029, + "grad_norm": 0.4446180462837219, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 1.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0863, + "grad_norm": 2.0155787467956543, + "learning_rate": 1.2145e-05, + "num_tokens": 537620.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0022, + "grad_norm": 0.3482968807220459, + "learning_rate": 1.214e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0023, + "grad_norm": 0.32771721482276917, + "learning_rate": 1.2135000000000002e-05, + "num_tokens": 537802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.056, + "grad_norm": 1.8173542022705078, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.039, + "grad_norm": 1.1963605880737305, + "learning_rate": 1.2125e-05, + "num_tokens": 538826.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.0594, + "grad_norm": 1.7138198614120483, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.002, + "grad_norm": 0.2943565249443054, + "learning_rate": 1.2115000000000002e-05, + "num_tokens": 539429.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.002, + "grad_norm": 0.2892753481864929, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0019, + "grad_norm": 0.2714136838912964, + "learning_rate": 1.2105e-05, + "num_tokens": 539611.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0504, + "grad_norm": 1.0601574182510376, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0019, + "grad_norm": 0.2627917230129242, + "learning_rate": 1.2095000000000002e-05, + "num_tokens": 540214.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0566, + "grad_norm": 1.1405881643295288, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0018, + "grad_norm": 0.2452574223279953, + "learning_rate": 1.2085e-05, + "num_tokens": 540817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.0018, + "grad_norm": 0.24650417268276215, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0014, + "grad_norm": 0.19634543359279633, + "learning_rate": 1.2075e-05, + "num_tokens": 540999.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.0014, + "grad_norm": 0.17830893397331238, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.045, + "grad_norm": 1.1427490711212158, + "learning_rate": 1.2065e-05, + "num_tokens": 541602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.043, + "grad_norm": 1.0804896354675293, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0729, + "grad_norm": 1.6100242137908936, + "learning_rate": 1.2055e-05, + "num_tokens": 542626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.0585, + "grad_norm": 1.2319777011871338, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0014, + "grad_norm": 0.18333016335964203, + "learning_rate": 1.2045e-05, + "num_tokens": 543229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0014, + "grad_norm": 0.17933838069438934, + "learning_rate": 1.204e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.0606, + "grad_norm": 1.531948208808899, + "learning_rate": 1.2035e-05, + "num_tokens": 543832.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.0798, + "grad_norm": 1.4439104795455933, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.0798, + "grad_norm": 1.6658635139465332, + "learning_rate": 1.2025e-05, + "num_tokens": 544856.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.0666, + "grad_norm": 1.2919996976852417, + "learning_rate": 1.202e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.0526, + "grad_norm": 1.7219940423965454, + "learning_rate": 1.2015000000000001e-05, + "num_tokens": 545880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0453, + "grad_norm": 1.3877556324005127, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0675, + "grad_norm": 1.6357606649398804, + "learning_rate": 1.2005e-05, + "num_tokens": 546904.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0023, + "grad_norm": 0.3360651433467865, + "learning_rate": 1.2e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0025, + "grad_norm": 0.36647501587867737, + "learning_rate": 1.1995000000000001e-05, + "num_tokens": 547086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.1876, + "grad_norm": 3.880563974380493, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0026, + "grad_norm": 0.3927272856235504, + "learning_rate": 1.1985e-05, + "num_tokens": 547689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0616, + "grad_norm": 1.807646632194519, + "learning_rate": 1.198e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.0939, + "grad_norm": 3.455456018447876, + "learning_rate": 1.1975000000000001e-05, + "num_tokens": 548713.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0576, + "grad_norm": 1.2851530313491821, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0613, + "grad_norm": 1.2460367679595947, + "learning_rate": 1.1965e-05, + "num_tokens": 549737.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0498, + "grad_norm": 1.8220652341842651, + "learning_rate": 1.196e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0029, + "grad_norm": 0.43996259570121765, + "learning_rate": 1.1955000000000002e-05, + "num_tokens": 550340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.164, + "grad_norm": 3.639434814453125, + "learning_rate": 1.195e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0033, + "grad_norm": 0.49846982955932617, + "learning_rate": 1.1945e-05, + "num_tokens": 550943.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.0034, + "grad_norm": 0.5146701335906982, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0552, + "grad_norm": 0.9798343777656555, + "learning_rate": 1.1935000000000002e-05, + "num_tokens": 551546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0033, + "grad_norm": 0.49275118112564087, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 1.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.0699, + "grad_norm": 1.1279994249343872, + "learning_rate": 1.1925e-05, + "num_tokens": 552149.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0029, + "grad_norm": 0.4336951673030853, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0682, + "grad_norm": 1.8408714532852173, + "learning_rate": 1.1915e-05, + "num_tokens": 552752.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0025, + "grad_norm": 0.3696609139442444, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0468, + "grad_norm": 1.6169545650482178, + "learning_rate": 1.1905e-05, + "num_tokens": 553355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0669, + "grad_norm": 1.641153335571289, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0019, + "grad_norm": 0.2700659930706024, + "learning_rate": 1.1895e-05, + "num_tokens": 553958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0021, + "grad_norm": 0.30612003803253174, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0755, + "grad_norm": 1.821285367012024, + "learning_rate": 1.1885e-05, + "num_tokens": 554561.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0628, + "grad_norm": 1.6025607585906982, + "learning_rate": 1.188e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0018, + "grad_norm": 0.24747499823570251, + "learning_rate": 1.1875e-05, + "num_tokens": 555164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0017, + "grad_norm": 0.2355332225561142, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 1.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0016, + "grad_norm": 0.22167058289051056, + "learning_rate": 1.1865000000000002e-05, + "num_tokens": 555346.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0014, + "grad_norm": 0.1909945011138916, + "learning_rate": 1.186e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0014, + "grad_norm": 0.17070873081684113, + "learning_rate": 1.1855e-05, + "num_tokens": 555528.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0714, + "grad_norm": 1.4018418788909912, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0859, + "grad_norm": 2.558520793914795, + "learning_rate": 1.1845000000000003e-05, + "num_tokens": 556552.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0012, + "grad_norm": 0.14977574348449707, + "learning_rate": 1.184e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0011, + "grad_norm": 0.12937067449092865, + "learning_rate": 1.1835000000000001e-05, + "num_tokens": 556734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0604, + "grad_norm": 1.5028055906295776, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0012, + "grad_norm": 0.13798221945762634, + "learning_rate": 1.1825000000000003e-05, + "num_tokens": 557337.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0508, + "grad_norm": 1.1325984001159668, + "learning_rate": 1.182e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0608, + "grad_norm": 1.3021001815795898, + "learning_rate": 1.1815000000000001e-05, + "num_tokens": 558361.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0563, + "grad_norm": 1.5208338499069214, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.0669, + "grad_norm": 1.6899033784866333, + "learning_rate": 1.1805000000000001e-05, + "num_tokens": 559385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0582, + "grad_norm": 1.563767910003662, + "learning_rate": 1.18e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.0674, + "grad_norm": 1.4604460000991821, + "learning_rate": 1.1795000000000001e-05, + "num_tokens": 560409.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.051, + "grad_norm": 1.4536890983581543, + "learning_rate": 1.179e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0681, + "grad_norm": 1.4582575559616089, + "learning_rate": 1.1785000000000002e-05, + "num_tokens": 561433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0625, + "grad_norm": 1.5202876329421997, + "learning_rate": 1.178e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0018, + "grad_norm": 0.25325441360473633, + "learning_rate": 1.1775000000000002e-05, + "num_tokens": 562036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0533, + "grad_norm": 1.4468379020690918, + "learning_rate": 1.177e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0023, + "grad_norm": 0.32276058197021484, + "learning_rate": 1.1765000000000002e-05, + "num_tokens": 562639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0025, + "grad_norm": 0.36645182967185974, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.067, + "grad_norm": 2.532277822494507, + "learning_rate": 1.1755e-05, + "num_tokens": 563242.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0025, + "grad_norm": 0.3641115427017212, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 1.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0621, + "grad_norm": 1.6259859800338745, + "learning_rate": 1.1745000000000002e-05, + "num_tokens": 563845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.0431, + "grad_norm": 1.5126338005065918, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0416, + "grad_norm": 1.3851490020751953, + "learning_rate": 1.1735e-05, + "num_tokens": 564869.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0621, + "grad_norm": 1.7890119552612305, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.0661, + "grad_norm": 1.2367877960205078, + "learning_rate": 1.1725000000000002e-05, + "num_tokens": 565893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0032, + "grad_norm": 0.49922677874565125, + "learning_rate": 1.172e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 1.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.0033, + "grad_norm": 0.49921202659606934, + "learning_rate": 1.1715e-05, + "num_tokens": 566075.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.0035, + "grad_norm": 0.5215579867362976, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0031, + "grad_norm": 0.43590739369392395, + "learning_rate": 1.1705000000000002e-05, + "num_tokens": 566257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0397, + "grad_norm": 1.2309280633926392, + "learning_rate": 1.17e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.04, + "grad_norm": 1.2009049654006958, + "learning_rate": 1.1695e-05, + "num_tokens": 567281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0608, + "grad_norm": 1.7890830039978027, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0022, + "grad_norm": 0.33328190445899963, + "learning_rate": 1.1685000000000002e-05, + "num_tokens": 567884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0725, + "grad_norm": 1.7722251415252686, + "learning_rate": 1.168e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.002, + "grad_norm": 0.2905958592891693, + "learning_rate": 1.1675000000000001e-05, + "num_tokens": 568487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0778, + "grad_norm": 1.8844209909439087, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0558, + "grad_norm": 1.4232587814331055, + "learning_rate": 1.1665000000000003e-05, + "num_tokens": 569511.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0588, + "grad_norm": 1.4562510251998901, + "learning_rate": 1.166e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.0019, + "grad_norm": 0.2660907804965973, + "learning_rate": 1.1655000000000001e-05, + "num_tokens": 570114.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.079, + "grad_norm": 1.9491440057754517, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.055, + "grad_norm": 1.847509741783142, + "learning_rate": 1.1645000000000001e-05, + "num_tokens": 571138.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0517, + "grad_norm": 1.504838466644287, + "learning_rate": 1.164e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": 0.0416, + "grad_norm": 1.0979009866714478, + "learning_rate": 1.1635000000000001e-05, + "num_tokens": 572162.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0448, + "grad_norm": 1.3496202230453491, + "learning_rate": 1.163e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0026, + "grad_norm": 0.382183700799942, + "learning_rate": 1.1625000000000001e-05, + "num_tokens": 572765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0026, + "grad_norm": 0.37047019600868225, + "learning_rate": 1.162e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 1.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0702, + "grad_norm": 1.7991583347320557, + "learning_rate": 1.1615000000000001e-05, + "num_tokens": 573368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0442, + "grad_norm": 1.4013893604278564, + "learning_rate": 1.161e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.0409, + "grad_norm": 1.3295344114303589, + "learning_rate": 1.1605000000000002e-05, + "num_tokens": 574392.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.0388, + "grad_norm": 1.3626537322998047, + "learning_rate": 1.16e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0031, + "grad_norm": 0.4437231123447418, + "learning_rate": 1.1595e-05, + "num_tokens": 574995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0036, + "grad_norm": 0.5210691094398499, + "learning_rate": 1.159e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.065, + "grad_norm": 2.1340172290802, + "learning_rate": 1.1585000000000002e-05, + "num_tokens": 575598.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0647, + "grad_norm": 1.9830479621887207, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0684, + "grad_norm": 2.2673563957214355, + "learning_rate": 1.1575e-05, + "num_tokens": 576622.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0031, + "grad_norm": 0.44506582617759705, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.038, + "grad_norm": 1.131693959236145, + "learning_rate": 1.1565000000000002e-05, + "num_tokens": 577225.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.0369, + "grad_norm": 1.1869642734527588, + "learning_rate": 1.156e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0031, + "grad_norm": 0.4332590401172638, + "learning_rate": 1.1555e-05, + "num_tokens": 577828.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0026, + "grad_norm": 0.359754741191864, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0028, + "grad_norm": 0.3960857689380646, + "learning_rate": 1.1545000000000002e-05, + "num_tokens": 578010.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0569, + "grad_norm": 1.7389343976974487, + "learning_rate": 1.154e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0714, + "grad_norm": 1.75542414188385, + "learning_rate": 1.1535e-05, + "num_tokens": 579034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0026, + "grad_norm": 0.3733665943145752, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 1.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151729702949524, + "learning_rate": 1.1525000000000002e-05, + "num_tokens": 579216.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0725, + "grad_norm": 2.008699417114258, + "learning_rate": 1.152e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0679, + "grad_norm": 2.3607006072998047, + "learning_rate": 1.1515e-05, + "num_tokens": 580240.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.043, + "grad_norm": 1.3802534341812134, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0018, + "grad_norm": 0.24884727597236633, + "learning_rate": 1.1505000000000003e-05, + "num_tokens": 580843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0517, + "grad_norm": 1.4253575801849365, + "learning_rate": 1.15e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0479, + "grad_norm": 1.2443790435791016, + "learning_rate": 1.1495000000000001e-05, + "num_tokens": 581867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0017, + "grad_norm": 0.22854706645011902, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0742, + "grad_norm": 1.5941340923309326, + "learning_rate": 1.1485000000000001e-05, + "num_tokens": 582470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.002, + "grad_norm": 0.27522599697113037, + "learning_rate": 1.148e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0019, + "grad_norm": 0.2548190653324127, + "learning_rate": 1.1475000000000001e-05, + "num_tokens": 582652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0586, + "grad_norm": 0.9956546425819397, + "learning_rate": 1.147e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0399, + "grad_norm": 1.2318187952041626, + "learning_rate": 1.1465000000000001e-05, + "num_tokens": 583676.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.057, + "grad_norm": 1.2258297204971313, + "learning_rate": 1.146e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0593, + "grad_norm": 1.4450581073760986, + "learning_rate": 1.1455000000000001e-05, + "num_tokens": 584700.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0589, + "grad_norm": 2.703789472579956, + "learning_rate": 1.145e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0022, + "grad_norm": 0.2988422214984894, + "learning_rate": 1.1445000000000001e-05, + "num_tokens": 585303.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.002, + "grad_norm": 0.2543957829475403, + "learning_rate": 1.144e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0634, + "grad_norm": 1.5069470405578613, + "learning_rate": 1.1435e-05, + "num_tokens": 585906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0777, + "grad_norm": 1.8321071863174438, + "learning_rate": 1.143e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0705, + "grad_norm": 1.7684837579727173, + "learning_rate": 1.1425000000000002e-05, + "num_tokens": 586930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0646, + "grad_norm": 1.7334975004196167, + "learning_rate": 1.142e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0657, + "grad_norm": 1.7223514318466187, + "learning_rate": 1.1415e-05, + "num_tokens": 587954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0551, + "grad_norm": 2.0270273685455322, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0642, + "grad_norm": 1.5014370679855347, + "learning_rate": 1.1405000000000002e-05, + "num_tokens": 588978.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0766, + "grad_norm": 1.7329357862472534, + "learning_rate": 1.14e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.0038, + "grad_norm": 0.5561279654502869, + "learning_rate": 1.1395e-05, + "num_tokens": 589581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0805, + "grad_norm": 2.5624947547912598, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0036, + "grad_norm": 0.5101985931396484, + "learning_rate": 1.1385000000000002e-05, + "num_tokens": 590184.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0564, + "grad_norm": 1.227173924446106, + "learning_rate": 1.138e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0036, + "grad_norm": 0.5354023575782776, + "learning_rate": 1.1375e-05, + "num_tokens": 590787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0443, + "grad_norm": 1.4744853973388672, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0715, + "grad_norm": 1.5623061656951904, + "learning_rate": 1.1365000000000002e-05, + "num_tokens": 591811.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0529, + "grad_norm": 1.357082486152649, + "learning_rate": 1.136e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0037, + "grad_norm": 0.54876309633255, + "learning_rate": 1.1355e-05, + "num_tokens": 592414.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.0635, + "grad_norm": 1.2679226398468018, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0458, + "grad_norm": 1.1748446226119995, + "learning_rate": 1.1345000000000002e-05, + "num_tokens": 593438.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0035, + "grad_norm": 0.5624827146530151, + "learning_rate": 1.134e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.003, + "grad_norm": 0.4557420015335083, + "learning_rate": 1.1335e-05, + "num_tokens": 593620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.003, + "grad_norm": 0.46185532212257385, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0028, + "grad_norm": 0.42278051376342773, + "learning_rate": 1.1325e-05, + "num_tokens": 593802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0453, + "grad_norm": 1.387130856513977, + "learning_rate": 1.132e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025925099849701, + "learning_rate": 1.1315000000000001e-05, + "num_tokens": 594405.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0022, + "grad_norm": 0.33897924423217773, + "learning_rate": 1.131e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 1.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0737, + "grad_norm": 1.979303240776062, + "learning_rate": 1.1305000000000001e-05, + "num_tokens": 595008.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.064, + "grad_norm": 1.5425118207931519, + "learning_rate": 1.13e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0671, + "grad_norm": 1.1620323657989502, + "learning_rate": 1.1295000000000001e-05, + "num_tokens": 596032.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0785, + "grad_norm": 2.378268003463745, + "learning_rate": 1.129e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0016, + "grad_norm": 0.22170788049697876, + "learning_rate": 1.1285000000000001e-05, + "num_tokens": 596635.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0015, + "grad_norm": 0.20151561498641968, + "learning_rate": 1.128e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0017, + "grad_norm": 0.2272740602493286, + "learning_rate": 1.1275e-05, + "num_tokens": 596817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.0013, + "grad_norm": 0.15716217458248138, + "learning_rate": 1.127e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0609, + "grad_norm": 1.5205357074737549, + "learning_rate": 1.1265000000000001e-05, + "num_tokens": 597420.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0013, + "grad_norm": 0.16709472239017487, + "learning_rate": 1.126e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0771, + "grad_norm": 1.7946810722351074, + "learning_rate": 1.1255e-05, + "num_tokens": 598023.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0581, + "grad_norm": 1.250422716140747, + "learning_rate": 1.125e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0566, + "grad_norm": 1.8859542608261108, + "learning_rate": 1.1245000000000002e-05, + "num_tokens": 599047.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.042, + "grad_norm": 1.3896710872650146, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0012, + "grad_norm": 0.13600599765777588, + "learning_rate": 1.1235e-05, + "num_tokens": 599650.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0455, + "grad_norm": 1.2671265602111816, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0792, + "grad_norm": 1.9507051706314087, + "learning_rate": 1.1225000000000002e-05, + "num_tokens": 600674.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0015, + "grad_norm": 0.18869547545909882, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0643, + "grad_norm": 2.124163866043091, + "learning_rate": 1.1215e-05, + "num_tokens": 601277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0017, + "grad_norm": 0.22649085521697998, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 1.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0014, + "grad_norm": 0.1775384545326233, + "learning_rate": 1.1205000000000002e-05, + "num_tokens": 601459.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0675, + "grad_norm": 2.2713491916656494, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0504, + "grad_norm": 1.3982276916503906, + "learning_rate": 1.1195e-05, + "num_tokens": 602483.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0478, + "grad_norm": 1.40345299243927, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0655, + "grad_norm": 2.0257670879364014, + "learning_rate": 1.1185000000000002e-05, + "num_tokens": 603507.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.0019, + "grad_norm": 0.2651630938053131, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": 0.0772, + "grad_norm": 2.0185799598693848, + "learning_rate": 1.1175e-05, + "num_tokens": 604110.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0022, + "grad_norm": 0.30773913860321045, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 1.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0401, + "grad_norm": 1.1661447286605835, + "learning_rate": 1.1165e-05, + "num_tokens": 604713.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0805, + "grad_norm": 2.5561182498931885, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.0023, + "grad_norm": 0.3356492221355438, + "learning_rate": 1.1155e-05, + "num_tokens": 605316.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0519, + "grad_norm": 1.2280339002609253, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.0412, + "grad_norm": 1.1461997032165527, + "learning_rate": 1.1145000000000001e-05, + "num_tokens": 606340.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0024, + "grad_norm": 0.33912718296051025, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0026, + "grad_norm": 0.3827052116394043, + "learning_rate": 1.1135000000000001e-05, + "num_tokens": 606522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025944471359253, + "learning_rate": 1.113e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0025, + "grad_norm": 0.34845641255378723, + "learning_rate": 1.1125000000000001e-05, + "num_tokens": 606704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0704, + "grad_norm": 1.9853920936584473, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0431, + "grad_norm": 1.3894938230514526, + "learning_rate": 1.1115e-05, + "num_tokens": 607728.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.069, + "grad_norm": 1.2977555990219116, + "learning_rate": 1.111e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0366, + "grad_norm": 1.1859874725341797, + "learning_rate": 1.1105000000000001e-05, + "num_tokens": 608752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0022, + "grad_norm": 0.3078896105289459, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.002, + "grad_norm": 0.28668129444122314, + "learning_rate": 1.1095e-05, + "num_tokens": 608934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0021, + "grad_norm": 0.30314162373542786, + "learning_rate": 1.109e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0741, + "grad_norm": 1.5230200290679932, + "learning_rate": 1.1085000000000001e-05, + "num_tokens": 609537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.002, + "grad_norm": 0.26326534152030945, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 1.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.002, + "grad_norm": 0.2711552381515503, + "learning_rate": 1.1075e-05, + "num_tokens": 609719.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.0616, + "grad_norm": 1.274338960647583, + "learning_rate": 1.107e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.0016, + "grad_norm": 0.2114490568637848, + "learning_rate": 1.1065000000000002e-05, + "num_tokens": 610322.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0682, + "grad_norm": 1.6731176376342773, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0813, + "grad_norm": 1.9255222082138062, + "learning_rate": 1.1055e-05, + "num_tokens": 611346.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0016, + "grad_norm": 0.21615324914455414, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0688, + "grad_norm": 1.5003544092178345, + "learning_rate": 1.1045000000000002e-05, + "num_tokens": 611949.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0014, + "grad_norm": 0.18165816366672516, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 1.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0583, + "grad_norm": 1.9068502187728882, + "learning_rate": 1.1035e-05, + "num_tokens": 612552.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0015, + "grad_norm": 0.18768055737018585, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0014, + "grad_norm": 0.1921229511499405, + "learning_rate": 1.1025000000000002e-05, + "num_tokens": 612734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0015, + "grad_norm": 0.19404935836791992, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0619, + "grad_norm": 1.6527628898620605, + "learning_rate": 1.1015e-05, + "num_tokens": 613337.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0413, + "grad_norm": 1.2340315580368042, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0015, + "grad_norm": 0.19533570110797882, + "learning_rate": 1.1005e-05, + "num_tokens": 613940.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0639, + "grad_norm": 1.0601844787597656, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0014, + "grad_norm": 0.18472979962825775, + "learning_rate": 1.0995e-05, + "num_tokens": 614543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0642, + "grad_norm": 1.2736060619354248, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0581, + "grad_norm": 1.4980621337890625, + "learning_rate": 1.0985e-05, + "num_tokens": 615567.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0489, + "grad_norm": 1.1453659534454346, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0625, + "grad_norm": 1.6183781623840332, + "learning_rate": 1.0975e-05, + "num_tokens": 616591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0018, + "grad_norm": 0.24508105218410492, + "learning_rate": 1.097e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 1.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.002, + "grad_norm": 0.2894340753555298, + "learning_rate": 1.0965000000000001e-05, + "num_tokens": 616773.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0394, + "grad_norm": 1.3422820568084717, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0018, + "grad_norm": 0.26346835494041443, + "learning_rate": 1.0955e-05, + "num_tokens": 617376.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.002, + "grad_norm": 0.28616681694984436, + "learning_rate": 1.095e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0629, + "grad_norm": 1.515001654624939, + "learning_rate": 1.0945000000000001e-05, + "num_tokens": 617979.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.0429, + "grad_norm": 1.3231642246246338, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0498, + "grad_norm": 1.3477892875671387, + "learning_rate": 1.0935e-05, + "num_tokens": 619003.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.0686, + "grad_norm": 1.4584791660308838, + "learning_rate": 1.093e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.0021, + "grad_norm": 0.29815393686294556, + "learning_rate": 1.0925000000000001e-05, + "num_tokens": 619606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.087, + "grad_norm": 2.550358533859253, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.0021, + "grad_norm": 0.3024434447288513, + "learning_rate": 1.0915e-05, + "num_tokens": 620209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0559, + "grad_norm": 1.8500303030014038, + "learning_rate": 1.091e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0024, + "grad_norm": 0.3702225685119629, + "learning_rate": 1.0905000000000001e-05, + "num_tokens": 620812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0782, + "grad_norm": 1.9154956340789795, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0613, + "grad_norm": 1.6961833238601685, + "learning_rate": 1.0895e-05, + "num_tokens": 621836.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0022, + "grad_norm": 0.3193221390247345, + "learning_rate": 1.089e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0025, + "grad_norm": 0.36297887563705444, + "learning_rate": 1.0885000000000002e-05, + "num_tokens": 622018.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0022, + "grad_norm": 0.3415636420249939, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0408, + "grad_norm": 1.2334237098693848, + "learning_rate": 1.0875e-05, + "num_tokens": 622621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.002, + "grad_norm": 0.2912217974662781, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.043, + "grad_norm": 1.9397270679473877, + "learning_rate": 1.0865000000000002e-05, + "num_tokens": 623224.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.0395, + "grad_norm": 1.2516388893127441, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0018, + "grad_norm": 0.24329343438148499, + "learning_rate": 1.0855e-05, + "num_tokens": 623827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0019, + "grad_norm": 0.2603467106819153, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0588, + "grad_norm": 1.736319661140442, + "learning_rate": 1.0845e-05, + "num_tokens": 624430.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0014, + "grad_norm": 0.19694186747074127, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 1.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.0015, + "grad_norm": 0.20471760630607605, + "learning_rate": 1.0835e-05, + "num_tokens": 624612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0016, + "grad_norm": 0.21806074678897858, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0014, + "grad_norm": 0.19000421464443207, + "learning_rate": 1.0825e-05, + "num_tokens": 624794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.0516, + "grad_norm": 1.4601935148239136, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0713, + "grad_norm": 2.011367082595825, + "learning_rate": 1.0815e-05, + "num_tokens": 625818.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0012, + "grad_norm": 0.15841880440711975, + "learning_rate": 1.081e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 1.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0711, + "grad_norm": 2.100233793258667, + "learning_rate": 1.0805e-05, + "num_tokens": 626421.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0012, + "grad_norm": 0.1544499695301056, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0012, + "grad_norm": 0.15288732945919037, + "learning_rate": 1.0794999999999999e-05, + "num_tokens": 626603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.0379, + "grad_norm": 1.210354208946228, + "learning_rate": 1.079e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.042, + "grad_norm": 1.1011019945144653, + "learning_rate": 1.0785000000000001e-05, + "num_tokens": 627627.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0646, + "grad_norm": 1.4223557710647583, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0011, + "grad_norm": 0.14515887200832367, + "learning_rate": 1.0775e-05, + "num_tokens": 628230.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0012, + "grad_norm": 0.14745497703552246, + "learning_rate": 1.077e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0013, + "grad_norm": 0.16342398524284363, + "learning_rate": 1.0765000000000001e-05, + "num_tokens": 628412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0711, + "grad_norm": 1.4518134593963623, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0719, + "grad_norm": 1.6602455377578735, + "learning_rate": 1.0755e-05, + "num_tokens": 629436.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0676, + "grad_norm": 1.4668382406234741, + "learning_rate": 1.075e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0675, + "grad_norm": 1.7040259838104248, + "learning_rate": 1.0745000000000001e-05, + "num_tokens": 630460.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0015, + "grad_norm": 0.2076033502817154, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 1.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0578, + "grad_norm": 1.4224144220352173, + "learning_rate": 1.0735e-05, + "num_tokens": 631063.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0359, + "grad_norm": 1.0415198802947998, + "learning_rate": 1.073e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0549, + "grad_norm": 1.3249598741531372, + "learning_rate": 1.0725000000000001e-05, + "num_tokens": 632087.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27819395065307617, + "learning_rate": 1.072e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 1.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.002, + "grad_norm": 0.28510138392448425, + "learning_rate": 1.0715e-05, + "num_tokens": 632269.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0023, + "grad_norm": 0.33845254778862, + "learning_rate": 1.071e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.0022, + "grad_norm": 0.3247784972190857, + "learning_rate": 1.0705000000000002e-05, + "num_tokens": 632451.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.043, + "grad_norm": 1.0912247896194458, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.0578, + "grad_norm": 1.1355180740356445, + "learning_rate": 1.0695e-05, + "num_tokens": 633475.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.0024, + "grad_norm": 0.3479563593864441, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 1.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0022, + "grad_norm": 0.3158959448337555, + "learning_rate": 1.0685e-05, + "num_tokens": 633657.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": 0.0428, + "grad_norm": 1.4031771421432495, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.055, + "grad_norm": 1.2979878187179565, + "learning_rate": 1.0675e-05, + "num_tokens": 634681.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0021, + "grad_norm": 0.30659785866737366, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0407, + "grad_norm": 1.1281771659851074, + "learning_rate": 1.0665e-05, + "num_tokens": 635284.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0021, + "grad_norm": 0.3046596050262451, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 1.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.002, + "grad_norm": 0.29561498761177063, + "learning_rate": 1.0655e-05, + "num_tokens": 635466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.042, + "grad_norm": 1.11528480052948, + "learning_rate": 1.065e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0603, + "grad_norm": 1.633859634399414, + "learning_rate": 1.0645e-05, + "num_tokens": 636490.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0617, + "grad_norm": 1.5089678764343262, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0393, + "grad_norm": 1.644981026649475, + "learning_rate": 1.0634999999999999e-05, + "num_tokens": 637514.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0548, + "grad_norm": 1.4219714403152466, + "learning_rate": 1.063e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0022, + "grad_norm": 0.3061341941356659, + "learning_rate": 1.0625e-05, + "num_tokens": 638117.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0439, + "grad_norm": 1.3055533170700073, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0844, + "grad_norm": 2.4925858974456787, + "learning_rate": 1.0615000000000003e-05, + "num_tokens": 639141.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0409, + "grad_norm": 1.2279584407806396, + "learning_rate": 1.061e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.0023, + "grad_norm": 0.3406059145927429, + "learning_rate": 1.0605000000000001e-05, + "num_tokens": 639744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.0024, + "grad_norm": 0.3423788249492645, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 1.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0758, + "grad_norm": 2.193775177001953, + "learning_rate": 1.0595000000000003e-05, + "num_tokens": 640347.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0397, + "grad_norm": 1.2993077039718628, + "learning_rate": 1.059e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0025, + "grad_norm": 0.37831318378448486, + "learning_rate": 1.0585000000000001e-05, + "num_tokens": 640950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0695, + "grad_norm": 1.9661240577697754, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0514, + "grad_norm": 1.348526954650879, + "learning_rate": 1.0575000000000001e-05, + "num_tokens": 641974.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0422, + "grad_norm": 1.4465380907058716, + "learning_rate": 1.057e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0792, + "grad_norm": 1.823074460029602, + "learning_rate": 1.0565000000000001e-05, + "num_tokens": 642998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0707, + "grad_norm": 1.9393905401229858, + "learning_rate": 1.056e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": 0.0765, + "grad_norm": 2.4390299320220947, + "learning_rate": 1.0555000000000001e-05, + "num_tokens": 644022.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0377, + "grad_norm": 1.2858082056045532, + "learning_rate": 1.055e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.036, + "grad_norm": 1.1891300678253174, + "learning_rate": 1.0545000000000002e-05, + "num_tokens": 645046.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0596, + "grad_norm": 1.3432769775390625, + "learning_rate": 1.054e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0409, + "grad_norm": 1.3289687633514404, + "learning_rate": 1.0535000000000002e-05, + "num_tokens": 646070.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0703, + "grad_norm": 1.9712656736373901, + "learning_rate": 1.053e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0674, + "grad_norm": 1.360931634902954, + "learning_rate": 1.0525e-05, + "num_tokens": 647094.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0712, + "grad_norm": 1.7070671319961548, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0553, + "grad_norm": 1.2540414333343506, + "learning_rate": 1.0515000000000002e-05, + "num_tokens": 648118.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0462, + "grad_norm": 1.0861750841140747, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0606, + "grad_norm": 1.2730586528778076, + "learning_rate": 1.0505e-05, + "num_tokens": 649142.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0678, + "grad_norm": 1.881486177444458, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0358, + "grad_norm": 1.520228385925293, + "learning_rate": 1.0495000000000002e-05, + "num_tokens": 650166.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0102, + "grad_norm": 1.2519571781158447, + "learning_rate": 1.049e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0603, + "grad_norm": 1.7512507438659668, + "learning_rate": 1.0485e-05, + "num_tokens": 650769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": 0.0422, + "grad_norm": 1.2172882556915283, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0111, + "grad_norm": 1.2125916481018066, + "learning_rate": 1.0475000000000002e-05, + "num_tokens": 651372.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0104, + "grad_norm": 1.187291145324707, + "learning_rate": 1.047e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.067, + "grad_norm": 1.5227930545806885, + "learning_rate": 1.0465e-05, + "num_tokens": 651975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0512, + "grad_norm": 1.1584064960479736, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0585, + "grad_norm": 1.5452741384506226, + "learning_rate": 1.0455000000000002e-05, + "num_tokens": 652999.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.037, + "grad_norm": 1.2185399532318115, + "learning_rate": 1.045e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0073, + "grad_norm": 0.8913355469703674, + "learning_rate": 1.0445e-05, + "num_tokens": 653602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.1718, + "grad_norm": 3.605719804763794, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0545, + "grad_norm": 0.8743512034416199, + "learning_rate": 1.0435000000000003e-05, + "num_tokens": 654626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0601, + "grad_norm": 1.5047037601470947, + "learning_rate": 1.043e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0048, + "grad_norm": 0.6472101211547852, + "learning_rate": 1.0425000000000001e-05, + "num_tokens": 655229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0819, + "grad_norm": 2.8786802291870117, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0722, + "grad_norm": 1.6400585174560547, + "learning_rate": 1.0415000000000001e-05, + "num_tokens": 656253.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0379, + "grad_norm": 1.1578104496002197, + "learning_rate": 1.041e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0651, + "grad_norm": 1.9455623626708984, + "learning_rate": 1.0405000000000001e-05, + "num_tokens": 657277.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0588, + "grad_norm": 1.3513238430023193, + "learning_rate": 1.04e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0584, + "grad_norm": 2.0099873542785645, + "learning_rate": 1.0395000000000001e-05, + "num_tokens": 658301.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0422, + "grad_norm": 1.1260371208190918, + "learning_rate": 1.039e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.1567, + "grad_norm": 4.341492652893066, + "learning_rate": 1.0385000000000001e-05, + "num_tokens": 659325.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0034, + "grad_norm": 0.5023797154426575, + "learning_rate": 1.038e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.0515, + "grad_norm": 1.3957620859146118, + "learning_rate": 1.0375000000000001e-05, + "num_tokens": 659928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.074, + "grad_norm": 1.8058022260665894, + "learning_rate": 1.037e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.0683, + "grad_norm": 1.5976930856704712, + "learning_rate": 1.0365e-05, + "num_tokens": 660952.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.042, + "grad_norm": 1.2127424478530884, + "learning_rate": 1.036e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0525, + "grad_norm": 1.24295175075531, + "learning_rate": 1.0355000000000002e-05, + "num_tokens": 661976.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0553, + "grad_norm": 1.3676091432571411, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0043, + "grad_norm": 0.5990502834320068, + "learning_rate": 1.0345e-05, + "num_tokens": 662579.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.0651, + "grad_norm": 1.8467062711715698, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.0035, + "grad_norm": 0.4997740089893341, + "learning_rate": 1.0335000000000002e-05, + "num_tokens": 663182.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0564, + "grad_norm": 0.9972801804542542, + "learning_rate": 1.033e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0587, + "grad_norm": 1.6288121938705444, + "learning_rate": 1.0325e-05, + "num_tokens": 664206.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0387, + "grad_norm": 1.0264148712158203, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.0044, + "grad_norm": 0.6445260047912598, + "learning_rate": 1.0315000000000002e-05, + "num_tokens": 664809.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0379, + "grad_norm": 1.0764647722244263, + "learning_rate": 1.031e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.0483, + "grad_norm": 1.6414856910705566, + "learning_rate": 1.0305e-05, + "num_tokens": 665833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.0392, + "grad_norm": 1.0878779888153076, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0721, + "grad_norm": 1.8314939737319946, + "learning_rate": 1.0295000000000002e-05, + "num_tokens": 666857.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0404, + "grad_norm": 1.2442834377288818, + "learning_rate": 1.029e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0344, + "grad_norm": 1.0829095840454102, + "learning_rate": 1.0285e-05, + "num_tokens": 667881.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.005, + "grad_norm": 0.7069464921951294, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 1.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0701, + "grad_norm": 1.8649088144302368, + "learning_rate": 1.0275000000000002e-05, + "num_tokens": 668484.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0445, + "grad_norm": 1.5859991312026978, + "learning_rate": 1.027e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.0617, + "grad_norm": 1.400742530822754, + "learning_rate": 1.0265e-05, + "num_tokens": 669508.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0527, + "grad_norm": 1.4805254936218262, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.0052, + "grad_norm": 0.7180629968643188, + "learning_rate": 1.0255000000000001e-05, + "num_tokens": 670111.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0404, + "grad_norm": 1.3597116470336914, + "learning_rate": 1.025e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0054, + "grad_norm": 0.7400949597358704, + "learning_rate": 1.0245000000000001e-05, + "num_tokens": 670714.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.0049, + "grad_norm": 0.6836004853248596, + "learning_rate": 1.024e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 1.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0736, + "grad_norm": 2.3706512451171875, + "learning_rate": 1.0235000000000001e-05, + "num_tokens": 671317.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0045, + "grad_norm": 0.6252732872962952, + "learning_rate": 1.023e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0538, + "grad_norm": 1.2009153366088867, + "learning_rate": 1.0225000000000001e-05, + "num_tokens": 671920.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0032, + "grad_norm": 0.4667681157588959, + "learning_rate": 1.022e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 1.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0612, + "grad_norm": 1.505027413368225, + "learning_rate": 1.0215000000000001e-05, + "num_tokens": 672523.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.0551, + "grad_norm": 1.3336291313171387, + "learning_rate": 1.021e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0402, + "grad_norm": 1.1181267499923706, + "learning_rate": 1.0205e-05, + "num_tokens": 673547.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0633, + "grad_norm": 1.5764997005462646, + "learning_rate": 1.02e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0024, + "grad_norm": 0.33718812465667725, + "learning_rate": 1.0195000000000001e-05, + "num_tokens": 674150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0683, + "grad_norm": 1.428412675857544, + "learning_rate": 1.019e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441157937049866, + "learning_rate": 1.0185e-05, + "num_tokens": 674753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0023, + "grad_norm": 0.33211714029312134, + "learning_rate": 1.018e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0022, + "grad_norm": 0.3089843988418579, + "learning_rate": 1.0175000000000002e-05, + "num_tokens": 674935.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0635, + "grad_norm": 1.286823034286499, + "learning_rate": 1.017e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0419, + "grad_norm": 1.0465713739395142, + "learning_rate": 1.0165e-05, + "num_tokens": 675959.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0019, + "grad_norm": 0.27270686626434326, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 1.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0614, + "grad_norm": 1.536331295967102, + "learning_rate": 1.0155000000000002e-05, + "num_tokens": 676562.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0521, + "grad_norm": 1.3282392024993896, + "learning_rate": 1.015e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0573, + "grad_norm": 1.3458013534545898, + "learning_rate": 1.0145e-05, + "num_tokens": 677586.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0607, + "grad_norm": 1.5142616033554077, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0427, + "grad_norm": 1.3866674900054932, + "learning_rate": 1.0135000000000002e-05, + "num_tokens": 678610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0652, + "grad_norm": 1.3013007640838623, + "learning_rate": 1.013e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0021, + "grad_norm": 0.2967868447303772, + "learning_rate": 1.0125e-05, + "num_tokens": 679213.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.002, + "grad_norm": 0.2977685332298279, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0022, + "grad_norm": 0.3109460473060608, + "learning_rate": 1.0115000000000002e-05, + "num_tokens": 679395.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0563, + "grad_norm": 1.1927019357681274, + "learning_rate": 1.011e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0019, + "grad_norm": 0.27015697956085205, + "learning_rate": 1.0105e-05, + "num_tokens": 679998.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.054, + "grad_norm": 1.8113130331039429, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0688, + "grad_norm": 1.6508032083511353, + "learning_rate": 1.0095e-05, + "num_tokens": 681022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0502, + "grad_norm": 1.1528620719909668, + "learning_rate": 1.009e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29425331950187683, + "learning_rate": 1.0085000000000001e-05, + "num_tokens": 681625.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0641, + "grad_norm": 1.702049732208252, + "learning_rate": 1.008e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.036, + "grad_norm": 1.1969891786575317, + "learning_rate": 1.0075000000000001e-05, + "num_tokens": 682649.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0022, + "grad_norm": 0.31679248809814453, + "learning_rate": 1.007e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 1.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.0403, + "grad_norm": 1.1920922994613647, + "learning_rate": 1.0065000000000001e-05, + "num_tokens": 683252.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0544, + "grad_norm": 1.1415454149246216, + "learning_rate": 1.006e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0028, + "grad_norm": 0.42351487278938293, + "learning_rate": 1.0055000000000001e-05, + "num_tokens": 683855.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0466, + "grad_norm": 1.6247456073760986, + "learning_rate": 1.005e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0524, + "grad_norm": 1.2605568170547485, + "learning_rate": 1.0045e-05, + "num_tokens": 684879.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.057, + "grad_norm": 1.483921766281128, + "learning_rate": 1.004e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0029, + "grad_norm": 0.420865923166275, + "learning_rate": 1.0035000000000001e-05, + "num_tokens": 685482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0483, + "grad_norm": 1.9411001205444336, + "learning_rate": 1.003e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0418, + "grad_norm": 1.1357734203338623, + "learning_rate": 1.0025e-05, + "num_tokens": 686506.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0031, + "grad_norm": 0.4264874756336212, + "learning_rate": 1.002e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0628, + "grad_norm": 1.5096089839935303, + "learning_rate": 1.0015000000000002e-05, + "num_tokens": 687109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.003, + "grad_norm": 0.41657188534736633, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0, + "step": 2000 + }, + { + "loss": 0.0028, + "grad_norm": 0.3918426036834717, + "learning_rate": 1.0005e-05, + "num_tokens": 687291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0005, + "step": 2001 + }, + { + "loss": 0.0524, + "grad_norm": 1.1938209533691406, + "learning_rate": 1e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.001, + "step": 2002 + }, + { + "loss": 0.0027, + "grad_norm": 0.3788990080356598, + "learning_rate": 9.995000000000002e-06, + "num_tokens": 687894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0015, + "step": 2003 + }, + { + "loss": 0.0025, + "grad_norm": 0.3577810227870941, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 687985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.002, + "step": 2004 + }, + { + "loss": 0.0024, + "grad_norm": 0.3305366039276123, + "learning_rate": 9.985000000000002e-06, + "num_tokens": 688076.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0025, + "step": 2005 + }, + { + "loss": 0.002, + "grad_norm": 0.277047336101532, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 688167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.003, + "step": 2006 + }, + { + "loss": 0.0019, + "grad_norm": 0.2567979693412781, + "learning_rate": 9.975000000000002e-06, + "num_tokens": 688258.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0035, + "step": 2007 + }, + { + "loss": 0.0682, + "grad_norm": 1.844512701034546, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.004, + "step": 2008 + }, + { + "loss": 0.0487, + "grad_norm": 1.2499569654464722, + "learning_rate": 9.965000000000002e-06, + "num_tokens": 689282.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0045, + "step": 2009 + }, + { + "loss": 0.0432, + "grad_norm": 1.2406448125839233, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.005, + "step": 2010 + }, + { + "loss": 0.0804, + "grad_norm": 1.833058476448059, + "learning_rate": 9.955000000000002e-06, + "num_tokens": 690306.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0055, + "step": 2011 + }, + { + "loss": 0.0464, + "grad_norm": 1.3244189023971558, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.006, + "step": 2012 + }, + { + "loss": 0.0416, + "grad_norm": 1.044066309928894, + "learning_rate": 9.945e-06, + "num_tokens": 691330.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0065, + "step": 2013 + }, + { + "loss": 0.0646, + "grad_norm": 1.5272581577301025, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.007, + "step": 2014 + }, + { + "loss": 0.0401, + "grad_norm": 1.2222588062286377, + "learning_rate": 9.935e-06, + "num_tokens": 692354.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0075, + "step": 2015 + }, + { + "loss": 0.0833, + "grad_norm": 2.3880302906036377, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.008, + "step": 2016 + }, + { + "loss": 0.0661, + "grad_norm": 1.666345238685608, + "learning_rate": 9.925e-06, + "num_tokens": 693378.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0085, + "step": 2017 + }, + { + "loss": 0.061, + "grad_norm": 1.2552286386489868, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.009, + "step": 2018 + }, + { + "loss": 0.0022, + "grad_norm": 0.2978605329990387, + "learning_rate": 9.915e-06, + "num_tokens": 693981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0095, + "step": 2019 + }, + { + "loss": 0.0419, + "grad_norm": 1.1351749897003174, + "learning_rate": 9.91e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.01, + "step": 2020 + }, + { + "loss": 0.0028, + "grad_norm": 0.4339805245399475, + "learning_rate": 9.905000000000001e-06, + "num_tokens": 694584.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0105, + "step": 2021 + }, + { + "loss": 0.0027, + "grad_norm": 0.3737834393978119, + "learning_rate": 9.9e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 2022 + }, + { + "loss": 0.0724, + "grad_norm": 1.6216633319854736, + "learning_rate": 9.895000000000001e-06, + "num_tokens": 695187.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0115, + "step": 2023 + }, + { + "loss": 0.0026, + "grad_norm": 0.38558149337768555, + "learning_rate": 9.89e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 1.0, + "epoch": 1.012, + "step": 2024 + }, + { + "loss": 0.0457, + "grad_norm": 1.2241498231887817, + "learning_rate": 9.885000000000001e-06, + "num_tokens": 695790.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0125, + "step": 2025 + }, + { + "loss": 0.0387, + "grad_norm": 1.4335367679595947, + "learning_rate": 9.88e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.013, + "step": 2026 + }, + { + "loss": 0.0716, + "grad_norm": 1.5836760997772217, + "learning_rate": 9.875000000000001e-06, + "num_tokens": 696814.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0135, + "step": 2027 + }, + { + "loss": 0.0419, + "grad_norm": 1.2072887420654297, + "learning_rate": 9.87e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.014, + "step": 2028 + }, + { + "loss": 0.0376, + "grad_norm": 0.9630845189094543, + "learning_rate": 9.865000000000001e-06, + "num_tokens": 697838.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.0145, + "step": 2029 + }, + { + "loss": 0.0562, + "grad_norm": 1.396782636642456, + "learning_rate": 9.86e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.015, + "step": 2030 + }, + { + "loss": 0.0611, + "grad_norm": 1.526076316833496, + "learning_rate": 9.855000000000001e-06, + "num_tokens": 698862.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0155, + "step": 2031 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280098915100098, + "learning_rate": 9.85e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.016, + "step": 2032 + }, + { + "loss": 0.0036, + "grad_norm": 0.5271911025047302, + "learning_rate": 9.845000000000001e-06, + "num_tokens": 699044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0165, + "step": 2033 + }, + { + "loss": 0.0638, + "grad_norm": 1.2341188192367554, + "learning_rate": 9.84e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.017, + "step": 2034 + }, + { + "loss": 0.0386, + "grad_norm": 1.0637688636779785, + "learning_rate": 9.835000000000002e-06, + "num_tokens": 700068.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0175, + "step": 2035 + }, + { + "loss": 0.0036, + "grad_norm": 0.52369225025177, + "learning_rate": 9.83e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 1.0, + "epoch": 1.018, + "step": 2036 + }, + { + "loss": 0.0494, + "grad_norm": 2.351320266723633, + "learning_rate": 9.825000000000002e-06, + "num_tokens": 700671.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0185, + "step": 2037 + }, + { + "loss": 0.0034, + "grad_norm": 0.4984705150127411, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.019, + "step": 2038 + }, + { + "loss": 0.0406, + "grad_norm": 1.5286310911178589, + "learning_rate": 9.815000000000002e-06, + "num_tokens": 701274.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0195, + "step": 2039 + }, + { + "loss": 0.0523, + "grad_norm": 1.7273446321487427, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.02, + "step": 2040 + }, + { + "loss": 0.0033, + "grad_norm": 0.4823690950870514, + "learning_rate": 9.805000000000002e-06, + "num_tokens": 701877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0205, + "step": 2041 + }, + { + "loss": 0.0032, + "grad_norm": 0.4507608711719513, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.021, + "step": 2042 + }, + { + "loss": 0.0703, + "grad_norm": 1.77262544631958, + "learning_rate": 9.795000000000002e-06, + "num_tokens": 702480.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0215, + "step": 2043 + }, + { + "loss": 0.0026, + "grad_norm": 0.3709382116794586, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.022, + "step": 2044 + }, + { + "loss": 0.0683, + "grad_norm": 3.5564355850219727, + "learning_rate": 9.785e-06, + "num_tokens": 703083.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0225, + "step": 2045 + }, + { + "loss": 0.0024, + "grad_norm": 0.3166162967681885, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.023, + "step": 2046 + }, + { + "loss": 0.0022, + "grad_norm": 0.2928009331226349, + "learning_rate": 9.775e-06, + "num_tokens": 703265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0235, + "step": 2047 + }, + { + "loss": 0.0621, + "grad_norm": 1.902612566947937, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.024, + "step": 2048 + }, + { + "loss": 0.0018, + "grad_norm": 0.23954610526561737, + "learning_rate": 9.765e-06, + "num_tokens": 703868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0245, + "step": 2049 + }, + { + "loss": 0.0409, + "grad_norm": 1.3355653285980225, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.025, + "step": 2050 + }, + { + "loss": 0.0705, + "grad_norm": 1.6696054935455322, + "learning_rate": 9.755e-06, + "num_tokens": 704892.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0255, + "step": 2051 + }, + { + "loss": 0.0016, + "grad_norm": 0.22299779951572418, + "learning_rate": 9.75e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.026, + "step": 2052 + }, + { + "loss": 0.0016, + "grad_norm": 0.21063728630542755, + "learning_rate": 9.745e-06, + "num_tokens": 705074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0265, + "step": 2053 + }, + { + "loss": 0.0696, + "grad_norm": 1.6844984292984009, + "learning_rate": 9.74e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.027, + "step": 2054 + }, + { + "loss": 0.0714, + "grad_norm": 1.5383219718933105, + "learning_rate": 9.735e-06, + "num_tokens": 706098.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0275, + "step": 2055 + }, + { + "loss": 0.0015, + "grad_norm": 0.19807161390781403, + "learning_rate": 9.73e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.028, + "step": 2056 + }, + { + "loss": 0.0014, + "grad_norm": 0.19030039012432098, + "learning_rate": 9.725000000000001e-06, + "num_tokens": 706280.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0285, + "step": 2057 + }, + { + "loss": 0.0013, + "grad_norm": 0.16322408616542816, + "learning_rate": 9.72e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 1.0, + "epoch": 1.029, + "step": 2058 + }, + { + "loss": 0.0014, + "grad_norm": 0.17665083706378937, + "learning_rate": 9.715000000000001e-06, + "num_tokens": 706462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0295, + "step": 2059 + }, + { + "loss": 0.0669, + "grad_norm": 1.8765722513198853, + "learning_rate": 9.71e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.03, + "step": 2060 + }, + { + "loss": 0.0768, + "grad_norm": 1.7586760520935059, + "learning_rate": 9.705000000000001e-06, + "num_tokens": 707486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0305, + "step": 2061 + }, + { + "loss": 0.0696, + "grad_norm": 1.258619785308838, + "learning_rate": 9.7e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.031, + "step": 2062 + }, + { + "loss": 0.0493, + "grad_norm": 1.2884832620620728, + "learning_rate": 9.695000000000001e-06, + "num_tokens": 708510.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0315, + "step": 2063 + }, + { + "loss": 0.0012, + "grad_norm": 0.15901947021484375, + "learning_rate": 9.69e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.032, + "step": 2064 + }, + { + "loss": 0.0656, + "grad_norm": 1.3002307415008545, + "learning_rate": 9.685000000000001e-06, + "num_tokens": 709113.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0325, + "step": 2065 + }, + { + "loss": 0.0013, + "grad_norm": 0.17090171575546265, + "learning_rate": 9.68e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.033, + "step": 2066 + }, + { + "loss": 0.0013, + "grad_norm": 0.1825355738401413, + "learning_rate": 9.675000000000001e-06, + "num_tokens": 709295.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0335, + "step": 2067 + }, + { + "loss": 0.0459, + "grad_norm": 1.092247724533081, + "learning_rate": 9.67e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.034, + "step": 2068 + }, + { + "loss": 0.0648, + "grad_norm": 1.4761494398117065, + "learning_rate": 9.665000000000001e-06, + "num_tokens": 710319.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0345, + "step": 2069 + }, + { + "loss": 0.0014, + "grad_norm": 0.1826472133398056, + "learning_rate": 9.66e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 1.0, + "epoch": 1.035, + "step": 2070 + }, + { + "loss": 0.0461, + "grad_norm": 1.338349461555481, + "learning_rate": 9.655000000000002e-06, + "num_tokens": 710922.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0355, + "step": 2071 + }, + { + "loss": 0.0567, + "grad_norm": 1.0566164255142212, + "learning_rate": 9.65e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.036, + "step": 2072 + }, + { + "loss": 0.0015, + "grad_norm": 0.19834326207637787, + "learning_rate": 9.645000000000002e-06, + "num_tokens": 711525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0365, + "step": 2073 + }, + { + "loss": 0.0418, + "grad_norm": 1.210045576095581, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.037, + "step": 2074 + }, + { + "loss": 0.0016, + "grad_norm": 0.22290614247322083, + "learning_rate": 9.635000000000002e-06, + "num_tokens": 712128.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0375, + "step": 2075 + }, + { + "loss": 0.0695, + "grad_norm": 1.4690190553665161, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.038, + "step": 2076 + }, + { + "loss": 0.0016, + "grad_norm": 0.2209765613079071, + "learning_rate": 9.625e-06, + "num_tokens": 712731.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0385, + "step": 2077 + }, + { + "loss": 0.0018, + "grad_norm": 0.23313096165657043, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 2078 + }, + { + "loss": 0.0017, + "grad_norm": 0.23196078836917877, + "learning_rate": 9.615e-06, + "num_tokens": 712913.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0395, + "step": 2079 + }, + { + "loss": 0.0541, + "grad_norm": 1.220723032951355, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.04, + "step": 2080 + }, + { + "loss": 0.0018, + "grad_norm": 0.2516387403011322, + "learning_rate": 9.605e-06, + "num_tokens": 713516.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0405, + "step": 2081 + }, + { + "loss": 0.0424, + "grad_norm": 1.0561903715133667, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.041, + "step": 2082 + }, + { + "loss": 0.0438, + "grad_norm": 1.2110846042633057, + "learning_rate": 9.595e-06, + "num_tokens": 714540.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0415, + "step": 2083 + }, + { + "loss": 0.0018, + "grad_norm": 0.24697688221931458, + "learning_rate": 9.59e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 1.0, + "epoch": 1.042, + "step": 2084 + }, + { + "loss": 0.0388, + "grad_norm": 1.0054850578308105, + "learning_rate": 9.585e-06, + "num_tokens": 715143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0425, + "step": 2085 + }, + { + "loss": 0.0713, + "grad_norm": 1.8077067136764526, + "learning_rate": 9.58e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.043, + "step": 2086 + }, + { + "loss": 0.0018, + "grad_norm": 0.24363017082214355, + "learning_rate": 9.575e-06, + "num_tokens": 715746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0435, + "step": 2087 + }, + { + "loss": 0.0016, + "grad_norm": 0.21341845393180847, + "learning_rate": 9.57e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 2088 + }, + { + "loss": 0.0391, + "grad_norm": 1.3833376169204712, + "learning_rate": 9.565e-06, + "num_tokens": 716349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0445, + "step": 2089 + }, + { + "loss": 0.0393, + "grad_norm": 0.9772108793258667, + "learning_rate": 9.56e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.045, + "step": 2090 + }, + { + "loss": 0.002, + "grad_norm": 0.283633828163147, + "learning_rate": 9.555e-06, + "num_tokens": 716952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0455, + "step": 2091 + }, + { + "loss": 0.0728, + "grad_norm": 1.849652647972107, + "learning_rate": 9.55e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.046, + "step": 2092 + }, + { + "loss": 0.0022, + "grad_norm": 0.3161669969558716, + "learning_rate": 9.545000000000001e-06, + "num_tokens": 717555.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0465, + "step": 2093 + }, + { + "loss": 0.0587, + "grad_norm": 1.600858449935913, + "learning_rate": 9.54e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.047, + "step": 2094 + }, + { + "loss": 0.0021, + "grad_norm": 0.2948978543281555, + "learning_rate": 9.535000000000001e-06, + "num_tokens": 718158.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0475, + "step": 2095 + }, + { + "loss": 0.0019, + "grad_norm": 0.27492448687553406, + "learning_rate": 9.53e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.048, + "step": 2096 + }, + { + "loss": 0.0382, + "grad_norm": 1.2440471649169922, + "learning_rate": 9.525000000000001e-06, + "num_tokens": 718761.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0485, + "step": 2097 + }, + { + "loss": 0.058, + "grad_norm": 1.5657495260238647, + "learning_rate": 9.52e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 2098 + }, + { + "loss": 0.0018, + "grad_norm": 0.2510983645915985, + "learning_rate": 9.515000000000001e-06, + "num_tokens": 719364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0495, + "step": 2099 + }, + { + "loss": 0.0677, + "grad_norm": 2.6615045070648193, + "learning_rate": 9.51e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.05, + "step": 2100 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355963945388794, + "learning_rate": 9.505000000000001e-06, + "num_tokens": 719967.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0505, + "step": 2101 + }, + { + "loss": 0.0628, + "grad_norm": 1.4263781309127808, + "learning_rate": 9.5e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.051, + "step": 2102 + }, + { + "loss": 0.0384, + "grad_norm": 1.3316160440444946, + "learning_rate": 9.495000000000001e-06, + "num_tokens": 720991.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0515, + "step": 2103 + }, + { + "loss": 0.0413, + "grad_norm": 1.2754371166229248, + "learning_rate": 9.49e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.052, + "step": 2104 + }, + { + "loss": 0.0551, + "grad_norm": 1.9524251222610474, + "learning_rate": 9.485000000000002e-06, + "num_tokens": 722015.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0525, + "step": 2105 + }, + { + "loss": 0.0551, + "grad_norm": 1.5522267818450928, + "learning_rate": 9.48e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.053, + "step": 2106 + }, + { + "loss": 0.0019, + "grad_norm": 0.27614012360572815, + "learning_rate": 9.475000000000002e-06, + "num_tokens": 722618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0535, + "step": 2107 + }, + { + "loss": 0.0606, + "grad_norm": 1.409346103668213, + "learning_rate": 9.47e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.054, + "step": 2108 + }, + { + "loss": 0.0024, + "grad_norm": 0.357972115278244, + "learning_rate": 9.465e-06, + "num_tokens": 723221.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0545, + "step": 2109 + }, + { + "loss": 0.0023, + "grad_norm": 0.3270082175731659, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 1.0, + "epoch": 1.055, + "step": 2110 + }, + { + "loss": 0.0024, + "grad_norm": 0.3454654812812805, + "learning_rate": 9.455e-06, + "num_tokens": 723403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0555, + "step": 2111 + }, + { + "loss": 0.0024, + "grad_norm": 0.352299302816391, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 1.0, + "epoch": 1.056, + "step": 2112 + }, + { + "loss": 0.002, + "grad_norm": 0.27746516466140747, + "learning_rate": 9.445e-06, + "num_tokens": 723585.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0565, + "step": 2113 + }, + { + "loss": 0.002, + "grad_norm": 0.2780683636665344, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 1.0, + "epoch": 1.057, + "step": 2114 + }, + { + "loss": 0.0464, + "grad_norm": 1.5355291366577148, + "learning_rate": 9.435e-06, + "num_tokens": 724188.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0575, + "step": 2115 + }, + { + "loss": 0.0017, + "grad_norm": 0.2329765260219574, + "learning_rate": 9.43e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.058, + "step": 2116 + }, + { + "loss": 0.0015, + "grad_norm": 0.20377217233181, + "learning_rate": 9.425e-06, + "num_tokens": 724370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0585, + "step": 2117 + }, + { + "loss": 0.0014, + "grad_norm": 0.1731068193912506, + "learning_rate": 9.42e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 2118 + }, + { + "loss": 0.0349, + "grad_norm": 1.301210641860962, + "learning_rate": 9.415e-06, + "num_tokens": 724973.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.0594999999999999, + "step": 2119 + }, + { + "loss": 0.0012, + "grad_norm": 0.15070641040802002, + "learning_rate": 9.41e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.06, + "step": 2120 + }, + { + "loss": 0.0012, + "grad_norm": 0.13666701316833496, + "learning_rate": 9.405e-06, + "num_tokens": 725155.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0605, + "step": 2121 + }, + { + "loss": 0.0011, + "grad_norm": 0.13183920085430145, + "learning_rate": 9.4e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 2122 + }, + { + "loss": 0.0735, + "grad_norm": 2.157339096069336, + "learning_rate": 9.395e-06, + "num_tokens": 725758.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.0615, + "step": 2123 + }, + { + "loss": 0.0434, + "grad_norm": 1.441329836845398, + "learning_rate": 9.39e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.062, + "step": 2124 + }, + { + "loss": 0.001, + "grad_norm": 0.11148537695407867, + "learning_rate": 9.385e-06, + "num_tokens": 726361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0625, + "step": 2125 + }, + { + "loss": 0.0363, + "grad_norm": 1.2650766372680664, + "learning_rate": 9.38e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 1.063, + "step": 2126 + }, + { + "loss": 0.042, + "grad_norm": 1.170820951461792, + "learning_rate": 9.375000000000001e-06, + "num_tokens": 727385.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0635, + "step": 2127 + }, + { + "loss": 0.0375, + "grad_norm": 1.31922447681427, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.064, + "step": 2128 + }, + { + "loss": 0.0009, + "grad_norm": 0.10702881962060928, + "learning_rate": 9.365000000000001e-06, + "num_tokens": 727988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0645, + "step": 2129 + }, + { + "loss": 0.001, + "grad_norm": 0.12134991586208344, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 2130 + }, + { + "loss": 0.001, + "grad_norm": 0.12518537044525146, + "learning_rate": 9.355000000000001e-06, + "num_tokens": 728170.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0655000000000001, + "step": 2131 + }, + { + "loss": 0.0443, + "grad_norm": 1.5640217065811157, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.066, + "step": 2132 + }, + { + "loss": 0.043, + "grad_norm": 1.7402693033218384, + "learning_rate": 9.345000000000001e-06, + "num_tokens": 729194.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0665, + "step": 2133 + }, + { + "loss": 0.0572, + "grad_norm": 1.478943109512329, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.067, + "step": 2134 + }, + { + "loss": 0.0436, + "grad_norm": 1.75895357131958, + "learning_rate": 9.335000000000001e-06, + "num_tokens": 730218.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0675, + "step": 2135 + }, + { + "loss": 0.0011, + "grad_norm": 0.14104828238487244, + "learning_rate": 9.33e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.068, + "step": 2136 + }, + { + "loss": 0.0014, + "grad_norm": 0.1940988302230835, + "learning_rate": 9.325000000000001e-06, + "num_tokens": 730400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0685, + "step": 2137 + }, + { + "loss": 0.0012, + "grad_norm": 0.15279027819633484, + "learning_rate": 9.32e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 2138 + }, + { + "loss": 0.0627, + "grad_norm": 1.8744264841079712, + "learning_rate": 9.315000000000001e-06, + "num_tokens": 731003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0695000000000001, + "step": 2139 + }, + { + "loss": 0.045, + "grad_norm": 1.4347468614578247, + "learning_rate": 9.31e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.07, + "step": 2140 + }, + { + "loss": 0.0711, + "grad_norm": 1.9654953479766846, + "learning_rate": 9.305000000000002e-06, + "num_tokens": 732027.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0705, + "step": 2141 + }, + { + "loss": 0.0723, + "grad_norm": 1.851762294769287, + "learning_rate": 9.3e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 2142 + }, + { + "loss": 0.0397, + "grad_norm": 1.1016762256622314, + "learning_rate": 9.295e-06, + "num_tokens": 733051.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0715, + "step": 2143 + }, + { + "loss": 0.0614, + "grad_norm": 1.278972864151001, + "learning_rate": 9.29e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.072, + "step": 2144 + }, + { + "loss": 0.0578, + "grad_norm": 1.5237491130828857, + "learning_rate": 9.285e-06, + "num_tokens": 734075.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0725, + "step": 2145 + }, + { + "loss": 0.0021, + "grad_norm": 0.29453045129776, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.073, + "step": 2146 + }, + { + "loss": 0.0756, + "grad_norm": 1.90165376663208, + "learning_rate": 9.275e-06, + "num_tokens": 734678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0735, + "step": 2147 + }, + { + "loss": 0.0025, + "grad_norm": 0.3552635610103607, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.074, + "step": 2148 + }, + { + "loss": 0.0615, + "grad_norm": 1.3596733808517456, + "learning_rate": 9.265e-06, + "num_tokens": 735281.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0745, + "step": 2149 + }, + { + "loss": 0.0571, + "grad_norm": 1.0499508380889893, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.075, + "step": 2150 + }, + { + "loss": 0.0593, + "grad_norm": 1.4813532829284668, + "learning_rate": 9.255e-06, + "num_tokens": 736305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0755, + "step": 2151 + }, + { + "loss": 0.0451, + "grad_norm": 1.1956957578659058, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.076, + "step": 2152 + }, + { + "loss": 0.0035, + "grad_norm": 0.5021563172340393, + "learning_rate": 9.245e-06, + "num_tokens": 736908.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0765, + "step": 2153 + }, + { + "loss": 0.0035, + "grad_norm": 0.5023340582847595, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 2154 + }, + { + "loss": 0.0593, + "grad_norm": 1.3515294790267944, + "learning_rate": 9.235e-06, + "num_tokens": 737511.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0775, + "step": 2155 + }, + { + "loss": 0.0036, + "grad_norm": 0.5020677447319031, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.078, + "step": 2156 + }, + { + "loss": 0.0034, + "grad_norm": 0.4873979985713959, + "learning_rate": 9.225e-06, + "num_tokens": 737693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0785, + "step": 2157 + }, + { + "loss": 0.0582, + "grad_norm": 1.3766424655914307, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.079, + "step": 2158 + }, + { + "loss": 0.0631, + "grad_norm": 1.1943955421447754, + "learning_rate": 9.215e-06, + "num_tokens": 738717.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0795, + "step": 2159 + }, + { + "loss": 0.003, + "grad_norm": 0.43413516879081726, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.08, + "step": 2160 + }, + { + "loss": 0.0031, + "grad_norm": 0.44669783115386963, + "learning_rate": 9.205e-06, + "num_tokens": 738899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0805, + "step": 2161 + }, + { + "loss": 0.0561, + "grad_norm": 1.3388497829437256, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.081, + "step": 2162 + }, + { + "loss": 0.0426, + "grad_norm": 1.8933428525924683, + "learning_rate": 9.195000000000001e-06, + "num_tokens": 739923.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0815, + "step": 2163 + }, + { + "loss": 0.06, + "grad_norm": 1.3706074953079224, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.082, + "step": 2164 + }, + { + "loss": 0.0621, + "grad_norm": 1.443211555480957, + "learning_rate": 9.185000000000001e-06, + "num_tokens": 740947.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0825, + "step": 2165 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098005950450897, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 1.0, + "epoch": 1.083, + "step": 2166 + }, + { + "loss": 0.06, + "grad_norm": 1.2332003116607666, + "learning_rate": 9.175000000000001e-06, + "num_tokens": 741550.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.0835, + "step": 2167 + }, + { + "loss": 0.0682, + "grad_norm": 1.4077450037002563, + "learning_rate": 9.17e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.084, + "step": 2168 + }, + { + "loss": 0.0584, + "grad_norm": 1.4201141595840454, + "learning_rate": 9.165000000000001e-06, + "num_tokens": 742574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0845, + "step": 2169 + }, + { + "loss": 0.0024, + "grad_norm": 0.3220980167388916, + "learning_rate": 9.16e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.085, + "step": 2170 + }, + { + "loss": 0.0571, + "grad_norm": 1.3979272842407227, + "learning_rate": 9.155000000000001e-06, + "num_tokens": 743177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0855, + "step": 2171 + }, + { + "loss": 0.0572, + "grad_norm": 1.6924889087677002, + "learning_rate": 9.15e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.086, + "step": 2172 + }, + { + "loss": 0.0708, + "grad_norm": 1.7350118160247803, + "learning_rate": 9.145000000000001e-06, + "num_tokens": 744201.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0865, + "step": 2173 + }, + { + "loss": 0.0024, + "grad_norm": 0.3453267812728882, + "learning_rate": 9.14e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 1.0, + "epoch": 1.087, + "step": 2174 + }, + { + "loss": 0.0028, + "grad_norm": 0.3845599293708801, + "learning_rate": 9.135e-06, + "num_tokens": 744383.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0875, + "step": 2175 + }, + { + "loss": 0.0023, + "grad_norm": 0.32928982377052307, + "learning_rate": 9.13e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.088, + "step": 2176 + }, + { + "loss": 0.0025, + "grad_norm": 0.3593277335166931, + "learning_rate": 9.125e-06, + "num_tokens": 744565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0885, + "step": 2177 + }, + { + "loss": 0.0447, + "grad_norm": 1.6252307891845703, + "learning_rate": 9.12e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.089, + "step": 2178 + }, + { + "loss": 0.0664, + "grad_norm": 1.3326979875564575, + "learning_rate": 9.115e-06, + "num_tokens": 745589.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0895, + "step": 2179 + }, + { + "loss": 0.0713, + "grad_norm": 2.490602493286133, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.09, + "step": 2180 + }, + { + "loss": 0.0577, + "grad_norm": 1.2613682746887207, + "learning_rate": 9.105e-06, + "num_tokens": 746613.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0905, + "step": 2181 + }, + { + "loss": 0.0604, + "grad_norm": 1.8400533199310303, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.091, + "step": 2182 + }, + { + "loss": 0.0546, + "grad_norm": 1.577405571937561, + "learning_rate": 9.095e-06, + "num_tokens": 747637.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0915, + "step": 2183 + }, + { + "loss": 0.1758, + "grad_norm": 3.9485361576080322, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 1.092, + "step": 2184 + }, + { + "loss": 0.0407, + "grad_norm": 1.4230077266693115, + "learning_rate": 9.085e-06, + "num_tokens": 748661.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0925, + "step": 2185 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441873788833618, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 1.0, + "epoch": 1.093, + "step": 2186 + }, + { + "loss": 0.0574, + "grad_norm": 1.059336543083191, + "learning_rate": 9.075e-06, + "num_tokens": 749264.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0935, + "step": 2187 + }, + { + "loss": 0.0022, + "grad_norm": 0.3150666058063507, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.094, + "step": 2188 + }, + { + "loss": 0.0401, + "grad_norm": 1.1904288530349731, + "learning_rate": 9.065e-06, + "num_tokens": 749867.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0945, + "step": 2189 + }, + { + "loss": 0.0024, + "grad_norm": 0.3425971567630768, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 1.0, + "epoch": 1.095, + "step": 2190 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606398403644562, + "learning_rate": 9.055e-06, + "num_tokens": 750049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0955, + "step": 2191 + }, + { + "loss": 0.0025, + "grad_norm": 0.3754805028438568, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.096, + "step": 2192 + }, + { + "loss": 0.0512, + "grad_norm": 1.1577214002609253, + "learning_rate": 9.045e-06, + "num_tokens": 750652.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0965, + "step": 2193 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151845633983612, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 1.0, + "epoch": 1.097, + "step": 2194 + }, + { + "loss": 0.0386, + "grad_norm": 1.1814777851104736, + "learning_rate": 9.035e-06, + "num_tokens": 751255.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0975, + "step": 2195 + }, + { + "loss": 0.002, + "grad_norm": 0.2940640151500702, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.098, + "step": 2196 + }, + { + "loss": 0.0021, + "grad_norm": 0.3114289939403534, + "learning_rate": 9.025e-06, + "num_tokens": 751437.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0985, + "step": 2197 + }, + { + "loss": 0.0587, + "grad_norm": 1.5265949964523315, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.099, + "step": 2198 + }, + { + "loss": 0.0584, + "grad_norm": 1.182391881942749, + "learning_rate": 9.015000000000001e-06, + "num_tokens": 752461.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0995, + "step": 2199 + }, + { + "loss": 0.0018, + "grad_norm": 0.2633577287197113, + "learning_rate": 9.01e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1, + "step": 2200 + }, + { + "loss": 0.0019, + "grad_norm": 0.26985710859298706, + "learning_rate": 9.005000000000001e-06, + "num_tokens": 752643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1005, + "step": 2201 + }, + { + "loss": 0.0017, + "grad_norm": 0.23652321100234985, + "learning_rate": 9e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 2202 + }, + { + "loss": 0.0578, + "grad_norm": 1.4083077907562256, + "learning_rate": 8.995000000000001e-06, + "num_tokens": 753246.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1015, + "step": 2203 + }, + { + "loss": 0.0595, + "grad_norm": 1.427134394645691, + "learning_rate": 8.99e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.102, + "step": 2204 + }, + { + "loss": 0.0539, + "grad_norm": 1.3228328227996826, + "learning_rate": 8.985000000000001e-06, + "num_tokens": 754270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1025, + "step": 2205 + }, + { + "loss": 0.0015, + "grad_norm": 0.2133481651544571, + "learning_rate": 8.98e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.103, + "step": 2206 + }, + { + "loss": 0.0821, + "grad_norm": 2.5287461280822754, + "learning_rate": 8.975e-06, + "num_tokens": 754873.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1035, + "step": 2207 + }, + { + "loss": 0.0623, + "grad_norm": 1.4041988849639893, + "learning_rate": 8.97e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.104, + "step": 2208 + }, + { + "loss": 0.0409, + "grad_norm": 1.1858478784561157, + "learning_rate": 8.965e-06, + "num_tokens": 755897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1045, + "step": 2209 + }, + { + "loss": 0.0583, + "grad_norm": 1.219450831413269, + "learning_rate": 8.96e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.105, + "step": 2210 + }, + { + "loss": 0.0414, + "grad_norm": 1.1721197366714478, + "learning_rate": 8.955e-06, + "num_tokens": 756921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1055, + "step": 2211 + }, + { + "loss": 0.053, + "grad_norm": 1.277345895767212, + "learning_rate": 8.95e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.106, + "step": 2212 + }, + { + "loss": 0.0625, + "grad_norm": 1.3503938913345337, + "learning_rate": 8.945e-06, + "num_tokens": 757945.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1065, + "step": 2213 + }, + { + "loss": 0.002, + "grad_norm": 0.30203038454055786, + "learning_rate": 8.94e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.107, + "step": 2214 + }, + { + "loss": 0.0022, + "grad_norm": 0.35174328088760376, + "learning_rate": 8.935e-06, + "num_tokens": 758127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1075, + "step": 2215 + }, + { + "loss": 0.0423, + "grad_norm": 1.168192744255066, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.108, + "step": 2216 + }, + { + "loss": 0.0764, + "grad_norm": 1.3265845775604248, + "learning_rate": 8.925e-06, + "num_tokens": 759151.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1085, + "step": 2217 + }, + { + "loss": 0.1833, + "grad_norm": 3.288583755493164, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 1.109, + "step": 2218 + }, + { + "loss": 0.0029, + "grad_norm": 0.44568195939064026, + "learning_rate": 8.915e-06, + "num_tokens": 759754.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1095, + "step": 2219 + }, + { + "loss": 0.0027, + "grad_norm": 0.409576416015625, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 2220 + }, + { + "loss": 0.0033, + "grad_norm": 0.4960649907588959, + "learning_rate": 8.905e-06, + "num_tokens": 759936.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1105, + "step": 2221 + }, + { + "loss": 0.1642, + "grad_norm": 2.6913421154022217, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.111, + "step": 2222 + }, + { + "loss": 0.0715, + "grad_norm": 1.5037237405776978, + "learning_rate": 8.895e-06, + "num_tokens": 760960.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1115, + "step": 2223 + }, + { + "loss": 0.0562, + "grad_norm": 1.152312159538269, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.112, + "step": 2224 + }, + { + "loss": 0.0025, + "grad_norm": 0.3840191960334778, + "learning_rate": 8.885e-06, + "num_tokens": 761563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1125, + "step": 2225 + }, + { + "loss": 0.0421, + "grad_norm": 1.0708019733428955, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.113, + "step": 2226 + }, + { + "loss": 0.0713, + "grad_norm": 1.2928557395935059, + "learning_rate": 8.875e-06, + "num_tokens": 762587.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1135, + "step": 2227 + }, + { + "loss": 0.0622, + "grad_norm": 1.3733391761779785, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 2228 + }, + { + "loss": 0.0029, + "grad_norm": 0.42555150389671326, + "learning_rate": 8.865e-06, + "num_tokens": 763190.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1145, + "step": 2229 + }, + { + "loss": 0.0457, + "grad_norm": 1.3084357976913452, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.115, + "step": 2230 + }, + { + "loss": 0.0027, + "grad_norm": 0.3956111967563629, + "learning_rate": 8.855e-06, + "num_tokens": 763793.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1155, + "step": 2231 + }, + { + "loss": 0.066, + "grad_norm": 1.3650692701339722, + "learning_rate": 8.85e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.116, + "step": 2232 + }, + { + "loss": 0.0029, + "grad_norm": 0.4088021516799927, + "learning_rate": 8.845000000000001e-06, + "num_tokens": 764396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1165, + "step": 2233 + }, + { + "loss": 0.0397, + "grad_norm": 1.2808146476745605, + "learning_rate": 8.84e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.117, + "step": 2234 + }, + { + "loss": 0.0027, + "grad_norm": 0.3983195126056671, + "learning_rate": 8.835000000000001e-06, + "num_tokens": 764999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1175, + "step": 2235 + }, + { + "loss": 0.0423, + "grad_norm": 1.1593605279922485, + "learning_rate": 8.83e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1179999999999999, + "step": 2236 + }, + { + "loss": 0.0649, + "grad_norm": 1.5087552070617676, + "learning_rate": 8.825000000000001e-06, + "num_tokens": 766023.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1185, + "step": 2237 + }, + { + "loss": 0.0683, + "grad_norm": 1.5192102193832397, + "learning_rate": 8.82e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.119, + "step": 2238 + }, + { + "loss": 0.0588, + "grad_norm": 1.386413812637329, + "learning_rate": 8.815e-06, + "num_tokens": 767047.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1195, + "step": 2239 + }, + { + "loss": 0.14, + "grad_norm": 2.439119815826416, + "learning_rate": 8.81e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.12, + "step": 2240 + }, + { + "loss": 0.0029, + "grad_norm": 0.4191952049732208, + "learning_rate": 8.805e-06, + "num_tokens": 767650.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1205, + "step": 2241 + }, + { + "loss": 0.0397, + "grad_norm": 1.169542908668518, + "learning_rate": 8.8e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.121, + "step": 2242 + }, + { + "loss": 0.0584, + "grad_norm": 1.2895692586898804, + "learning_rate": 8.795e-06, + "num_tokens": 768674.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1215, + "step": 2243 + }, + { + "loss": 0.0582, + "grad_norm": 1.274592638015747, + "learning_rate": 8.79e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1219999999999999, + "step": 2244 + }, + { + "loss": 0.0032, + "grad_norm": 0.44238153100013733, + "learning_rate": 8.785e-06, + "num_tokens": 769277.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1225, + "step": 2245 + }, + { + "loss": 0.0032, + "grad_norm": 0.4488213360309601, + "learning_rate": 8.78e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.123, + "step": 2246 + }, + { + "loss": 0.003, + "grad_norm": 0.43088752031326294, + "learning_rate": 8.775e-06, + "num_tokens": 769459.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1235, + "step": 2247 + }, + { + "loss": 0.0366, + "grad_norm": 1.2531421184539795, + "learning_rate": 8.77e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.124, + "step": 2248 + }, + { + "loss": 0.0029, + "grad_norm": 0.40329650044441223, + "learning_rate": 8.765e-06, + "num_tokens": 770062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1245, + "step": 2249 + }, + { + "loss": 0.0527, + "grad_norm": 1.196119785308838, + "learning_rate": 8.76e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.125, + "step": 2250 + }, + { + "loss": 0.0468, + "grad_norm": 1.571480393409729, + "learning_rate": 8.755e-06, + "num_tokens": 771086.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1255, + "step": 2251 + }, + { + "loss": 0.0024, + "grad_norm": 0.32946687936782837, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.126, + "step": 2252 + }, + { + "loss": 0.0023, + "grad_norm": 0.3213779628276825, + "learning_rate": 8.745000000000002e-06, + "num_tokens": 771268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1265, + "step": 2253 + }, + { + "loss": 0.0381, + "grad_norm": 1.36893630027771, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.127, + "step": 2254 + }, + { + "loss": 0.0023, + "grad_norm": 0.3214550316333771, + "learning_rate": 8.735000000000002e-06, + "num_tokens": 771871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1275, + "step": 2255 + }, + { + "loss": 0.0389, + "grad_norm": 1.1307684183120728, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1280000000000001, + "step": 2256 + }, + { + "loss": 0.0021, + "grad_norm": 0.30145928263664246, + "learning_rate": 8.725000000000002e-06, + "num_tokens": 772474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1285, + "step": 2257 + }, + { + "loss": 0.0018, + "grad_norm": 0.24611108005046844, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.129, + "step": 2258 + }, + { + "loss": 0.0652, + "grad_norm": 1.5593312978744507, + "learning_rate": 8.715e-06, + "num_tokens": 773077.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1295, + "step": 2259 + }, + { + "loss": 0.1724, + "grad_norm": 3.1925134658813477, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.13, + "step": 2260 + }, + { + "loss": 0.0016, + "grad_norm": 0.2210361361503601, + "learning_rate": 8.705e-06, + "num_tokens": 773680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1305, + "step": 2261 + }, + { + "loss": 0.044, + "grad_norm": 1.1579885482788086, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.131, + "step": 2262 + }, + { + "loss": 0.0812, + "grad_norm": 2.0770068168640137, + "learning_rate": 8.695e-06, + "num_tokens": 774704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1315, + "step": 2263 + }, + { + "loss": 0.0376, + "grad_norm": 1.1654012203216553, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1320000000000001, + "step": 2264 + }, + { + "loss": 0.0017, + "grad_norm": 0.22535240650177002, + "learning_rate": 8.685e-06, + "num_tokens": 775307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1325, + "step": 2265 + }, + { + "loss": 0.0017, + "grad_norm": 0.2348785251379013, + "learning_rate": 8.68e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 2266 + }, + { + "loss": 0.0017, + "grad_norm": 0.24279342591762543, + "learning_rate": 8.675e-06, + "num_tokens": 775489.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1335, + "step": 2267 + }, + { + "loss": 0.0748, + "grad_norm": 1.5453892946243286, + "learning_rate": 8.67e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.134, + "step": 2268 + }, + { + "loss": 0.0015, + "grad_norm": 0.20795051753520966, + "learning_rate": 8.665000000000001e-06, + "num_tokens": 776092.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1345, + "step": 2269 + }, + { + "loss": 0.0016, + "grad_norm": 0.21314096450805664, + "learning_rate": 8.66e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 2270 + }, + { + "loss": 0.0016, + "grad_norm": 0.22147318720817566, + "learning_rate": 8.655000000000001e-06, + "num_tokens": 776274.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1355, + "step": 2271 + }, + { + "loss": 0.0511, + "grad_norm": 1.1325373649597168, + "learning_rate": 8.65e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1360000000000001, + "step": 2272 + }, + { + "loss": 0.0014, + "grad_norm": 0.18845656514167786, + "learning_rate": 8.645000000000001e-06, + "num_tokens": 776877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1365, + "step": 2273 + }, + { + "loss": 0.0013, + "grad_norm": 0.16952817142009735, + "learning_rate": 8.64e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 2274 + }, + { + "loss": 0.0621, + "grad_norm": 1.329026222229004, + "learning_rate": 8.635000000000001e-06, + "num_tokens": 777480.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1375, + "step": 2275 + }, + { + "loss": 0.0416, + "grad_norm": 1.105779767036438, + "learning_rate": 8.63e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.138, + "step": 2276 + }, + { + "loss": 0.0467, + "grad_norm": 1.1847842931747437, + "learning_rate": 8.625000000000001e-06, + "num_tokens": 778504.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1385, + "step": 2277 + }, + { + "loss": 0.0414, + "grad_norm": 1.0636855363845825, + "learning_rate": 8.62e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.139, + "step": 2278 + }, + { + "loss": 0.058, + "grad_norm": 1.3789916038513184, + "learning_rate": 8.615000000000001e-06, + "num_tokens": 779528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1395, + "step": 2279 + }, + { + "loss": 0.0649, + "grad_norm": 1.1419354677200317, + "learning_rate": 8.61e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1400000000000001, + "step": 2280 + }, + { + "loss": 0.0014, + "grad_norm": 0.19384142756462097, + "learning_rate": 8.605000000000001e-06, + "num_tokens": 780131.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1405, + "step": 2281 + }, + { + "loss": 0.0015, + "grad_norm": 0.19773858785629272, + "learning_rate": 8.6e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 2282 + }, + { + "loss": 0.0557, + "grad_norm": 1.190521001815796, + "learning_rate": 8.595000000000002e-06, + "num_tokens": 780734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1415, + "step": 2283 + }, + { + "loss": 0.0017, + "grad_norm": 0.23638860881328583, + "learning_rate": 8.59e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 1.0, + "epoch": 1.142, + "step": 2284 + }, + { + "loss": 0.0017, + "grad_norm": 0.24933819472789764, + "learning_rate": 8.585000000000002e-06, + "num_tokens": 780916.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1425, + "step": 2285 + }, + { + "loss": 0.0017, + "grad_norm": 0.22720065712928772, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 2286 + }, + { + "loss": 0.0416, + "grad_norm": 1.214958667755127, + "learning_rate": 8.575000000000002e-06, + "num_tokens": 781519.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1435, + "step": 2287 + }, + { + "loss": 0.054, + "grad_norm": 0.9985194206237793, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.144, + "step": 2288 + }, + { + "loss": 0.0017, + "grad_norm": 0.24114187061786652, + "learning_rate": 8.565000000000002e-06, + "num_tokens": 782122.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1445, + "step": 2289 + }, + { + "loss": 0.0574, + "grad_norm": 1.4530028104782104, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.145, + "step": 2290 + }, + { + "loss": 0.0018, + "grad_norm": 0.2544173002243042, + "learning_rate": 8.555e-06, + "num_tokens": 782725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1455, + "step": 2291 + }, + { + "loss": 0.0017, + "grad_norm": 0.23475930094718933, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 2292 + }, + { + "loss": 0.0708, + "grad_norm": 1.619470477104187, + "learning_rate": 8.545e-06, + "num_tokens": 783328.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1465, + "step": 2293 + }, + { + "loss": 0.0019, + "grad_norm": 0.2572467029094696, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 1.0, + "epoch": 1.147, + "step": 2294 + }, + { + "loss": 0.0019, + "grad_norm": 0.26701951026916504, + "learning_rate": 8.535e-06, + "num_tokens": 783510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1475, + "step": 2295 + }, + { + "loss": 0.0471, + "grad_norm": 1.147359848022461, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.148, + "step": 2296 + }, + { + "loss": 0.0485, + "grad_norm": 1.0665885210037231, + "learning_rate": 8.525e-06, + "num_tokens": 784534.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1485, + "step": 2297 + }, + { + "loss": 0.0017, + "grad_norm": 0.23322324454784393, + "learning_rate": 8.52e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.149, + "step": 2298 + }, + { + "loss": 0.0667, + "grad_norm": 1.4317374229431152, + "learning_rate": 8.515e-06, + "num_tokens": 785137.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1495, + "step": 2299 + }, + { + "loss": 0.0651, + "grad_norm": 1.4495528936386108, + "learning_rate": 8.51e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.15, + "step": 2300 + }, + { + "loss": 0.0018, + "grad_norm": 0.24990759789943695, + "learning_rate": 8.505e-06, + "num_tokens": 785740.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1505, + "step": 2301 + }, + { + "loss": 0.0673, + "grad_norm": 1.3833082914352417, + "learning_rate": 8.5e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.151, + "step": 2302 + }, + { + "loss": 0.0384, + "grad_norm": 1.0650711059570312, + "learning_rate": 8.495e-06, + "num_tokens": 786764.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1515, + "step": 2303 + }, + { + "loss": 0.0017, + "grad_norm": 0.2362237423658371, + "learning_rate": 8.49e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.152, + "step": 2304 + }, + { + "loss": 0.0362, + "grad_norm": 1.2261658906936646, + "learning_rate": 8.485000000000001e-06, + "num_tokens": 787367.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1525, + "step": 2305 + }, + { + "loss": 0.0021, + "grad_norm": 0.285277396440506, + "learning_rate": 8.48e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 1.0, + "epoch": 1.153, + "step": 2306 + }, + { + "loss": 0.0018, + "grad_norm": 0.24331547319889069, + "learning_rate": 8.475000000000001e-06, + "num_tokens": 787549.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1535, + "step": 2307 + }, + { + "loss": 0.057, + "grad_norm": 1.260392427444458, + "learning_rate": 8.47e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.154, + "step": 2308 + }, + { + "loss": 0.002, + "grad_norm": 0.26841071248054504, + "learning_rate": 8.465000000000001e-06, + "num_tokens": 788152.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1545, + "step": 2309 + }, + { + "loss": 0.0018, + "grad_norm": 0.25016698241233826, + "learning_rate": 8.46e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 2310 + }, + { + "loss": 0.002, + "grad_norm": 0.2738337218761444, + "learning_rate": 8.455000000000001e-06, + "num_tokens": 788334.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1555, + "step": 2311 + }, + { + "loss": 0.0017, + "grad_norm": 0.2311965376138687, + "learning_rate": 8.45e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 2312 + }, + { + "loss": 0.0608, + "grad_norm": 1.6522681713104248, + "learning_rate": 8.445000000000001e-06, + "num_tokens": 788937.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1565, + "step": 2313 + }, + { + "loss": 0.0595, + "grad_norm": 1.3370118141174316, + "learning_rate": 8.44e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.157, + "step": 2314 + }, + { + "loss": 0.0706, + "grad_norm": 1.5185800790786743, + "learning_rate": 8.435000000000001e-06, + "num_tokens": 789961.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1575, + "step": 2315 + }, + { + "loss": 0.0015, + "grad_norm": 0.20058579742908478, + "learning_rate": 8.43e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 1.0, + "epoch": 1.158, + "step": 2316 + }, + { + "loss": 0.0736, + "grad_norm": 1.6871758699417114, + "learning_rate": 8.425000000000001e-06, + "num_tokens": 790564.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1585, + "step": 2317 + }, + { + "loss": 0.0684, + "grad_norm": 1.7638912200927734, + "learning_rate": 8.42e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.159, + "step": 2318 + }, + { + "loss": 0.0017, + "grad_norm": 0.23336097598075867, + "learning_rate": 8.415000000000002e-06, + "num_tokens": 791167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1595, + "step": 2319 + }, + { + "loss": 0.0596, + "grad_norm": 1.3170890808105469, + "learning_rate": 8.41e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.16, + "step": 2320 + }, + { + "loss": 0.0566, + "grad_norm": 1.8501343727111816, + "learning_rate": 8.405000000000002e-06, + "num_tokens": 792191.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1605, + "step": 2321 + }, + { + "loss": 0.0679, + "grad_norm": 1.3065072298049927, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.161, + "step": 2322 + }, + { + "loss": 0.0577, + "grad_norm": 1.3374840021133423, + "learning_rate": 8.395e-06, + "num_tokens": 793215.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1615, + "step": 2323 + }, + { + "loss": 0.0651, + "grad_norm": 1.2627785205841064, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.162, + "step": 2324 + }, + { + "loss": 0.0589, + "grad_norm": 1.1249433755874634, + "learning_rate": 8.385e-06, + "num_tokens": 794239.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1625, + "step": 2325 + }, + { + "loss": 0.0022, + "grad_norm": 0.31153878569602966, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.163, + "step": 2326 + }, + { + "loss": 0.0376, + "grad_norm": 1.2043869495391846, + "learning_rate": 8.375e-06, + "num_tokens": 794842.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1635, + "step": 2327 + }, + { + "loss": 0.0024, + "grad_norm": 0.3410504162311554, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 1.0, + "epoch": 1.164, + "step": 2328 + }, + { + "loss": 0.0497, + "grad_norm": 1.3358232975006104, + "learning_rate": 8.365e-06, + "num_tokens": 795445.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1645, + "step": 2329 + }, + { + "loss": 0.062, + "grad_norm": 1.3019129037857056, + "learning_rate": 8.36e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.165, + "step": 2330 + }, + { + "loss": 0.1411, + "grad_norm": 3.1003713607788086, + "learning_rate": 8.355e-06, + "num_tokens": 796469.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.1655, + "step": 2331 + }, + { + "loss": 0.0675, + "grad_norm": 1.4928791522979736, + "learning_rate": 8.35e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.166, + "step": 2332 + }, + { + "loss": 0.0032, + "grad_norm": 0.47702810168266296, + "learning_rate": 8.345e-06, + "num_tokens": 797072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1665, + "step": 2333 + }, + { + "loss": 0.0486, + "grad_norm": 1.189456820487976, + "learning_rate": 8.34e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.167, + "step": 2334 + }, + { + "loss": 0.0033, + "grad_norm": 0.5152677893638611, + "learning_rate": 8.335e-06, + "num_tokens": 797675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1675, + "step": 2335 + }, + { + "loss": 0.0463, + "grad_norm": 1.3805276155471802, + "learning_rate": 8.33e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.168, + "step": 2336 + }, + { + "loss": 0.0653, + "grad_norm": 1.7025351524353027, + "learning_rate": 8.325e-06, + "num_tokens": 798699.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1685, + "step": 2337 + }, + { + "loss": 0.0031, + "grad_norm": 0.44580474495887756, + "learning_rate": 8.32e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 1.0, + "epoch": 1.169, + "step": 2338 + }, + { + "loss": 0.0462, + "grad_norm": 1.3915964365005493, + "learning_rate": 8.315000000000001e-06, + "num_tokens": 799302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1695, + "step": 2339 + }, + { + "loss": 0.0689, + "grad_norm": 1.3206253051757812, + "learning_rate": 8.31e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.17, + "step": 2340 + }, + { + "loss": 0.0616, + "grad_norm": 1.0774954557418823, + "learning_rate": 8.305000000000001e-06, + "num_tokens": 800326.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1705, + "step": 2341 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280348658561707, + "learning_rate": 8.3e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 1.0, + "epoch": 1.171, + "step": 2342 + }, + { + "loss": 0.0534, + "grad_norm": 1.1514171361923218, + "learning_rate": 8.295000000000001e-06, + "num_tokens": 800929.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1715, + "step": 2343 + }, + { + "loss": 0.0034, + "grad_norm": 0.4936150014400482, + "learning_rate": 8.29e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 1.0, + "epoch": 1.172, + "step": 2344 + }, + { + "loss": 0.0411, + "grad_norm": 1.091706395149231, + "learning_rate": 8.285000000000001e-06, + "num_tokens": 801532.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1724999999999999, + "step": 2345 + }, + { + "loss": 0.0633, + "grad_norm": 1.2277299165725708, + "learning_rate": 8.28e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.173, + "step": 2346 + }, + { + "loss": 0.0032, + "grad_norm": 0.4532278776168823, + "learning_rate": 8.275000000000001e-06, + "num_tokens": 802135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1735, + "step": 2347 + }, + { + "loss": 0.0033, + "grad_norm": 0.467818021774292, + "learning_rate": 8.27e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.174, + "step": 2348 + }, + { + "loss": 0.0528, + "grad_norm": 1.7821072340011597, + "learning_rate": 8.265000000000001e-06, + "num_tokens": 802738.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1745, + "step": 2349 + }, + { + "loss": 0.0415, + "grad_norm": 1.4086565971374512, + "learning_rate": 8.26e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.175, + "step": 2350 + }, + { + "loss": 0.045, + "grad_norm": 1.1930326223373413, + "learning_rate": 8.255000000000001e-06, + "num_tokens": 803762.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1755, + "step": 2351 + }, + { + "loss": 0.0028, + "grad_norm": 0.4077257215976715, + "learning_rate": 8.25e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 1.0, + "epoch": 1.176, + "step": 2352 + }, + { + "loss": 0.0535, + "grad_norm": 1.0156196355819702, + "learning_rate": 8.245000000000002e-06, + "num_tokens": 804365.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1764999999999999, + "step": 2353 + }, + { + "loss": 0.0544, + "grad_norm": 1.701621413230896, + "learning_rate": 8.24e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.177, + "step": 2354 + }, + { + "loss": 0.0408, + "grad_norm": 1.3804023265838623, + "learning_rate": 8.235e-06, + "num_tokens": 805389.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1775, + "step": 2355 + }, + { + "loss": 0.0538, + "grad_norm": 1.4935331344604492, + "learning_rate": 8.23e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.178, + "step": 2356 + }, + { + "loss": 0.0031, + "grad_norm": 0.46967241168022156, + "learning_rate": 8.225e-06, + "num_tokens": 805992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1785, + "step": 2357 + }, + { + "loss": 0.003, + "grad_norm": 0.4181312620639801, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.179, + "step": 2358 + }, + { + "loss": 0.003, + "grad_norm": 0.4292071461677551, + "learning_rate": 8.215e-06, + "num_tokens": 806174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1795, + "step": 2359 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606574833393097, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.18, + "step": 2360 + }, + { + "loss": 0.0384, + "grad_norm": 1.0812703371047974, + "learning_rate": 8.205e-06, + "num_tokens": 806777.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1804999999999999, + "step": 2361 + }, + { + "loss": 0.0025, + "grad_norm": 0.36413413286209106, + "learning_rate": 8.2e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.181, + "step": 2362 + }, + { + "loss": 0.0632, + "grad_norm": 1.3525351285934448, + "learning_rate": 8.195e-06, + "num_tokens": 807380.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1815, + "step": 2363 + }, + { + "loss": 0.0021, + "grad_norm": 0.29519718885421753, + "learning_rate": 8.19e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.182, + "step": 2364 + }, + { + "loss": 0.002, + "grad_norm": 0.28825369477272034, + "learning_rate": 8.185e-06, + "num_tokens": 807562.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1825, + "step": 2365 + }, + { + "loss": 0.0364, + "grad_norm": 1.0907576084136963, + "learning_rate": 8.18e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.183, + "step": 2366 + }, + { + "loss": 0.0682, + "grad_norm": 1.3050081729888916, + "learning_rate": 8.175e-06, + "num_tokens": 808586.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1835, + "step": 2367 + }, + { + "loss": 0.0424, + "grad_norm": 1.141483187675476, + "learning_rate": 8.17e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.184, + "step": 2368 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355233788490295, + "learning_rate": 8.165e-06, + "num_tokens": 809189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1844999999999999, + "step": 2369 + }, + { + "loss": 0.0744, + "grad_norm": 1.7785593271255493, + "learning_rate": 8.16e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.185, + "step": 2370 + }, + { + "loss": 0.0657, + "grad_norm": 1.3623268604278564, + "learning_rate": 8.155e-06, + "num_tokens": 810213.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1855, + "step": 2371 + }, + { + "loss": 0.0549, + "grad_norm": 1.1436368227005005, + "learning_rate": 8.15e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.186, + "step": 2372 + }, + { + "loss": 0.0539, + "grad_norm": 1.2383182048797607, + "learning_rate": 8.145e-06, + "num_tokens": 811237.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1865, + "step": 2373 + }, + { + "loss": 0.0018, + "grad_norm": 0.24816246330738068, + "learning_rate": 8.14e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.187, + "step": 2374 + }, + { + "loss": 0.0409, + "grad_norm": 1.240695834159851, + "learning_rate": 8.135000000000001e-06, + "num_tokens": 811840.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1875, + "step": 2375 + }, + { + "loss": 0.0364, + "grad_norm": 0.927349328994751, + "learning_rate": 8.13e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.188, + "step": 2376 + }, + { + "loss": 0.002, + "grad_norm": 0.28636854887008667, + "learning_rate": 8.125000000000001e-06, + "num_tokens": 812443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1885, + "step": 2377 + }, + { + "loss": 0.0021, + "grad_norm": 0.3085651397705078, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 2378 + }, + { + "loss": 0.0733, + "grad_norm": 1.627233862876892, + "learning_rate": 8.115000000000001e-06, + "num_tokens": 813046.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1895, + "step": 2379 + }, + { + "loss": 0.0523, + "grad_norm": 1.2803730964660645, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.19, + "step": 2380 + }, + { + "loss": 0.0358, + "grad_norm": 1.134440302848816, + "learning_rate": 8.105000000000001e-06, + "num_tokens": 814070.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1905000000000001, + "step": 2381 + }, + { + "loss": 0.062, + "grad_norm": 1.7024178504943848, + "learning_rate": 8.1e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.191, + "step": 2382 + }, + { + "loss": 0.0555, + "grad_norm": 1.755904197692871, + "learning_rate": 8.095000000000001e-06, + "num_tokens": 815094.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1915, + "step": 2383 + }, + { + "loss": 0.0028, + "grad_norm": 0.4056146442890167, + "learning_rate": 8.09e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 1.0, + "epoch": 1.192, + "step": 2384 + }, + { + "loss": 0.0415, + "grad_norm": 1.3847079277038574, + "learning_rate": 8.085000000000001e-06, + "num_tokens": 815697.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1925, + "step": 2385 + }, + { + "loss": 0.041, + "grad_norm": 1.05851149559021, + "learning_rate": 8.08e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.193, + "step": 2386 + }, + { + "loss": 0.0683, + "grad_norm": 1.5797926187515259, + "learning_rate": 8.075000000000001e-06, + "num_tokens": 816721.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1935, + "step": 2387 + }, + { + "loss": 0.003, + "grad_norm": 0.44755682349205017, + "learning_rate": 8.07e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 1.0, + "epoch": 1.194, + "step": 2388 + }, + { + "loss": 0.0035, + "grad_norm": 0.5333588719367981, + "learning_rate": 8.065e-06, + "num_tokens": 816903.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1945000000000001, + "step": 2389 + }, + { + "loss": 0.0034, + "grad_norm": 0.5025861263275146, + "learning_rate": 8.06e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 2390 + }, + { + "loss": 0.0657, + "grad_norm": 1.9265213012695312, + "learning_rate": 8.055e-06, + "num_tokens": 817506.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1955, + "step": 2391 + }, + { + "loss": 0.0029, + "grad_norm": 0.4326709508895874, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.196, + "step": 2392 + }, + { + "loss": 0.0385, + "grad_norm": 1.282583236694336, + "learning_rate": 8.045e-06, + "num_tokens": 818109.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1965, + "step": 2393 + }, + { + "loss": 0.048, + "grad_norm": 1.7246921062469482, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.197, + "step": 2394 + }, + { + "loss": 0.0529, + "grad_norm": 1.3816536664962769, + "learning_rate": 8.035e-06, + "num_tokens": 819133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1975, + "step": 2395 + }, + { + "loss": 0.0025, + "grad_norm": 0.36934202909469604, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 1.0, + "epoch": 1.198, + "step": 2396 + }, + { + "loss": 0.0701, + "grad_norm": 1.844415307044983, + "learning_rate": 8.025e-06, + "num_tokens": 819736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1985000000000001, + "step": 2397 + }, + { + "loss": 0.0026, + "grad_norm": 0.3918537199497223, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 1.0, + "epoch": 1.199, + "step": 2398 + }, + { + "loss": 0.0025, + "grad_norm": 0.3629172444343567, + "learning_rate": 8.015e-06, + "num_tokens": 819918.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1995, + "step": 2399 + }, + { + "loss": 0.0593, + "grad_norm": 1.3562273979187012, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2, + "step": 2400 + }, + { + "loss": 0.0415, + "grad_norm": 1.1191670894622803, + "learning_rate": 8.005e-06, + "num_tokens": 820942.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2005, + "step": 2401 + }, + { + "loss": 0.0021, + "grad_norm": 0.3028194308280945, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 1.0, + "epoch": 1.201, + "step": 2402 + }, + { + "loss": 0.0021, + "grad_norm": 0.3161010444164276, + "learning_rate": 7.995e-06, + "num_tokens": 821124.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2015, + "step": 2403 + }, + { + "loss": 0.0631, + "grad_norm": 1.4275634288787842, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.202, + "step": 2404 + }, + { + "loss": 0.0018, + "grad_norm": 0.2525792121887207, + "learning_rate": 7.985e-06, + "num_tokens": 821727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2025000000000001, + "step": 2405 + }, + { + "loss": 0.0576, + "grad_norm": 1.2019566297531128, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.203, + "step": 2406 + }, + { + "loss": 0.0019, + "grad_norm": 0.28433406352996826, + "learning_rate": 7.975e-06, + "num_tokens": 822330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2035, + "step": 2407 + }, + { + "loss": 0.0018, + "grad_norm": 0.26680925488471985, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 1.0, + "epoch": 1.204, + "step": 2408 + }, + { + "loss": 0.0523, + "grad_norm": 1.5135900974273682, + "learning_rate": 7.965e-06, + "num_tokens": 822933.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2045, + "step": 2409 + }, + { + "loss": 0.0595, + "grad_norm": 1.425874948501587, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.205, + "step": 2410 + }, + { + "loss": 0.0688, + "grad_norm": 1.7353657484054565, + "learning_rate": 7.955000000000001e-06, + "num_tokens": 823957.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2055, + "step": 2411 + }, + { + "loss": 0.0016, + "grad_norm": 0.22734731435775757, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.206, + "step": 2412 + }, + { + "loss": 0.0016, + "grad_norm": 0.22473861277103424, + "learning_rate": 7.945000000000001e-06, + "num_tokens": 824139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2065, + "step": 2413 + }, + { + "loss": 0.0016, + "grad_norm": 0.23369428515434265, + "learning_rate": 7.94e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 2414 + }, + { + "loss": 0.0018, + "grad_norm": 0.25014567375183105, + "learning_rate": 7.935000000000001e-06, + "num_tokens": 824321.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2075, + "step": 2415 + }, + { + "loss": 0.0701, + "grad_norm": 1.4806315898895264, + "learning_rate": 7.93e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.208, + "step": 2416 + }, + { + "loss": 0.0015, + "grad_norm": 0.1993637979030609, + "learning_rate": 7.925000000000001e-06, + "num_tokens": 824924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2085, + "step": 2417 + }, + { + "loss": 0.0548, + "grad_norm": 1.2813140153884888, + "learning_rate": 7.92e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.209, + "step": 2418 + }, + { + "loss": 0.0552, + "grad_norm": 1.2722525596618652, + "learning_rate": 7.915000000000001e-06, + "num_tokens": 825948.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2095, + "step": 2419 + }, + { + "loss": 0.0013, + "grad_norm": 0.17925392091274261, + "learning_rate": 7.91e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 1.0, + "epoch": 1.21, + "step": 2420 + }, + { + "loss": 0.0013, + "grad_norm": 0.18519414961338043, + "learning_rate": 7.905e-06, + "num_tokens": 826130.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2105, + "step": 2421 + }, + { + "loss": 0.041, + "grad_norm": 1.3869478702545166, + "learning_rate": 7.9e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.211, + "step": 2422 + }, + { + "loss": 0.0013, + "grad_norm": 0.1751483976840973, + "learning_rate": 7.895e-06, + "num_tokens": 826733.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2115, + "step": 2423 + }, + { + "loss": 0.05, + "grad_norm": 1.0098025798797607, + "learning_rate": 7.89e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.212, + "step": 2424 + }, + { + "loss": 0.0605, + "grad_norm": 1.3178874254226685, + "learning_rate": 7.885e-06, + "num_tokens": 827757.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2125, + "step": 2425 + }, + { + "loss": 0.0013, + "grad_norm": 0.18827441334724426, + "learning_rate": 7.88e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 1.0, + "epoch": 1.213, + "step": 2426 + }, + { + "loss": 0.064, + "grad_norm": 1.4484566450119019, + "learning_rate": 7.875e-06, + "num_tokens": 828360.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2135, + "step": 2427 + }, + { + "loss": 0.0014, + "grad_norm": 0.19540052115917206, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 1.0, + "epoch": 1.214, + "step": 2428 + }, + { + "loss": 0.0623, + "grad_norm": 1.3592177629470825, + "learning_rate": 7.865e-06, + "num_tokens": 828963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2145, + "step": 2429 + }, + { + "loss": 0.0014, + "grad_norm": 0.20412060618400574, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.215, + "step": 2430 + }, + { + "loss": 0.0617, + "grad_norm": 1.755582332611084, + "learning_rate": 7.855e-06, + "num_tokens": 829566.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2155, + "step": 2431 + }, + { + "loss": 0.0631, + "grad_norm": 1.2380058765411377, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.216, + "step": 2432 + }, + { + "loss": 0.0375, + "grad_norm": 1.3119670152664185, + "learning_rate": 7.845e-06, + "num_tokens": 830590.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2165, + "step": 2433 + }, + { + "loss": 0.0015, + "grad_norm": 0.22137387096881866, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 1.0, + "epoch": 1.217, + "step": 2434 + }, + { + "loss": 0.0017, + "grad_norm": 0.2416553795337677, + "learning_rate": 7.835e-06, + "num_tokens": 830772.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2175, + "step": 2435 + }, + { + "loss": 0.0015, + "grad_norm": 0.21708650887012482, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 2436 + }, + { + "loss": 0.0016, + "grad_norm": 0.23922832310199738, + "learning_rate": 7.825e-06, + "num_tokens": 830954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2185, + "step": 2437 + }, + { + "loss": 0.0016, + "grad_norm": 0.2385343313217163, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 2438 + }, + { + "loss": 0.065, + "grad_norm": 1.4742591381072998, + "learning_rate": 7.815e-06, + "num_tokens": 831557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2195, + "step": 2439 + }, + { + "loss": 0.0016, + "grad_norm": 0.2341725379228592, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 1.0, + "epoch": 1.22, + "step": 2440 + }, + { + "loss": 0.0615, + "grad_norm": 1.4791371822357178, + "learning_rate": 7.805e-06, + "num_tokens": 832160.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2205, + "step": 2441 + }, + { + "loss": 0.048, + "grad_norm": 1.601716160774231, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.221, + "step": 2442 + }, + { + "loss": 0.0014, + "grad_norm": 0.19947591423988342, + "learning_rate": 7.795e-06, + "num_tokens": 832763.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2215, + "step": 2443 + }, + { + "loss": 0.0801, + "grad_norm": 1.753954291343689, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.222, + "step": 2444 + }, + { + "loss": 0.0015, + "grad_norm": 0.21398615837097168, + "learning_rate": 7.785000000000001e-06, + "num_tokens": 833366.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2225, + "step": 2445 + }, + { + "loss": 0.0655, + "grad_norm": 1.799574851989746, + "learning_rate": 7.78e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.223, + "step": 2446 + }, + { + "loss": 0.0438, + "grad_norm": 1.332261085510254, + "learning_rate": 7.775000000000001e-06, + "num_tokens": 834390.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2235, + "step": 2447 + }, + { + "loss": 0.044, + "grad_norm": 1.238344430923462, + "learning_rate": 7.77e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.224, + "step": 2448 + }, + { + "loss": 0.0015, + "grad_norm": 0.2137579768896103, + "learning_rate": 7.765000000000001e-06, + "num_tokens": 834993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2245, + "step": 2449 + }, + { + "loss": 0.0438, + "grad_norm": 1.1821973323822021, + "learning_rate": 7.76e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.225, + "step": 2450 + }, + { + "loss": 0.0562, + "grad_norm": 1.4905529022216797, + "learning_rate": 7.755000000000001e-06, + "num_tokens": 836017.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2255, + "step": 2451 + }, + { + "loss": 0.0015, + "grad_norm": 0.21731820702552795, + "learning_rate": 7.75e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 1.0, + "epoch": 1.226, + "step": 2452 + }, + { + "loss": 0.0017, + "grad_norm": 0.25909724831581116, + "learning_rate": 7.745e-06, + "num_tokens": 836199.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2265, + "step": 2453 + }, + { + "loss": 0.0016, + "grad_norm": 0.22781187295913696, + "learning_rate": 7.74e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 2454 + }, + { + "loss": 0.0016, + "grad_norm": 0.24323998391628265, + "learning_rate": 7.735e-06, + "num_tokens": 836381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2275, + "step": 2455 + }, + { + "loss": 0.0594, + "grad_norm": 1.5349161624908447, + "learning_rate": 7.73e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.228, + "step": 2456 + }, + { + "loss": 0.0017, + "grad_norm": 0.24151335656642914, + "learning_rate": 7.725e-06, + "num_tokens": 836984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2285, + "step": 2457 + }, + { + "loss": 0.0016, + "grad_norm": 0.23347225785255432, + "learning_rate": 7.72e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 2458 + }, + { + "loss": 0.0017, + "grad_norm": 0.24232612550258636, + "learning_rate": 7.715e-06, + "num_tokens": 837166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2295, + "step": 2459 + }, + { + "loss": 0.0016, + "grad_norm": 0.23151801526546478, + "learning_rate": 7.71e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 2460 + }, + { + "loss": 0.0586, + "grad_norm": 1.4122602939605713, + "learning_rate": 7.705e-06, + "num_tokens": 837769.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2305, + "step": 2461 + }, + { + "loss": 0.0014, + "grad_norm": 0.19469626247882843, + "learning_rate": 7.7e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.231, + "step": 2462 + }, + { + "loss": 0.0637, + "grad_norm": 1.675697684288025, + "learning_rate": 7.695e-06, + "num_tokens": 838372.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2315, + "step": 2463 + }, + { + "loss": 0.0013, + "grad_norm": 0.17535777390003204, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.232, + "step": 2464 + }, + { + "loss": 0.0549, + "grad_norm": 1.1719900369644165, + "learning_rate": 7.685e-06, + "num_tokens": 838975.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2325, + "step": 2465 + }, + { + "loss": 0.0013, + "grad_norm": 0.16398227214813232, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.233, + "step": 2466 + }, + { + "loss": 0.0674, + "grad_norm": 1.7502342462539673, + "learning_rate": 7.675e-06, + "num_tokens": 839578.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2335, + "step": 2467 + }, + { + "loss": 0.0013, + "grad_norm": 0.17352193593978882, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.234, + "step": 2468 + }, + { + "loss": 0.063, + "grad_norm": 1.5015274286270142, + "learning_rate": 7.665e-06, + "num_tokens": 840181.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2345, + "step": 2469 + }, + { + "loss": 0.0611, + "grad_norm": 1.3142430782318115, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2349999999999999, + "step": 2470 + }, + { + "loss": 0.0589, + "grad_norm": 1.3366830348968506, + "learning_rate": 7.655e-06, + "num_tokens": 841205.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2355, + "step": 2471 + }, + { + "loss": 0.0013, + "grad_norm": 0.17301248013973236, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.236, + "step": 2472 + }, + { + "loss": 0.0435, + "grad_norm": 1.1996126174926758, + "learning_rate": 7.645e-06, + "num_tokens": 841808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2365, + "step": 2473 + }, + { + "loss": 0.0015, + "grad_norm": 0.21387803554534912, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.237, + "step": 2474 + }, + { + "loss": 0.064, + "grad_norm": 1.3917018175125122, + "learning_rate": 7.635e-06, + "num_tokens": 842411.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2375, + "step": 2475 + }, + { + "loss": 0.0014, + "grad_norm": 0.20352397859096527, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.238, + "step": 2476 + }, + { + "loss": 0.0015, + "grad_norm": 0.21035854518413544, + "learning_rate": 7.625e-06, + "num_tokens": 842593.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2385, + "step": 2477 + }, + { + "loss": 0.0384, + "grad_norm": 1.1954495906829834, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.2389999999999999, + "step": 2478 + }, + { + "loss": 0.0398, + "grad_norm": 1.3171675205230713, + "learning_rate": 7.615e-06, + "num_tokens": 843617.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2395, + "step": 2479 + }, + { + "loss": 0.0016, + "grad_norm": 0.22742266952991486, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.24, + "step": 2480 + }, + { + "loss": 0.0505, + "grad_norm": 1.463847041130066, + "learning_rate": 7.605e-06, + "num_tokens": 844220.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2405, + "step": 2481 + }, + { + "loss": 0.0634, + "grad_norm": 1.0150220394134521, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.241, + "step": 2482 + }, + { + "loss": 0.0628, + "grad_norm": 1.2490217685699463, + "learning_rate": 7.595e-06, + "num_tokens": 845244.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2415, + "step": 2483 + }, + { + "loss": 0.0568, + "grad_norm": 0.9812212586402893, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.242, + "step": 2484 + }, + { + "loss": 0.0684, + "grad_norm": 1.4887269735336304, + "learning_rate": 7.585e-06, + "num_tokens": 846268.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2425, + "step": 2485 + }, + { + "loss": 0.002, + "grad_norm": 0.2907889485359192, + "learning_rate": 7.58e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2429999999999999, + "step": 2486 + }, + { + "loss": 0.0024, + "grad_norm": 0.3490116596221924, + "learning_rate": 7.575e-06, + "num_tokens": 846450.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2435, + "step": 2487 + }, + { + "loss": 0.0379, + "grad_norm": 0.9351921081542969, + "learning_rate": 7.57e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.244, + "step": 2488 + }, + { + "loss": 0.0409, + "grad_norm": 1.486227035522461, + "learning_rate": 7.565e-06, + "num_tokens": 847474.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2445, + "step": 2489 + }, + { + "loss": 0.0024, + "grad_norm": 0.35926783084869385, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.245, + "step": 2490 + }, + { + "loss": 0.0547, + "grad_norm": 1.216343879699707, + "learning_rate": 7.5550000000000005e-06, + "num_tokens": 848077.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2455, + "step": 2491 + }, + { + "loss": 0.0622, + "grad_norm": 1.0978708267211914, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.246, + "step": 2492 + }, + { + "loss": 0.0026, + "grad_norm": 0.3695952892303467, + "learning_rate": 7.545e-06, + "num_tokens": 848680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2465, + "step": 2493 + }, + { + "loss": 0.0712, + "grad_norm": 1.1717898845672607, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2469999999999999, + "step": 2494 + }, + { + "loss": 0.003, + "grad_norm": 0.4548373818397522, + "learning_rate": 7.535e-06, + "num_tokens": 849283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2475, + "step": 2495 + }, + { + "loss": 0.003, + "grad_norm": 0.4568769335746765, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.248, + "step": 2496 + }, + { + "loss": 0.0024, + "grad_norm": 0.36542901396751404, + "learning_rate": 7.525e-06, + "num_tokens": 849465.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2485, + "step": 2497 + }, + { + "loss": 0.0566, + "grad_norm": 1.315274715423584, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.249, + "step": 2498 + }, + { + "loss": 0.0026, + "grad_norm": 0.39514294266700745, + "learning_rate": 7.515e-06, + "num_tokens": 850068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2495, + "step": 2499 + }, + { + "loss": 0.0678, + "grad_norm": 1.530604362487793, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.25, + "step": 2500 + }, + { + "loss": 0.0022, + "grad_norm": 0.3104536533355713, + "learning_rate": 7.505e-06, + "num_tokens": 850671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2505, + "step": 2501 + }, + { + "loss": 0.0019, + "grad_norm": 0.2783941924571991, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.251, + "step": 2502 + }, + { + "loss": 0.0597, + "grad_norm": 1.77070951461792, + "learning_rate": 7.495000000000001e-06, + "num_tokens": 851274.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2515, + "step": 2503 + }, + { + "loss": 0.0019, + "grad_norm": 0.2808924913406372, + "learning_rate": 7.49e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 1.0, + "epoch": 1.252, + "step": 2504 + }, + { + "loss": 0.0441, + "grad_norm": 1.070281982421875, + "learning_rate": 7.485000000000001e-06, + "num_tokens": 851877.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2525, + "step": 2505 + }, + { + "loss": 0.0018, + "grad_norm": 0.25118544697761536, + "learning_rate": 7.48e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2530000000000001, + "step": 2506 + }, + { + "loss": 0.0698, + "grad_norm": 1.3499447107315063, + "learning_rate": 7.475000000000001e-06, + "num_tokens": 852480.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2535, + "step": 2507 + }, + { + "loss": 0.0016, + "grad_norm": 0.23157145082950592, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.254, + "step": 2508 + }, + { + "loss": 0.0384, + "grad_norm": 1.1759817600250244, + "learning_rate": 7.465000000000001e-06, + "num_tokens": 853083.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2545, + "step": 2509 + }, + { + "loss": 0.0017, + "grad_norm": 0.24023179709911346, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.255, + "step": 2510 + }, + { + "loss": 0.0559, + "grad_norm": 1.3075677156448364, + "learning_rate": 7.4550000000000015e-06, + "num_tokens": 853686.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2555, + "step": 2511 + }, + { + "loss": 0.0691, + "grad_norm": 1.5931618213653564, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.256, + "step": 2512 + }, + { + "loss": 0.0015, + "grad_norm": 0.21379417181015015, + "learning_rate": 7.445000000000001e-06, + "num_tokens": 854289.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2565, + "step": 2513 + }, + { + "loss": 0.0016, + "grad_norm": 0.22427783906459808, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 2514 + }, + { + "loss": 0.0585, + "grad_norm": 1.3955110311508179, + "learning_rate": 7.435000000000001e-06, + "num_tokens": 854892.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2575, + "step": 2515 + }, + { + "loss": 0.0016, + "grad_norm": 0.22540539503097534, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.258, + "step": 2516 + }, + { + "loss": 0.0015, + "grad_norm": 0.20957466959953308, + "learning_rate": 7.425000000000001e-06, + "num_tokens": 855074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2585, + "step": 2517 + }, + { + "loss": 0.0013, + "grad_norm": 0.17798997461795807, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 2518 + }, + { + "loss": 0.0681, + "grad_norm": 1.692757487297058, + "learning_rate": 7.415000000000001e-06, + "num_tokens": 855677.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2595, + "step": 2519 + }, + { + "loss": 0.0013, + "grad_norm": 0.18327295780181885, + "learning_rate": 7.41e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 1.0, + "epoch": 1.26, + "step": 2520 + }, + { + "loss": 0.0694, + "grad_norm": 1.3426337242126465, + "learning_rate": 7.405000000000001e-06, + "num_tokens": 856280.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2605, + "step": 2521 + }, + { + "loss": 0.0575, + "grad_norm": 1.3755184412002563, + "learning_rate": 7.4e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2610000000000001, + "step": 2522 + }, + { + "loss": 0.0012, + "grad_norm": 0.15550144016742706, + "learning_rate": 7.395000000000001e-06, + "num_tokens": 856883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2615, + "step": 2523 + }, + { + "loss": 0.0013, + "grad_norm": 0.18434429168701172, + "learning_rate": 7.39e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 2524 + }, + { + "loss": 0.0561, + "grad_norm": 1.3532037734985352, + "learning_rate": 7.385000000000001e-06, + "num_tokens": 857486.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2625, + "step": 2525 + }, + { + "loss": 0.0783, + "grad_norm": 2.749722719192505, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.263, + "step": 2526 + }, + { + "loss": 0.0739, + "grad_norm": 1.7389228343963623, + "learning_rate": 7.375000000000001e-06, + "num_tokens": 858510.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2635, + "step": 2527 + }, + { + "loss": 0.0596, + "grad_norm": 1.5434712171554565, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 2528 + }, + { + "loss": 0.0012, + "grad_norm": 0.16660870611667633, + "learning_rate": 7.365000000000001e-06, + "num_tokens": 859113.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2645, + "step": 2529 + }, + { + "loss": 0.0466, + "grad_norm": 1.1618560552597046, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2650000000000001, + "step": 2530 + }, + { + "loss": 0.066, + "grad_norm": 1.4426238536834717, + "learning_rate": 7.355000000000001e-06, + "num_tokens": 860137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2655, + "step": 2531 + }, + { + "loss": 0.0014, + "grad_norm": 0.1874425858259201, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 1.0, + "epoch": 1.266, + "step": 2532 + }, + { + "loss": 0.0574, + "grad_norm": 1.2460824251174927, + "learning_rate": 7.345000000000001e-06, + "num_tokens": 860740.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2665, + "step": 2533 + }, + { + "loss": 0.0722, + "grad_norm": 1.7045679092407227, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.267, + "step": 2534 + }, + { + "loss": 0.0641, + "grad_norm": 1.4023394584655762, + "learning_rate": 7.335000000000001e-06, + "num_tokens": 861764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2675, + "step": 2535 + }, + { + "loss": 0.0018, + "grad_norm": 0.25083932280540466, + "learning_rate": 7.33e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.268, + "step": 2536 + }, + { + "loss": 0.0625, + "grad_norm": 1.2308841943740845, + "learning_rate": 7.325000000000001e-06, + "num_tokens": 862367.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2685, + "step": 2537 + }, + { + "loss": 0.1399, + "grad_norm": 2.6957058906555176, + "learning_rate": 7.32e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.2690000000000001, + "step": 2538 + }, + { + "loss": 0.0403, + "grad_norm": 1.0539931058883667, + "learning_rate": 7.315000000000001e-06, + "num_tokens": 863391.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2695, + "step": 2539 + }, + { + "loss": 0.0603, + "grad_norm": 1.6862679719924927, + "learning_rate": 7.31e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.27, + "step": 2540 + }, + { + "loss": 0.0022, + "grad_norm": 0.3110877275466919, + "learning_rate": 7.305000000000001e-06, + "num_tokens": 863994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2705, + "step": 2541 + }, + { + "loss": 0.0521, + "grad_norm": 1.1967720985412598, + "learning_rate": 7.3e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.271, + "step": 2542 + }, + { + "loss": 0.1383, + "grad_norm": 2.653751850128174, + "learning_rate": 7.295000000000001e-06, + "num_tokens": 865018.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.2715, + "step": 2543 + }, + { + "loss": 0.0025, + "grad_norm": 0.3700110614299774, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.272, + "step": 2544 + }, + { + "loss": 0.0031, + "grad_norm": 0.42906609177589417, + "learning_rate": 7.2850000000000006e-06, + "num_tokens": 865200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2725, + "step": 2545 + }, + { + "loss": 0.0437, + "grad_norm": 1.104537010192871, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2730000000000001, + "step": 2546 + }, + { + "loss": 0.0027, + "grad_norm": 0.3919247090816498, + "learning_rate": 7.275000000000001e-06, + "num_tokens": 865803.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2735, + "step": 2547 + }, + { + "loss": 0.0029, + "grad_norm": 0.4317328929901123, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 2548 + }, + { + "loss": 0.0025, + "grad_norm": 0.37341031432151794, + "learning_rate": 7.265000000000001e-06, + "num_tokens": 865985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2745, + "step": 2549 + }, + { + "loss": 0.0416, + "grad_norm": 1.0737035274505615, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.275, + "step": 2550 + }, + { + "loss": 0.0646, + "grad_norm": 1.3107216358184814, + "learning_rate": 7.255000000000001e-06, + "num_tokens": 867009.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2755, + "step": 2551 + }, + { + "loss": 0.0381, + "grad_norm": 0.9233097434043884, + "learning_rate": 7.25e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.276, + "step": 2552 + }, + { + "loss": 0.056, + "grad_norm": 1.2655408382415771, + "learning_rate": 7.245000000000001e-06, + "num_tokens": 868033.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2765, + "step": 2553 + }, + { + "loss": 0.0519, + "grad_norm": 1.2633070945739746, + "learning_rate": 7.24e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2770000000000001, + "step": 2554 + }, + { + "loss": 0.0666, + "grad_norm": 1.5826315879821777, + "learning_rate": 7.235000000000001e-06, + "num_tokens": 869057.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2775, + "step": 2555 + }, + { + "loss": 0.0026, + "grad_norm": 0.3732459545135498, + "learning_rate": 7.23e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 1.0, + "epoch": 1.278, + "step": 2556 + }, + { + "loss": 0.0384, + "grad_norm": 0.9308870434761047, + "learning_rate": 7.225000000000001e-06, + "num_tokens": 869660.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2785, + "step": 2557 + }, + { + "loss": 0.0027, + "grad_norm": 0.3898535668849945, + "learning_rate": 7.22e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 1.0, + "epoch": 1.279, + "step": 2558 + }, + { + "loss": 0.0416, + "grad_norm": 1.0320757627487183, + "learning_rate": 7.215000000000001e-06, + "num_tokens": 870263.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2795, + "step": 2559 + }, + { + "loss": 0.0028, + "grad_norm": 0.4121858477592468, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 1.0, + "epoch": 1.28, + "step": 2560 + }, + { + "loss": 0.0028, + "grad_norm": 0.4276776611804962, + "learning_rate": 7.2050000000000005e-06, + "num_tokens": 870445.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2805, + "step": 2561 + }, + { + "loss": 0.0407, + "grad_norm": 0.9345077872276306, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2810000000000001, + "step": 2562 + }, + { + "loss": 0.0025, + "grad_norm": 0.3605985641479492, + "learning_rate": 7.1950000000000006e-06, + "num_tokens": 871048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2814999999999999, + "step": 2563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346655070781708, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.282, + "step": 2564 + }, + { + "loss": 0.0744, + "grad_norm": 1.8985601663589478, + "learning_rate": 7.185000000000001e-06, + "num_tokens": 871651.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2825, + "step": 2565 + }, + { + "loss": 0.0388, + "grad_norm": 0.96394282579422, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.283, + "step": 2566 + }, + { + "loss": 0.0682, + "grad_norm": 1.4056230783462524, + "learning_rate": 7.175000000000001e-06, + "num_tokens": 872675.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2835, + "step": 2567 + }, + { + "loss": 0.0022, + "grad_norm": 0.3106633722782135, + "learning_rate": 7.17e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 1.0, + "epoch": 1.284, + "step": 2568 + }, + { + "loss": 0.0384, + "grad_norm": 1.064553141593933, + "learning_rate": 7.165000000000001e-06, + "num_tokens": 873278.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2845, + "step": 2569 + }, + { + "loss": 0.0626, + "grad_norm": 1.0392028093338013, + "learning_rate": 7.16e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.285, + "step": 2570 + }, + { + "loss": 0.0022, + "grad_norm": 0.30655112862586975, + "learning_rate": 7.155000000000001e-06, + "num_tokens": 873881.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2854999999999999, + "step": 2571 + }, + { + "loss": 0.0673, + "grad_norm": 1.5468289852142334, + "learning_rate": 7.15e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.286, + "step": 2572 + }, + { + "loss": 0.0498, + "grad_norm": 1.2830432653427124, + "learning_rate": 7.145000000000001e-06, + "num_tokens": 874905.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2865, + "step": 2573 + }, + { + "loss": 0.055, + "grad_norm": 1.0863239765167236, + "learning_rate": 7.14e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 2574 + }, + { + "loss": 0.0606, + "grad_norm": 1.434999704360962, + "learning_rate": 7.135000000000001e-06, + "num_tokens": 875929.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2875, + "step": 2575 + }, + { + "loss": 0.0532, + "grad_norm": 1.290963888168335, + "learning_rate": 7.13e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.288, + "step": 2576 + }, + { + "loss": 0.0026, + "grad_norm": 0.36665645241737366, + "learning_rate": 7.125e-06, + "num_tokens": 876532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2885, + "step": 2577 + }, + { + "loss": 0.0485, + "grad_norm": 1.2393323183059692, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.289, + "step": 2578 + }, + { + "loss": 0.0029, + "grad_norm": 0.3994691073894501, + "learning_rate": 7.1150000000000005e-06, + "num_tokens": 877135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2894999999999999, + "step": 2579 + }, + { + "loss": 0.0544, + "grad_norm": 1.361981987953186, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.29, + "step": 2580 + }, + { + "loss": 0.0529, + "grad_norm": 1.1892880201339722, + "learning_rate": 7.105000000000001e-06, + "num_tokens": 878159.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2905, + "step": 2581 + }, + { + "loss": 0.069, + "grad_norm": 1.5022639036178589, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.291, + "step": 2582 + }, + { + "loss": 0.0594, + "grad_norm": 1.2174897193908691, + "learning_rate": 7.095000000000001e-06, + "num_tokens": 879183.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2915, + "step": 2583 + }, + { + "loss": 0.0723, + "grad_norm": 2.1814920902252197, + "learning_rate": 7.09e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.292, + "step": 2584 + }, + { + "loss": 0.0544, + "grad_norm": 1.1524139642715454, + "learning_rate": 7.085000000000001e-06, + "num_tokens": 880207.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2925, + "step": 2585 + }, + { + "loss": 0.0035, + "grad_norm": 0.5082859396934509, + "learning_rate": 7.08e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.293, + "step": 2586 + }, + { + "loss": 0.0034, + "grad_norm": 0.49455657601356506, + "learning_rate": 7.075000000000001e-06, + "num_tokens": 880389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2934999999999999, + "step": 2587 + }, + { + "loss": 0.0516, + "grad_norm": 1.1291673183441162, + "learning_rate": 7.07e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.294, + "step": 2588 + }, + { + "loss": 0.0402, + "grad_norm": 1.073132038116455, + "learning_rate": 7.065000000000001e-06, + "num_tokens": 881413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2945, + "step": 2589 + }, + { + "loss": 0.0409, + "grad_norm": 1.1712205410003662, + "learning_rate": 7.06e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.295, + "step": 2590 + }, + { + "loss": 0.0596, + "grad_norm": 1.2515616416931152, + "learning_rate": 7.055000000000001e-06, + "num_tokens": 882437.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2955, + "step": 2591 + }, + { + "loss": 0.0039, + "grad_norm": 0.5442217588424683, + "learning_rate": 7.05e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.296, + "step": 2592 + }, + { + "loss": 0.0041, + "grad_norm": 0.5982818603515625, + "learning_rate": 7.045e-06, + "num_tokens": 882619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2965, + "step": 2593 + }, + { + "loss": 0.0558, + "grad_norm": 1.3499200344085693, + "learning_rate": 7.04e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.297, + "step": 2594 + }, + { + "loss": 0.0038, + "grad_norm": 0.5531075596809387, + "learning_rate": 7.035e-06, + "num_tokens": 883222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2974999999999999, + "step": 2595 + }, + { + "loss": 0.0716, + "grad_norm": 1.8495835065841675, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.298, + "step": 2596 + }, + { + "loss": 0.0387, + "grad_norm": 1.2195173501968384, + "learning_rate": 7.0250000000000005e-06, + "num_tokens": 884246.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2985, + "step": 2597 + }, + { + "loss": 0.0715, + "grad_norm": 1.7892330884933472, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.299, + "step": 2598 + }, + { + "loss": 0.0034, + "grad_norm": 0.5045487284660339, + "learning_rate": 7.015000000000001e-06, + "num_tokens": 884849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2995, + "step": 2599 + }, + { + "loss": 0.0551, + "grad_norm": 1.5834842920303345, + "learning_rate": 7.01e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3, + "step": 2600 + }, + { + "loss": 0.0037, + "grad_norm": 0.5456190705299377, + "learning_rate": 7.005000000000001e-06, + "num_tokens": 885452.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3005, + "step": 2601 + }, + { + "loss": 0.0036, + "grad_norm": 0.5648893117904663, + "learning_rate": 7e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.301, + "step": 2602 + }, + { + "loss": 0.06, + "grad_norm": 1.417505145072937, + "learning_rate": 6.995000000000001e-06, + "num_tokens": 886055.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3014999999999999, + "step": 2603 + }, + { + "loss": 0.0684, + "grad_norm": 1.5355315208435059, + "learning_rate": 6.99e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.302, + "step": 2604 + }, + { + "loss": 0.0027, + "grad_norm": 0.4013388454914093, + "learning_rate": 6.985000000000001e-06, + "num_tokens": 886658.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3025, + "step": 2605 + }, + { + "loss": 0.0026, + "grad_norm": 0.38935649394989014, + "learning_rate": 6.98e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 1.0, + "epoch": 1.303, + "step": 2606 + }, + { + "loss": 0.0578, + "grad_norm": 1.1277109384536743, + "learning_rate": 6.975000000000001e-06, + "num_tokens": 887261.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3035, + "step": 2607 + }, + { + "loss": 0.0023, + "grad_norm": 0.3507567048072815, + "learning_rate": 6.97e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.304, + "step": 2608 + }, + { + "loss": 0.0021, + "grad_norm": 0.3047695755958557, + "learning_rate": 6.965e-06, + "num_tokens": 887443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3045, + "step": 2609 + }, + { + "loss": 0.0564, + "grad_norm": 1.2580876350402832, + "learning_rate": 6.96e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.305, + "step": 2610 + }, + { + "loss": 0.0018, + "grad_norm": 0.26692500710487366, + "learning_rate": 6.955e-06, + "num_tokens": 888046.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3054999999999999, + "step": 2611 + }, + { + "loss": 0.0601, + "grad_norm": 1.2882280349731445, + "learning_rate": 6.95e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.306, + "step": 2612 + }, + { + "loss": 0.0662, + "grad_norm": 1.3626042604446411, + "learning_rate": 6.945e-06, + "num_tokens": 889070.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3065, + "step": 2613 + }, + { + "loss": 0.0015, + "grad_norm": 0.20663970708847046, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 1.0, + "epoch": 1.307, + "step": 2614 + }, + { + "loss": 0.0421, + "grad_norm": 1.0858242511749268, + "learning_rate": 6.9350000000000005e-06, + "num_tokens": 889673.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3075, + "step": 2615 + }, + { + "loss": 0.061, + "grad_norm": 1.1361438035964966, + "learning_rate": 6.93e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.308, + "step": 2616 + }, + { + "loss": 0.053, + "grad_norm": 1.0651867389678955, + "learning_rate": 6.925000000000001e-06, + "num_tokens": 890697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3085, + "step": 2617 + }, + { + "loss": 0.0648, + "grad_norm": 1.4413301944732666, + "learning_rate": 6.92e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.309, + "step": 2618 + }, + { + "loss": 0.0016, + "grad_norm": 0.23106220364570618, + "learning_rate": 6.915000000000001e-06, + "num_tokens": 891300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3094999999999999, + "step": 2619 + }, + { + "loss": 0.0596, + "grad_norm": 1.1959160566329956, + "learning_rate": 6.91e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.31, + "step": 2620 + }, + { + "loss": 0.0625, + "grad_norm": 1.4631091356277466, + "learning_rate": 6.905000000000001e-06, + "num_tokens": 892324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3105, + "step": 2621 + }, + { + "loss": 0.0385, + "grad_norm": 1.1421785354614258, + "learning_rate": 6.9e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.311, + "step": 2622 + }, + { + "loss": 0.0644, + "grad_norm": 1.3361622095108032, + "learning_rate": 6.895000000000001e-06, + "num_tokens": 893348.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3115, + "step": 2623 + }, + { + "loss": 0.0393, + "grad_norm": 1.3101776838302612, + "learning_rate": 6.89e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.312, + "step": 2624 + }, + { + "loss": 0.0415, + "grad_norm": 1.2668944597244263, + "learning_rate": 6.885e-06, + "num_tokens": 894372.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3125, + "step": 2625 + }, + { + "loss": 0.0637, + "grad_norm": 1.8910597562789917, + "learning_rate": 6.88e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.313, + "step": 2626 + }, + { + "loss": 0.0385, + "grad_norm": 1.383195161819458, + "learning_rate": 6.875e-06, + "num_tokens": 895396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3135, + "step": 2627 + }, + { + "loss": 0.0029, + "grad_norm": 0.41114333271980286, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.314, + "step": 2628 + }, + { + "loss": 0.0709, + "grad_norm": 2.5799410343170166, + "learning_rate": 6.865e-06, + "num_tokens": 895999.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3145, + "step": 2629 + }, + { + "loss": 0.0717, + "grad_norm": 1.9481109380722046, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.315, + "step": 2630 + }, + { + "loss": 0.0031, + "grad_norm": 0.4399254620075226, + "learning_rate": 6.8550000000000004e-06, + "num_tokens": 896602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3155000000000001, + "step": 2631 + }, + { + "loss": 0.0692, + "grad_norm": 1.7998204231262207, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.316, + "step": 2632 + }, + { + "loss": 0.0589, + "grad_norm": 1.2681806087493896, + "learning_rate": 6.8450000000000005e-06, + "num_tokens": 897626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3165, + "step": 2633 + }, + { + "loss": 0.1572, + "grad_norm": 2.9861464500427246, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.317, + "step": 2634 + }, + { + "loss": 0.0033, + "grad_norm": 0.4804554879665375, + "learning_rate": 6.835000000000001e-06, + "num_tokens": 898229.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3175, + "step": 2635 + }, + { + "loss": 0.0039, + "grad_norm": 0.5298879742622375, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 1.0, + "epoch": 1.318, + "step": 2636 + }, + { + "loss": 0.0033, + "grad_norm": 0.45830750465393066, + "learning_rate": 6.825000000000001e-06, + "num_tokens": 898411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3185, + "step": 2637 + }, + { + "loss": 0.0759, + "grad_norm": 2.195838451385498, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.319, + "step": 2638 + }, + { + "loss": 0.0028, + "grad_norm": 0.3985951840877533, + "learning_rate": 6.815000000000001e-06, + "num_tokens": 899014.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3195000000000001, + "step": 2639 + }, + { + "loss": 0.0435, + "grad_norm": 1.082383155822754, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.32, + "step": 2640 + }, + { + "loss": 0.0031, + "grad_norm": 0.4386924207210541, + "learning_rate": 6.805000000000001e-06, + "num_tokens": 899617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3205, + "step": 2641 + }, + { + "loss": 0.044, + "grad_norm": 1.3280903100967407, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.321, + "step": 2642 + }, + { + "loss": 0.0024, + "grad_norm": 0.34161683917045593, + "learning_rate": 6.795e-06, + "num_tokens": 900220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3215, + "step": 2643 + }, + { + "loss": 0.0026, + "grad_norm": 0.3536019027233124, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.322, + "step": 2644 + }, + { + "loss": 0.0721, + "grad_norm": 1.825214147567749, + "learning_rate": 6.785e-06, + "num_tokens": 900823.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.3225, + "step": 2645 + }, + { + "loss": 0.0603, + "grad_norm": 1.441401481628418, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.323, + "step": 2646 + }, + { + "loss": 0.0552, + "grad_norm": 1.026498556137085, + "learning_rate": 6.775e-06, + "num_tokens": 901847.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3235000000000001, + "step": 2647 + }, + { + "loss": 0.0607, + "grad_norm": 1.567400574684143, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.324, + "step": 2648 + }, + { + "loss": 0.0365, + "grad_norm": 1.1754707098007202, + "learning_rate": 6.7650000000000005e-06, + "num_tokens": 902871.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3245, + "step": 2649 + }, + { + "loss": 0.0634, + "grad_norm": 1.0925911664962769, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.325, + "step": 2650 + }, + { + "loss": 0.0022, + "grad_norm": 0.3080379068851471, + "learning_rate": 6.7550000000000005e-06, + "num_tokens": 903474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3255, + "step": 2651 + }, + { + "loss": 0.0024, + "grad_norm": 0.3412145972251892, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.326, + "step": 2652 + }, + { + "loss": 0.0612, + "grad_norm": 1.387506127357483, + "learning_rate": 6.745000000000001e-06, + "num_tokens": 904077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3265, + "step": 2653 + }, + { + "loss": 0.0543, + "grad_norm": 1.0726388692855835, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 2654 + }, + { + "loss": 0.0515, + "grad_norm": 1.3620095252990723, + "learning_rate": 6.735000000000001e-06, + "num_tokens": 905101.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3275000000000001, + "step": 2655 + }, + { + "loss": 0.0536, + "grad_norm": 0.999693751335144, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.328, + "step": 2656 + }, + { + "loss": 0.0725, + "grad_norm": 1.338326096534729, + "learning_rate": 6.725000000000001e-06, + "num_tokens": 906125.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3285, + "step": 2657 + }, + { + "loss": 0.0025, + "grad_norm": 0.3621944487094879, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.329, + "step": 2658 + }, + { + "loss": 0.0027, + "grad_norm": 0.3732605576515198, + "learning_rate": 6.715e-06, + "num_tokens": 906307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3295, + "step": 2659 + }, + { + "loss": 0.0025, + "grad_norm": 0.3675785958766937, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 2660 + }, + { + "loss": 0.0546, + "grad_norm": 1.420166015625, + "learning_rate": 6.705e-06, + "num_tokens": 906910.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3305, + "step": 2661 + }, + { + "loss": 0.065, + "grad_norm": 1.7972251176834106, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.331, + "step": 2662 + }, + { + "loss": 0.0026, + "grad_norm": 0.38739708065986633, + "learning_rate": 6.695e-06, + "num_tokens": 907513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3315000000000001, + "step": 2663 + }, + { + "loss": 0.0621, + "grad_norm": 1.1773098707199097, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.332, + "step": 2664 + }, + { + "loss": 0.047, + "grad_norm": 1.3367711305618286, + "learning_rate": 6.685e-06, + "num_tokens": 908537.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3325, + "step": 2665 + }, + { + "loss": 0.0614, + "grad_norm": 1.5761219263076782, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.333, + "step": 2666 + }, + { + "loss": 0.0028, + "grad_norm": 0.39666748046875, + "learning_rate": 6.6750000000000005e-06, + "num_tokens": 909140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3335, + "step": 2667 + }, + { + "loss": 0.0026, + "grad_norm": 0.38161027431488037, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 2668 + }, + { + "loss": 0.0027, + "grad_norm": 0.3782355785369873, + "learning_rate": 6.6650000000000006e-06, + "num_tokens": 909322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3345, + "step": 2669 + }, + { + "loss": 0.0449, + "grad_norm": 1.2690225839614868, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.335, + "step": 2670 + }, + { + "loss": 0.0618, + "grad_norm": 1.4404915571212769, + "learning_rate": 6.655000000000001e-06, + "num_tokens": 910346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3355000000000001, + "step": 2671 + }, + { + "loss": 0.0593, + "grad_norm": 1.6381967067718506, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.336, + "step": 2672 + }, + { + "loss": 0.0023, + "grad_norm": 0.3195578455924988, + "learning_rate": 6.645000000000001e-06, + "num_tokens": 910949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3365, + "step": 2673 + }, + { + "loss": 0.1244, + "grad_norm": 2.2930221557617188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.337, + "step": 2674 + }, + { + "loss": 0.061, + "grad_norm": 1.1066110134124756, + "learning_rate": 6.635e-06, + "num_tokens": 911973.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3375, + "step": 2675 + }, + { + "loss": 0.0023, + "grad_norm": 0.3287852704524994, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.338, + "step": 2676 + }, + { + "loss": 0.0723, + "grad_norm": 1.8842978477478027, + "learning_rate": 6.625e-06, + "num_tokens": 912576.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3385, + "step": 2677 + }, + { + "loss": 0.0616, + "grad_norm": 1.410254955291748, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.339, + "step": 2678 + }, + { + "loss": 0.0661, + "grad_norm": 1.7658559083938599, + "learning_rate": 6.615e-06, + "num_tokens": 913600.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3395000000000001, + "step": 2679 + }, + { + "loss": 0.0023, + "grad_norm": 0.3321514427661896, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.34, + "step": 2680 + }, + { + "loss": 0.0026, + "grad_norm": 0.38943803310394287, + "learning_rate": 6.605e-06, + "num_tokens": 913782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3405, + "step": 2681 + }, + { + "loss": 0.0533, + "grad_norm": 1.220119833946228, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.341, + "step": 2682 + }, + { + "loss": 0.0577, + "grad_norm": 1.4489399194717407, + "learning_rate": 6.595e-06, + "num_tokens": 914806.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3415, + "step": 2683 + }, + { + "loss": 0.0534, + "grad_norm": 1.437482237815857, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.342, + "step": 2684 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185518980026245, + "learning_rate": 6.5850000000000005e-06, + "num_tokens": 915409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3425, + "step": 2685 + }, + { + "loss": 0.0557, + "grad_norm": 1.233544945716858, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.343, + "step": 2686 + }, + { + "loss": 0.1326, + "grad_norm": 2.9976046085357666, + "learning_rate": 6.5750000000000006e-06, + "num_tokens": 916433.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3435000000000001, + "step": 2687 + }, + { + "loss": 0.0555, + "grad_norm": 1.1236023902893066, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3439999999999999, + "step": 2688 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615230619907379, + "learning_rate": 6.565000000000001e-06, + "num_tokens": 917036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3445, + "step": 2689 + }, + { + "loss": 0.0613, + "grad_norm": 1.391479730606079, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.345, + "step": 2690 + }, + { + "loss": 0.0023, + "grad_norm": 0.32829907536506653, + "learning_rate": 6.555e-06, + "num_tokens": 917639.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3455, + "step": 2691 + }, + { + "loss": 0.0025, + "grad_norm": 0.35658934712409973, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 1.0, + "epoch": 1.346, + "step": 2692 + }, + { + "loss": 0.0028, + "grad_norm": 0.40413787961006165, + "learning_rate": 6.545e-06, + "num_tokens": 917821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3465, + "step": 2693 + }, + { + "loss": 0.0023, + "grad_norm": 0.3243667185306549, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 1.0, + "epoch": 1.347, + "step": 2694 + }, + { + "loss": 0.0023, + "grad_norm": 0.33630460500717163, + "learning_rate": 6.535e-06, + "num_tokens": 918003.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3475, + "step": 2695 + }, + { + "loss": 0.0529, + "grad_norm": 1.6163023710250854, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3479999999999999, + "step": 2696 + }, + { + "loss": 0.0678, + "grad_norm": 1.5625479221343994, + "learning_rate": 6.525e-06, + "num_tokens": 919027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3485, + "step": 2697 + }, + { + "loss": 0.0676, + "grad_norm": 1.5719348192214966, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.349, + "step": 2698 + }, + { + "loss": 0.002, + "grad_norm": 0.2859533727169037, + "learning_rate": 6.515e-06, + "num_tokens": 919630.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3495, + "step": 2699 + }, + { + "loss": 0.0434, + "grad_norm": 1.324418067932129, + "learning_rate": 6.51e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.35, + "step": 2700 + }, + { + "loss": 0.042, + "grad_norm": 1.3165403604507446, + "learning_rate": 6.505e-06, + "num_tokens": 920654.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3505, + "step": 2701 + }, + { + "loss": 0.0018, + "grad_norm": 0.2492700070142746, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.351, + "step": 2702 + }, + { + "loss": 0.1336, + "grad_norm": 2.710927963256836, + "learning_rate": 6.4950000000000005e-06, + "num_tokens": 921257.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.3515, + "step": 2703 + }, + { + "loss": 0.059, + "grad_norm": 1.8472118377685547, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3519999999999999, + "step": 2704 + }, + { + "loss": 0.0448, + "grad_norm": 1.164633870124817, + "learning_rate": 6.485000000000001e-06, + "num_tokens": 922281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3525, + "step": 2705 + }, + { + "loss": 0.0544, + "grad_norm": 1.3916175365447998, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.353, + "step": 2706 + }, + { + "loss": 0.0463, + "grad_norm": 1.397131085395813, + "learning_rate": 6.475e-06, + "num_tokens": 923305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3535, + "step": 2707 + }, + { + "loss": 0.0019, + "grad_norm": 0.26947012543678284, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.354, + "step": 2708 + }, + { + "loss": 0.0017, + "grad_norm": 0.23892365396022797, + "learning_rate": 6.465e-06, + "num_tokens": 923487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3545, + "step": 2709 + }, + { + "loss": 0.0018, + "grad_norm": 0.25066784024238586, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 2710 + }, + { + "loss": 0.0435, + "grad_norm": 1.2238185405731201, + "learning_rate": 6.455e-06, + "num_tokens": 924090.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3555, + "step": 2711 + }, + { + "loss": 0.0019, + "grad_norm": 0.26420801877975464, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3559999999999999, + "step": 2712 + }, + { + "loss": 0.0572, + "grad_norm": 1.1416776180267334, + "learning_rate": 6.445e-06, + "num_tokens": 924693.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3565, + "step": 2713 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754037082195282, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.357, + "step": 2714 + }, + { + "loss": 0.0018, + "grad_norm": 0.25344598293304443, + "learning_rate": 6.435e-06, + "num_tokens": 924875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3575, + "step": 2715 + }, + { + "loss": 0.0017, + "grad_norm": 0.23587873578071594, + "learning_rate": 6.43e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 2716 + }, + { + "loss": 0.0701, + "grad_norm": 1.6822742223739624, + "learning_rate": 6.425e-06, + "num_tokens": 925478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3585, + "step": 2717 + }, + { + "loss": 0.0017, + "grad_norm": 0.22698912024497986, + "learning_rate": 6.42e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 1.0, + "epoch": 1.359, + "step": 2718 + }, + { + "loss": 0.044, + "grad_norm": 1.2083390951156616, + "learning_rate": 6.415e-06, + "num_tokens": 926081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3595, + "step": 2719 + }, + { + "loss": 0.0017, + "grad_norm": 0.23327840864658356, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3599999999999999, + "step": 2720 + }, + { + "loss": 0.0557, + "grad_norm": 1.281182885169983, + "learning_rate": 6.4050000000000005e-06, + "num_tokens": 926684.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3605, + "step": 2721 + }, + { + "loss": 0.0539, + "grad_norm": 1.1743288040161133, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.361, + "step": 2722 + }, + { + "loss": 0.0646, + "grad_norm": 1.2470465898513794, + "learning_rate": 6.395e-06, + "num_tokens": 927708.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3615, + "step": 2723 + }, + { + "loss": 0.0015, + "grad_norm": 0.20256949961185455, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 1.0, + "epoch": 1.362, + "step": 2724 + }, + { + "loss": 0.0394, + "grad_norm": 1.1593482494354248, + "learning_rate": 6.385e-06, + "num_tokens": 928311.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3625, + "step": 2725 + }, + { + "loss": 0.0737, + "grad_norm": 1.937491774559021, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.363, + "step": 2726 + }, + { + "loss": 0.0438, + "grad_norm": 1.1960216760635376, + "learning_rate": 6.375e-06, + "num_tokens": 929335.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3635, + "step": 2727 + }, + { + "loss": 0.0016, + "grad_norm": 0.21763351559638977, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3639999999999999, + "step": 2728 + }, + { + "loss": 0.0017, + "grad_norm": 0.24479590356349945, + "learning_rate": 6.365e-06, + "num_tokens": 929517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3645, + "step": 2729 + }, + { + "loss": 0.0619, + "grad_norm": 1.315623164176941, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.365, + "step": 2730 + }, + { + "loss": 0.0016, + "grad_norm": 0.2220989614725113, + "learning_rate": 6.355e-06, + "num_tokens": 930120.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3655, + "step": 2731 + }, + { + "loss": 0.0017, + "grad_norm": 0.2321062982082367, + "learning_rate": 6.35e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 2732 + }, + { + "loss": 0.0017, + "grad_norm": 0.23798637092113495, + "learning_rate": 6.345e-06, + "num_tokens": 930302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3665, + "step": 2733 + }, + { + "loss": 0.0577, + "grad_norm": 1.2568942308425903, + "learning_rate": 6.34e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.367, + "step": 2734 + }, + { + "loss": 0.041, + "grad_norm": 1.6406105756759644, + "learning_rate": 6.335e-06, + "num_tokens": 931326.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3675, + "step": 2735 + }, + { + "loss": 0.0517, + "grad_norm": 1.235734224319458, + "learning_rate": 6.33e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 2736 + }, + { + "loss": 0.0423, + "grad_norm": 0.9826679825782776, + "learning_rate": 6.3250000000000004e-06, + "num_tokens": 932350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3685, + "step": 2737 + }, + { + "loss": 0.0018, + "grad_norm": 0.26410505175590515, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.369, + "step": 2738 + }, + { + "loss": 0.002, + "grad_norm": 0.2839818596839905, + "learning_rate": 6.315e-06, + "num_tokens": 932532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3695, + "step": 2739 + }, + { + "loss": 0.0533, + "grad_norm": 1.2392011880874634, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.37, + "step": 2740 + }, + { + "loss": 0.0017, + "grad_norm": 0.23982419073581696, + "learning_rate": 6.305e-06, + "num_tokens": 933135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3705, + "step": 2741 + }, + { + "loss": 0.0548, + "grad_norm": 1.4777438640594482, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.371, + "step": 2742 + }, + { + "loss": 0.0019, + "grad_norm": 0.2724550664424896, + "learning_rate": 6.295e-06, + "num_tokens": 933738.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3715, + "step": 2743 + }, + { + "loss": 0.0019, + "grad_norm": 0.2623855173587799, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3719999999999999, + "step": 2744 + }, + { + "loss": 0.0583, + "grad_norm": 1.0648019313812256, + "learning_rate": 6.285e-06, + "num_tokens": 934341.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3725, + "step": 2745 + }, + { + "loss": 0.0725, + "grad_norm": 1.589500069618225, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.373, + "step": 2746 + }, + { + "loss": 0.0617, + "grad_norm": 1.4101024866104126, + "learning_rate": 6.275e-06, + "num_tokens": 935365.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3735, + "step": 2747 + }, + { + "loss": 0.0019, + "grad_norm": 0.2686757743358612, + "learning_rate": 6.27e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 1.0, + "epoch": 1.374, + "step": 2748 + }, + { + "loss": 0.0451, + "grad_norm": 1.6723026037216187, + "learning_rate": 6.265e-06, + "num_tokens": 935968.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3745, + "step": 2749 + }, + { + "loss": 0.1481, + "grad_norm": 2.561096668243408, + "learning_rate": 6.26e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.375, + "step": 2750 + }, + { + "loss": 0.0593, + "grad_norm": 1.1495637893676758, + "learning_rate": 6.255e-06, + "num_tokens": 936992.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3755, + "step": 2751 + }, + { + "loss": 0.0583, + "grad_norm": 1.0880846977233887, + "learning_rate": 6.25e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.376, + "step": 2752 + }, + { + "loss": 0.0641, + "grad_norm": 1.4671814441680908, + "learning_rate": 6.245000000000001e-06, + "num_tokens": 938016.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3765, + "step": 2753 + }, + { + "loss": 0.0022, + "grad_norm": 0.3182397186756134, + "learning_rate": 6.24e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 1.0, + "epoch": 1.377, + "step": 2754 + }, + { + "loss": 0.0605, + "grad_norm": 1.1844297647476196, + "learning_rate": 6.235000000000001e-06, + "num_tokens": 938619.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3775, + "step": 2755 + }, + { + "loss": 0.0633, + "grad_norm": 1.227432131767273, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3780000000000001, + "step": 2756 + }, + { + "loss": 0.0026, + "grad_norm": 0.3716835677623749, + "learning_rate": 6.225000000000001e-06, + "num_tokens": 939222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3785, + "step": 2757 + }, + { + "loss": 0.0599, + "grad_norm": 1.3364546298980713, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.379, + "step": 2758 + }, + { + "loss": 0.0532, + "grad_norm": 1.3746514320373535, + "learning_rate": 6.215000000000001e-06, + "num_tokens": 940246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3795, + "step": 2759 + }, + { + "loss": 0.0696, + "grad_norm": 1.6494160890579224, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.38, + "step": 2760 + }, + { + "loss": 0.0031, + "grad_norm": 0.4407944083213806, + "learning_rate": 6.205000000000001e-06, + "num_tokens": 940849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3805, + "step": 2761 + }, + { + "loss": 0.0559, + "grad_norm": 1.3899201154708862, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.381, + "step": 2762 + }, + { + "loss": 0.0393, + "grad_norm": 1.0294471979141235, + "learning_rate": 6.195000000000001e-06, + "num_tokens": 941873.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3815, + "step": 2763 + }, + { + "loss": 0.0028, + "grad_norm": 0.41492387652397156, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3820000000000001, + "step": 2764 + }, + { + "loss": 0.039, + "grad_norm": 1.2755433320999146, + "learning_rate": 6.185000000000001e-06, + "num_tokens": 942476.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3825, + "step": 2765 + }, + { + "loss": 0.0407, + "grad_norm": 1.1641042232513428, + "learning_rate": 6.18e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.383, + "step": 2766 + }, + { + "loss": 0.0033, + "grad_norm": 0.45876702666282654, + "learning_rate": 6.175000000000001e-06, + "num_tokens": 943079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3835, + "step": 2767 + }, + { + "loss": 0.053, + "grad_norm": 1.1277137994766235, + "learning_rate": 6.17e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.384, + "step": 2768 + }, + { + "loss": 0.069, + "grad_norm": 1.974735140800476, + "learning_rate": 6.165000000000001e-06, + "num_tokens": 944103.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3845, + "step": 2769 + }, + { + "loss": 0.0399, + "grad_norm": 1.308519959449768, + "learning_rate": 6.16e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.385, + "step": 2770 + }, + { + "loss": 0.0399, + "grad_norm": 1.3881995677947998, + "learning_rate": 6.155000000000001e-06, + "num_tokens": 945127.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3855, + "step": 2771 + }, + { + "loss": 0.0388, + "grad_norm": 1.376846194267273, + "learning_rate": 6.15e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3860000000000001, + "step": 2772 + }, + { + "loss": 0.0565, + "grad_norm": 1.6753615140914917, + "learning_rate": 6.145000000000001e-06, + "num_tokens": 946151.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3865, + "step": 2773 + }, + { + "loss": 0.0537, + "grad_norm": 1.350510597229004, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.387, + "step": 2774 + }, + { + "loss": 0.0348, + "grad_norm": 1.0870490074157715, + "learning_rate": 6.1350000000000006e-06, + "num_tokens": 947175.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3875, + "step": 2775 + }, + { + "loss": 0.0041, + "grad_norm": 0.5800921320915222, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 1.0, + "epoch": 1.388, + "step": 2776 + }, + { + "loss": 0.0046, + "grad_norm": 0.6146813631057739, + "learning_rate": 6.125000000000001e-06, + "num_tokens": 947357.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3885, + "step": 2777 + }, + { + "loss": 0.0685, + "grad_norm": 2.028545618057251, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.389, + "step": 2778 + }, + { + "loss": 0.0562, + "grad_norm": 1.10191011428833, + "learning_rate": 6.115000000000001e-06, + "num_tokens": 948381.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3895, + "step": 2779 + }, + { + "loss": 0.057, + "grad_norm": 1.6782788038253784, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3900000000000001, + "step": 2780 + }, + { + "loss": 0.0048, + "grad_norm": 0.6447672843933105, + "learning_rate": 6.105000000000001e-06, + "num_tokens": 948984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3905, + "step": 2781 + }, + { + "loss": 0.0045, + "grad_norm": 0.6120741963386536, + "learning_rate": 6.1e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.391, + "step": 2782 + }, + { + "loss": 0.0037, + "grad_norm": 0.5294094085693359, + "learning_rate": 6.095000000000001e-06, + "num_tokens": 949166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3915, + "step": 2783 + }, + { + "loss": 0.0041, + "grad_norm": 0.5634744167327881, + "learning_rate": 6.09e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.392, + "step": 2784 + }, + { + "loss": 0.0543, + "grad_norm": 1.1946736574172974, + "learning_rate": 6.085000000000001e-06, + "num_tokens": 949769.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3925, + "step": 2785 + }, + { + "loss": 0.0393, + "grad_norm": 1.366204857826233, + "learning_rate": 6.08e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.393, + "step": 2786 + }, + { + "loss": 0.0031, + "grad_norm": 0.4588482677936554, + "learning_rate": 6.075000000000001e-06, + "num_tokens": 950372.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3935, + "step": 2787 + }, + { + "loss": 0.0741, + "grad_norm": 1.6554986238479614, + "learning_rate": 6.07e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.3940000000000001, + "step": 2788 + }, + { + "loss": 0.0358, + "grad_norm": 1.0052374601364136, + "learning_rate": 6.065000000000001e-06, + "num_tokens": 951396.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3945, + "step": 2789 + }, + { + "loss": 0.0029, + "grad_norm": 0.4081237316131592, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.395, + "step": 2790 + }, + { + "loss": 0.0627, + "grad_norm": 1.5037425756454468, + "learning_rate": 6.0550000000000005e-06, + "num_tokens": 951999.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3955, + "step": 2791 + }, + { + "loss": 0.0024, + "grad_norm": 0.36483630537986755, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.396, + "step": 2792 + }, + { + "loss": 0.0455, + "grad_norm": 1.2050751447677612, + "learning_rate": 6.0450000000000006e-06, + "num_tokens": 952602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3965, + "step": 2793 + }, + { + "loss": 0.0021, + "grad_norm": 0.3035581111907959, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.397, + "step": 2794 + }, + { + "loss": 0.0025, + "grad_norm": 0.3607647716999054, + "learning_rate": 6.035000000000001e-06, + "num_tokens": 952784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3975, + "step": 2795 + }, + { + "loss": 0.0625, + "grad_norm": 1.2081470489501953, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3980000000000001, + "step": 2796 + }, + { + "loss": 0.0425, + "grad_norm": 1.0764844417572021, + "learning_rate": 6.025000000000001e-06, + "num_tokens": 953808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3985, + "step": 2797 + }, + { + "loss": 0.0632, + "grad_norm": 1.425076961517334, + "learning_rate": 6.02e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.399, + "step": 2798 + }, + { + "loss": 0.0395, + "grad_norm": 0.9470378160476685, + "learning_rate": 6.015000000000001e-06, + "num_tokens": 954832.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3995, + "step": 2799 + }, + { + "loss": 0.0404, + "grad_norm": 1.0599867105484009, + "learning_rate": 6.01e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4, + "step": 2800 + }, + { + "loss": 0.0577, + "grad_norm": 1.2933481931686401, + "learning_rate": 6.005000000000001e-06, + "num_tokens": 955856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4005, + "step": 2801 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215804398059845, + "learning_rate": 6e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 1.0, + "epoch": 1.401, + "step": 2802 + }, + { + "loss": 0.0601, + "grad_norm": 1.4103161096572876, + "learning_rate": 5.995000000000001e-06, + "num_tokens": 956459.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4015, + "step": 2803 + }, + { + "loss": 0.0022, + "grad_norm": 0.303093820810318, + "learning_rate": 5.99e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4020000000000001, + "step": 2804 + }, + { + "loss": 0.0663, + "grad_norm": 1.360801339149475, + "learning_rate": 5.985000000000001e-06, + "num_tokens": 957062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4025, + "step": 2805 + }, + { + "loss": 0.0022, + "grad_norm": 0.3075718581676483, + "learning_rate": 5.98e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 1.0, + "epoch": 1.403, + "step": 2806 + }, + { + "loss": 0.0602, + "grad_norm": 1.137125849723816, + "learning_rate": 5.975e-06, + "num_tokens": 957665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4035, + "step": 2807 + }, + { + "loss": 0.0022, + "grad_norm": 0.30045661330223083, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.404, + "step": 2808 + }, + { + "loss": 0.0392, + "grad_norm": 1.0042834281921387, + "learning_rate": 5.9650000000000005e-06, + "num_tokens": 958268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4045, + "step": 2809 + }, + { + "loss": 0.0401, + "grad_norm": 1.117727279663086, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.405, + "step": 2810 + }, + { + "loss": 0.0703, + "grad_norm": 1.4459725618362427, + "learning_rate": 5.955000000000001e-06, + "num_tokens": 959292.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4055, + "step": 2811 + }, + { + "loss": 0.0621, + "grad_norm": 1.3719003200531006, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4060000000000001, + "step": 2812 + }, + { + "loss": 0.0023, + "grad_norm": 0.31605690717697144, + "learning_rate": 5.945000000000001e-06, + "num_tokens": 959895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4064999999999999, + "step": 2813 + }, + { + "loss": 0.0605, + "grad_norm": 1.3043557405471802, + "learning_rate": 5.94e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.407, + "step": 2814 + }, + { + "loss": 0.0653, + "grad_norm": 1.2358129024505615, + "learning_rate": 5.935000000000001e-06, + "num_tokens": 960919.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4075, + "step": 2815 + }, + { + "loss": 0.0025, + "grad_norm": 0.3330060839653015, + "learning_rate": 5.93e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.408, + "step": 2816 + }, + { + "loss": 0.058, + "grad_norm": 1.1393845081329346, + "learning_rate": 5.925000000000001e-06, + "num_tokens": 961522.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4085, + "step": 2817 + }, + { + "loss": 0.0689, + "grad_norm": 1.4732993841171265, + "learning_rate": 5.92e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.409, + "step": 2818 + }, + { + "loss": 0.0028, + "grad_norm": 0.37631359696388245, + "learning_rate": 5.915000000000001e-06, + "num_tokens": 962125.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4095, + "step": 2819 + }, + { + "loss": 0.0026, + "grad_norm": 0.35936713218688965, + "learning_rate": 5.91e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.41, + "step": 2820 + }, + { + "loss": 0.0558, + "grad_norm": 1.2061470746994019, + "learning_rate": 5.905000000000001e-06, + "num_tokens": 962728.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4104999999999999, + "step": 2821 + }, + { + "loss": 0.0582, + "grad_norm": 1.513380527496338, + "learning_rate": 5.9e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 2822 + }, + { + "loss": 0.0418, + "grad_norm": 1.2391456365585327, + "learning_rate": 5.895e-06, + "num_tokens": 963752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4115, + "step": 2823 + }, + { + "loss": 0.069, + "grad_norm": 1.4670116901397705, + "learning_rate": 5.89e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.412, + "step": 2824 + }, + { + "loss": 0.0028, + "grad_norm": 0.3788264989852905, + "learning_rate": 5.885e-06, + "num_tokens": 964355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4125, + "step": 2825 + }, + { + "loss": 0.0027, + "grad_norm": 0.3687077462673187, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 1.0, + "epoch": 1.413, + "step": 2826 + }, + { + "loss": 0.0399, + "grad_norm": 1.233347773551941, + "learning_rate": 5.8750000000000005e-06, + "num_tokens": 964958.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4135, + "step": 2827 + }, + { + "loss": 0.0027, + "grad_norm": 0.37683984637260437, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.414, + "step": 2828 + }, + { + "loss": 0.048, + "grad_norm": 1.2649948596954346, + "learning_rate": 5.865000000000001e-06, + "num_tokens": 965561.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4144999999999999, + "step": 2829 + }, + { + "loss": 0.0589, + "grad_norm": 1.3882242441177368, + "learning_rate": 5.86e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.415, + "step": 2830 + }, + { + "loss": 0.0362, + "grad_norm": 1.1658241748809814, + "learning_rate": 5.855000000000001e-06, + "num_tokens": 966585.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4155, + "step": 2831 + }, + { + "loss": 0.0521, + "grad_norm": 1.0679434537887573, + "learning_rate": 5.85e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.416, + "step": 2832 + }, + { + "loss": 0.003, + "grad_norm": 0.40383246541023254, + "learning_rate": 5.845000000000001e-06, + "num_tokens": 967188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4165, + "step": 2833 + }, + { + "loss": 0.0427, + "grad_norm": 1.2304917573928833, + "learning_rate": 5.84e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.417, + "step": 2834 + }, + { + "loss": 0.0538, + "grad_norm": 1.1524217128753662, + "learning_rate": 5.835000000000001e-06, + "num_tokens": 968212.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4175, + "step": 2835 + }, + { + "loss": 0.0379, + "grad_norm": 0.9404373168945312, + "learning_rate": 5.83e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.418, + "step": 2836 + }, + { + "loss": 0.0031, + "grad_norm": 0.4096873104572296, + "learning_rate": 5.825000000000001e-06, + "num_tokens": 968815.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4184999999999999, + "step": 2837 + }, + { + "loss": 0.0028, + "grad_norm": 0.37403908371925354, + "learning_rate": 5.82e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.419, + "step": 2838 + }, + { + "loss": 0.0361, + "grad_norm": 0.9613595604896545, + "learning_rate": 5.815e-06, + "num_tokens": 969418.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.4195, + "step": 2839 + }, + { + "loss": 0.0571, + "grad_norm": 1.3871361017227173, + "learning_rate": 5.81e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.42, + "step": 2840 + }, + { + "loss": 0.0365, + "grad_norm": 1.060208797454834, + "learning_rate": 5.805e-06, + "num_tokens": 970442.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4205, + "step": 2841 + }, + { + "loss": 0.0031, + "grad_norm": 0.4013337790966034, + "learning_rate": 5.8e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 1.0, + "epoch": 1.421, + "step": 2842 + }, + { + "loss": 0.041, + "grad_norm": 1.2097371816635132, + "learning_rate": 5.795e-06, + "num_tokens": 971045.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4215, + "step": 2843 + }, + { + "loss": 0.0614, + "grad_norm": 1.1929858922958374, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.422, + "step": 2844 + }, + { + "loss": 0.0559, + "grad_norm": 1.3881855010986328, + "learning_rate": 5.7850000000000005e-06, + "num_tokens": 972069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4224999999999999, + "step": 2845 + }, + { + "loss": 0.0649, + "grad_norm": 1.5359828472137451, + "learning_rate": 5.78e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.423, + "step": 2846 + }, + { + "loss": 0.0562, + "grad_norm": 1.2387086153030396, + "learning_rate": 5.775000000000001e-06, + "num_tokens": 973093.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4235, + "step": 2847 + }, + { + "loss": 0.0634, + "grad_norm": 1.30796480178833, + "learning_rate": 5.77e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.424, + "step": 2848 + }, + { + "loss": 0.0035, + "grad_norm": 0.4502550959587097, + "learning_rate": 5.765000000000001e-06, + "num_tokens": 973696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4245, + "step": 2849 + }, + { + "loss": 0.0625, + "grad_norm": 1.4468958377838135, + "learning_rate": 5.76e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.425, + "step": 2850 + }, + { + "loss": 0.0675, + "grad_norm": 1.6001074314117432, + "learning_rate": 5.755000000000001e-06, + "num_tokens": 974720.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4255, + "step": 2851 + }, + { + "loss": 0.0039, + "grad_norm": 0.5094487071037292, + "learning_rate": 5.75e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.426, + "step": 2852 + }, + { + "loss": 0.039, + "grad_norm": 0.9305217266082764, + "learning_rate": 5.745000000000001e-06, + "num_tokens": 975323.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4264999999999999, + "step": 2853 + }, + { + "loss": 0.0379, + "grad_norm": 0.9311109185218811, + "learning_rate": 5.74e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.427, + "step": 2854 + }, + { + "loss": 0.0656, + "grad_norm": 1.3803378343582153, + "learning_rate": 5.735e-06, + "num_tokens": 976347.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4275, + "step": 2855 + }, + { + "loss": 0.0495, + "grad_norm": 1.455142855644226, + "learning_rate": 5.73e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.428, + "step": 2856 + }, + { + "loss": 0.048, + "grad_norm": 0.9757342338562012, + "learning_rate": 5.725e-06, + "num_tokens": 977371.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4285, + "step": 2857 + }, + { + "loss": 0.07, + "grad_norm": 1.3820722103118896, + "learning_rate": 5.72e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.429, + "step": 2858 + }, + { + "loss": 0.0496, + "grad_norm": 0.9005600810050964, + "learning_rate": 5.715e-06, + "num_tokens": 978395.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4295, + "step": 2859 + }, + { + "loss": 0.0588, + "grad_norm": 1.1311612129211426, + "learning_rate": 5.71e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.43, + "step": 2860 + }, + { + "loss": 0.0603, + "grad_norm": 1.2565733194351196, + "learning_rate": 5.7050000000000004e-06, + "num_tokens": 979419.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4304999999999999, + "step": 2861 + }, + { + "loss": 0.0061, + "grad_norm": 0.7569929361343384, + "learning_rate": 5.7e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.431, + "step": 2862 + }, + { + "loss": 0.0061, + "grad_norm": 0.757468044757843, + "learning_rate": 5.6950000000000005e-06, + "num_tokens": 979601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4315, + "step": 2863 + }, + { + "loss": 0.0442, + "grad_norm": 1.3257757425308228, + "learning_rate": 5.69e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.432, + "step": 2864 + }, + { + "loss": 0.0054, + "grad_norm": 0.7246440649032593, + "learning_rate": 5.685000000000001e-06, + "num_tokens": 980204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4325, + "step": 2865 + }, + { + "loss": 0.0558, + "grad_norm": 1.1359434127807617, + "learning_rate": 5.68e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.433, + "step": 2866 + }, + { + "loss": 0.0059, + "grad_norm": 0.7417834997177124, + "learning_rate": 5.675000000000001e-06, + "num_tokens": 980807.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4335, + "step": 2867 + }, + { + "loss": 0.0046, + "grad_norm": 0.6065738201141357, + "learning_rate": 5.67e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 1.0, + "epoch": 1.434, + "step": 2868 + }, + { + "loss": 0.0045, + "grad_norm": 0.6112881898880005, + "learning_rate": 5.665000000000001e-06, + "num_tokens": 980989.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4344999999999999, + "step": 2869 + }, + { + "loss": 0.0598, + "grad_norm": 1.1446788311004639, + "learning_rate": 5.66e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.435, + "step": 2870 + }, + { + "loss": 0.004, + "grad_norm": 0.5359569787979126, + "learning_rate": 5.655e-06, + "num_tokens": 981592.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4355, + "step": 2871 + }, + { + "loss": 0.0372, + "grad_norm": 1.0225598812103271, + "learning_rate": 5.65e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.436, + "step": 2872 + }, + { + "loss": 0.0031, + "grad_norm": 0.4344872236251831, + "learning_rate": 5.645e-06, + "num_tokens": 982195.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4365, + "step": 2873 + }, + { + "loss": 0.0035, + "grad_norm": 0.4770989418029785, + "learning_rate": 5.64e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 1.0, + "epoch": 1.437, + "step": 2874 + }, + { + "loss": 0.1529, + "grad_norm": 2.6292223930358887, + "learning_rate": 5.635e-06, + "num_tokens": 982798.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4375, + "step": 2875 + }, + { + "loss": 0.0536, + "grad_norm": 1.1502479314804077, + "learning_rate": 5.63e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.438, + "step": 2876 + }, + { + "loss": 0.0541, + "grad_norm": 1.5837680101394653, + "learning_rate": 5.625e-06, + "num_tokens": 983822.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4385, + "step": 2877 + }, + { + "loss": 0.0621, + "grad_norm": 1.0932730436325073, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.439, + "step": 2878 + }, + { + "loss": 0.0024, + "grad_norm": 0.3176769018173218, + "learning_rate": 5.6150000000000005e-06, + "num_tokens": 984425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4395, + "step": 2879 + }, + { + "loss": 0.056, + "grad_norm": 1.2500354051589966, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.44, + "step": 2880 + }, + { + "loss": 0.046, + "grad_norm": 1.282015323638916, + "learning_rate": 5.6050000000000005e-06, + "num_tokens": 985449.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4405000000000001, + "step": 2881 + }, + { + "loss": 0.0672, + "grad_norm": 1.5532522201538086, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.441, + "step": 2882 + }, + { + "loss": 0.0571, + "grad_norm": 1.1880862712860107, + "learning_rate": 5.595000000000001e-06, + "num_tokens": 986473.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4415, + "step": 2883 + }, + { + "loss": 0.0019, + "grad_norm": 0.26678329706192017, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.442, + "step": 2884 + }, + { + "loss": 0.002, + "grad_norm": 0.26291605830192566, + "learning_rate": 5.585000000000001e-06, + "num_tokens": 986655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4425, + "step": 2885 + }, + { + "loss": 0.002, + "grad_norm": 0.2711234986782074, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.443, + "step": 2886 + }, + { + "loss": 0.0021, + "grad_norm": 0.2862178087234497, + "learning_rate": 5.575000000000001e-06, + "num_tokens": 986837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4435, + "step": 2887 + }, + { + "loss": 0.0571, + "grad_norm": 1.3704899549484253, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.444, + "step": 2888 + }, + { + "loss": 0.0585, + "grad_norm": 1.0157582759857178, + "learning_rate": 5.565e-06, + "num_tokens": 987861.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4445000000000001, + "step": 2889 + }, + { + "loss": 0.0377, + "grad_norm": 1.079724669456482, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.445, + "step": 2890 + }, + { + "loss": 0.14, + "grad_norm": 1.9184038639068604, + "learning_rate": 5.555e-06, + "num_tokens": 988885.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4455, + "step": 2891 + }, + { + "loss": 0.0019, + "grad_norm": 0.25762176513671875, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.446, + "step": 2892 + }, + { + "loss": 0.0702, + "grad_norm": 1.5166800022125244, + "learning_rate": 5.545e-06, + "num_tokens": 989488.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4465, + "step": 2893 + }, + { + "loss": 0.0394, + "grad_norm": 1.1091899871826172, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.447, + "step": 2894 + }, + { + "loss": 0.0647, + "grad_norm": 1.4911457300186157, + "learning_rate": 5.535e-06, + "num_tokens": 990512.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4475, + "step": 2895 + }, + { + "loss": 0.063, + "grad_norm": 1.6225489377975464, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.448, + "step": 2896 + }, + { + "loss": 0.041, + "grad_norm": 1.3053377866744995, + "learning_rate": 5.5250000000000005e-06, + "num_tokens": 991536.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4485000000000001, + "step": 2897 + }, + { + "loss": 0.002, + "grad_norm": 0.27576708793640137, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 1.0, + "epoch": 1.449, + "step": 2898 + }, + { + "loss": 0.0019, + "grad_norm": 0.26415082812309265, + "learning_rate": 5.5150000000000006e-06, + "num_tokens": 991718.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4495, + "step": 2899 + }, + { + "loss": 0.0021, + "grad_norm": 0.29174545407295227, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 2900 + }, + { + "loss": 0.0573, + "grad_norm": 1.38834810256958, + "learning_rate": 5.505000000000001e-06, + "num_tokens": 992321.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4505, + "step": 2901 + }, + { + "loss": 0.0443, + "grad_norm": 1.4421913623809814, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 2902 + }, + { + "loss": 0.0022, + "grad_norm": 0.29639050364494324, + "learning_rate": 5.495000000000001e-06, + "num_tokens": 992924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4515, + "step": 2903 + }, + { + "loss": 0.0655, + "grad_norm": 1.5755751132965088, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.452, + "step": 2904 + }, + { + "loss": 0.0022, + "grad_norm": 0.2955166697502136, + "learning_rate": 5.485e-06, + "num_tokens": 993527.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4525000000000001, + "step": 2905 + }, + { + "loss": 0.0021, + "grad_norm": 0.2841387689113617, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.453, + "step": 2906 + }, + { + "loss": 0.0021, + "grad_norm": 0.286550909280777, + "learning_rate": 5.475e-06, + "num_tokens": 993709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4535, + "step": 2907 + }, + { + "loss": 0.0357, + "grad_norm": 1.0881201028823853, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.454, + "step": 2908 + }, + { + "loss": 0.0409, + "grad_norm": 1.0831390619277954, + "learning_rate": 5.465e-06, + "num_tokens": 994733.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4545, + "step": 2909 + }, + { + "loss": 0.0573, + "grad_norm": 1.2077234983444214, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.455, + "step": 2910 + }, + { + "loss": 0.0567, + "grad_norm": 1.2307626008987427, + "learning_rate": 5.455e-06, + "num_tokens": 995757.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4555, + "step": 2911 + }, + { + "loss": 0.067, + "grad_norm": 1.356170654296875, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.456, + "step": 2912 + }, + { + "loss": 0.0019, + "grad_norm": 0.2535565495491028, + "learning_rate": 5.445e-06, + "num_tokens": 996360.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4565000000000001, + "step": 2913 + }, + { + "loss": 0.0366, + "grad_norm": 1.0972084999084473, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.457, + "step": 2914 + }, + { + "loss": 0.054, + "grad_norm": 1.0509806871414185, + "learning_rate": 5.4350000000000005e-06, + "num_tokens": 997384.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4575, + "step": 2915 + }, + { + "loss": 0.0609, + "grad_norm": 1.3918635845184326, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.458, + "step": 2916 + }, + { + "loss": 0.0388, + "grad_norm": 1.0420371294021606, + "learning_rate": 5.4250000000000006e-06, + "num_tokens": 998408.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4585, + "step": 2917 + }, + { + "loss": 0.072, + "grad_norm": 1.3679769039154053, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.459, + "step": 2918 + }, + { + "loss": 0.0027, + "grad_norm": 0.3709925413131714, + "learning_rate": 5.415000000000001e-06, + "num_tokens": 999011.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4595, + "step": 2919 + }, + { + "loss": 0.0661, + "grad_norm": 1.381754755973816, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.46, + "step": 2920 + }, + { + "loss": 0.041, + "grad_norm": 1.2045968770980835, + "learning_rate": 5.405e-06, + "num_tokens": 1000035.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4605000000000001, + "step": 2921 + }, + { + "loss": 0.0023, + "grad_norm": 0.3062268793582916, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 1.0, + "epoch": 1.461, + "step": 2922 + }, + { + "loss": 0.0464, + "grad_norm": 1.0317680835723877, + "learning_rate": 5.395e-06, + "num_tokens": 1000638.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4615, + "step": 2923 + }, + { + "loss": 0.0495, + "grad_norm": 1.3268100023269653, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.462, + "step": 2924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6260963678359985, + "learning_rate": 5.385e-06, + "num_tokens": 1001662.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4625, + "step": 2925 + }, + { + "loss": 0.0553, + "grad_norm": 1.0903215408325195, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.463, + "step": 2926 + }, + { + "loss": 0.0029, + "grad_norm": 0.3851076066493988, + "learning_rate": 5.375e-06, + "num_tokens": 1002265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4635, + "step": 2927 + }, + { + "loss": 0.0692, + "grad_norm": 1.6572927236557007, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.464, + "step": 2928 + }, + { + "loss": 0.0625, + "grad_norm": 1.5664637088775635, + "learning_rate": 5.365e-06, + "num_tokens": 1003289.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4645000000000001, + "step": 2929 + }, + { + "loss": 0.0626, + "grad_norm": 1.198908805847168, + "learning_rate": 5.36e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.465, + "step": 2930 + }, + { + "loss": 0.0641, + "grad_norm": 1.2499873638153076, + "learning_rate": 5.355e-06, + "num_tokens": 1004313.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4655, + "step": 2931 + }, + { + "loss": 0.0042, + "grad_norm": 0.5362296104431152, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 1.0, + "epoch": 1.466, + "step": 2932 + }, + { + "loss": 0.0037, + "grad_norm": 0.49612900614738464, + "learning_rate": 5.3450000000000005e-06, + "num_tokens": 1004495.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4665, + "step": 2933 + }, + { + "loss": 0.0039, + "grad_norm": 0.5115715861320496, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.467, + "step": 2934 + }, + { + "loss": 0.056, + "grad_norm": 1.3353906869888306, + "learning_rate": 5.335000000000001e-06, + "num_tokens": 1005098.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4675, + "step": 2935 + }, + { + "loss": 0.0407, + "grad_norm": 1.1807116270065308, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.468, + "step": 2936 + }, + { + "loss": 0.0551, + "grad_norm": 1.257308006286621, + "learning_rate": 5.325e-06, + "num_tokens": 1006122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4685000000000001, + "step": 2937 + }, + { + "loss": 0.0606, + "grad_norm": 1.2219009399414062, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4689999999999999, + "step": 2938 + }, + { + "loss": 0.0403, + "grad_norm": 1.094189167022705, + "learning_rate": 5.315e-06, + "num_tokens": 1007146.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4695, + "step": 2939 + }, + { + "loss": 0.0467, + "grad_norm": 1.1191236972808838, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 2940 + }, + { + "loss": 0.0556, + "grad_norm": 1.1905457973480225, + "learning_rate": 5.305e-06, + "num_tokens": 1008170.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4705, + "step": 2941 + }, + { + "loss": 0.0038, + "grad_norm": 0.5084776282310486, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 1.0, + "epoch": 1.471, + "step": 2942 + }, + { + "loss": 0.0558, + "grad_norm": 0.9725843071937561, + "learning_rate": 5.295e-06, + "num_tokens": 1008773.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4715, + "step": 2943 + }, + { + "loss": 0.058, + "grad_norm": 1.1404790878295898, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.472, + "step": 2944 + }, + { + "loss": 0.0038, + "grad_norm": 0.4927501380443573, + "learning_rate": 5.285e-06, + "num_tokens": 1009376.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4725, + "step": 2945 + }, + { + "loss": 0.052, + "grad_norm": 1.0383561849594116, + "learning_rate": 5.28e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4729999999999999, + "step": 2946 + }, + { + "loss": 0.0039, + "grad_norm": 0.5245242118835449, + "learning_rate": 5.275e-06, + "num_tokens": 1009979.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4735, + "step": 2947 + }, + { + "loss": 0.0599, + "grad_norm": 1.137878179550171, + "learning_rate": 5.27e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.474, + "step": 2948 + }, + { + "loss": 0.0039, + "grad_norm": 0.5066397190093994, + "learning_rate": 5.265e-06, + "num_tokens": 1010582.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4745, + "step": 2949 + }, + { + "loss": 0.0037, + "grad_norm": 0.4922652542591095, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 1.0, + "epoch": 1.475, + "step": 2950 + }, + { + "loss": 0.0402, + "grad_norm": 1.1538424491882324, + "learning_rate": 5.2550000000000005e-06, + "num_tokens": 1011185.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4755, + "step": 2951 + }, + { + "loss": 0.0562, + "grad_norm": 1.8279345035552979, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.476, + "step": 2952 + }, + { + "loss": 0.0636, + "grad_norm": 1.2982397079467773, + "learning_rate": 5.245e-06, + "num_tokens": 1012209.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4765, + "step": 2953 + }, + { + "loss": 0.0033, + "grad_norm": 0.4363272488117218, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4769999999999999, + "step": 2954 + }, + { + "loss": 0.0549, + "grad_norm": 1.556806206703186, + "learning_rate": 5.235e-06, + "num_tokens": 1012812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4775, + "step": 2955 + }, + { + "loss": 0.0358, + "grad_norm": 1.0845907926559448, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.478, + "step": 2956 + }, + { + "loss": 0.0032, + "grad_norm": 0.4301038384437561, + "learning_rate": 5.225e-06, + "num_tokens": 1013415.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4785, + "step": 2957 + }, + { + "loss": 0.003, + "grad_norm": 0.3937813341617584, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 1.0, + "epoch": 1.479, + "step": 2958 + }, + { + "loss": 0.0403, + "grad_norm": 0.9416876435279846, + "learning_rate": 5.215e-06, + "num_tokens": 1014018.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4795, + "step": 2959 + }, + { + "loss": 0.0029, + "grad_norm": 0.3991153836250305, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.48, + "step": 2960 + }, + { + "loss": 0.0367, + "grad_norm": 1.106955885887146, + "learning_rate": 5.205e-06, + "num_tokens": 1014621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4805, + "step": 2961 + }, + { + "loss": 0.0586, + "grad_norm": 1.3418941497802734, + "learning_rate": 5.2e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4809999999999999, + "step": 2962 + }, + { + "loss": 0.0358, + "grad_norm": 0.9489701390266418, + "learning_rate": 5.195e-06, + "num_tokens": 1015645.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4815, + "step": 2963 + }, + { + "loss": 0.0629, + "grad_norm": 1.0855809450149536, + "learning_rate": 5.19e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.482, + "step": 2964 + }, + { + "loss": 0.0027, + "grad_norm": 0.3812173306941986, + "learning_rate": 5.185e-06, + "num_tokens": 1016248.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4825, + "step": 2965 + }, + { + "loss": 0.0028, + "grad_norm": 0.3925476372241974, + "learning_rate": 5.18e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 1.0, + "epoch": 1.483, + "step": 2966 + }, + { + "loss": 0.0567, + "grad_norm": 1.3809915781021118, + "learning_rate": 5.1750000000000004e-06, + "num_tokens": 1016851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4835, + "step": 2967 + }, + { + "loss": 0.0428, + "grad_norm": 1.4269046783447266, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.484, + "step": 2968 + }, + { + "loss": 0.0026, + "grad_norm": 0.3535688519477844, + "learning_rate": 5.165e-06, + "num_tokens": 1017454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4845, + "step": 2969 + }, + { + "loss": 0.0025, + "grad_norm": 0.34918057918548584, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4849999999999999, + "step": 2970 + }, + { + "loss": 0.0025, + "grad_norm": 0.34093669056892395, + "learning_rate": 5.155e-06, + "num_tokens": 1017636.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4855, + "step": 2971 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282490372657776, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.486, + "step": 2972 + }, + { + "loss": 0.0762, + "grad_norm": 2.083855628967285, + "learning_rate": 5.145e-06, + "num_tokens": 1018239.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4865, + "step": 2973 + }, + { + "loss": 0.0548, + "grad_norm": 1.5333393812179565, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.487, + "step": 2974 + }, + { + "loss": 0.0373, + "grad_norm": 1.078650712966919, + "learning_rate": 5.135e-06, + "num_tokens": 1019263.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4875, + "step": 2975 + }, + { + "loss": 0.0447, + "grad_norm": 1.3176923990249634, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.488, + "step": 2976 + }, + { + "loss": 0.0023, + "grad_norm": 0.3142336308956146, + "learning_rate": 5.125e-06, + "num_tokens": 1019866.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4885, + "step": 2977 + }, + { + "loss": 0.0021, + "grad_norm": 0.2898966073989868, + "learning_rate": 5.12e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 2978 + }, + { + "loss": 0.046, + "grad_norm": 1.2612260580062866, + "learning_rate": 5.115e-06, + "num_tokens": 1020469.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4895, + "step": 2979 + }, + { + "loss": 0.0718, + "grad_norm": 2.1195919513702393, + "learning_rate": 5.11e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.49, + "step": 2980 + }, + { + "loss": 0.002, + "grad_norm": 0.2805778682231903, + "learning_rate": 5.105e-06, + "num_tokens": 1021072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4905, + "step": 2981 + }, + { + "loss": 0.002, + "grad_norm": 0.2843017280101776, + "learning_rate": 5.1e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 2982 + }, + { + "loss": 0.002, + "grad_norm": 0.277892529964447, + "learning_rate": 5.095e-06, + "num_tokens": 1021254.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4915, + "step": 2983 + }, + { + "loss": 0.0422, + "grad_norm": 1.0654278993606567, + "learning_rate": 5.09e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.492, + "step": 2984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29488760232925415, + "learning_rate": 5.085e-06, + "num_tokens": 1021857.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4925, + "step": 2985 + }, + { + "loss": 0.0392, + "grad_norm": 1.086630940437317, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4929999999999999, + "step": 2986 + }, + { + "loss": 0.0018, + "grad_norm": 0.24030831456184387, + "learning_rate": 5.075e-06, + "num_tokens": 1022460.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4935, + "step": 2987 + }, + { + "loss": 0.0406, + "grad_norm": 0.9846900105476379, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.494, + "step": 2988 + }, + { + "loss": 0.0418, + "grad_norm": 1.6849744319915771, + "learning_rate": 5.065e-06, + "num_tokens": 1023484.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4945, + "step": 2989 + }, + { + "loss": 0.0015, + "grad_norm": 0.2105080932378769, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 1.0, + "epoch": 1.495, + "step": 2990 + }, + { + "loss": 0.0019, + "grad_norm": 0.26552438735961914, + "learning_rate": 5.055e-06, + "num_tokens": 1023666.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4955, + "step": 2991 + }, + { + "loss": 0.0016, + "grad_norm": 0.21752813458442688, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 2992 + }, + { + "loss": 0.0666, + "grad_norm": 1.4344254732131958, + "learning_rate": 5.045e-06, + "num_tokens": 1024269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4965, + "step": 2993 + }, + { + "loss": 0.0415, + "grad_norm": 1.1530293226242065, + "learning_rate": 5.04e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4969999999999999, + "step": 2994 + }, + { + "loss": 0.0365, + "grad_norm": 1.0033750534057617, + "learning_rate": 5.035e-06, + "num_tokens": 1025293.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4975, + "step": 2995 + }, + { + "loss": 0.0369, + "grad_norm": 1.062666654586792, + "learning_rate": 5.03e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.498, + "step": 2996 + }, + { + "loss": 0.0016, + "grad_norm": 0.23261243104934692, + "learning_rate": 5.025e-06, + "num_tokens": 1025896.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4985, + "step": 2997 + }, + { + "loss": 0.0019, + "grad_norm": 0.26436832547187805, + "learning_rate": 5.02e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 2998 + }, + { + "loss": 0.0395, + "grad_norm": 1.0828720331192017, + "learning_rate": 5.015e-06, + "num_tokens": 1026499.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4995, + "step": 2999 + }, + { + "loss": 0.0018, + "grad_norm": 0.24229036271572113, + "learning_rate": 5.01e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5, + "step": 3000 + }, + { + "loss": 0.0636, + "grad_norm": 1.5817841291427612, + "learning_rate": 5.0049999999999995e-06, + "num_tokens": 1027102.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5005, + "step": 3001 + }, + { + "loss": 0.0016, + "grad_norm": 0.21737374365329742, + "learning_rate": 5e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.501, + "step": 3002 + }, + { + "loss": 0.0535, + "grad_norm": 1.0760457515716553, + "learning_rate": 4.9950000000000005e-06, + "num_tokens": 1027705.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5015, + "step": 3003 + }, + { + "loss": 0.0702, + "grad_norm": 1.5160242319107056, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 3004 + }, + { + "loss": 0.002, + "grad_norm": 0.28444817662239075, + "learning_rate": 4.9850000000000006e-06, + "num_tokens": 1028308.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5025, + "step": 3005 + }, + { + "loss": 0.0659, + "grad_norm": 1.394598364830017, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5030000000000001, + "step": 3006 + }, + { + "loss": 0.0549, + "grad_norm": 1.4268598556518555, + "learning_rate": 4.975000000000001e-06, + "num_tokens": 1029332.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5034999999999998, + "step": 3007 + }, + { + "loss": 0.0693, + "grad_norm": 1.3022048473358154, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.504, + "step": 3008 + }, + { + "loss": 0.0577, + "grad_norm": 1.6034104824066162, + "learning_rate": 4.965000000000001e-06, + "num_tokens": 1030356.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5045, + "step": 3009 + }, + { + "loss": 0.002, + "grad_norm": 0.26663535833358765, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.505, + "step": 3010 + }, + { + "loss": 0.0021, + "grad_norm": 0.29342901706695557, + "learning_rate": 4.955e-06, + "num_tokens": 1030538.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5055, + "step": 3011 + }, + { + "loss": 0.0574, + "grad_norm": 1.232057809829712, + "learning_rate": 4.95e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.506, + "step": 3012 + }, + { + "loss": 0.0022, + "grad_norm": 0.2940972149372101, + "learning_rate": 4.945e-06, + "num_tokens": 1031141.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5065, + "step": 3013 + }, + { + "loss": 0.0022, + "grad_norm": 0.3054879307746887, + "learning_rate": 4.94e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 3014 + }, + { + "loss": 0.002, + "grad_norm": 0.2681850492954254, + "learning_rate": 4.935e-06, + "num_tokens": 1031323.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5074999999999998, + "step": 3015 + }, + { + "loss": 0.0018, + "grad_norm": 0.24893507361412048, + "learning_rate": 4.93e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 3016 + }, + { + "loss": 0.0514, + "grad_norm": 0.9832684993743896, + "learning_rate": 4.925e-06, + "num_tokens": 1031926.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5085, + "step": 3017 + }, + { + "loss": 0.0546, + "grad_norm": 1.0513758659362793, + "learning_rate": 4.92e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.509, + "step": 3018 + }, + { + "loss": 0.0438, + "grad_norm": 1.3256640434265137, + "learning_rate": 4.915e-06, + "num_tokens": 1032950.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5095, + "step": 3019 + }, + { + "loss": 0.039, + "grad_norm": 1.1269205808639526, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.51, + "step": 3020 + }, + { + "loss": 0.0606, + "grad_norm": 1.2971444129943848, + "learning_rate": 4.9050000000000005e-06, + "num_tokens": 1033974.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5105, + "step": 3021 + }, + { + "loss": 0.0018, + "grad_norm": 0.24280324578285217, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5110000000000001, + "step": 3022 + }, + { + "loss": 0.0726, + "grad_norm": 1.984804630279541, + "learning_rate": 4.8950000000000006e-06, + "num_tokens": 1034577.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.5114999999999998, + "step": 3023 + }, + { + "loss": 0.0444, + "grad_norm": 1.1891791820526123, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.512, + "step": 3024 + }, + { + "loss": 0.0425, + "grad_norm": 1.3020859956741333, + "learning_rate": 4.885000000000001e-06, + "num_tokens": 1035601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5125, + "step": 3025 + }, + { + "loss": 0.0397, + "grad_norm": 0.8992137312889099, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.513, + "step": 3026 + }, + { + "loss": 0.0518, + "grad_norm": 1.0060539245605469, + "learning_rate": 4.875e-06, + "num_tokens": 1036625.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5135, + "step": 3027 + }, + { + "loss": 0.0618, + "grad_norm": 1.2295892238616943, + "learning_rate": 4.87e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.514, + "step": 3028 + }, + { + "loss": 0.057, + "grad_norm": 1.2740446329116821, + "learning_rate": 4.865e-06, + "num_tokens": 1037649.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5145, + "step": 3029 + }, + { + "loss": 0.067, + "grad_norm": 1.2444658279418945, + "learning_rate": 4.86e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5150000000000001, + "step": 3030 + }, + { + "loss": 0.0389, + "grad_norm": 1.0539816617965698, + "learning_rate": 4.855e-06, + "num_tokens": 1038673.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5154999999999998, + "step": 3031 + }, + { + "loss": 0.0613, + "grad_norm": 1.2166608572006226, + "learning_rate": 4.85e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.516, + "step": 3032 + }, + { + "loss": 0.0636, + "grad_norm": 1.2355148792266846, + "learning_rate": 4.845e-06, + "num_tokens": 1039697.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5165, + "step": 3033 + }, + { + "loss": 0.0586, + "grad_norm": 1.195371150970459, + "learning_rate": 4.84e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.517, + "step": 3034 + }, + { + "loss": 0.0031, + "grad_norm": 0.4328796863555908, + "learning_rate": 4.835e-06, + "num_tokens": 1040300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5175, + "step": 3035 + }, + { + "loss": 0.0033, + "grad_norm": 0.4462224841117859, + "learning_rate": 4.83e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 3036 + }, + { + "loss": 0.0404, + "grad_norm": 1.2766720056533813, + "learning_rate": 4.825e-06, + "num_tokens": 1040903.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5185, + "step": 3037 + }, + { + "loss": 0.0038, + "grad_norm": 0.5095945000648499, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5190000000000001, + "step": 3038 + }, + { + "loss": 0.0528, + "grad_norm": 1.006589651107788, + "learning_rate": 4.8150000000000005e-06, + "num_tokens": 1041506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5194999999999999, + "step": 3039 + }, + { + "loss": 0.0417, + "grad_norm": 1.2964030504226685, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.52, + "step": 3040 + }, + { + "loss": 0.0592, + "grad_norm": 1.1840168237686157, + "learning_rate": 4.805000000000001e-06, + "num_tokens": 1042530.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5205, + "step": 3041 + }, + { + "loss": 0.0038, + "grad_norm": 0.49861085414886475, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 1.0, + "epoch": 1.521, + "step": 3042 + }, + { + "loss": 0.0037, + "grad_norm": 0.49751704931259155, + "learning_rate": 4.795e-06, + "num_tokens": 1042712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5215, + "step": 3043 + }, + { + "loss": 0.0481, + "grad_norm": 1.022782564163208, + "learning_rate": 4.79e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.522, + "step": 3044 + }, + { + "loss": 0.0038, + "grad_norm": 0.49228596687316895, + "learning_rate": 4.785e-06, + "num_tokens": 1043315.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5225, + "step": 3045 + }, + { + "loss": 0.0376, + "grad_norm": 1.1729862689971924, + "learning_rate": 4.78e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5230000000000001, + "step": 3046 + }, + { + "loss": 0.0653, + "grad_norm": 1.5206072330474854, + "learning_rate": 4.775e-06, + "num_tokens": 1044339.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5234999999999999, + "step": 3047 + }, + { + "loss": 0.0633, + "grad_norm": 1.2756298780441284, + "learning_rate": 4.77e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.524, + "step": 3048 + }, + { + "loss": 0.0036, + "grad_norm": 0.4977829158306122, + "learning_rate": 4.765e-06, + "num_tokens": 1044942.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5245, + "step": 3049 + }, + { + "loss": 0.0526, + "grad_norm": 1.0627686977386475, + "learning_rate": 4.76e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.525, + "step": 3050 + }, + { + "loss": 0.0381, + "grad_norm": 1.1623107194900513, + "learning_rate": 4.755e-06, + "num_tokens": 1045966.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5255, + "step": 3051 + }, + { + "loss": 0.0036, + "grad_norm": 0.5119946002960205, + "learning_rate": 4.75e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.526, + "step": 3052 + }, + { + "loss": 0.0581, + "grad_norm": 1.3532719612121582, + "learning_rate": 4.745e-06, + "num_tokens": 1046569.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5265, + "step": 3053 + }, + { + "loss": 0.0594, + "grad_norm": 1.2599351406097412, + "learning_rate": 4.74e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5270000000000001, + "step": 3054 + }, + { + "loss": 0.0033, + "grad_norm": 0.4622514843940735, + "learning_rate": 4.735e-06, + "num_tokens": 1047172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5274999999999999, + "step": 3055 + }, + { + "loss": 0.0728, + "grad_norm": 1.6162607669830322, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.528, + "step": 3056 + }, + { + "loss": 0.0627, + "grad_norm": 1.4714545011520386, + "learning_rate": 4.7250000000000005e-06, + "num_tokens": 1048196.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5285, + "step": 3057 + }, + { + "loss": 0.0034, + "grad_norm": 0.48141252994537354, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 1.0, + "epoch": 1.529, + "step": 3058 + }, + { + "loss": 0.0385, + "grad_norm": 1.0676530599594116, + "learning_rate": 4.715e-06, + "num_tokens": 1048799.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5295, + "step": 3059 + }, + { + "loss": 0.0032, + "grad_norm": 0.44829145073890686, + "learning_rate": 4.71e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 1.0, + "epoch": 1.53, + "step": 3060 + }, + { + "loss": 0.0031, + "grad_norm": 0.4258093535900116, + "learning_rate": 4.705e-06, + "num_tokens": 1048981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5305, + "step": 3061 + }, + { + "loss": 0.0715, + "grad_norm": 1.3509596586227417, + "learning_rate": 4.7e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5310000000000001, + "step": 3062 + }, + { + "loss": 0.0341, + "grad_norm": 1.0876250267028809, + "learning_rate": 4.695e-06, + "num_tokens": 1050005.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5314999999999999, + "step": 3063 + }, + { + "loss": 0.0611, + "grad_norm": 1.3174924850463867, + "learning_rate": 4.69e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.532, + "step": 3064 + }, + { + "loss": 0.0417, + "grad_norm": 1.123489499092102, + "learning_rate": 4.685000000000001e-06, + "num_tokens": 1051029.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5325, + "step": 3065 + }, + { + "loss": 0.066, + "grad_norm": 1.7399777173995972, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.533, + "step": 3066 + }, + { + "loss": 0.0028, + "grad_norm": 0.38190290331840515, + "learning_rate": 4.675000000000001e-06, + "num_tokens": 1051632.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5335, + "step": 3067 + }, + { + "loss": 0.0651, + "grad_norm": 1.4947158098220825, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.534, + "step": 3068 + }, + { + "loss": 0.003, + "grad_norm": 0.40696173906326294, + "learning_rate": 4.665e-06, + "num_tokens": 1052235.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5345, + "step": 3069 + }, + { + "loss": 0.0555, + "grad_norm": 1.2926570177078247, + "learning_rate": 4.66e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5350000000000001, + "step": 3070 + }, + { + "loss": 0.0625, + "grad_norm": 1.2110244035720825, + "learning_rate": 4.655e-06, + "num_tokens": 1053259.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5354999999999999, + "step": 3071 + }, + { + "loss": 0.0033, + "grad_norm": 0.44495561718940735, + "learning_rate": 4.65e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 1.0, + "epoch": 1.536, + "step": 3072 + }, + { + "loss": 0.0574, + "grad_norm": 1.1019057035446167, + "learning_rate": 4.645e-06, + "num_tokens": 1053862.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5365, + "step": 3073 + }, + { + "loss": 0.003, + "grad_norm": 0.4128797650337219, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.537, + "step": 3074 + }, + { + "loss": 0.0572, + "grad_norm": 1.164238452911377, + "learning_rate": 4.6350000000000005e-06, + "num_tokens": 1054465.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5375, + "step": 3075 + }, + { + "loss": 0.0631, + "grad_norm": 1.4220542907714844, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.538, + "step": 3076 + }, + { + "loss": 0.0377, + "grad_norm": 1.2259591817855835, + "learning_rate": 4.625000000000001e-06, + "num_tokens": 1055489.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5385, + "step": 3077 + }, + { + "loss": 0.003, + "grad_norm": 0.4099157154560089, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5390000000000001, + "step": 3078 + }, + { + "loss": 0.0027, + "grad_norm": 0.3750811219215393, + "learning_rate": 4.615000000000001e-06, + "num_tokens": 1055671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5394999999999999, + "step": 3079 + }, + { + "loss": 0.0621, + "grad_norm": 1.2325596809387207, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.54, + "step": 3080 + }, + { + "loss": 0.0504, + "grad_norm": 0.9959844350814819, + "learning_rate": 4.605000000000001e-06, + "num_tokens": 1056695.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5405, + "step": 3081 + }, + { + "loss": 0.0574, + "grad_norm": 1.0301742553710938, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.541, + "step": 3082 + }, + { + "loss": 0.0512, + "grad_norm": 1.0320547819137573, + "learning_rate": 4.595000000000001e-06, + "num_tokens": 1057719.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5415, + "step": 3083 + }, + { + "loss": 0.0561, + "grad_norm": 1.225005865097046, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.542, + "step": 3084 + }, + { + "loss": 0.0376, + "grad_norm": 1.1090381145477295, + "learning_rate": 4.585e-06, + "num_tokens": 1058743.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.5425, + "step": 3085 + }, + { + "loss": 0.0032, + "grad_norm": 0.44738513231277466, + "learning_rate": 4.58e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5430000000000001, + "step": 3086 + }, + { + "loss": 0.0031, + "grad_norm": 0.4485037624835968, + "learning_rate": 4.575e-06, + "num_tokens": 1058925.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5434999999999999, + "step": 3087 + }, + { + "loss": 0.0703, + "grad_norm": 1.630645751953125, + "learning_rate": 4.57e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.544, + "step": 3088 + }, + { + "loss": 0.0034, + "grad_norm": 0.4586680233478546, + "learning_rate": 4.565e-06, + "num_tokens": 1059528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5445, + "step": 3089 + }, + { + "loss": 0.003, + "grad_norm": 0.41872572898864746, + "learning_rate": 4.56e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.545, + "step": 3090 + }, + { + "loss": 0.0433, + "grad_norm": 1.1152652502059937, + "learning_rate": 4.5550000000000004e-06, + "num_tokens": 1060131.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5455, + "step": 3091 + }, + { + "loss": 0.0025, + "grad_norm": 0.35068032145500183, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.546, + "step": 3092 + }, + { + "loss": 0.0396, + "grad_norm": 1.0990018844604492, + "learning_rate": 4.5450000000000005e-06, + "num_tokens": 1060734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5465, + "step": 3093 + }, + { + "loss": 0.0635, + "grad_norm": 1.6193867921829224, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5470000000000002, + "step": 3094 + }, + { + "loss": 0.0027, + "grad_norm": 0.3813343644142151, + "learning_rate": 4.535000000000001e-06, + "num_tokens": 1061337.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5474999999999999, + "step": 3095 + }, + { + "loss": 0.0025, + "grad_norm": 0.3389427363872528, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 1.0, + "epoch": 1.548, + "step": 3096 + }, + { + "loss": 0.0652, + "grad_norm": 1.455460786819458, + "learning_rate": 4.525000000000001e-06, + "num_tokens": 1061940.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5485, + "step": 3097 + }, + { + "loss": 0.0596, + "grad_norm": 1.318932056427002, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.549, + "step": 3098 + }, + { + "loss": 0.0021, + "grad_norm": 0.30851492285728455, + "learning_rate": 4.515000000000001e-06, + "num_tokens": 1062543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5495, + "step": 3099 + }, + { + "loss": 0.0021, + "grad_norm": 0.29576948285102844, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 1.0, + "epoch": 1.55, + "step": 3100 + }, + { + "loss": 0.0021, + "grad_norm": 0.29117029905319214, + "learning_rate": 4.505e-06, + "num_tokens": 1062725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5505, + "step": 3101 + }, + { + "loss": 0.04, + "grad_norm": 1.1777619123458862, + "learning_rate": 4.5e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5510000000000002, + "step": 3102 + }, + { + "loss": 0.0538, + "grad_norm": 1.1641870737075806, + "learning_rate": 4.495e-06, + "num_tokens": 1063749.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5514999999999999, + "step": 3103 + }, + { + "loss": 0.0423, + "grad_norm": 1.3220707178115845, + "learning_rate": 4.49e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.552, + "step": 3104 + }, + { + "loss": 0.0021, + "grad_norm": 0.30619239807128906, + "learning_rate": 4.485e-06, + "num_tokens": 1064352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5525, + "step": 3105 + }, + { + "loss": 0.0681, + "grad_norm": 1.3809969425201416, + "learning_rate": 4.48e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.553, + "step": 3106 + }, + { + "loss": 0.055, + "grad_norm": 1.1956359148025513, + "learning_rate": 4.475e-06, + "num_tokens": 1065376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5535, + "step": 3107 + }, + { + "loss": 0.0573, + "grad_norm": 1.2887022495269775, + "learning_rate": 4.47e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.554, + "step": 3108 + }, + { + "loss": 0.0554, + "grad_norm": 1.1560310125350952, + "learning_rate": 4.4650000000000004e-06, + "num_tokens": 1066400.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5545, + "step": 3109 + }, + { + "loss": 0.0021, + "grad_norm": 0.29395192861557007, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5550000000000002, + "step": 3110 + }, + { + "loss": 0.0652, + "grad_norm": 1.608464002609253, + "learning_rate": 4.4550000000000005e-06, + "num_tokens": 1067003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5554999999999999, + "step": 3111 + }, + { + "loss": 0.0558, + "grad_norm": 1.2650138139724731, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.556, + "step": 3112 + }, + { + "loss": 0.0458, + "grad_norm": 1.2872962951660156, + "learning_rate": 4.445000000000001e-06, + "num_tokens": 1068027.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5565, + "step": 3113 + }, + { + "loss": 0.0022, + "grad_norm": 0.30732589960098267, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.557, + "step": 3114 + }, + { + "loss": 0.0558, + "grad_norm": 1.0926036834716797, + "learning_rate": 4.435000000000001e-06, + "num_tokens": 1068630.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5575, + "step": 3115 + }, + { + "loss": 0.0023, + "grad_norm": 0.32145828008651733, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 1.0, + "epoch": 1.558, + "step": 3116 + }, + { + "loss": 0.0373, + "grad_norm": 1.1655807495117188, + "learning_rate": 4.425e-06, + "num_tokens": 1069233.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5585, + "step": 3117 + }, + { + "loss": 0.0769, + "grad_norm": 1.796105980873108, + "learning_rate": 4.42e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.5590000000000002, + "step": 3118 + }, + { + "loss": 0.0026, + "grad_norm": 0.3620903789997101, + "learning_rate": 4.415e-06, + "num_tokens": 1069836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5594999999999999, + "step": 3119 + }, + { + "loss": 0.0429, + "grad_norm": 1.309659481048584, + "learning_rate": 4.41e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.56, + "step": 3120 + }, + { + "loss": 0.0023, + "grad_norm": 0.32819899916648865, + "learning_rate": 4.405e-06, + "num_tokens": 1070439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5605, + "step": 3121 + }, + { + "loss": 0.0576, + "grad_norm": 1.0110256671905518, + "learning_rate": 4.4e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.561, + "step": 3122 + }, + { + "loss": 0.0474, + "grad_norm": 1.327854037284851, + "learning_rate": 4.395e-06, + "num_tokens": 1071463.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5615, + "step": 3123 + }, + { + "loss": 0.0371, + "grad_norm": 1.2000775337219238, + "learning_rate": 4.39e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.562, + "step": 3124 + }, + { + "loss": 0.0532, + "grad_norm": 1.1874752044677734, + "learning_rate": 4.385e-06, + "num_tokens": 1072487.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5625, + "step": 3125 + }, + { + "loss": 0.0387, + "grad_norm": 1.2780605554580688, + "learning_rate": 4.38e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.563, + "step": 3126 + }, + { + "loss": 0.0029, + "grad_norm": 0.38496679067611694, + "learning_rate": 4.3750000000000005e-06, + "num_tokens": 1073090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5635, + "step": 3127 + }, + { + "loss": 0.0028, + "grad_norm": 0.3800834119319916, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.564, + "step": 3128 + }, + { + "loss": 0.0386, + "grad_norm": 1.077006459236145, + "learning_rate": 4.3650000000000006e-06, + "num_tokens": 1073693.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5645, + "step": 3129 + }, + { + "loss": 0.0669, + "grad_norm": 1.2879207134246826, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.565, + "step": 3130 + }, + { + "loss": 0.0027, + "grad_norm": 0.37664031982421875, + "learning_rate": 4.355000000000001e-06, + "num_tokens": 1074296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5655000000000001, + "step": 3131 + }, + { + "loss": 0.0026, + "grad_norm": 0.35762181878089905, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5659999999999998, + "step": 3132 + }, + { + "loss": 0.0026, + "grad_norm": 0.3616492450237274, + "learning_rate": 4.345000000000001e-06, + "num_tokens": 1074478.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5665, + "step": 3133 + }, + { + "loss": 0.054, + "grad_norm": 1.413800835609436, + "learning_rate": 4.34e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.567, + "step": 3134 + }, + { + "loss": 0.0549, + "grad_norm": 1.1791685819625854, + "learning_rate": 4.335e-06, + "num_tokens": 1075502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5675, + "step": 3135 + }, + { + "loss": 0.0382, + "grad_norm": 1.1417726278305054, + "learning_rate": 4.33e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.568, + "step": 3136 + }, + { + "loss": 0.0586, + "grad_norm": 1.360926866531372, + "learning_rate": 4.325e-06, + "num_tokens": 1076526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5685, + "step": 3137 + }, + { + "loss": 0.0569, + "grad_norm": 1.1636319160461426, + "learning_rate": 4.32e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.569, + "step": 3138 + }, + { + "loss": 0.0024, + "grad_norm": 0.3462548851966858, + "learning_rate": 4.315e-06, + "num_tokens": 1077129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5695000000000001, + "step": 3139 + }, + { + "loss": 0.0619, + "grad_norm": 1.3171995878219604, + "learning_rate": 4.31e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5699999999999998, + "step": 3140 + }, + { + "loss": 0.0026, + "grad_norm": 0.35494717955589294, + "learning_rate": 4.305e-06, + "num_tokens": 1077732.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5705, + "step": 3141 + }, + { + "loss": 0.003, + "grad_norm": 0.4175266921520233, + "learning_rate": 4.3e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 3142 + }, + { + "loss": 0.0588, + "grad_norm": 1.5107394456863403, + "learning_rate": 4.295e-06, + "num_tokens": 1078335.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5715, + "step": 3143 + }, + { + "loss": 0.0583, + "grad_norm": 1.5851935148239136, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.572, + "step": 3144 + }, + { + "loss": 0.0401, + "grad_norm": 1.1422215700149536, + "learning_rate": 4.2850000000000005e-06, + "num_tokens": 1079359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5725, + "step": 3145 + }, + { + "loss": 0.0429, + "grad_norm": 1.3809804916381836, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.573, + "step": 3146 + }, + { + "loss": 0.0397, + "grad_norm": 1.1466025114059448, + "learning_rate": 4.2750000000000006e-06, + "num_tokens": 1080383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5735000000000001, + "step": 3147 + }, + { + "loss": 0.0389, + "grad_norm": 1.035447120666504, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 3148 + }, + { + "loss": 0.0029, + "grad_norm": 0.39080947637557983, + "learning_rate": 4.265000000000001e-06, + "num_tokens": 1080986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5745, + "step": 3149 + }, + { + "loss": 0.0029, + "grad_norm": 0.39702585339546204, + "learning_rate": 4.26e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.575, + "step": 3150 + }, + { + "loss": 0.0376, + "grad_norm": 1.0406111478805542, + "learning_rate": 4.255e-06, + "num_tokens": 1081589.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5755, + "step": 3151 + }, + { + "loss": 0.0029, + "grad_norm": 0.40471911430358887, + "learning_rate": 4.25e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.576, + "step": 3152 + }, + { + "loss": 0.0542, + "grad_norm": 1.382663607597351, + "learning_rate": 4.245e-06, + "num_tokens": 1082192.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5765, + "step": 3153 + }, + { + "loss": 0.0026, + "grad_norm": 0.39454102516174316, + "learning_rate": 4.24e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.577, + "step": 3154 + }, + { + "loss": 0.0515, + "grad_norm": 1.1649845838546753, + "learning_rate": 4.235e-06, + "num_tokens": 1082795.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5775000000000001, + "step": 3155 + }, + { + "loss": 0.0383, + "grad_norm": 1.10068941116333, + "learning_rate": 4.23e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5779999999999998, + "step": 3156 + }, + { + "loss": 0.0417, + "grad_norm": 1.2253996133804321, + "learning_rate": 4.225e-06, + "num_tokens": 1083819.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5785, + "step": 3157 + }, + { + "loss": 0.0028, + "grad_norm": 0.3961932361125946, + "learning_rate": 4.22e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 1.0, + "epoch": 1.579, + "step": 3158 + }, + { + "loss": 0.0503, + "grad_norm": 1.089829921722412, + "learning_rate": 4.215e-06, + "num_tokens": 1084422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5795, + "step": 3159 + }, + { + "loss": 0.0026, + "grad_norm": 0.3804922103881836, + "learning_rate": 4.21e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.58, + "step": 3160 + }, + { + "loss": 0.0551, + "grad_norm": 1.131371259689331, + "learning_rate": 4.205e-06, + "num_tokens": 1085025.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5805, + "step": 3161 + }, + { + "loss": 0.0707, + "grad_norm": 1.5008512735366821, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.581, + "step": 3162 + }, + { + "loss": 0.1371, + "grad_norm": 2.452535629272461, + "learning_rate": 4.1950000000000005e-06, + "num_tokens": 1086049.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.5815000000000001, + "step": 3163 + }, + { + "loss": 0.0375, + "grad_norm": 1.132121205329895, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5819999999999999, + "step": 3164 + }, + { + "loss": 0.0372, + "grad_norm": 1.136691689491272, + "learning_rate": 4.185000000000001e-06, + "num_tokens": 1087073.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5825, + "step": 3165 + }, + { + "loss": 0.066, + "grad_norm": 1.451141595840454, + "learning_rate": 4.18e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.583, + "step": 3166 + }, + { + "loss": 0.0601, + "grad_norm": 1.3219071626663208, + "learning_rate": 4.175e-06, + "num_tokens": 1088097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5835, + "step": 3167 + }, + { + "loss": 0.0033, + "grad_norm": 0.44295263290405273, + "learning_rate": 4.17e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.584, + "step": 3168 + }, + { + "loss": 0.0033, + "grad_norm": 0.4387746751308441, + "learning_rate": 4.165e-06, + "num_tokens": 1088279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5845, + "step": 3169 + }, + { + "loss": 0.0031, + "grad_norm": 0.42495018243789673, + "learning_rate": 4.16e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 3170 + }, + { + "loss": 0.0032, + "grad_norm": 0.43195274472236633, + "learning_rate": 4.155e-06, + "num_tokens": 1088461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5855000000000001, + "step": 3171 + }, + { + "loss": 0.0383, + "grad_norm": 1.089600682258606, + "learning_rate": 4.15e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5859999999999999, + "step": 3172 + }, + { + "loss": 0.037, + "grad_norm": 1.125685691833496, + "learning_rate": 4.145e-06, + "num_tokens": 1089485.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5865, + "step": 3173 + }, + { + "loss": 0.0028, + "grad_norm": 0.3951958119869232, + "learning_rate": 4.14e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 1.0, + "epoch": 1.587, + "step": 3174 + }, + { + "loss": 0.0032, + "grad_norm": 0.4249975085258484, + "learning_rate": 4.135e-06, + "num_tokens": 1089667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5875, + "step": 3175 + }, + { + "loss": 0.003, + "grad_norm": 0.4017711281776428, + "learning_rate": 4.13e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 1.0, + "epoch": 1.588, + "step": 3176 + }, + { + "loss": 0.0554, + "grad_norm": 1.5242044925689697, + "learning_rate": 4.125e-06, + "num_tokens": 1090270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5885, + "step": 3177 + }, + { + "loss": 0.0397, + "grad_norm": 1.1341863870620728, + "learning_rate": 4.12e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.589, + "step": 3178 + }, + { + "loss": 0.0027, + "grad_norm": 0.36381402611732483, + "learning_rate": 4.115e-06, + "num_tokens": 1090873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5895000000000001, + "step": 3179 + }, + { + "loss": 0.0607, + "grad_norm": 1.1853790283203125, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5899999999999999, + "step": 3180 + }, + { + "loss": 0.0643, + "grad_norm": 1.3047658205032349, + "learning_rate": 4.1050000000000005e-06, + "num_tokens": 1091897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5905, + "step": 3181 + }, + { + "loss": 0.0026, + "grad_norm": 0.35462620854377747, + "learning_rate": 4.1e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.591, + "step": 3182 + }, + { + "loss": 0.0551, + "grad_norm": 1.313693642616272, + "learning_rate": 4.095e-06, + "num_tokens": 1092500.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5915, + "step": 3183 + }, + { + "loss": 0.0476, + "grad_norm": 1.3256938457489014, + "learning_rate": 4.09e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.592, + "step": 3184 + }, + { + "loss": 0.0674, + "grad_norm": 1.4579592943191528, + "learning_rate": 4.085e-06, + "num_tokens": 1093524.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5925, + "step": 3185 + }, + { + "loss": 0.0654, + "grad_norm": 1.39744234085083, + "learning_rate": 4.08e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.593, + "step": 3186 + }, + { + "loss": 0.0024, + "grad_norm": 0.3426502048969269, + "learning_rate": 4.075e-06, + "num_tokens": 1094127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5935000000000001, + "step": 3187 + }, + { + "loss": 0.0025, + "grad_norm": 0.34538590908050537, + "learning_rate": 4.07e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 3188 + }, + { + "loss": 0.0023, + "grad_norm": 0.317192405462265, + "learning_rate": 4.065e-06, + "num_tokens": 1094309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5945, + "step": 3189 + }, + { + "loss": 0.067, + "grad_norm": 1.3644077777862549, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.595, + "step": 3190 + }, + { + "loss": 0.0403, + "grad_norm": 1.0108872652053833, + "learning_rate": 4.055000000000001e-06, + "num_tokens": 1095333.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5955, + "step": 3191 + }, + { + "loss": 0.0023, + "grad_norm": 0.32959794998168945, + "learning_rate": 4.05e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 1.0, + "epoch": 1.596, + "step": 3192 + }, + { + "loss": 0.0695, + "grad_norm": 1.4694541692733765, + "learning_rate": 4.045e-06, + "num_tokens": 1095936.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5965, + "step": 3193 + }, + { + "loss": 0.0579, + "grad_norm": 1.4185339212417603, + "learning_rate": 4.04e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.597, + "step": 3194 + }, + { + "loss": 0.0023, + "grad_norm": 0.3271894156932831, + "learning_rate": 4.035e-06, + "num_tokens": 1096539.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5975000000000001, + "step": 3195 + }, + { + "loss": 0.0687, + "grad_norm": 1.3683706521987915, + "learning_rate": 4.03e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5979999999999999, + "step": 3196 + }, + { + "loss": 0.0022, + "grad_norm": 0.3076697289943695, + "learning_rate": 4.0250000000000004e-06, + "num_tokens": 1097142.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5985, + "step": 3197 + }, + { + "loss": 0.0633, + "grad_norm": 1.3920204639434814, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.599, + "step": 3198 + }, + { + "loss": 0.0025, + "grad_norm": 0.340093195438385, + "learning_rate": 4.0150000000000005e-06, + "num_tokens": 1097745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5995, + "step": 3199 + }, + { + "loss": 0.0446, + "grad_norm": 1.343589186668396, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6, + "step": 3200 + }, + { + "loss": 0.0019, + "grad_norm": 0.27124884724617004, + "learning_rate": 4.005000000000001e-06, + "num_tokens": 1098348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6005, + "step": 3201 + }, + { + "loss": 0.0404, + "grad_norm": 0.9648232460021973, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.601, + "step": 3202 + }, + { + "loss": 0.0019, + "grad_norm": 0.27278977632522583, + "learning_rate": 3.995000000000001e-06, + "num_tokens": 1098951.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6015000000000001, + "step": 3203 + }, + { + "loss": 0.0376, + "grad_norm": 1.0787500143051147, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6019999999999999, + "step": 3204 + }, + { + "loss": 0.0528, + "grad_norm": 1.1423871517181396, + "learning_rate": 3.985000000000001e-06, + "num_tokens": 1099975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6025, + "step": 3205 + }, + { + "loss": 0.0428, + "grad_norm": 1.0963202714920044, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.603, + "step": 3206 + }, + { + "loss": 0.0023, + "grad_norm": 0.3151981234550476, + "learning_rate": 3.975000000000001e-06, + "num_tokens": 1100578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6035, + "step": 3207 + }, + { + "loss": 0.0627, + "grad_norm": 1.3276523351669312, + "learning_rate": 3.97e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.604, + "step": 3208 + }, + { + "loss": 0.0644, + "grad_norm": 1.2610445022583008, + "learning_rate": 3.965e-06, + "num_tokens": 1101602.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6045, + "step": 3209 + }, + { + "loss": 0.0605, + "grad_norm": 1.5303077697753906, + "learning_rate": 3.96e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.605, + "step": 3210 + }, + { + "loss": 0.0428, + "grad_norm": 1.1033059358596802, + "learning_rate": 3.955e-06, + "num_tokens": 1102626.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6055000000000001, + "step": 3211 + }, + { + "loss": 0.0025, + "grad_norm": 0.3444884419441223, + "learning_rate": 3.95e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6059999999999999, + "step": 3212 + }, + { + "loss": 0.0021, + "grad_norm": 0.30967977643013, + "learning_rate": 3.945e-06, + "num_tokens": 1102808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6065, + "step": 3213 + }, + { + "loss": 0.0023, + "grad_norm": 0.3297445774078369, + "learning_rate": 3.94e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.607, + "step": 3214 + }, + { + "loss": 0.0389, + "grad_norm": 0.9863300323486328, + "learning_rate": 3.9350000000000004e-06, + "num_tokens": 1103411.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6075, + "step": 3215 + }, + { + "loss": 0.0024, + "grad_norm": 0.34737643599510193, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.608, + "step": 3216 + }, + { + "loss": 0.0636, + "grad_norm": 1.4206818342208862, + "learning_rate": 3.9250000000000005e-06, + "num_tokens": 1104014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6085, + "step": 3217 + }, + { + "loss": 0.0635, + "grad_norm": 1.3302878141403198, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.609, + "step": 3218 + }, + { + "loss": 0.0023, + "grad_norm": 0.34072810411453247, + "learning_rate": 3.915000000000001e-06, + "num_tokens": 1104617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6095000000000002, + "step": 3219 + }, + { + "loss": 0.0023, + "grad_norm": 0.324464350938797, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6099999999999999, + "step": 3220 + }, + { + "loss": 0.041, + "grad_norm": 1.2196465730667114, + "learning_rate": 3.905000000000001e-06, + "num_tokens": 1105220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6105, + "step": 3221 + }, + { + "loss": 0.0609, + "grad_norm": 1.3683393001556396, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.611, + "step": 3222 + }, + { + "loss": 0.067, + "grad_norm": 1.3955715894699097, + "learning_rate": 3.895000000000001e-06, + "num_tokens": 1106244.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6115, + "step": 3223 + }, + { + "loss": 0.0681, + "grad_norm": 1.2971601486206055, + "learning_rate": 3.89e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.612, + "step": 3224 + }, + { + "loss": 0.0399, + "grad_norm": 0.9620857834815979, + "learning_rate": 3.885e-06, + "num_tokens": 1107268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6125, + "step": 3225 + }, + { + "loss": 0.0563, + "grad_norm": 1.419252634048462, + "learning_rate": 3.88e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.613, + "step": 3226 + }, + { + "loss": 0.0025, + "grad_norm": 0.3523210883140564, + "learning_rate": 3.875e-06, + "num_tokens": 1107871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6135000000000002, + "step": 3227 + }, + { + "loss": 0.0025, + "grad_norm": 0.3481607437133789, + "learning_rate": 3.87e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 3228 + }, + { + "loss": 0.0668, + "grad_norm": 1.5234949588775635, + "learning_rate": 3.865e-06, + "num_tokens": 1108474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6145, + "step": 3229 + }, + { + "loss": 0.065, + "grad_norm": 1.0866061449050903, + "learning_rate": 3.86e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.615, + "step": 3230 + }, + { + "loss": 0.0023, + "grad_norm": 0.32322317361831665, + "learning_rate": 3.855e-06, + "num_tokens": 1109077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6155, + "step": 3231 + }, + { + "loss": 0.0028, + "grad_norm": 0.3983127474784851, + "learning_rate": 3.85e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 3232 + }, + { + "loss": 0.0028, + "grad_norm": 0.3855290114879608, + "learning_rate": 3.8450000000000005e-06, + "num_tokens": 1109259.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6165, + "step": 3233 + }, + { + "loss": 0.0628, + "grad_norm": 1.2134065628051758, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.617, + "step": 3234 + }, + { + "loss": 0.0026, + "grad_norm": 0.3645097613334656, + "learning_rate": 3.8350000000000006e-06, + "num_tokens": 1109862.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6175000000000002, + "step": 3235 + }, + { + "loss": 0.0564, + "grad_norm": 1.3227709531784058, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6179999999999999, + "step": 3236 + }, + { + "loss": 0.0356, + "grad_norm": 1.1357544660568237, + "learning_rate": 3.825000000000001e-06, + "num_tokens": 1110886.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6185, + "step": 3237 + }, + { + "loss": 0.002, + "grad_norm": 0.2842106819152832, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.619, + "step": 3238 + }, + { + "loss": 0.0021, + "grad_norm": 0.2954864501953125, + "learning_rate": 3.815000000000001e-06, + "num_tokens": 1111068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6195, + "step": 3239 + }, + { + "loss": 0.0535, + "grad_norm": 1.2989691495895386, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.62, + "step": 3240 + }, + { + "loss": 0.0633, + "grad_norm": 1.4842454195022583, + "learning_rate": 3.8050000000000004e-06, + "num_tokens": 1112092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6205, + "step": 3241 + }, + { + "loss": 0.0613, + "grad_norm": 1.4029802083969116, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.621, + "step": 3242 + }, + { + "loss": 0.0021, + "grad_norm": 0.3039712905883789, + "learning_rate": 3.7950000000000005e-06, + "num_tokens": 1112695.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6215000000000002, + "step": 3243 + }, + { + "loss": 0.0564, + "grad_norm": 1.3126254081726074, + "learning_rate": 3.79e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6219999999999999, + "step": 3244 + }, + { + "loss": 0.0372, + "grad_norm": 1.1704014539718628, + "learning_rate": 3.785e-06, + "num_tokens": 1113719.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6225, + "step": 3245 + }, + { + "loss": 0.0438, + "grad_norm": 1.2828481197357178, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.623, + "step": 3246 + }, + { + "loss": 0.0023, + "grad_norm": 0.343226820230484, + "learning_rate": 3.7750000000000003e-06, + "num_tokens": 1114322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6235, + "step": 3247 + }, + { + "loss": 0.0402, + "grad_norm": 1.072348952293396, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.624, + "step": 3248 + }, + { + "loss": 0.0372, + "grad_norm": 1.061455488204956, + "learning_rate": 3.7650000000000004e-06, + "num_tokens": 1115346.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6245, + "step": 3249 + }, + { + "loss": 0.0621, + "grad_norm": 1.3332241773605347, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.625, + "step": 3250 + }, + { + "loss": 0.0665, + "grad_norm": 1.4206236600875854, + "learning_rate": 3.7550000000000005e-06, + "num_tokens": 1116370.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6255, + "step": 3251 + }, + { + "loss": 0.0616, + "grad_norm": 1.5544387102127075, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.626, + "step": 3252 + }, + { + "loss": 0.0024, + "grad_norm": 0.34623461961746216, + "learning_rate": 3.745e-06, + "num_tokens": 1116973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6265, + "step": 3253 + }, + { + "loss": 0.0611, + "grad_norm": 1.2223175764083862, + "learning_rate": 3.74e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.627, + "step": 3254 + }, + { + "loss": 0.0517, + "grad_norm": 1.338625192642212, + "learning_rate": 3.7350000000000002e-06, + "num_tokens": 1117997.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6275, + "step": 3255 + }, + { + "loss": 0.0567, + "grad_norm": 1.3747273683547974, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6280000000000001, + "step": 3256 + }, + { + "loss": 0.0026, + "grad_norm": 0.36324965953826904, + "learning_rate": 3.7250000000000003e-06, + "num_tokens": 1118600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6284999999999998, + "step": 3257 + }, + { + "loss": 0.0025, + "grad_norm": 0.3447258472442627, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 3258 + }, + { + "loss": 0.0026, + "grad_norm": 0.36628466844558716, + "learning_rate": 3.7150000000000004e-06, + "num_tokens": 1118782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6295, + "step": 3259 + }, + { + "loss": 0.0535, + "grad_norm": 1.2702912092208862, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.63, + "step": 3260 + }, + { + "loss": 0.0026, + "grad_norm": 0.37140271067619324, + "learning_rate": 3.705e-06, + "num_tokens": 1119385.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6305, + "step": 3261 + }, + { + "loss": 0.003, + "grad_norm": 0.4019966721534729, + "learning_rate": 3.7e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 1.0, + "epoch": 1.631, + "step": 3262 + }, + { + "loss": 0.0669, + "grad_norm": 1.4418880939483643, + "learning_rate": 3.695e-06, + "num_tokens": 1119988.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6315, + "step": 3263 + }, + { + "loss": 0.0396, + "grad_norm": 1.2212142944335938, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6320000000000001, + "step": 3264 + }, + { + "loss": 0.0026, + "grad_norm": 0.37143605947494507, + "learning_rate": 3.6850000000000003e-06, + "num_tokens": 1120591.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6324999999999998, + "step": 3265 + }, + { + "loss": 0.0588, + "grad_norm": 1.3627078533172607, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.633, + "step": 3266 + }, + { + "loss": 0.0027, + "grad_norm": 0.3791561722755432, + "learning_rate": 3.6750000000000004e-06, + "num_tokens": 1121194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6335, + "step": 3267 + }, + { + "loss": 0.0567, + "grad_norm": 1.289622187614441, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.634, + "step": 3268 + }, + { + "loss": 0.0579, + "grad_norm": 1.220171332359314, + "learning_rate": 3.665e-06, + "num_tokens": 1122218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6345, + "step": 3269 + }, + { + "loss": 0.0543, + "grad_norm": 1.3633372783660889, + "learning_rate": 3.66e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.635, + "step": 3270 + }, + { + "loss": 0.0376, + "grad_norm": 1.1212244033813477, + "learning_rate": 3.655e-06, + "num_tokens": 1123242.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6355, + "step": 3271 + }, + { + "loss": 0.066, + "grad_norm": 1.352933645248413, + "learning_rate": 3.65e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6360000000000001, + "step": 3272 + }, + { + "loss": 0.0469, + "grad_norm": 1.09308922290802, + "learning_rate": 3.6450000000000003e-06, + "num_tokens": 1124266.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6364999999999998, + "step": 3273 + }, + { + "loss": 0.1411, + "grad_norm": 2.6187405586242676, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.637, + "step": 3274 + }, + { + "loss": 0.0414, + "grad_norm": 1.162994146347046, + "learning_rate": 3.6350000000000003e-06, + "num_tokens": 1125290.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6375, + "step": 3275 + }, + { + "loss": 0.0028, + "grad_norm": 0.3896919786930084, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.638, + "step": 3276 + }, + { + "loss": 0.0026, + "grad_norm": 0.3726244270801544, + "learning_rate": 3.625e-06, + "num_tokens": 1125472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6385, + "step": 3277 + }, + { + "loss": 0.0026, + "grad_norm": 0.36463192105293274, + "learning_rate": 3.62e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.639, + "step": 3278 + }, + { + "loss": 0.0507, + "grad_norm": 1.3470423221588135, + "learning_rate": 3.615e-06, + "num_tokens": 1126075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6395, + "step": 3279 + }, + { + "loss": 0.0683, + "grad_norm": 1.4609153270721436, + "learning_rate": 3.61e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.6400000000000001, + "step": 3280 + }, + { + "loss": 0.0535, + "grad_norm": 1.1537185907363892, + "learning_rate": 3.6050000000000002e-06, + "num_tokens": 1127099.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6404999999999998, + "step": 3281 + }, + { + "loss": 0.0608, + "grad_norm": 1.3845043182373047, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.641, + "step": 3282 + }, + { + "loss": 0.0447, + "grad_norm": 1.212424397468567, + "learning_rate": 3.5950000000000003e-06, + "num_tokens": 1128123.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6415, + "step": 3283 + }, + { + "loss": 0.0026, + "grad_norm": 0.37876564264297485, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 1.0, + "epoch": 1.642, + "step": 3284 + }, + { + "loss": 0.0408, + "grad_norm": 1.2840468883514404, + "learning_rate": 3.585e-06, + "num_tokens": 1128726.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6425, + "step": 3285 + }, + { + "loss": 0.0386, + "grad_norm": 1.1343239545822144, + "learning_rate": 3.58e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.643, + "step": 3286 + }, + { + "loss": 0.0381, + "grad_norm": 1.1031399965286255, + "learning_rate": 3.575e-06, + "num_tokens": 1129750.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6435, + "step": 3287 + }, + { + "loss": 0.0728, + "grad_norm": 1.8012501001358032, + "learning_rate": 3.57e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6440000000000001, + "step": 3288 + }, + { + "loss": 0.003, + "grad_norm": 0.42031532526016235, + "learning_rate": 3.565e-06, + "num_tokens": 1130353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6444999999999999, + "step": 3289 + }, + { + "loss": 0.0028, + "grad_norm": 0.42307499051094055, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 1.0, + "epoch": 1.645, + "step": 3290 + }, + { + "loss": 0.0656, + "grad_norm": 1.4206976890563965, + "learning_rate": 3.5550000000000003e-06, + "num_tokens": 1130956.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6455, + "step": 3291 + }, + { + "loss": 0.0373, + "grad_norm": 1.0836045742034912, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 3292 + }, + { + "loss": 0.0666, + "grad_norm": 1.4353013038635254, + "learning_rate": 3.545e-06, + "num_tokens": 1131980.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6465, + "step": 3293 + }, + { + "loss": 0.0033, + "grad_norm": 0.48532357811927795, + "learning_rate": 3.54e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.647, + "step": 3294 + }, + { + "loss": 0.0032, + "grad_norm": 0.4415268898010254, + "learning_rate": 3.535e-06, + "num_tokens": 1132162.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6475, + "step": 3295 + }, + { + "loss": 0.0029, + "grad_norm": 0.41665494441986084, + "learning_rate": 3.53e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 3296 + }, + { + "loss": 0.0638, + "grad_norm": 1.2469731569290161, + "learning_rate": 3.525e-06, + "num_tokens": 1132765.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6484999999999999, + "step": 3297 + }, + { + "loss": 0.0614, + "grad_norm": 1.251099944114685, + "learning_rate": 3.52e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.649, + "step": 3298 + }, + { + "loss": 0.0027, + "grad_norm": 0.39604058861732483, + "learning_rate": 3.5150000000000002e-06, + "num_tokens": 1133368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6495, + "step": 3299 + }, + { + "loss": 0.0588, + "grad_norm": 1.0699150562286377, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.65, + "step": 3300 + }, + { + "loss": 0.0583, + "grad_norm": 1.2757554054260254, + "learning_rate": 3.505e-06, + "num_tokens": 1134392.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6505, + "step": 3301 + }, + { + "loss": 0.0401, + "grad_norm": 1.3257462978363037, + "learning_rate": 3.5e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.651, + "step": 3302 + }, + { + "loss": 0.0643, + "grad_norm": 1.4011600017547607, + "learning_rate": 3.495e-06, + "num_tokens": 1135416.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6515, + "step": 3303 + }, + { + "loss": 0.0587, + "grad_norm": 1.5523959398269653, + "learning_rate": 3.49e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6520000000000001, + "step": 3304 + }, + { + "loss": 0.0602, + "grad_norm": 1.1153236627578735, + "learning_rate": 3.485e-06, + "num_tokens": 1136440.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6524999999999999, + "step": 3305 + }, + { + "loss": 0.0032, + "grad_norm": 0.4743506610393524, + "learning_rate": 3.48e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 1.0, + "epoch": 1.653, + "step": 3306 + }, + { + "loss": 0.0032, + "grad_norm": 0.44705691933631897, + "learning_rate": 3.475e-06, + "num_tokens": 1136622.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6535, + "step": 3307 + }, + { + "loss": 0.0627, + "grad_norm": 1.376706838607788, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.654, + "step": 3308 + }, + { + "loss": 0.0578, + "grad_norm": 1.3461076021194458, + "learning_rate": 3.465e-06, + "num_tokens": 1137646.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6545, + "step": 3309 + }, + { + "loss": 0.0028, + "grad_norm": 0.4053739011287689, + "learning_rate": 3.46e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 1.0, + "epoch": 1.655, + "step": 3310 + }, + { + "loss": 0.0028, + "grad_norm": 0.4151926636695862, + "learning_rate": 3.455e-06, + "num_tokens": 1137828.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6555, + "step": 3311 + }, + { + "loss": 0.003, + "grad_norm": 0.42436280846595764, + "learning_rate": 3.45e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6560000000000001, + "step": 3312 + }, + { + "loss": 0.0029, + "grad_norm": 0.41050389409065247, + "learning_rate": 3.445e-06, + "num_tokens": 1138010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6564999999999999, + "step": 3313 + }, + { + "loss": 0.0562, + "grad_norm": 1.2650190591812134, + "learning_rate": 3.44e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.657, + "step": 3314 + }, + { + "loss": 0.0558, + "grad_norm": 1.1567943096160889, + "learning_rate": 3.4350000000000006e-06, + "num_tokens": 1139034.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6575, + "step": 3315 + }, + { + "loss": 0.0413, + "grad_norm": 1.3011746406555176, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.658, + "step": 3316 + }, + { + "loss": 0.0569, + "grad_norm": 1.4117727279663086, + "learning_rate": 3.4250000000000007e-06, + "num_tokens": 1140058.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6585, + "step": 3317 + }, + { + "loss": 0.0027, + "grad_norm": 0.3829484283924103, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.659, + "step": 3318 + }, + { + "loss": 0.0516, + "grad_norm": 1.152258038520813, + "learning_rate": 3.4150000000000003e-06, + "num_tokens": 1140661.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6595, + "step": 3319 + }, + { + "loss": 0.0396, + "grad_norm": 1.20711088180542, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6600000000000001, + "step": 3320 + }, + { + "loss": 0.0522, + "grad_norm": 1.251099705696106, + "learning_rate": 3.4050000000000004e-06, + "num_tokens": 1141685.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6604999999999999, + "step": 3321 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730953454971313, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.661, + "step": 3322 + }, + { + "loss": 0.0613, + "grad_norm": 1.5974045991897583, + "learning_rate": 3.3950000000000005e-06, + "num_tokens": 1142709.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6615, + "step": 3323 + }, + { + "loss": 0.0522, + "grad_norm": 1.416182518005371, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.662, + "step": 3324 + }, + { + "loss": 0.0595, + "grad_norm": 1.381279706954956, + "learning_rate": 3.3850000000000006e-06, + "num_tokens": 1143733.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6625, + "step": 3325 + }, + { + "loss": 0.0563, + "grad_norm": 1.2484899759292603, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.663, + "step": 3326 + }, + { + "loss": 0.0029, + "grad_norm": 0.41797107458114624, + "learning_rate": 3.3750000000000003e-06, + "num_tokens": 1144336.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6635, + "step": 3327 + }, + { + "loss": 0.0027, + "grad_norm": 0.39544638991355896, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6640000000000001, + "step": 3328 + }, + { + "loss": 0.0371, + "grad_norm": 1.0045322179794312, + "learning_rate": 3.3650000000000004e-06, + "num_tokens": 1144939.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6644999999999999, + "step": 3329 + }, + { + "loss": 0.0671, + "grad_norm": 1.530097246170044, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.665, + "step": 3330 + }, + { + "loss": 0.0529, + "grad_norm": 1.179215669631958, + "learning_rate": 3.3550000000000005e-06, + "num_tokens": 1145963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6655, + "step": 3331 + }, + { + "loss": 0.0033, + "grad_norm": 0.46830442547798157, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.666, + "step": 3332 + }, + { + "loss": 0.0031, + "grad_norm": 0.44680675864219666, + "learning_rate": 3.3450000000000006e-06, + "num_tokens": 1146145.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6665, + "step": 3333 + }, + { + "loss": 0.0591, + "grad_norm": 2.0427138805389404, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.667, + "step": 3334 + }, + { + "loss": 0.0446, + "grad_norm": 1.0700162649154663, + "learning_rate": 3.3350000000000003e-06, + "num_tokens": 1147169.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6675, + "step": 3335 + }, + { + "loss": 0.0352, + "grad_norm": 0.953519344329834, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6680000000000001, + "step": 3336 + }, + { + "loss": 0.0402, + "grad_norm": 1.208362102508545, + "learning_rate": 3.3250000000000004e-06, + "num_tokens": 1148193.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6684999999999999, + "step": 3337 + }, + { + "loss": 0.0034, + "grad_norm": 0.48497405648231506, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 1.0, + "epoch": 1.669, + "step": 3338 + }, + { + "loss": 0.0031, + "grad_norm": 0.4533288776874542, + "learning_rate": 3.3150000000000004e-06, + "num_tokens": 1148375.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6695, + "step": 3339 + }, + { + "loss": 0.0531, + "grad_norm": 1.031333088874817, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.67, + "step": 3340 + }, + { + "loss": 0.0029, + "grad_norm": 0.40945783257484436, + "learning_rate": 3.3050000000000005e-06, + "num_tokens": 1148978.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6705, + "step": 3341 + }, + { + "loss": 0.0643, + "grad_norm": 1.0990197658538818, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.671, + "step": 3342 + }, + { + "loss": 0.0379, + "grad_norm": 1.0483911037445068, + "learning_rate": 3.2950000000000002e-06, + "num_tokens": 1150002.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6715, + "step": 3343 + }, + { + "loss": 0.0489, + "grad_norm": 1.0835374593734741, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6720000000000002, + "step": 3344 + }, + { + "loss": 0.0033, + "grad_norm": 0.4901528060436249, + "learning_rate": 3.2850000000000003e-06, + "num_tokens": 1150605.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6724999999999999, + "step": 3345 + }, + { + "loss": 0.0029, + "grad_norm": 0.41757330298423767, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.673, + "step": 3346 + }, + { + "loss": 0.0379, + "grad_norm": 0.9371951818466187, + "learning_rate": 3.2750000000000004e-06, + "num_tokens": 1151208.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6735, + "step": 3347 + }, + { + "loss": 0.0397, + "grad_norm": 1.0155102014541626, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 3348 + }, + { + "loss": 0.0027, + "grad_norm": 0.3897286653518677, + "learning_rate": 3.2650000000000005e-06, + "num_tokens": 1151811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6745, + "step": 3349 + }, + { + "loss": 0.0028, + "grad_norm": 0.4042399525642395, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 1.0, + "epoch": 1.675, + "step": 3350 + }, + { + "loss": 0.003, + "grad_norm": 0.43666109442710876, + "learning_rate": 3.255e-06, + "num_tokens": 1151993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6755, + "step": 3351 + }, + { + "loss": 0.0029, + "grad_norm": 0.42103472352027893, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6760000000000002, + "step": 3352 + }, + { + "loss": 0.0028, + "grad_norm": 0.41361838579177856, + "learning_rate": 3.2450000000000003e-06, + "num_tokens": 1152175.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6764999999999999, + "step": 3353 + }, + { + "loss": 0.0357, + "grad_norm": 0.9301024675369263, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.677, + "step": 3354 + }, + { + "loss": 0.0025, + "grad_norm": 0.3655649721622467, + "learning_rate": 3.2350000000000004e-06, + "num_tokens": 1152778.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6775, + "step": 3355 + }, + { + "loss": 0.0363, + "grad_norm": 1.0852001905441284, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.678, + "step": 3356 + }, + { + "loss": 0.0021, + "grad_norm": 0.3051436245441437, + "learning_rate": 3.2250000000000005e-06, + "num_tokens": 1153381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6785, + "step": 3357 + }, + { + "loss": 0.0025, + "grad_norm": 0.38162630796432495, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 3358 + }, + { + "loss": 0.0022, + "grad_norm": 0.33861595392227173, + "learning_rate": 3.215e-06, + "num_tokens": 1153563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6795, + "step": 3359 + }, + { + "loss": 0.0021, + "grad_norm": 0.311531126499176, + "learning_rate": 3.21e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 3360 + }, + { + "loss": 0.002, + "grad_norm": 0.30146220326423645, + "learning_rate": 3.2050000000000002e-06, + "num_tokens": 1153745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6804999999999999, + "step": 3361 + }, + { + "loss": 0.0019, + "grad_norm": 0.28205639123916626, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 3362 + }, + { + "loss": 0.0483, + "grad_norm": 1.185204029083252, + "learning_rate": 3.1950000000000003e-06, + "num_tokens": 1154348.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6815, + "step": 3363 + }, + { + "loss": 0.0705, + "grad_norm": 1.442715048789978, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.682, + "step": 3364 + }, + { + "loss": 0.059, + "grad_norm": 1.5234472751617432, + "learning_rate": 3.1850000000000004e-06, + "num_tokens": 1155372.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6825, + "step": 3365 + }, + { + "loss": 0.0712, + "grad_norm": 1.9519693851470947, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.683, + "step": 3366 + }, + { + "loss": 0.041, + "grad_norm": 1.0349758863449097, + "learning_rate": 3.175e-06, + "num_tokens": 1156396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6835, + "step": 3367 + }, + { + "loss": 0.0423, + "grad_norm": 1.263643503189087, + "learning_rate": 3.17e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 3368 + }, + { + "loss": 0.0015, + "grad_norm": 0.21718572080135345, + "learning_rate": 3.165e-06, + "num_tokens": 1156999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6844999999999999, + "step": 3369 + }, + { + "loss": 0.0612, + "grad_norm": 1.4974867105484009, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.685, + "step": 3370 + }, + { + "loss": 0.0684, + "grad_norm": 1.3690571784973145, + "learning_rate": 3.1550000000000003e-06, + "num_tokens": 1158023.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6855, + "step": 3371 + }, + { + "loss": 0.0015, + "grad_norm": 0.22092363238334656, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 1.0, + "epoch": 1.686, + "step": 3372 + }, + { + "loss": 0.0466, + "grad_norm": 1.359930157661438, + "learning_rate": 3.1450000000000004e-06, + "num_tokens": 1158626.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6865, + "step": 3373 + }, + { + "loss": 0.0017, + "grad_norm": 0.23505748808383942, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.687, + "step": 3374 + }, + { + "loss": 0.0412, + "grad_norm": 1.154797077178955, + "learning_rate": 3.135e-06, + "num_tokens": 1159229.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6875, + "step": 3375 + }, + { + "loss": 0.0688, + "grad_norm": 1.5609385967254639, + "learning_rate": 3.13e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.688, + "step": 3376 + }, + { + "loss": 0.0689, + "grad_norm": 1.9219101667404175, + "learning_rate": 3.125e-06, + "num_tokens": 1160253.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6885, + "step": 3377 + }, + { + "loss": 0.0528, + "grad_norm": 1.4017720222473145, + "learning_rate": 3.12e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 3378 + }, + { + "loss": 0.0018, + "grad_norm": 0.2644074261188507, + "learning_rate": 3.1150000000000002e-06, + "num_tokens": 1160856.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6895, + "step": 3379 + }, + { + "loss": 0.0359, + "grad_norm": 1.1351364850997925, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.69, + "step": 3380 + }, + { + "loss": 0.0561, + "grad_norm": 1.2852329015731812, + "learning_rate": 3.1050000000000003e-06, + "num_tokens": 1161880.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6905000000000001, + "step": 3381 + }, + { + "loss": 0.0019, + "grad_norm": 0.2809182107448578, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6909999999999998, + "step": 3382 + }, + { + "loss": 0.0019, + "grad_norm": 0.2629799544811249, + "learning_rate": 3.0950000000000004e-06, + "num_tokens": 1162062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6915, + "step": 3383 + }, + { + "loss": 0.0583, + "grad_norm": 1.3401031494140625, + "learning_rate": 3.09e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.692, + "step": 3384 + }, + { + "loss": 0.0019, + "grad_norm": 0.2741340398788452, + "learning_rate": 3.085e-06, + "num_tokens": 1162665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6925, + "step": 3385 + }, + { + "loss": 0.0019, + "grad_norm": 0.2670257091522217, + "learning_rate": 3.08e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 3386 + }, + { + "loss": 0.0529, + "grad_norm": 0.9913851022720337, + "learning_rate": 3.075e-06, + "num_tokens": 1163268.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6935, + "step": 3387 + }, + { + "loss": 0.0018, + "grad_norm": 0.2675456404685974, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.694, + "step": 3388 + }, + { + "loss": 0.0405, + "grad_norm": 1.6220101118087769, + "learning_rate": 3.0650000000000003e-06, + "num_tokens": 1163871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6945000000000001, + "step": 3389 + }, + { + "loss": 0.0478, + "grad_norm": 1.0595648288726807, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 3390 + }, + { + "loss": 0.0022, + "grad_norm": 0.3088478446006775, + "learning_rate": 3.0550000000000004e-06, + "num_tokens": 1164474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6955, + "step": 3391 + }, + { + "loss": 0.0501, + "grad_norm": 1.3393687009811401, + "learning_rate": 3.05e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.696, + "step": 3392 + }, + { + "loss": 0.0019, + "grad_norm": 0.2677120566368103, + "learning_rate": 3.045e-06, + "num_tokens": 1165077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6965, + "step": 3393 + }, + { + "loss": 0.0519, + "grad_norm": 1.1974607706069946, + "learning_rate": 3.04e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.697, + "step": 3394 + }, + { + "loss": 0.0406, + "grad_norm": 1.0820717811584473, + "learning_rate": 3.035e-06, + "num_tokens": 1166101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6975, + "step": 3395 + }, + { + "loss": 0.002, + "grad_norm": 0.2836916148662567, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.698, + "step": 3396 + }, + { + "loss": 0.002, + "grad_norm": 0.2837901711463928, + "learning_rate": 3.0250000000000003e-06, + "num_tokens": 1166283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6985000000000001, + "step": 3397 + }, + { + "loss": 0.0546, + "grad_norm": 1.4433382749557495, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6989999999999998, + "step": 3398 + }, + { + "loss": 0.0021, + "grad_norm": 0.2978130877017975, + "learning_rate": 3.0150000000000004e-06, + "num_tokens": 1166886.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6995, + "step": 3399 + }, + { + "loss": 0.002, + "grad_norm": 0.2806030511856079, + "learning_rate": 3.01e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 3400 + }, + { + "loss": 0.0636, + "grad_norm": 1.3879796266555786, + "learning_rate": 3.005e-06, + "num_tokens": 1167489.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7005, + "step": 3401 + }, + { + "loss": 0.002, + "grad_norm": 0.2759900689125061, + "learning_rate": 3e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.701, + "step": 3402 + }, + { + "loss": 0.0574, + "grad_norm": 1.3505700826644897, + "learning_rate": 2.995e-06, + "num_tokens": 1168092.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7015, + "step": 3403 + }, + { + "loss": 0.0554, + "grad_norm": 1.4108113050460815, + "learning_rate": 2.99e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.702, + "step": 3404 + }, + { + "loss": 0.0558, + "grad_norm": 1.5085475444793701, + "learning_rate": 2.9850000000000002e-06, + "num_tokens": 1169116.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7025000000000001, + "step": 3405 + }, + { + "loss": 0.0019, + "grad_norm": 0.2683292031288147, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7029999999999998, + "step": 3406 + }, + { + "loss": 0.0367, + "grad_norm": 1.1768198013305664, + "learning_rate": 2.9750000000000003e-06, + "num_tokens": 1169719.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7035, + "step": 3407 + }, + { + "loss": 0.002, + "grad_norm": 0.2821144759654999, + "learning_rate": 2.97e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 1.0, + "epoch": 1.704, + "step": 3408 + }, + { + "loss": 0.0018, + "grad_norm": 0.26630160212516785, + "learning_rate": 2.965e-06, + "num_tokens": 1169901.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7045, + "step": 3409 + }, + { + "loss": 0.0018, + "grad_norm": 0.2571128308773041, + "learning_rate": 2.96e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 3410 + }, + { + "loss": 0.002, + "grad_norm": 0.28111621737480164, + "learning_rate": 2.955e-06, + "num_tokens": 1170083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7055, + "step": 3411 + }, + { + "loss": 0.002, + "grad_norm": 0.27419018745422363, + "learning_rate": 2.95e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 3412 + }, + { + "loss": 0.0019, + "grad_norm": 0.26888176798820496, + "learning_rate": 2.945e-06, + "num_tokens": 1170265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7065000000000001, + "step": 3413 + }, + { + "loss": 0.0018, + "grad_norm": 0.2536250352859497, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 3414 + }, + { + "loss": 0.0018, + "grad_norm": 0.24844178557395935, + "learning_rate": 2.9350000000000003e-06, + "num_tokens": 1170447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7075, + "step": 3415 + }, + { + "loss": 0.0487, + "grad_norm": 1.4517875909805298, + "learning_rate": 2.93e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.708, + "step": 3416 + }, + { + "loss": 0.0564, + "grad_norm": 1.2101439237594604, + "learning_rate": 2.925e-06, + "num_tokens": 1171471.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7085, + "step": 3417 + }, + { + "loss": 0.043, + "grad_norm": 1.1227502822875977, + "learning_rate": 2.92e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.709, + "step": 3418 + }, + { + "loss": 0.0556, + "grad_norm": 1.1113651990890503, + "learning_rate": 2.915e-06, + "num_tokens": 1172495.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7095, + "step": 3419 + }, + { + "loss": 0.0015, + "grad_norm": 0.21050438284873962, + "learning_rate": 2.91e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.71, + "step": 3420 + }, + { + "loss": 0.0492, + "grad_norm": 1.136242389678955, + "learning_rate": 2.905e-06, + "num_tokens": 1173098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7105000000000001, + "step": 3421 + }, + { + "loss": 0.0549, + "grad_norm": 1.1831704378128052, + "learning_rate": 2.9e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7109999999999999, + "step": 3422 + }, + { + "loss": 0.0589, + "grad_norm": 1.318955659866333, + "learning_rate": 2.8950000000000002e-06, + "num_tokens": 1174122.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7115, + "step": 3423 + }, + { + "loss": 0.0385, + "grad_norm": 1.1089059114456177, + "learning_rate": 2.89e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.712, + "step": 3424 + }, + { + "loss": 0.0017, + "grad_norm": 0.24754203855991364, + "learning_rate": 2.885e-06, + "num_tokens": 1174725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7125, + "step": 3425 + }, + { + "loss": 0.0563, + "grad_norm": 1.1799119710922241, + "learning_rate": 2.88e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.713, + "step": 3426 + }, + { + "loss": 0.0017, + "grad_norm": 0.2318888157606125, + "learning_rate": 2.875e-06, + "num_tokens": 1175328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7135, + "step": 3427 + }, + { + "loss": 0.0623, + "grad_norm": 1.3154571056365967, + "learning_rate": 2.87e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.714, + "step": 3428 + }, + { + "loss": 0.0019, + "grad_norm": 0.26307183504104614, + "learning_rate": 2.865e-06, + "num_tokens": 1175931.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7145000000000001, + "step": 3429 + }, + { + "loss": 0.0018, + "grad_norm": 0.2589333653450012, + "learning_rate": 2.86e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 3430 + }, + { + "loss": 0.0504, + "grad_norm": 1.4614155292510986, + "learning_rate": 2.855e-06, + "num_tokens": 1176534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7155, + "step": 3431 + }, + { + "loss": 0.0018, + "grad_norm": 0.2591991722583771, + "learning_rate": 2.85e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.716, + "step": 3432 + }, + { + "loss": 0.0018, + "grad_norm": 0.25856250524520874, + "learning_rate": 2.845e-06, + "num_tokens": 1176716.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7165, + "step": 3433 + }, + { + "loss": 0.0368, + "grad_norm": 1.2794378995895386, + "learning_rate": 2.84e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.717, + "step": 3434 + }, + { + "loss": 0.0595, + "grad_norm": 1.1754332780838013, + "learning_rate": 2.835e-06, + "num_tokens": 1177740.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7175, + "step": 3435 + }, + { + "loss": 0.0016, + "grad_norm": 0.218499094247818, + "learning_rate": 2.83e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 1.0, + "epoch": 1.718, + "step": 3436 + }, + { + "loss": 0.0562, + "grad_norm": 1.4319361448287964, + "learning_rate": 2.825e-06, + "num_tokens": 1178343.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7185000000000001, + "step": 3437 + }, + { + "loss": 0.0548, + "grad_norm": 1.1614960432052612, + "learning_rate": 2.82e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7189999999999999, + "step": 3438 + }, + { + "loss": 0.0634, + "grad_norm": 1.559000849723816, + "learning_rate": 2.815e-06, + "num_tokens": 1179367.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7195, + "step": 3439 + }, + { + "loss": 0.0593, + "grad_norm": 1.1891441345214844, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 3440 + }, + { + "loss": 0.0638, + "grad_norm": 1.2654136419296265, + "learning_rate": 2.8050000000000007e-06, + "num_tokens": 1180391.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7205, + "step": 3441 + }, + { + "loss": 0.0411, + "grad_norm": 1.2888840436935425, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.721, + "step": 3442 + }, + { + "loss": 0.002, + "grad_norm": 0.2810196280479431, + "learning_rate": 2.7950000000000003e-06, + "num_tokens": 1180994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7215, + "step": 3443 + }, + { + "loss": 0.0393, + "grad_norm": 1.1534147262573242, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.722, + "step": 3444 + }, + { + "loss": 0.0019, + "grad_norm": 0.2703098952770233, + "learning_rate": 2.7850000000000004e-06, + "num_tokens": 1181597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7225000000000001, + "step": 3445 + }, + { + "loss": 0.0612, + "grad_norm": 1.2400104999542236, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7229999999999999, + "step": 3446 + }, + { + "loss": 0.0019, + "grad_norm": 0.27535656094551086, + "learning_rate": 2.7750000000000005e-06, + "num_tokens": 1182200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7235, + "step": 3447 + }, + { + "loss": 0.002, + "grad_norm": 0.2844158411026001, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 3448 + }, + { + "loss": 0.002, + "grad_norm": 0.2850154936313629, + "learning_rate": 2.7650000000000006e-06, + "num_tokens": 1182382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7245, + "step": 3449 + }, + { + "loss": 0.0018, + "grad_norm": 0.26619744300842285, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 3450 + }, + { + "loss": 0.0019, + "grad_norm": 0.2684476971626282, + "learning_rate": 2.7550000000000003e-06, + "num_tokens": 1182564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7255, + "step": 3451 + }, + { + "loss": 0.0577, + "grad_norm": 1.3094863891601562, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.726, + "step": 3452 + }, + { + "loss": 0.0378, + "grad_norm": 1.201589822769165, + "learning_rate": 2.7450000000000004e-06, + "num_tokens": 1183588.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7265000000000001, + "step": 3453 + }, + { + "loss": 0.0537, + "grad_norm": 1.2897847890853882, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7269999999999999, + "step": 3454 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792169749736786, + "learning_rate": 2.7350000000000005e-06, + "num_tokens": 1184191.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7275, + "step": 3455 + }, + { + "loss": 0.002, + "grad_norm": 0.28593137860298157, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 3456 + }, + { + "loss": 0.058, + "grad_norm": 1.3839404582977295, + "learning_rate": 2.7250000000000006e-06, + "num_tokens": 1184794.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7285, + "step": 3457 + }, + { + "loss": 0.0018, + "grad_norm": 0.2617915868759155, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 1.0, + "epoch": 1.729, + "step": 3458 + }, + { + "loss": 0.0019, + "grad_norm": 0.2803640067577362, + "learning_rate": 2.7150000000000003e-06, + "num_tokens": 1184976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7295, + "step": 3459 + }, + { + "loss": 0.0389, + "grad_norm": 1.0974253416061401, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.73, + "step": 3460 + }, + { + "loss": 0.0017, + "grad_norm": 0.24105492234230042, + "learning_rate": 2.7050000000000004e-06, + "num_tokens": 1185579.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7305000000000001, + "step": 3461 + }, + { + "loss": 0.0017, + "grad_norm": 0.2462151199579239, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 3462 + }, + { + "loss": 0.0681, + "grad_norm": 2.0248329639434814, + "learning_rate": 2.6950000000000005e-06, + "num_tokens": 1186182.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7315, + "step": 3463 + }, + { + "loss": 0.0506, + "grad_norm": 1.0506778955459595, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.732, + "step": 3464 + }, + { + "loss": 0.0414, + "grad_norm": 1.1461181640625, + "learning_rate": 2.6850000000000006e-06, + "num_tokens": 1187206.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7325, + "step": 3465 + }, + { + "loss": 0.002, + "grad_norm": 0.29532936215400696, + "learning_rate": 2.68e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 1.0, + "epoch": 1.733, + "step": 3466 + }, + { + "loss": 0.0018, + "grad_norm": 0.2511617839336395, + "learning_rate": 2.6750000000000002e-06, + "num_tokens": 1187388.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7335, + "step": 3467 + }, + { + "loss": 0.0017, + "grad_norm": 0.24015438556671143, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 3468 + }, + { + "loss": 0.0394, + "grad_norm": 1.186040997505188, + "learning_rate": 2.6650000000000003e-06, + "num_tokens": 1187991.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7345000000000002, + "step": 3469 + }, + { + "loss": 0.0516, + "grad_norm": 1.3716928958892822, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7349999999999999, + "step": 3470 + }, + { + "loss": 0.0017, + "grad_norm": 0.24118225276470184, + "learning_rate": 2.6550000000000004e-06, + "num_tokens": 1188594.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7355, + "step": 3471 + }, + { + "loss": 0.0634, + "grad_norm": 1.3280280828475952, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.736, + "step": 3472 + }, + { + "loss": 0.0606, + "grad_norm": 1.5957295894622803, + "learning_rate": 2.6450000000000005e-06, + "num_tokens": 1189618.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7365, + "step": 3473 + }, + { + "loss": 0.0019, + "grad_norm": 0.26652151346206665, + "learning_rate": 2.64e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.737, + "step": 3474 + }, + { + "loss": 0.0465, + "grad_norm": 1.2865381240844727, + "learning_rate": 2.635e-06, + "num_tokens": 1190221.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7375, + "step": 3475 + }, + { + "loss": 0.0696, + "grad_norm": 1.5268961191177368, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.738, + "step": 3476 + }, + { + "loss": 0.0016, + "grad_norm": 0.22352814674377441, + "learning_rate": 2.6250000000000003e-06, + "num_tokens": 1190824.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7385000000000002, + "step": 3477 + }, + { + "loss": 0.0398, + "grad_norm": 1.0832366943359375, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7389999999999999, + "step": 3478 + }, + { + "loss": 0.002, + "grad_norm": 0.2866823971271515, + "learning_rate": 2.6150000000000004e-06, + "num_tokens": 1191427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7395, + "step": 3479 + }, + { + "loss": 0.0017, + "grad_norm": 0.25320085883140564, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 3480 + }, + { + "loss": 0.0554, + "grad_norm": 1.305580496788025, + "learning_rate": 2.6050000000000005e-06, + "num_tokens": 1192030.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7405, + "step": 3481 + }, + { + "loss": 0.053, + "grad_norm": 1.3485558032989502, + "learning_rate": 2.6e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.741, + "step": 3482 + }, + { + "loss": 0.0597, + "grad_norm": 1.3094996213912964, + "learning_rate": 2.595e-06, + "num_tokens": 1193054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7415, + "step": 3483 + }, + { + "loss": 0.0361, + "grad_norm": 1.02549409866333, + "learning_rate": 2.59e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.742, + "step": 3484 + }, + { + "loss": 0.0549, + "grad_norm": 1.1604732275009155, + "learning_rate": 2.5850000000000002e-06, + "num_tokens": 1194078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7425000000000002, + "step": 3485 + }, + { + "loss": 0.0578, + "grad_norm": 1.1389886140823364, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7429999999999999, + "step": 3486 + }, + { + "loss": 0.0383, + "grad_norm": 1.1444112062454224, + "learning_rate": 2.5750000000000003e-06, + "num_tokens": 1195102.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7435, + "step": 3487 + }, + { + "loss": 0.0363, + "grad_norm": 1.2686033248901367, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.744, + "step": 3488 + }, + { + "loss": 0.0609, + "grad_norm": 1.2078722715377808, + "learning_rate": 2.5650000000000004e-06, + "num_tokens": 1196126.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7445, + "step": 3489 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754855155944824, + "learning_rate": 2.56e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 1.0, + "epoch": 1.745, + "step": 3490 + }, + { + "loss": 0.063, + "grad_norm": 1.346100091934204, + "learning_rate": 2.555e-06, + "num_tokens": 1196729.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7455, + "step": 3491 + }, + { + "loss": 0.0625, + "grad_norm": 1.3309886455535889, + "learning_rate": 2.55e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.746, + "step": 3492 + }, + { + "loss": 0.0023, + "grad_norm": 0.3301111161708832, + "learning_rate": 2.545e-06, + "num_tokens": 1197332.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7465000000000002, + "step": 3493 + }, + { + "loss": 0.0382, + "grad_norm": 1.0473533868789673, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7469999999999999, + "step": 3494 + }, + { + "loss": 0.0625, + "grad_norm": 1.2907440662384033, + "learning_rate": 2.5350000000000003e-06, + "num_tokens": 1198356.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7475, + "step": 3495 + }, + { + "loss": 0.0412, + "grad_norm": 1.1875349283218384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.748, + "step": 3496 + }, + { + "loss": 0.1176, + "grad_norm": 2.9710206985473633, + "learning_rate": 2.5250000000000004e-06, + "num_tokens": 1199380.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.7485, + "step": 3497 + }, + { + "loss": 0.0026, + "grad_norm": 0.36476898193359375, + "learning_rate": 2.52e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.749, + "step": 3498 + }, + { + "loss": 0.0379, + "grad_norm": 1.0208238363265991, + "learning_rate": 2.515e-06, + "num_tokens": 1199983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7495, + "step": 3499 + }, + { + "loss": 0.0026, + "grad_norm": 0.37356528639793396, + "learning_rate": 2.51e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.75, + "step": 3500 + }, + { + "loss": 0.0027, + "grad_norm": 0.39622190594673157, + "learning_rate": 2.505e-06, + "num_tokens": 1200165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7505, + "step": 3501 + }, + { + "loss": 0.0372, + "grad_norm": 1.0979310274124146, + "learning_rate": 2.5e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.751, + "step": 3502 + }, + { + "loss": 0.0362, + "grad_norm": 1.0418155193328857, + "learning_rate": 2.4950000000000003e-06, + "num_tokens": 1201189.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7515, + "step": 3503 + }, + { + "loss": 0.0632, + "grad_norm": 1.6260945796966553, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.752, + "step": 3504 + }, + { + "loss": 0.0029, + "grad_norm": 0.3957514762878418, + "learning_rate": 2.4850000000000003e-06, + "num_tokens": 1201792.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7525, + "step": 3505 + }, + { + "loss": 0.0024, + "grad_norm": 0.3393152356147766, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 3506 + }, + { + "loss": 0.0515, + "grad_norm": 1.1930348873138428, + "learning_rate": 2.475e-06, + "num_tokens": 1202395.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7534999999999998, + "step": 3507 + }, + { + "loss": 0.0026, + "grad_norm": 0.380045086145401, + "learning_rate": 2.47e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 1.0, + "epoch": 1.754, + "step": 3508 + }, + { + "loss": 0.0027, + "grad_norm": 0.3971390724182129, + "learning_rate": 2.465e-06, + "num_tokens": 1202577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7545, + "step": 3509 + }, + { + "loss": 0.0028, + "grad_norm": 0.38638150691986084, + "learning_rate": 2.46e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 1.0, + "epoch": 1.755, + "step": 3510 + }, + { + "loss": 0.0615, + "grad_norm": 1.3876094818115234, + "learning_rate": 2.4550000000000002e-06, + "num_tokens": 1203180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7555, + "step": 3511 + }, + { + "loss": 0.0432, + "grad_norm": 1.4136366844177246, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.756, + "step": 3512 + }, + { + "loss": 0.0024, + "grad_norm": 0.34141626954078674, + "learning_rate": 2.4450000000000003e-06, + "num_tokens": 1203783.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7565, + "step": 3513 + }, + { + "loss": 0.0566, + "grad_norm": 1.0875115394592285, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7570000000000001, + "step": 3514 + }, + { + "loss": 0.0482, + "grad_norm": 1.5494464635849, + "learning_rate": 2.435e-06, + "num_tokens": 1204807.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7574999999999998, + "step": 3515 + }, + { + "loss": 0.0413, + "grad_norm": 1.0267417430877686, + "learning_rate": 2.43e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.758, + "step": 3516 + }, + { + "loss": 0.0529, + "grad_norm": 1.3826123476028442, + "learning_rate": 2.425e-06, + "num_tokens": 1205831.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7585, + "step": 3517 + }, + { + "loss": 0.0622, + "grad_norm": 1.3799962997436523, + "learning_rate": 2.42e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.759, + "step": 3518 + }, + { + "loss": 0.0026, + "grad_norm": 0.36601629853248596, + "learning_rate": 2.415e-06, + "num_tokens": 1206434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7595, + "step": 3519 + }, + { + "loss": 0.057, + "grad_norm": 1.4413540363311768, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.76, + "step": 3520 + }, + { + "loss": 0.062, + "grad_norm": 1.5269067287445068, + "learning_rate": 2.4050000000000003e-06, + "num_tokens": 1207458.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7605, + "step": 3521 + }, + { + "loss": 0.0529, + "grad_norm": 1.1583778858184814, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7610000000000001, + "step": 3522 + }, + { + "loss": 0.0629, + "grad_norm": 1.502618432044983, + "learning_rate": 2.395e-06, + "num_tokens": 1208482.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7614999999999998, + "step": 3523 + }, + { + "loss": 0.0556, + "grad_norm": 1.4562733173370361, + "learning_rate": 2.39e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.762, + "step": 3524 + }, + { + "loss": 0.0028, + "grad_norm": 0.4034802317619324, + "learning_rate": 2.385e-06, + "num_tokens": 1209085.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7625, + "step": 3525 + }, + { + "loss": 0.0501, + "grad_norm": 1.3905121088027954, + "learning_rate": 2.38e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.763, + "step": 3526 + }, + { + "loss": 0.0628, + "grad_norm": 1.1878178119659424, + "learning_rate": 2.375e-06, + "num_tokens": 1210109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7635, + "step": 3527 + }, + { + "loss": 0.0371, + "grad_norm": 1.1999701261520386, + "learning_rate": 2.37e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.764, + "step": 3528 + }, + { + "loss": 0.0029, + "grad_norm": 0.40889084339141846, + "learning_rate": 2.3650000000000002e-06, + "num_tokens": 1210712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7645, + "step": 3529 + }, + { + "loss": 0.0389, + "grad_norm": 1.039504885673523, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7650000000000001, + "step": 3530 + }, + { + "loss": 0.068, + "grad_norm": 1.371443748474121, + "learning_rate": 2.355e-06, + "num_tokens": 1211736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7654999999999998, + "step": 3531 + }, + { + "loss": 0.0695, + "grad_norm": 1.7425730228424072, + "learning_rate": 2.35e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.766, + "step": 3532 + }, + { + "loss": 0.0523, + "grad_norm": 1.3040227890014648, + "learning_rate": 2.345e-06, + "num_tokens": 1212760.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7665, + "step": 3533 + }, + { + "loss": 0.0027, + "grad_norm": 0.3859405517578125, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 1.0, + "epoch": 1.767, + "step": 3534 + }, + { + "loss": 0.0385, + "grad_norm": 1.0744153261184692, + "learning_rate": 2.3350000000000005e-06, + "num_tokens": 1213363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7675, + "step": 3535 + }, + { + "loss": 0.0029, + "grad_norm": 0.4078717827796936, + "learning_rate": 2.33e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.768, + "step": 3536 + }, + { + "loss": 0.0464, + "grad_norm": 1.3526980876922607, + "learning_rate": 2.325e-06, + "num_tokens": 1213966.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7685, + "step": 3537 + }, + { + "loss": 0.0032, + "grad_norm": 0.44447413086891174, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7690000000000001, + "step": 3538 + }, + { + "loss": 0.0346, + "grad_norm": 0.9852960705757141, + "learning_rate": 2.3150000000000003e-06, + "num_tokens": 1214569.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7694999999999999, + "step": 3539 + }, + { + "loss": 0.0581, + "grad_norm": 1.1710577011108398, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.77, + "step": 3540 + }, + { + "loss": 0.003, + "grad_norm": 0.42533135414123535, + "learning_rate": 2.3050000000000004e-06, + "num_tokens": 1215172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7705, + "step": 3541 + }, + { + "loss": 0.0373, + "grad_norm": 0.9175604581832886, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.771, + "step": 3542 + }, + { + "loss": 0.0464, + "grad_norm": 1.2586400508880615, + "learning_rate": 2.2950000000000005e-06, + "num_tokens": 1216196.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.7715, + "step": 3543 + }, + { + "loss": 0.0557, + "grad_norm": 1.3000445365905762, + "learning_rate": 2.29e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.772, + "step": 3544 + }, + { + "loss": 0.0377, + "grad_norm": 1.0466715097427368, + "learning_rate": 2.285e-06, + "num_tokens": 1217220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7725, + "step": 3545 + }, + { + "loss": 0.003, + "grad_norm": 0.41341033577919006, + "learning_rate": 2.28e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7730000000000001, + "step": 3546 + }, + { + "loss": 0.0555, + "grad_norm": 1.2895411252975464, + "learning_rate": 2.2750000000000002e-06, + "num_tokens": 1217823.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7734999999999999, + "step": 3547 + }, + { + "loss": 0.0032, + "grad_norm": 0.4543672800064087, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 1.0, + "epoch": 1.774, + "step": 3548 + }, + { + "loss": 0.0033, + "grad_norm": 0.45242005586624146, + "learning_rate": 2.2650000000000003e-06, + "num_tokens": 1218005.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7745, + "step": 3549 + }, + { + "loss": 0.0664, + "grad_norm": 1.4492830038070679, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.775, + "step": 3550 + }, + { + "loss": 0.0621, + "grad_norm": 1.410575270652771, + "learning_rate": 2.2550000000000004e-06, + "num_tokens": 1219029.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7755, + "step": 3551 + }, + { + "loss": 0.0668, + "grad_norm": 1.4600263833999634, + "learning_rate": 2.25e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.776, + "step": 3552 + }, + { + "loss": 0.0518, + "grad_norm": 1.185958981513977, + "learning_rate": 2.245e-06, + "num_tokens": 1220053.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7765, + "step": 3553 + }, + { + "loss": 0.0031, + "grad_norm": 0.4426004886627197, + "learning_rate": 2.24e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7770000000000001, + "step": 3554 + }, + { + "loss": 0.0391, + "grad_norm": 1.1847765445709229, + "learning_rate": 2.235e-06, + "num_tokens": 1220656.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7774999999999999, + "step": 3555 + }, + { + "loss": 0.0387, + "grad_norm": 1.1244046688079834, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.778, + "step": 3556 + }, + { + "loss": 0.0639, + "grad_norm": 1.5144935846328735, + "learning_rate": 2.2250000000000003e-06, + "num_tokens": 1221680.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7785, + "step": 3557 + }, + { + "loss": 0.0504, + "grad_norm": 1.1694223880767822, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.779, + "step": 3558 + }, + { + "loss": 0.039, + "grad_norm": 1.198093295097351, + "learning_rate": 2.2150000000000004e-06, + "num_tokens": 1222704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7795, + "step": 3559 + }, + { + "loss": 0.0556, + "grad_norm": 1.4882034063339233, + "learning_rate": 2.21e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.78, + "step": 3560 + }, + { + "loss": 0.0033, + "grad_norm": 0.4605433940887451, + "learning_rate": 2.205e-06, + "num_tokens": 1223307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7805, + "step": 3561 + }, + { + "loss": 0.0427, + "grad_norm": 1.400830864906311, + "learning_rate": 2.2e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7810000000000001, + "step": 3562 + }, + { + "loss": 0.0596, + "grad_norm": 1.4765678644180298, + "learning_rate": 2.195e-06, + "num_tokens": 1224331.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7814999999999999, + "step": 3563 + }, + { + "loss": 0.0029, + "grad_norm": 0.4184083044528961, + "learning_rate": 2.19e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 1.0, + "epoch": 1.782, + "step": 3564 + }, + { + "loss": 0.0031, + "grad_norm": 0.4302586615085602, + "learning_rate": 2.1850000000000003e-06, + "num_tokens": 1224513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7825, + "step": 3565 + }, + { + "loss": 0.0031, + "grad_norm": 0.4298599362373352, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 1.0, + "epoch": 1.783, + "step": 3566 + }, + { + "loss": 0.065, + "grad_norm": 1.424648642539978, + "learning_rate": 2.1750000000000004e-06, + "num_tokens": 1225116.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7835, + "step": 3567 + }, + { + "loss": 0.0031, + "grad_norm": 0.4238447844982147, + "learning_rate": 2.17e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.784, + "step": 3568 + }, + { + "loss": 0.0031, + "grad_norm": 0.4220222532749176, + "learning_rate": 2.165e-06, + "num_tokens": 1225298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7845, + "step": 3569 + }, + { + "loss": 0.003, + "grad_norm": 0.42732101678848267, + "learning_rate": 2.16e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7850000000000001, + "step": 3570 + }, + { + "loss": 0.0346, + "grad_norm": 1.0672036409378052, + "learning_rate": 2.155e-06, + "num_tokens": 1225901.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7854999999999999, + "step": 3571 + }, + { + "loss": 0.0424, + "grad_norm": 1.0617742538452148, + "learning_rate": 2.15e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.786, + "step": 3572 + }, + { + "loss": 0.0592, + "grad_norm": 1.3852803707122803, + "learning_rate": 2.1450000000000002e-06, + "num_tokens": 1226925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7865, + "step": 3573 + }, + { + "loss": 0.0029, + "grad_norm": 0.4290924072265625, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 1.0, + "epoch": 1.787, + "step": 3574 + }, + { + "loss": 0.051, + "grad_norm": 1.1031818389892578, + "learning_rate": 2.1350000000000003e-06, + "num_tokens": 1227528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7875, + "step": 3575 + }, + { + "loss": 0.0393, + "grad_norm": 1.184659719467163, + "learning_rate": 2.13e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.788, + "step": 3576 + }, + { + "loss": 0.0755, + "grad_norm": 1.9755206108093262, + "learning_rate": 2.125e-06, + "num_tokens": 1228552.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.7885, + "step": 3577 + }, + { + "loss": 0.071, + "grad_norm": 1.4741475582122803, + "learning_rate": 2.12e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7890000000000001, + "step": 3578 + }, + { + "loss": 0.0609, + "grad_norm": 1.6418182849884033, + "learning_rate": 2.115e-06, + "num_tokens": 1229576.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7894999999999999, + "step": 3579 + }, + { + "loss": 0.0027, + "grad_norm": 0.40381157398223877, + "learning_rate": 2.11e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.79, + "step": 3580 + }, + { + "loss": 0.0551, + "grad_norm": 1.2949596643447876, + "learning_rate": 2.105e-06, + "num_tokens": 1230179.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7905, + "step": 3581 + }, + { + "loss": 0.0504, + "grad_norm": 1.073058843612671, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.791, + "step": 3582 + }, + { + "loss": 0.0028, + "grad_norm": 0.3910202980041504, + "learning_rate": 2.0950000000000003e-06, + "num_tokens": 1230782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7915, + "step": 3583 + }, + { + "loss": 0.0029, + "grad_norm": 0.40099310874938965, + "learning_rate": 2.09e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.792, + "step": 3584 + }, + { + "loss": 0.0686, + "grad_norm": 1.5408157110214233, + "learning_rate": 2.085e-06, + "num_tokens": 1231385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7925, + "step": 3585 + }, + { + "loss": 0.0547, + "grad_norm": 1.2888717651367188, + "learning_rate": 2.08e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7930000000000001, + "step": 3586 + }, + { + "loss": 0.0392, + "grad_norm": 1.1414070129394531, + "learning_rate": 2.075e-06, + "num_tokens": 1232409.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7934999999999999, + "step": 3587 + }, + { + "loss": 0.0567, + "grad_norm": 1.2421129941940308, + "learning_rate": 2.07e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.794, + "step": 3588 + }, + { + "loss": 0.0567, + "grad_norm": 1.2121027708053589, + "learning_rate": 2.065e-06, + "num_tokens": 1233433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7945, + "step": 3589 + }, + { + "loss": 0.0028, + "grad_norm": 0.4114837944507599, + "learning_rate": 2.06e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.795, + "step": 3590 + }, + { + "loss": 0.003, + "grad_norm": 0.4205188453197479, + "learning_rate": 2.0550000000000002e-06, + "num_tokens": 1233615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7955, + "step": 3591 + }, + { + "loss": 0.0029, + "grad_norm": 0.39967694878578186, + "learning_rate": 2.05e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 1.0, + "epoch": 1.796, + "step": 3592 + }, + { + "loss": 0.056, + "grad_norm": 1.251736044883728, + "learning_rate": 2.045e-06, + "num_tokens": 1234218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7965, + "step": 3593 + }, + { + "loss": 0.0028, + "grad_norm": 0.3914256989955902, + "learning_rate": 2.04e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7970000000000002, + "step": 3594 + }, + { + "loss": 0.0604, + "grad_norm": 1.1881632804870605, + "learning_rate": 2.035e-06, + "num_tokens": 1234821.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7974999999999999, + "step": 3595 + }, + { + "loss": 0.0622, + "grad_norm": 1.149919033050537, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.798, + "step": 3596 + }, + { + "loss": 0.0549, + "grad_norm": 1.0469919443130493, + "learning_rate": 2.025e-06, + "num_tokens": 1235845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7985, + "step": 3597 + }, + { + "loss": 0.0535, + "grad_norm": 1.3651666641235352, + "learning_rate": 2.02e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.799, + "step": 3598 + }, + { + "loss": 0.0026, + "grad_norm": 0.37465357780456543, + "learning_rate": 2.015e-06, + "num_tokens": 1236448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7995, + "step": 3599 + }, + { + "loss": 0.0365, + "grad_norm": 1.0199239253997803, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8, + "step": 3600 + }, + { + "loss": 0.0617, + "grad_norm": 1.1323697566986084, + "learning_rate": 2.0050000000000003e-06, + "num_tokens": 1237472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8005, + "step": 3601 + }, + { + "loss": 0.003, + "grad_norm": 0.4225693345069885, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8010000000000002, + "step": 3602 + }, + { + "loss": 0.0379, + "grad_norm": 1.1038097143173218, + "learning_rate": 1.9950000000000004e-06, + "num_tokens": 1238075.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8014999999999999, + "step": 3603 + }, + { + "loss": 0.003, + "grad_norm": 0.4044983685016632, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.802, + "step": 3604 + }, + { + "loss": 0.0655, + "grad_norm": 1.8133554458618164, + "learning_rate": 1.985e-06, + "num_tokens": 1238678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8025, + "step": 3605 + }, + { + "loss": 0.0028, + "grad_norm": 0.39725902676582336, + "learning_rate": 1.98e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.803, + "step": 3606 + }, + { + "loss": 0.003, + "grad_norm": 0.4250074028968811, + "learning_rate": 1.975e-06, + "num_tokens": 1238860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8035, + "step": 3607 + }, + { + "loss": 0.0378, + "grad_norm": 1.14003586769104, + "learning_rate": 1.97e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.804, + "step": 3608 + }, + { + "loss": 0.0028, + "grad_norm": 0.39355626702308655, + "learning_rate": 1.9650000000000002e-06, + "num_tokens": 1239463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8045, + "step": 3609 + }, + { + "loss": 0.0378, + "grad_norm": 1.2409162521362305, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8050000000000002, + "step": 3610 + }, + { + "loss": 0.0448, + "grad_norm": 1.4544258117675781, + "learning_rate": 1.9550000000000003e-06, + "num_tokens": 1240487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8054999999999999, + "step": 3611 + }, + { + "loss": 0.0027, + "grad_norm": 0.3753180205821991, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.806, + "step": 3612 + }, + { + "loss": 0.0029, + "grad_norm": 0.4058220088481903, + "learning_rate": 1.945e-06, + "num_tokens": 1240669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8065, + "step": 3613 + }, + { + "loss": 0.0574, + "grad_norm": 1.4277732372283936, + "learning_rate": 1.94e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.807, + "step": 3614 + }, + { + "loss": 0.0645, + "grad_norm": 1.5439943075180054, + "learning_rate": 1.935e-06, + "num_tokens": 1241693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8075, + "step": 3615 + }, + { + "loss": 0.0609, + "grad_norm": 1.4575119018554688, + "learning_rate": 1.93e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.808, + "step": 3616 + }, + { + "loss": 0.0024, + "grad_norm": 0.33791404962539673, + "learning_rate": 1.925e-06, + "num_tokens": 1242296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8085, + "step": 3617 + }, + { + "loss": 0.0392, + "grad_norm": 0.994301974773407, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8090000000000002, + "step": 3618 + }, + { + "loss": 0.0026, + "grad_norm": 0.35725516080856323, + "learning_rate": 1.9150000000000003e-06, + "num_tokens": 1242899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8094999999999999, + "step": 3619 + }, + { + "loss": 0.1147, + "grad_norm": 2.219489097595215, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.81, + "step": 3620 + }, + { + "loss": 0.0025, + "grad_norm": 0.358549028635025, + "learning_rate": 1.9050000000000002e-06, + "num_tokens": 1243502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8105, + "step": 3621 + }, + { + "loss": 0.0497, + "grad_norm": 1.0606470108032227, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.811, + "step": 3622 + }, + { + "loss": 0.0354, + "grad_norm": 1.1863391399383545, + "learning_rate": 1.895e-06, + "num_tokens": 1244526.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8115, + "step": 3623 + }, + { + "loss": 0.0617, + "grad_norm": 1.461073398590088, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.812, + "step": 3624 + }, + { + "loss": 0.0522, + "grad_norm": 1.180123209953308, + "learning_rate": 1.8850000000000002e-06, + "num_tokens": 1245550.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8125, + "step": 3625 + }, + { + "loss": 0.0513, + "grad_norm": 1.1050792932510376, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.813, + "step": 3626 + }, + { + "loss": 0.0382, + "grad_norm": 1.1048370599746704, + "learning_rate": 1.8750000000000003e-06, + "num_tokens": 1246574.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8135, + "step": 3627 + }, + { + "loss": 0.0594, + "grad_norm": 1.5278170108795166, + "learning_rate": 1.87e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.814, + "step": 3628 + }, + { + "loss": 0.0026, + "grad_norm": 0.3680756688117981, + "learning_rate": 1.8650000000000001e-06, + "num_tokens": 1247177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8145, + "step": 3629 + }, + { + "loss": 0.0025, + "grad_norm": 0.3478946387767792, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.815, + "step": 3630 + }, + { + "loss": 0.0602, + "grad_norm": 1.2490179538726807, + "learning_rate": 1.8550000000000002e-06, + "num_tokens": 1247780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8155000000000001, + "step": 3631 + }, + { + "loss": 0.0751, + "grad_norm": 1.6024861335754395, + "learning_rate": 1.85e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8159999999999998, + "step": 3632 + }, + { + "loss": 0.055, + "grad_norm": 1.4603705406188965, + "learning_rate": 1.8450000000000001e-06, + "num_tokens": 1248804.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8165, + "step": 3633 + }, + { + "loss": 0.0025, + "grad_norm": 0.37733298540115356, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.817, + "step": 3634 + }, + { + "loss": 0.0028, + "grad_norm": 0.3999163806438446, + "learning_rate": 1.8350000000000002e-06, + "num_tokens": 1248986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8175, + "step": 3635 + }, + { + "loss": 0.0027, + "grad_norm": 0.39710038900375366, + "learning_rate": 1.83e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.818, + "step": 3636 + }, + { + "loss": 0.0028, + "grad_norm": 0.39646029472351074, + "learning_rate": 1.825e-06, + "num_tokens": 1249168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8185, + "step": 3637 + }, + { + "loss": 0.0426, + "grad_norm": 1.3070132732391357, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.819, + "step": 3638 + }, + { + "loss": 0.039, + "grad_norm": 1.1619224548339844, + "learning_rate": 1.8150000000000002e-06, + "num_tokens": 1250192.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8195000000000001, + "step": 3639 + }, + { + "loss": 0.0367, + "grad_norm": 1.1559624671936035, + "learning_rate": 1.81e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8199999999999998, + "step": 3640 + }, + { + "loss": 0.053, + "grad_norm": 1.3208280801773071, + "learning_rate": 1.805e-06, + "num_tokens": 1251216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8205, + "step": 3641 + }, + { + "loss": 0.0544, + "grad_norm": 1.2948426008224487, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.821, + "step": 3642 + }, + { + "loss": 0.049, + "grad_norm": 1.0491054058074951, + "learning_rate": 1.7950000000000002e-06, + "num_tokens": 1252240.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8215, + "step": 3643 + }, + { + "loss": 0.037, + "grad_norm": 1.3279922008514404, + "learning_rate": 1.79e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.822, + "step": 3644 + }, + { + "loss": 0.0027, + "grad_norm": 0.38797032833099365, + "learning_rate": 1.785e-06, + "num_tokens": 1252843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8225, + "step": 3645 + }, + { + "loss": 0.0526, + "grad_norm": 1.3761346340179443, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.823, + "step": 3646 + }, + { + "loss": 0.0594, + "grad_norm": 1.5943882465362549, + "learning_rate": 1.7750000000000002e-06, + "num_tokens": 1253867.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8235000000000001, + "step": 3647 + }, + { + "loss": 0.0386, + "grad_norm": 1.1582005023956299, + "learning_rate": 1.77e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8239999999999998, + "step": 3648 + }, + { + "loss": 0.0625, + "grad_norm": 1.422128438949585, + "learning_rate": 1.765e-06, + "num_tokens": 1254891.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8245, + "step": 3649 + }, + { + "loss": 0.0027, + "grad_norm": 0.3794823884963989, + "learning_rate": 1.76e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.825, + "step": 3650 + }, + { + "loss": 0.0377, + "grad_norm": 1.0281649827957153, + "learning_rate": 1.7550000000000001e-06, + "num_tokens": 1255494.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8255, + "step": 3651 + }, + { + "loss": 0.057, + "grad_norm": 1.2542749643325806, + "learning_rate": 1.75e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.826, + "step": 3652 + }, + { + "loss": 0.0027, + "grad_norm": 0.3857089579105377, + "learning_rate": 1.745e-06, + "num_tokens": 1256097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8265, + "step": 3653 + }, + { + "loss": 0.0529, + "grad_norm": 1.148740291595459, + "learning_rate": 1.74e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.827, + "step": 3654 + }, + { + "loss": 0.003, + "grad_norm": 0.4200035333633423, + "learning_rate": 1.7350000000000001e-06, + "num_tokens": 1256700.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8275000000000001, + "step": 3655 + }, + { + "loss": 0.0028, + "grad_norm": 0.3945881426334381, + "learning_rate": 1.73e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8279999999999998, + "step": 3656 + }, + { + "loss": 0.039, + "grad_norm": 0.9618701934814453, + "learning_rate": 1.725e-06, + "num_tokens": 1257303.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8285, + "step": 3657 + }, + { + "loss": 0.0399, + "grad_norm": 1.2282723188400269, + "learning_rate": 1.72e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.829, + "step": 3658 + }, + { + "loss": 0.0509, + "grad_norm": 1.175613284111023, + "learning_rate": 1.7150000000000003e-06, + "num_tokens": 1258327.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8295, + "step": 3659 + }, + { + "loss": 0.0378, + "grad_norm": 1.1486104726791382, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.83, + "step": 3660 + }, + { + "loss": 0.0589, + "grad_norm": 1.3274273872375488, + "learning_rate": 1.7050000000000002e-06, + "num_tokens": 1259351.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8305, + "step": 3661 + }, + { + "loss": 0.046, + "grad_norm": 1.3887542486190796, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.831, + "step": 3662 + }, + { + "loss": 0.0029, + "grad_norm": 0.39590317010879517, + "learning_rate": 1.6950000000000003e-06, + "num_tokens": 1259954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8315000000000001, + "step": 3663 + }, + { + "loss": 0.0369, + "grad_norm": 1.080889105796814, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8319999999999999, + "step": 3664 + }, + { + "loss": 0.0535, + "grad_norm": 1.3136940002441406, + "learning_rate": 1.6850000000000002e-06, + "num_tokens": 1260978.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8325, + "step": 3665 + }, + { + "loss": 0.059, + "grad_norm": 1.5410752296447754, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 3666 + }, + { + "loss": 0.0029, + "grad_norm": 0.3952591121196747, + "learning_rate": 1.6750000000000003e-06, + "num_tokens": 1261581.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8335, + "step": 3667 + }, + { + "loss": 0.0518, + "grad_norm": 1.3276718854904175, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.834, + "step": 3668 + }, + { + "loss": 0.003, + "grad_norm": 0.4232414960861206, + "learning_rate": 1.6650000000000002e-06, + "num_tokens": 1262184.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8345, + "step": 3669 + }, + { + "loss": 0.0639, + "grad_norm": 1.2759331464767456, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.835, + "step": 3670 + }, + { + "loss": 0.0571, + "grad_norm": 1.5148133039474487, + "learning_rate": 1.6550000000000002e-06, + "num_tokens": 1263208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8355000000000001, + "step": 3671 + }, + { + "loss": 0.0637, + "grad_norm": 1.4910366535186768, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8359999999999999, + "step": 3672 + }, + { + "loss": 0.0029, + "grad_norm": 0.4135521948337555, + "learning_rate": 1.6450000000000001e-06, + "num_tokens": 1263811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8365, + "step": 3673 + }, + { + "loss": 0.0511, + "grad_norm": 1.2618604898452759, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.837, + "step": 3674 + }, + { + "loss": 0.0501, + "grad_norm": 1.1598845720291138, + "learning_rate": 1.6350000000000002e-06, + "num_tokens": 1264835.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8375, + "step": 3675 + }, + { + "loss": 0.0445, + "grad_norm": 1.0752735137939453, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.838, + "step": 3676 + }, + { + "loss": 0.003, + "grad_norm": 0.42967167496681213, + "learning_rate": 1.6250000000000001e-06, + "num_tokens": 1265438.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8385, + "step": 3677 + }, + { + "loss": 0.003, + "grad_norm": 0.41333630681037903, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 1.0, + "epoch": 1.839, + "step": 3678 + }, + { + "loss": 0.0033, + "grad_norm": 0.4601726531982422, + "learning_rate": 1.6150000000000002e-06, + "num_tokens": 1265620.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8395000000000001, + "step": 3679 + }, + { + "loss": 0.0648, + "grad_norm": 1.4645088911056519, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8399999999999999, + "step": 3680 + }, + { + "loss": 0.0371, + "grad_norm": 1.0282845497131348, + "learning_rate": 1.605e-06, + "num_tokens": 1266644.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8405, + "step": 3681 + }, + { + "loss": 0.0034, + "grad_norm": 0.4804507791996002, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 1.0, + "epoch": 1.841, + "step": 3682 + }, + { + "loss": 0.0611, + "grad_norm": 1.6006290912628174, + "learning_rate": 1.5950000000000002e-06, + "num_tokens": 1267247.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8415, + "step": 3683 + }, + { + "loss": 0.0032, + "grad_norm": 0.4456159472465515, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 1.0, + "epoch": 1.842, + "step": 3684 + }, + { + "loss": 0.0028, + "grad_norm": 0.39536213874816895, + "learning_rate": 1.585e-06, + "num_tokens": 1267429.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8425, + "step": 3685 + }, + { + "loss": 0.0441, + "grad_norm": 1.2790175676345825, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.843, + "step": 3686 + }, + { + "loss": 0.0545, + "grad_norm": 1.1657609939575195, + "learning_rate": 1.5750000000000002e-06, + "num_tokens": 1268453.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8435000000000001, + "step": 3687 + }, + { + "loss": 0.0536, + "grad_norm": 1.0926413536071777, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8439999999999999, + "step": 3688 + }, + { + "loss": 0.0362, + "grad_norm": 0.9912558197975159, + "learning_rate": 1.565e-06, + "num_tokens": 1269477.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8445, + "step": 3689 + }, + { + "loss": 0.0374, + "grad_norm": 1.0493851900100708, + "learning_rate": 1.56e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.845, + "step": 3690 + }, + { + "loss": 0.0028, + "grad_norm": 0.4059640169143677, + "learning_rate": 1.5550000000000001e-06, + "num_tokens": 1270080.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8455, + "step": 3691 + }, + { + "loss": 0.003, + "grad_norm": 0.4232662618160248, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 1.0, + "epoch": 1.846, + "step": 3692 + }, + { + "loss": 0.0031, + "grad_norm": 0.43225178122520447, + "learning_rate": 1.545e-06, + "num_tokens": 1270262.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8465, + "step": 3693 + }, + { + "loss": 0.0027, + "grad_norm": 0.3701487183570862, + "learning_rate": 1.54e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.847, + "step": 3694 + }, + { + "loss": 0.0545, + "grad_norm": 1.3909512758255005, + "learning_rate": 1.5350000000000001e-06, + "num_tokens": 1270865.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8475000000000001, + "step": 3695 + }, + { + "loss": 0.0027, + "grad_norm": 0.38712078332901, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8479999999999999, + "step": 3696 + }, + { + "loss": 0.0506, + "grad_norm": 1.0741735696792603, + "learning_rate": 1.525e-06, + "num_tokens": 1271468.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8485, + "step": 3697 + }, + { + "loss": 0.0693, + "grad_norm": 1.657240629196167, + "learning_rate": 1.52e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.849, + "step": 3698 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615441918373108, + "learning_rate": 1.5150000000000001e-06, + "num_tokens": 1272071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8495, + "step": 3699 + }, + { + "loss": 0.0355, + "grad_norm": 0.9562244415283203, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.85, + "step": 3700 + }, + { + "loss": 0.0026, + "grad_norm": 0.36725983023643494, + "learning_rate": 1.505e-06, + "num_tokens": 1272674.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8505, + "step": 3701 + }, + { + "loss": 0.0028, + "grad_norm": 0.3878721296787262, + "learning_rate": 1.5e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 1.0, + "epoch": 1.851, + "step": 3702 + }, + { + "loss": 0.0359, + "grad_norm": 1.0378117561340332, + "learning_rate": 1.495e-06, + "num_tokens": 1273277.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8515000000000001, + "step": 3703 + }, + { + "loss": 0.0656, + "grad_norm": 1.2746002674102783, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8519999999999999, + "step": 3704 + }, + { + "loss": 0.0026, + "grad_norm": 0.35767146944999695, + "learning_rate": 1.485e-06, + "num_tokens": 1273880.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8525, + "step": 3705 + }, + { + "loss": 0.0026, + "grad_norm": 0.36552944779396057, + "learning_rate": 1.48e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.853, + "step": 3706 + }, + { + "loss": 0.0473, + "grad_norm": 1.1046762466430664, + "learning_rate": 1.475e-06, + "num_tokens": 1274483.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8535, + "step": 3707 + }, + { + "loss": 0.0625, + "grad_norm": 1.4509928226470947, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.854, + "step": 3708 + }, + { + "loss": 0.0421, + "grad_norm": 1.1400452852249146, + "learning_rate": 1.465e-06, + "num_tokens": 1275507.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8545, + "step": 3709 + }, + { + "loss": 0.0026, + "grad_norm": 0.3619054853916168, + "learning_rate": 1.46e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 1.0, + "epoch": 1.855, + "step": 3710 + }, + { + "loss": 0.0026, + "grad_norm": 0.3667825162410736, + "learning_rate": 1.455e-06, + "num_tokens": 1275689.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8555000000000001, + "step": 3711 + }, + { + "loss": 0.0466, + "grad_norm": 1.255405068397522, + "learning_rate": 1.45e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8559999999999999, + "step": 3712 + }, + { + "loss": 0.0657, + "grad_norm": 1.4270333051681519, + "learning_rate": 1.445e-06, + "num_tokens": 1276713.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8565, + "step": 3713 + }, + { + "loss": 0.0356, + "grad_norm": 1.035252571105957, + "learning_rate": 1.44e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.857, + "step": 3714 + }, + { + "loss": 0.0024, + "grad_norm": 0.34851282835006714, + "learning_rate": 1.435e-06, + "num_tokens": 1277316.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8575, + "step": 3715 + }, + { + "loss": 0.0669, + "grad_norm": 1.6207127571105957, + "learning_rate": 1.43e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.858, + "step": 3716 + }, + { + "loss": 0.0025, + "grad_norm": 0.34068116545677185, + "learning_rate": 1.425e-06, + "num_tokens": 1277919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8585, + "step": 3717 + }, + { + "loss": 0.0023, + "grad_norm": 0.3336624801158905, + "learning_rate": 1.42e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 3718 + }, + { + "loss": 0.0663, + "grad_norm": 1.4342654943466187, + "learning_rate": 1.415e-06, + "num_tokens": 1278522.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8595000000000002, + "step": 3719 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730687618255615, + "learning_rate": 1.41e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8599999999999999, + "step": 3720 + }, + { + "loss": 0.062, + "grad_norm": 1.4714523553848267, + "learning_rate": 1.4050000000000003e-06, + "num_tokens": 1279546.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8605, + "step": 3721 + }, + { + "loss": 0.0514, + "grad_norm": 1.2004119157791138, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.861, + "step": 3722 + }, + { + "loss": 0.0023, + "grad_norm": 0.3368993103504181, + "learning_rate": 1.3950000000000002e-06, + "num_tokens": 1280149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8615, + "step": 3723 + }, + { + "loss": 0.0025, + "grad_norm": 0.3626645803451538, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 3724 + }, + { + "loss": 0.0379, + "grad_norm": 1.129130482673645, + "learning_rate": 1.3850000000000003e-06, + "num_tokens": 1280752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8625, + "step": 3725 + }, + { + "loss": 0.0026, + "grad_norm": 0.35549208521842957, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.863, + "step": 3726 + }, + { + "loss": 0.039, + "grad_norm": 1.0426714420318604, + "learning_rate": 1.3750000000000002e-06, + "num_tokens": 1281355.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8635000000000002, + "step": 3727 + }, + { + "loss": 0.0591, + "grad_norm": 1.4238243103027344, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8639999999999999, + "step": 3728 + }, + { + "loss": 0.0587, + "grad_norm": 1.182423710823059, + "learning_rate": 1.3650000000000003e-06, + "num_tokens": 1282379.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8645, + "step": 3729 + }, + { + "loss": 0.0344, + "grad_norm": 1.0535178184509277, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.865, + "step": 3730 + }, + { + "loss": 0.0024, + "grad_norm": 0.34818780422210693, + "learning_rate": 1.3550000000000002e-06, + "num_tokens": 1282982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8655, + "step": 3731 + }, + { + "loss": 0.0652, + "grad_norm": 1.3155183792114258, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.866, + "step": 3732 + }, + { + "loss": 0.0543, + "grad_norm": 1.2466151714324951, + "learning_rate": 1.3450000000000003e-06, + "num_tokens": 1284006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8665, + "step": 3733 + }, + { + "loss": 0.0366, + "grad_norm": 1.1111284494400024, + "learning_rate": 1.34e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.867, + "step": 3734 + }, + { + "loss": 0.036, + "grad_norm": 1.2413430213928223, + "learning_rate": 1.3350000000000001e-06, + "num_tokens": 1285030.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8675000000000002, + "step": 3735 + }, + { + "loss": 0.0503, + "grad_norm": 1.2572247982025146, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8679999999999999, + "step": 3736 + }, + { + "loss": 0.0634, + "grad_norm": 1.3656840324401855, + "learning_rate": 1.3250000000000002e-06, + "num_tokens": 1286054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8685, + "step": 3737 + }, + { + "loss": 0.0369, + "grad_norm": 1.1938374042510986, + "learning_rate": 1.32e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.869, + "step": 3738 + }, + { + "loss": 0.0619, + "grad_norm": 1.5963718891143799, + "learning_rate": 1.3150000000000001e-06, + "num_tokens": 1287078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8695, + "step": 3739 + }, + { + "loss": 0.0569, + "grad_norm": 1.3680788278579712, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.87, + "step": 3740 + }, + { + "loss": 0.0535, + "grad_norm": 1.175209879875183, + "learning_rate": 1.3050000000000002e-06, + "num_tokens": 1288102.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8705, + "step": 3741 + }, + { + "loss": 0.0026, + "grad_norm": 0.3611868619918823, + "learning_rate": 1.3e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.871, + "step": 3742 + }, + { + "loss": 0.0377, + "grad_norm": 1.2314857244491577, + "learning_rate": 1.295e-06, + "num_tokens": 1288705.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8715000000000002, + "step": 3743 + }, + { + "loss": 0.0511, + "grad_norm": 1.4128717184066772, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8719999999999999, + "step": 3744 + }, + { + "loss": 0.1336, + "grad_norm": 2.185844659805298, + "learning_rate": 1.2850000000000002e-06, + "num_tokens": 1289729.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.8725, + "step": 3745 + }, + { + "loss": 0.0025, + "grad_norm": 0.33957669138908386, + "learning_rate": 1.28e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 1.0, + "epoch": 1.873, + "step": 3746 + }, + { + "loss": 0.0027, + "grad_norm": 0.3769534230232239, + "learning_rate": 1.275e-06, + "num_tokens": 1289911.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8735, + "step": 3747 + }, + { + "loss": 0.0584, + "grad_norm": 1.4691829681396484, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.874, + "step": 3748 + }, + { + "loss": 0.0635, + "grad_norm": 1.6226807832717896, + "learning_rate": 1.2650000000000002e-06, + "num_tokens": 1290935.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8745, + "step": 3749 + }, + { + "loss": 0.0033, + "grad_norm": 0.4503451883792877, + "learning_rate": 1.26e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 1.0, + "epoch": 1.875, + "step": 3750 + }, + { + "loss": 0.0028, + "grad_norm": 0.39449983835220337, + "learning_rate": 1.255e-06, + "num_tokens": 1291117.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8755, + "step": 3751 + }, + { + "loss": 0.0029, + "grad_norm": 0.4101957678794861, + "learning_rate": 1.25e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 1.0, + "epoch": 1.876, + "step": 3752 + }, + { + "loss": 0.0359, + "grad_norm": 1.259843111038208, + "learning_rate": 1.2450000000000002e-06, + "num_tokens": 1291720.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8765, + "step": 3753 + }, + { + "loss": 0.0027, + "grad_norm": 0.372577965259552, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.877, + "step": 3754 + }, + { + "loss": 0.0596, + "grad_norm": 1.1994444131851196, + "learning_rate": 1.235e-06, + "num_tokens": 1292323.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8775, + "step": 3755 + }, + { + "loss": 0.0703, + "grad_norm": 1.5322065353393555, + "learning_rate": 1.23e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8780000000000001, + "step": 3756 + }, + { + "loss": 0.0643, + "grad_norm": 1.7045296430587769, + "learning_rate": 1.2250000000000001e-06, + "num_tokens": 1293347.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8784999999999998, + "step": 3757 + }, + { + "loss": 0.0439, + "grad_norm": 1.2476153373718262, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.879, + "step": 3758 + }, + { + "loss": 0.0402, + "grad_norm": 1.186736822128296, + "learning_rate": 1.215e-06, + "num_tokens": 1294371.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8795, + "step": 3759 + }, + { + "loss": 0.0029, + "grad_norm": 0.39700445532798767, + "learning_rate": 1.21e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.88, + "step": 3760 + }, + { + "loss": 0.1202, + "grad_norm": 3.1105434894561768, + "learning_rate": 1.2050000000000001e-06, + "num_tokens": 1294974.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.8805, + "step": 3761 + }, + { + "loss": 0.0408, + "grad_norm": 1.1640613079071045, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.881, + "step": 3762 + }, + { + "loss": 0.0023, + "grad_norm": 0.32245126366615295, + "learning_rate": 1.195e-06, + "num_tokens": 1295577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8815, + "step": 3763 + }, + { + "loss": 0.0644, + "grad_norm": 1.4617496728897095, + "learning_rate": 1.19e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8820000000000001, + "step": 3764 + }, + { + "loss": 0.0024, + "grad_norm": 0.3409968614578247, + "learning_rate": 1.185e-06, + "num_tokens": 1296180.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8824999999999998, + "step": 3765 + }, + { + "loss": 0.0666, + "grad_norm": 2.035632848739624, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.883, + "step": 3766 + }, + { + "loss": 0.0402, + "grad_norm": 1.1498757600784302, + "learning_rate": 1.175e-06, + "num_tokens": 1297204.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8835, + "step": 3767 + }, + { + "loss": 0.0593, + "grad_norm": 1.348196268081665, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.884, + "step": 3768 + }, + { + "loss": 0.0667, + "grad_norm": 1.692858099937439, + "learning_rate": 1.165e-06, + "num_tokens": 1298228.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8845, + "step": 3769 + }, + { + "loss": 0.0029, + "grad_norm": 0.40195682644844055, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 1.0, + "epoch": 1.885, + "step": 3770 + }, + { + "loss": 0.0515, + "grad_norm": 1.0095990896224976, + "learning_rate": 1.1550000000000002e-06, + "num_tokens": 1298831.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8855, + "step": 3771 + }, + { + "loss": 0.0411, + "grad_norm": 1.4529675245285034, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8860000000000001, + "step": 3772 + }, + { + "loss": 0.0029, + "grad_norm": 0.39934462308883667, + "learning_rate": 1.145e-06, + "num_tokens": 1299434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8864999999999998, + "step": 3773 + }, + { + "loss": 0.0026, + "grad_norm": 0.37341752648353577, + "learning_rate": 1.14e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.887, + "step": 3774 + }, + { + "loss": 0.003, + "grad_norm": 0.427602082490921, + "learning_rate": 1.1350000000000001e-06, + "num_tokens": 1299616.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8875, + "step": 3775 + }, + { + "loss": 0.0027, + "grad_norm": 0.38110828399658203, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 1.0, + "epoch": 1.888, + "step": 3776 + }, + { + "loss": 0.05, + "grad_norm": 1.3058017492294312, + "learning_rate": 1.125e-06, + "num_tokens": 1300219.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8885, + "step": 3777 + }, + { + "loss": 0.0551, + "grad_norm": 1.049538016319275, + "learning_rate": 1.12e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.889, + "step": 3778 + }, + { + "loss": 0.0543, + "grad_norm": 1.1460436582565308, + "learning_rate": 1.1150000000000001e-06, + "num_tokens": 1301243.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8895, + "step": 3779 + }, + { + "loss": 0.0402, + "grad_norm": 1.1601300239562988, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8900000000000001, + "step": 3780 + }, + { + "loss": 0.0571, + "grad_norm": 1.1402069330215454, + "learning_rate": 1.105e-06, + "num_tokens": 1302267.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8904999999999998, + "step": 3781 + }, + { + "loss": 0.0381, + "grad_norm": 1.2498735189437866, + "learning_rate": 1.1e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.891, + "step": 3782 + }, + { + "loss": 0.0658, + "grad_norm": 1.471903920173645, + "learning_rate": 1.095e-06, + "num_tokens": 1303291.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8915, + "step": 3783 + }, + { + "loss": 0.003, + "grad_norm": 0.40989261865615845, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.892, + "step": 3784 + }, + { + "loss": 0.0029, + "grad_norm": 0.4065409004688263, + "learning_rate": 1.085e-06, + "num_tokens": 1303473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8925, + "step": 3785 + }, + { + "loss": 0.0027, + "grad_norm": 0.38934385776519775, + "learning_rate": 1.08e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.893, + "step": 3786 + }, + { + "loss": 0.0028, + "grad_norm": 0.3856496810913086, + "learning_rate": 1.075e-06, + "num_tokens": 1303655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8935, + "step": 3787 + }, + { + "loss": 0.0422, + "grad_norm": 1.3679287433624268, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8940000000000001, + "step": 3788 + }, + { + "loss": 0.051, + "grad_norm": 1.206390619277954, + "learning_rate": 1.065e-06, + "num_tokens": 1304679.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8944999999999999, + "step": 3789 + }, + { + "loss": 0.0029, + "grad_norm": 0.41105058789253235, + "learning_rate": 1.06e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 1.0, + "epoch": 1.895, + "step": 3790 + }, + { + "loss": 0.0027, + "grad_norm": 0.3825374245643616, + "learning_rate": 1.055e-06, + "num_tokens": 1304861.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8955, + "step": 3791 + }, + { + "loss": 0.0024, + "grad_norm": 0.3389546871185303, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.896, + "step": 3792 + }, + { + "loss": 0.0027, + "grad_norm": 0.38113462924957275, + "learning_rate": 1.045e-06, + "num_tokens": 1305043.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8965, + "step": 3793 + }, + { + "loss": 0.0025, + "grad_norm": 0.35084959864616394, + "learning_rate": 1.04e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 1.0, + "epoch": 1.897, + "step": 3794 + }, + { + "loss": 0.056, + "grad_norm": 1.4280885457992554, + "learning_rate": 1.035e-06, + "num_tokens": 1305646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8975, + "step": 3795 + }, + { + "loss": 0.0584, + "grad_norm": 1.4864161014556885, + "learning_rate": 1.03e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8980000000000001, + "step": 3796 + }, + { + "loss": 0.0023, + "grad_norm": 0.32296261191368103, + "learning_rate": 1.025e-06, + "num_tokens": 1306249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8984999999999999, + "step": 3797 + }, + { + "loss": 0.0372, + "grad_norm": 1.1412842273712158, + "learning_rate": 1.02e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.899, + "step": 3798 + }, + { + "loss": 0.036, + "grad_norm": 1.0588805675506592, + "learning_rate": 1.0150000000000002e-06, + "num_tokens": 1307273.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8995, + "step": 3799 + }, + { + "loss": 0.0025, + "grad_norm": 0.34841030836105347, + "learning_rate": 1.01e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9, + "step": 3800 + }, + { + "loss": 0.0025, + "grad_norm": 0.3537651002407074, + "learning_rate": 1.0050000000000001e-06, + "num_tokens": 1307455.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9005, + "step": 3801 + }, + { + "loss": 0.0405, + "grad_norm": 1.1438575983047485, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.901, + "step": 3802 + }, + { + "loss": 0.0694, + "grad_norm": 1.4709012508392334, + "learning_rate": 9.950000000000002e-07, + "num_tokens": 1308479.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9015, + "step": 3803 + }, + { + "loss": 0.0023, + "grad_norm": 0.3326675593852997, + "learning_rate": 9.9e-07, + "num_tokens": 1308570.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9020000000000001, + "step": 3804 + }, + { + "loss": 0.0635, + "grad_norm": 1.4323761463165283, + "learning_rate": 9.85e-07, + "num_tokens": 1309082.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9024999999999999, + "step": 3805 + }, + { + "loss": 0.0683, + "grad_norm": 1.6102875471115112, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.903, + "step": 3806 + }, + { + "loss": 0.0022, + "grad_norm": 0.3131149709224701, + "learning_rate": 9.750000000000002e-07, + "num_tokens": 1309685.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9035, + "step": 3807 + }, + { + "loss": 0.0021, + "grad_norm": 0.30395570397377014, + "learning_rate": 9.7e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 3808 + }, + { + "loss": 0.056, + "grad_norm": 1.3097760677337646, + "learning_rate": 9.65e-07, + "num_tokens": 1310288.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9045, + "step": 3809 + }, + { + "loss": 0.0425, + "grad_norm": 1.2873075008392334, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.905, + "step": 3810 + }, + { + "loss": 0.0366, + "grad_norm": 1.1098606586456299, + "learning_rate": 9.550000000000002e-07, + "num_tokens": 1311312.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9055, + "step": 3811 + }, + { + "loss": 0.0023, + "grad_norm": 0.33073046803474426, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9060000000000001, + "step": 3812 + }, + { + "loss": 0.0558, + "grad_norm": 1.287516713142395, + "learning_rate": 9.450000000000001e-07, + "num_tokens": 1311915.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9064999999999999, + "step": 3813 + }, + { + "loss": 0.0023, + "grad_norm": 0.3197239935398102, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 1.0, + "epoch": 1.907, + "step": 3814 + }, + { + "loss": 0.0022, + "grad_norm": 0.3093603253364563, + "learning_rate": 9.35e-07, + "num_tokens": 1312097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9075, + "step": 3815 + }, + { + "loss": 0.0027, + "grad_norm": 0.3792094588279724, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.908, + "step": 3816 + }, + { + "loss": 0.0024, + "grad_norm": 0.33527225255966187, + "learning_rate": 9.25e-07, + "num_tokens": 1312279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9085, + "step": 3817 + }, + { + "loss": 0.0531, + "grad_norm": 1.204848051071167, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.909, + "step": 3818 + }, + { + "loss": 0.0702, + "grad_norm": 1.3416361808776855, + "learning_rate": 9.15e-07, + "num_tokens": 1313303.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9095, + "step": 3819 + }, + { + "loss": 0.0541, + "grad_norm": 1.515673279762268, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9100000000000001, + "step": 3820 + }, + { + "loss": 0.0024, + "grad_norm": 0.33284807205200195, + "learning_rate": 9.05e-07, + "num_tokens": 1313906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9104999999999999, + "step": 3821 + }, + { + "loss": 0.0023, + "grad_norm": 0.32082033157348633, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 3822 + }, + { + "loss": 0.056, + "grad_norm": 1.2340785264968872, + "learning_rate": 8.95e-07, + "num_tokens": 1314509.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9115, + "step": 3823 + }, + { + "loss": 0.0021, + "grad_norm": 0.3040038049221039, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.912, + "step": 3824 + }, + { + "loss": 0.0392, + "grad_norm": 1.3959851264953613, + "learning_rate": 8.85e-07, + "num_tokens": 1315112.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9125, + "step": 3825 + }, + { + "loss": 0.0027, + "grad_norm": 0.37887290120124817, + "learning_rate": 8.8e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 1.0, + "epoch": 1.913, + "step": 3826 + }, + { + "loss": 0.0022, + "grad_norm": 0.30666735768318176, + "learning_rate": 8.75e-07, + "num_tokens": 1315294.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9135, + "step": 3827 + }, + { + "loss": 0.0691, + "grad_norm": 1.3549600839614868, + "learning_rate": 8.7e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9140000000000001, + "step": 3828 + }, + { + "loss": 0.0675, + "grad_norm": 1.2945553064346313, + "learning_rate": 8.65e-07, + "num_tokens": 1316318.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9144999999999999, + "step": 3829 + }, + { + "loss": 0.0022, + "grad_norm": 0.3147728145122528, + "learning_rate": 8.6e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.915, + "step": 3830 + }, + { + "loss": 0.0531, + "grad_norm": 1.0365914106369019, + "learning_rate": 8.550000000000002e-07, + "num_tokens": 1316921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9155, + "step": 3831 + }, + { + "loss": 0.0416, + "grad_norm": 1.2123857736587524, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.916, + "step": 3832 + }, + { + "loss": 0.0023, + "grad_norm": 0.3252547085285187, + "learning_rate": 8.450000000000002e-07, + "num_tokens": 1317524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9165, + "step": 3833 + }, + { + "loss": 0.0021, + "grad_norm": 0.29913613200187683, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.917, + "step": 3834 + }, + { + "loss": 0.0688, + "grad_norm": 1.6491233110427856, + "learning_rate": 8.350000000000002e-07, + "num_tokens": 1318127.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9175, + "step": 3835 + }, + { + "loss": 0.0021, + "grad_norm": 0.3058773875236511, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9180000000000001, + "step": 3836 + }, + { + "loss": 0.038, + "grad_norm": 1.1742405891418457, + "learning_rate": 8.250000000000001e-07, + "num_tokens": 1318730.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9184999999999999, + "step": 3837 + }, + { + "loss": 0.002, + "grad_norm": 0.27437257766723633, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.919, + "step": 3838 + }, + { + "loss": 0.0397, + "grad_norm": 1.1734699010849, + "learning_rate": 8.150000000000001e-07, + "num_tokens": 1319333.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9195, + "step": 3839 + }, + { + "loss": 0.0688, + "grad_norm": 1.6114236116409302, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.92, + "step": 3840 + }, + { + "loss": 0.0396, + "grad_norm": 1.3022080659866333, + "learning_rate": 8.050000000000001e-07, + "num_tokens": 1320357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9205, + "step": 3841 + }, + { + "loss": 0.002, + "grad_norm": 0.2882446348667145, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.921, + "step": 3842 + }, + { + "loss": 0.0636, + "grad_norm": 1.4788239002227783, + "learning_rate": 7.950000000000001e-07, + "num_tokens": 1320960.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9215, + "step": 3843 + }, + { + "loss": 0.0554, + "grad_norm": 1.472805142402649, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 3844 + }, + { + "loss": 0.0382, + "grad_norm": 1.3122379779815674, + "learning_rate": 7.850000000000001e-07, + "num_tokens": 1321984.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9224999999999999, + "step": 3845 + }, + { + "loss": 0.0019, + "grad_norm": 0.27439191937446594, + "learning_rate": 7.8e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.923, + "step": 3846 + }, + { + "loss": 0.0021, + "grad_norm": 0.3059723973274231, + "learning_rate": 7.750000000000001e-07, + "num_tokens": 1322166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9235, + "step": 3847 + }, + { + "loss": 0.0021, + "grad_norm": 0.3025694489479065, + "learning_rate": 7.7e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 3848 + }, + { + "loss": 0.0416, + "grad_norm": 1.4384698867797852, + "learning_rate": 7.650000000000001e-07, + "num_tokens": 1322769.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9245, + "step": 3849 + }, + { + "loss": 0.0019, + "grad_norm": 0.26954689621925354, + "learning_rate": 7.6e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.925, + "step": 3850 + }, + { + "loss": 0.0373, + "grad_norm": 1.0434874296188354, + "learning_rate": 7.550000000000001e-07, + "num_tokens": 1323372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9255, + "step": 3851 + }, + { + "loss": 0.0384, + "grad_norm": 1.2146815061569214, + "learning_rate": 7.5e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9260000000000002, + "step": 3852 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992803454399109, + "learning_rate": 7.450000000000001e-07, + "num_tokens": 1323975.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9264999999999999, + "step": 3853 + }, + { + "loss": 0.0683, + "grad_norm": 2.0715625286102295, + "learning_rate": 7.4e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.927, + "step": 3854 + }, + { + "loss": 0.0687, + "grad_norm": 1.7195099592208862, + "learning_rate": 7.350000000000001e-07, + "num_tokens": 1324999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.9275, + "step": 3855 + }, + { + "loss": 0.0022, + "grad_norm": 0.31213998794555664, + "learning_rate": 7.3e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.928, + "step": 3856 + }, + { + "loss": 0.0446, + "grad_norm": 1.5833452939987183, + "learning_rate": 7.25e-07, + "num_tokens": 1325602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9285, + "step": 3857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27154725790023804, + "learning_rate": 7.2e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.929, + "step": 3858 + }, + { + "loss": 0.0385, + "grad_norm": 1.1363227367401123, + "learning_rate": 7.15e-07, + "num_tokens": 1326205.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9295, + "step": 3859 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992321252822876, + "learning_rate": 7.1e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9300000000000002, + "step": 3860 + }, + { + "loss": 0.0537, + "grad_norm": 1.2202407121658325, + "learning_rate": 7.05e-07, + "num_tokens": 1326808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9304999999999999, + "step": 3861 + }, + { + "loss": 0.0659, + "grad_norm": 1.3972662687301636, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.931, + "step": 3862 + }, + { + "loss": 0.0022, + "grad_norm": 0.3156076967716217, + "learning_rate": 6.950000000000001e-07, + "num_tokens": 1327411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9315, + "step": 3863 + }, + { + "loss": 0.002, + "grad_norm": 0.2746105492115021, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 3864 + }, + { + "loss": 0.0492, + "grad_norm": 1.111280083656311, + "learning_rate": 6.850000000000001e-07, + "num_tokens": 1328014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9325, + "step": 3865 + }, + { + "loss": 0.0557, + "grad_norm": 1.1395080089569092, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.933, + "step": 3866 + }, + { + "loss": 0.041, + "grad_norm": 1.1225674152374268, + "learning_rate": 6.750000000000001e-07, + "num_tokens": 1329038.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9335, + "step": 3867 + }, + { + "loss": 0.0021, + "grad_norm": 0.2975449860095978, + "learning_rate": 6.7e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9340000000000002, + "step": 3868 + }, + { + "loss": 0.002, + "grad_norm": 0.2790532410144806, + "learning_rate": 6.650000000000001e-07, + "num_tokens": 1329220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9344999999999999, + "step": 3869 + }, + { + "loss": 0.0019, + "grad_norm": 0.27045223116874695, + "learning_rate": 6.6e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 3870 + }, + { + "loss": 0.0587, + "grad_norm": 1.2998172044754028, + "learning_rate": 6.550000000000001e-07, + "num_tokens": 1329823.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9355, + "step": 3871 + }, + { + "loss": 0.1167, + "grad_norm": 2.1144580841064453, + "learning_rate": 6.5e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.936, + "step": 3872 + }, + { + "loss": 0.0021, + "grad_norm": 0.29768821597099304, + "learning_rate": 6.450000000000001e-07, + "num_tokens": 1330426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9365, + "step": 3873 + }, + { + "loss": 0.0021, + "grad_norm": 0.3033559024333954, + "learning_rate": 6.4e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 3874 + }, + { + "loss": 0.0017, + "grad_norm": 0.2499658465385437, + "learning_rate": 6.350000000000001e-07, + "num_tokens": 1330608.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9375, + "step": 3875 + }, + { + "loss": 0.002, + "grad_norm": 0.28729239106178284, + "learning_rate": 6.3e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 3876 + }, + { + "loss": 0.0538, + "grad_norm": 1.3207937479019165, + "learning_rate": 6.25e-07, + "num_tokens": 1331211.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9385, + "step": 3877 + }, + { + "loss": 0.0022, + "grad_norm": 0.3201894760131836, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.939, + "step": 3878 + }, + { + "loss": 0.058, + "grad_norm": 1.3156497478485107, + "learning_rate": 6.15e-07, + "num_tokens": 1331814.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9395, + "step": 3879 + }, + { + "loss": 0.0544, + "grad_norm": 1.192156195640564, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.94, + "step": 3880 + }, + { + "loss": 0.0634, + "grad_norm": 2.076542377471924, + "learning_rate": 6.05e-07, + "num_tokens": 1332838.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9405000000000001, + "step": 3881 + }, + { + "loss": 0.0488, + "grad_norm": 1.3221850395202637, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9409999999999998, + "step": 3882 + }, + { + "loss": 0.0021, + "grad_norm": 0.3004106283187866, + "learning_rate": 5.95e-07, + "num_tokens": 1333441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9415, + "step": 3883 + }, + { + "loss": 0.0541, + "grad_norm": 1.230305790901184, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.942, + "step": 3884 + }, + { + "loss": 0.002, + "grad_norm": 0.2805992662906647, + "learning_rate": 5.850000000000001e-07, + "num_tokens": 1334044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9425, + "step": 3885 + }, + { + "loss": 0.0019, + "grad_norm": 0.27598538994789124, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 3886 + }, + { + "loss": 0.0021, + "grad_norm": 0.3006319999694824, + "learning_rate": 5.750000000000001e-07, + "num_tokens": 1334226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9435, + "step": 3887 + }, + { + "loss": 0.0628, + "grad_norm": 1.3234870433807373, + "learning_rate": 5.7e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.944, + "step": 3888 + }, + { + "loss": 0.0368, + "grad_norm": 0.9632979035377502, + "learning_rate": 5.650000000000001e-07, + "num_tokens": 1335250.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9445000000000001, + "step": 3889 + }, + { + "loss": 0.0396, + "grad_norm": 1.0664863586425781, + "learning_rate": 5.6e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9449999999999998, + "step": 3890 + }, + { + "loss": 0.0361, + "grad_norm": 0.998447060585022, + "learning_rate": 5.550000000000001e-07, + "num_tokens": 1336274.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9455, + "step": 3891 + }, + { + "loss": 0.066, + "grad_norm": 1.6561861038208008, + "learning_rate": 5.5e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.946, + "step": 3892 + }, + { + "loss": 0.0564, + "grad_norm": 1.0982937812805176, + "learning_rate": 5.450000000000001e-07, + "num_tokens": 1337298.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9465, + "step": 3893 + }, + { + "loss": 0.0649, + "grad_norm": 1.3116402626037598, + "learning_rate": 5.4e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.947, + "step": 3894 + }, + { + "loss": 0.0393, + "grad_norm": 1.211995005607605, + "learning_rate": 5.350000000000001e-07, + "num_tokens": 1338322.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9475, + "step": 3895 + }, + { + "loss": 0.0656, + "grad_norm": 1.3053356409072876, + "learning_rate": 5.3e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.948, + "step": 3896 + }, + { + "loss": 0.059, + "grad_norm": 1.4926881790161133, + "learning_rate": 5.250000000000001e-07, + "num_tokens": 1339346.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9485000000000001, + "step": 3897 + }, + { + "loss": 0.0517, + "grad_norm": 1.099536657333374, + "learning_rate": 5.2e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9489999999999998, + "step": 3898 + }, + { + "loss": 0.002, + "grad_norm": 0.2851589620113373, + "learning_rate": 5.15e-07, + "num_tokens": 1339949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9495, + "step": 3899 + }, + { + "loss": 0.002, + "grad_norm": 0.2879925072193146, + "learning_rate": 5.1e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 3900 + }, + { + "loss": 0.0557, + "grad_norm": 1.0640603303909302, + "learning_rate": 5.05e-07, + "num_tokens": 1340552.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9505, + "step": 3901 + }, + { + "loss": 0.0021, + "grad_norm": 0.3005947470664978, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.951, + "step": 3902 + }, + { + "loss": 0.0021, + "grad_norm": 0.30592235922813416, + "learning_rate": 4.95e-07, + "num_tokens": 1340734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9515, + "step": 3903 + }, + { + "loss": 0.0508, + "grad_norm": 1.1045085191726685, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.952, + "step": 3904 + }, + { + "loss": 0.0539, + "grad_norm": 1.1382217407226562, + "learning_rate": 4.85e-07, + "num_tokens": 1341758.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9525000000000001, + "step": 3905 + }, + { + "loss": 0.0576, + "grad_norm": 1.5904083251953125, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9529999999999998, + "step": 3906 + }, + { + "loss": 0.0401, + "grad_norm": 1.0153878927230835, + "learning_rate": 4.7500000000000006e-07, + "num_tokens": 1342782.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9535, + "step": 3907 + }, + { + "loss": 0.0023, + "grad_norm": 0.32124239206314087, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.954, + "step": 3908 + }, + { + "loss": 0.037, + "grad_norm": 1.1176637411117554, + "learning_rate": 4.6500000000000005e-07, + "num_tokens": 1343385.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9545, + "step": 3909 + }, + { + "loss": 0.0414, + "grad_norm": 1.1863677501678467, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.955, + "step": 3910 + }, + { + "loss": 0.0697, + "grad_norm": 1.6575289964675903, + "learning_rate": 4.5500000000000004e-07, + "num_tokens": 1344409.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9555, + "step": 3911 + }, + { + "loss": 0.0384, + "grad_norm": 1.020317554473877, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.956, + "step": 3912 + }, + { + "loss": 0.0554, + "grad_norm": 1.1557419300079346, + "learning_rate": 4.4500000000000003e-07, + "num_tokens": 1345433.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9565000000000001, + "step": 3913 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282678723335266, + "learning_rate": 4.4e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9569999999999999, + "step": 3914 + }, + { + "loss": 0.0611, + "grad_norm": 1.4425996541976929, + "learning_rate": 4.35e-07, + "num_tokens": 1346036.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9575, + "step": 3915 + }, + { + "loss": 0.0021, + "grad_norm": 0.30943119525909424, + "learning_rate": 4.3e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.958, + "step": 3916 + }, + { + "loss": 0.0021, + "grad_norm": 0.29412642121315, + "learning_rate": 4.2500000000000006e-07, + "num_tokens": 1346218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9585, + "step": 3917 + }, + { + "loss": 0.0021, + "grad_norm": 0.2940139174461365, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.959, + "step": 3918 + }, + { + "loss": 0.0021, + "grad_norm": 0.3061344027519226, + "learning_rate": 4.1500000000000005e-07, + "num_tokens": 1346400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9595, + "step": 3919 + }, + { + "loss": 0.0399, + "grad_norm": 1.3357733488082886, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.96, + "step": 3920 + }, + { + "loss": 0.0548, + "grad_norm": 1.1528651714324951, + "learning_rate": 4.0500000000000004e-07, + "num_tokens": 1347424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9605000000000001, + "step": 3921 + }, + { + "loss": 0.0024, + "grad_norm": 0.3415958285331726, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9609999999999999, + "step": 3922 + }, + { + "loss": 0.0672, + "grad_norm": 1.716910719871521, + "learning_rate": 3.9500000000000003e-07, + "num_tokens": 1348027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9615, + "step": 3923 + }, + { + "loss": 0.0019, + "grad_norm": 0.2726108729839325, + "learning_rate": 3.9e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.962, + "step": 3924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6874312162399292, + "learning_rate": 3.85e-07, + "num_tokens": 1348630.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9625, + "step": 3925 + }, + { + "loss": 0.0677, + "grad_norm": 1.6080477237701416, + "learning_rate": 3.8e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 3926 + }, + { + "loss": 0.0455, + "grad_norm": 1.2764126062393188, + "learning_rate": 3.75e-07, + "num_tokens": 1349654.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9635, + "step": 3927 + }, + { + "loss": 0.0414, + "grad_norm": 1.4081971645355225, + "learning_rate": 3.7e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.964, + "step": 3928 + }, + { + "loss": 0.0022, + "grad_norm": 0.3177483081817627, + "learning_rate": 3.65e-07, + "num_tokens": 1350257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9645000000000001, + "step": 3929 + }, + { + "loss": 0.0024, + "grad_norm": 0.33574411273002625, + "learning_rate": 3.6e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 3930 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346923887729645, + "learning_rate": 3.55e-07, + "num_tokens": 1350439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9655, + "step": 3931 + }, + { + "loss": 0.0562, + "grad_norm": 1.2322405576705933, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.966, + "step": 3932 + }, + { + "loss": 0.0382, + "grad_norm": 1.126086711883545, + "learning_rate": 3.4500000000000003e-07, + "num_tokens": 1351463.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9665, + "step": 3933 + }, + { + "loss": 0.0679, + "grad_norm": 1.7950743436813354, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.967, + "step": 3934 + }, + { + "loss": 0.0023, + "grad_norm": 0.31813737750053406, + "learning_rate": 3.35e-07, + "num_tokens": 1352066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9675, + "step": 3935 + }, + { + "loss": 0.0563, + "grad_norm": 1.4460132122039795, + "learning_rate": 3.3e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.968, + "step": 3936 + }, + { + "loss": 0.0388, + "grad_norm": 1.2290942668914795, + "learning_rate": 3.25e-07, + "num_tokens": 1353090.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9685000000000001, + "step": 3937 + }, + { + "loss": 0.0624, + "grad_norm": 1.2616753578186035, + "learning_rate": 3.2e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9689999999999999, + "step": 3938 + }, + { + "loss": 0.0018, + "grad_norm": 0.258317232131958, + "learning_rate": 3.15e-07, + "num_tokens": 1353693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9695, + "step": 3939 + }, + { + "loss": 0.0021, + "grad_norm": 0.2969084680080414, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 3940 + }, + { + "loss": 0.0023, + "grad_norm": 0.3306228518486023, + "learning_rate": 3.0500000000000004e-07, + "num_tokens": 1353875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9705, + "step": 3941 + }, + { + "loss": 0.0021, + "grad_norm": 0.2877337336540222, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.971, + "step": 3942 + }, + { + "loss": 0.0385, + "grad_norm": 1.1180164813995361, + "learning_rate": 2.9500000000000003e-07, + "num_tokens": 1354478.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9715, + "step": 3943 + }, + { + "loss": 0.0422, + "grad_norm": 1.2713475227355957, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 3944 + }, + { + "loss": 0.0021, + "grad_norm": 0.30450907349586487, + "learning_rate": 2.85e-07, + "num_tokens": 1355081.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9725000000000001, + "step": 3945 + }, + { + "loss": 0.0369, + "grad_norm": 1.0453548431396484, + "learning_rate": 2.8e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9729999999999999, + "step": 3946 + }, + { + "loss": 0.0647, + "grad_norm": 1.4603972434997559, + "learning_rate": 2.75e-07, + "num_tokens": 1356105.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9735, + "step": 3947 + }, + { + "loss": 0.0572, + "grad_norm": 1.3418960571289062, + "learning_rate": 2.7e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.974, + "step": 3948 + }, + { + "loss": 0.0616, + "grad_norm": 1.2075037956237793, + "learning_rate": 2.65e-07, + "num_tokens": 1357129.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9745, + "step": 3949 + }, + { + "loss": 0.0561, + "grad_norm": 1.3293365240097046, + "learning_rate": 2.6e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.975, + "step": 3950 + }, + { + "loss": 0.0546, + "grad_norm": 1.1330344676971436, + "learning_rate": 2.55e-07, + "num_tokens": 1358153.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9755, + "step": 3951 + }, + { + "loss": 0.0553, + "grad_norm": 1.403975486755371, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 3952 + }, + { + "loss": 0.0589, + "grad_norm": 1.0574450492858887, + "learning_rate": 2.4500000000000004e-07, + "num_tokens": 1359177.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9765000000000001, + "step": 3953 + }, + { + "loss": 0.0024, + "grad_norm": 0.34114331007003784, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9769999999999999, + "step": 3954 + }, + { + "loss": 0.0531, + "grad_norm": 1.2925927639007568, + "learning_rate": 2.3500000000000003e-07, + "num_tokens": 1359780.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9775, + "step": 3955 + }, + { + "loss": 0.0023, + "grad_norm": 0.32414519786834717, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.978, + "step": 3956 + }, + { + "loss": 0.0409, + "grad_norm": 1.1193647384643555, + "learning_rate": 2.2500000000000002e-07, + "num_tokens": 1360383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9785, + "step": 3957 + }, + { + "loss": 0.0528, + "grad_norm": 1.0519967079162598, + "learning_rate": 2.2e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.979, + "step": 3958 + }, + { + "loss": 0.002, + "grad_norm": 0.290457159280777, + "learning_rate": 2.15e-07, + "num_tokens": 1360986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9795, + "step": 3959 + }, + { + "loss": 0.064, + "grad_norm": 1.5267326831817627, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.98, + "step": 3960 + }, + { + "loss": 0.0571, + "grad_norm": 1.354665756225586, + "learning_rate": 2.0500000000000002e-07, + "num_tokens": 1362010.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9805000000000001, + "step": 3961 + }, + { + "loss": 0.0023, + "grad_norm": 0.3175540566444397, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9809999999999999, + "step": 3962 + }, + { + "loss": 0.0022, + "grad_norm": 0.31645578145980835, + "learning_rate": 1.95e-07, + "num_tokens": 1362192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9815, + "step": 3963 + }, + { + "loss": 0.0023, + "grad_norm": 0.32781633734703064, + "learning_rate": 1.9e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 3964 + }, + { + "loss": 0.0022, + "grad_norm": 0.3074043393135071, + "learning_rate": 1.85e-07, + "num_tokens": 1362374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9825, + "step": 3965 + }, + { + "loss": 0.0616, + "grad_norm": 1.3107956647872925, + "learning_rate": 1.8e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.983, + "step": 3966 + }, + { + "loss": 0.0428, + "grad_norm": 1.0233242511749268, + "learning_rate": 1.7500000000000002e-07, + "num_tokens": 1363398.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9835, + "step": 3967 + }, + { + "loss": 0.0509, + "grad_norm": 1.1120326519012451, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.984, + "step": 3968 + }, + { + "loss": 0.0578, + "grad_norm": 1.1184195280075073, + "learning_rate": 1.65e-07, + "num_tokens": 1364422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9845000000000002, + "step": 3969 + }, + { + "loss": 0.0024, + "grad_norm": 0.3374731242656708, + "learning_rate": 1.6e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9849999999999999, + "step": 3970 + }, + { + "loss": 0.0647, + "grad_norm": 1.385146141052246, + "learning_rate": 1.5500000000000002e-07, + "num_tokens": 1365025.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9855, + "step": 3971 + }, + { + "loss": 0.0621, + "grad_norm": 1.3918462991714478, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.986, + "step": 3972 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185434639453888, + "learning_rate": 1.4500000000000001e-07, + "num_tokens": 1365628.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9865, + "step": 3973 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098815679550171, + "learning_rate": 1.4e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 3974 + }, + { + "loss": 0.0508, + "grad_norm": 1.1450035572052002, + "learning_rate": 1.35e-07, + "num_tokens": 1366231.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9875, + "step": 3975 + }, + { + "loss": 0.0545, + "grad_norm": 1.133862018585205, + "learning_rate": 1.3e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.988, + "step": 3976 + }, + { + "loss": 0.0575, + "grad_norm": 1.3929400444030762, + "learning_rate": 1.2500000000000002e-07, + "num_tokens": 1367255.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9885000000000002, + "step": 3977 + }, + { + "loss": 0.0023, + "grad_norm": 0.32601818442344666, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9889999999999999, + "step": 3978 + }, + { + "loss": 0.0614, + "grad_norm": 1.4804233312606812, + "learning_rate": 1.1500000000000001e-07, + "num_tokens": 1367858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9895, + "step": 3979 + }, + { + "loss": 0.0339, + "grad_norm": 1.0161491632461548, + "learning_rate": 1.1e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.99, + "step": 3980 + }, + { + "loss": 0.0374, + "grad_norm": 0.9113408327102661, + "learning_rate": 1.0500000000000001e-07, + "num_tokens": 1368882.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9905, + "step": 3981 + }, + { + "loss": 0.0022, + "grad_norm": 0.31800293922424316, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.991, + "step": 3982 + }, + { + "loss": 0.0022, + "grad_norm": 0.3091203570365906, + "learning_rate": 9.5e-08, + "num_tokens": 1369064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9915, + "step": 3983 + }, + { + "loss": 0.0697, + "grad_norm": 1.368817687034607, + "learning_rate": 9e-08, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.992, + "step": 3984 + }, + { + "loss": 0.0024, + "grad_norm": 0.334277480840683, + "learning_rate": 8.500000000000001e-08, + "num_tokens": 1369667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9925000000000002, + "step": 3985 + }, + { + "loss": 0.0545, + "grad_norm": 1.1396604776382446, + "learning_rate": 8e-08, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9929999999999999, + "step": 3986 + }, + { + "loss": 0.002, + "grad_norm": 0.2931969463825226, + "learning_rate": 7.500000000000001e-08, + "num_tokens": 1370270.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9935, + "step": 3987 + }, + { + "loss": 0.0021, + "grad_norm": 0.29304033517837524, + "learning_rate": 7e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 3988 + }, + { + "loss": 0.0579, + "grad_norm": 1.3336025476455688, + "learning_rate": 6.5e-08, + "num_tokens": 1370873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9945, + "step": 3989 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215644359588623, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.995, + "step": 3990 + }, + { + "loss": 0.0405, + "grad_norm": 1.221953272819519, + "learning_rate": 5.5e-08, + "num_tokens": 1371476.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9955, + "step": 3991 + }, + { + "loss": 0.0404, + "grad_norm": 1.0604480504989624, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.996, + "step": 3992 + }, + { + "loss": 0.0381, + "grad_norm": 0.919835090637207, + "learning_rate": 4.5e-08, + "num_tokens": 1372500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9965000000000002, + "step": 3993 + }, + { + "loss": 0.0378, + "grad_norm": 1.2490025758743286, + "learning_rate": 4e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9969999999999999, + "step": 3994 + }, + { + "loss": 0.0021, + "grad_norm": 0.3125726878643036, + "learning_rate": 3.5e-08, + "num_tokens": 1373103.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9975, + "step": 3995 + }, + { + "loss": 0.0023, + "grad_norm": 0.3294070065021515, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 3996 + }, + { + "loss": 0.002, + "grad_norm": 0.2793242931365967, + "learning_rate": 2.5000000000000002e-08, + "num_tokens": 1373285.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9985, + "step": 3997 + }, + { + "loss": 0.0386, + "grad_norm": 1.0813380479812622, + "learning_rate": 2e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.999, + "step": 3998 + }, + { + "loss": 0.0025, + "grad_norm": 0.3470178544521332, + "learning_rate": 1.5000000000000002e-08, + "num_tokens": 1373888.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9995, + "step": 3999 + }, + { + "loss": 0.0681, + "grad_norm": 1.5211089849472046, + "learning_rate": 1e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 2.0, + "step": 4000 + }, + { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898, + "epoch": 2.0, + "step": 4000 + } +] \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..be089935a10e89f2cb7ed806e7c10efa3baca54a --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "unsloth_available": false, + "train_runtime": 483.7085, + "train_loss": 0.11515871361242898, + "train_metrics": { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..40d6577dec4199160cb23eb63f3805b530c38dd7 --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.762, + "avg_latency_seconds": 2.748, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %{ \"patient_id\": \"patient_8000\", \"rationale\": \"candidate_id=cand_08; The patient is experiencing a high fever, which is a symptom of an infection. This suggests that the patient may have a bacterial infection, which would be treated with antibiotics. Candidate_id=cand_08 prescribes antibiotics, which are appropriate for treating bacterial infections.\",", + "candidate_id": "cand_08", + "selected_candidate": "cand_08", + "env_reward": 0.806, + "latency_seconds": 3.164, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"candidate_id\": \"cand_05\", \"rationale\": \"patient mentions having a headache and taking medicine. cand_05 explains that the patient is taking medicine for a headache, which aligns with the patient's symptoms. cand_09 does not mention any medication. cand_01 mentions nausea but no headache. cand_02 mentions fever but", + "candidate_id": "cand_05", + "selected_candidate": "cand_05", + "env_reward": 0.792, + "latency_seconds": 2.656, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.641, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; //= = = = = = = = = = = = = = = FUNCTION select_best_candidate(candidate_ids=LIST_of_STR, patient_id=STR, candidates_dict=DICTIONARY_OF_TUPLES) ;; best_candidate_id := candidate_ids[0]; strongest_rationale := candidates_dict[candidate_ids[0]][1]; for i in range(1, len(candidate_ids)) { this", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.643, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.638, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json b/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5e7e4a3ead8458fcf1611ff54bdbc630d0bbdfea --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "model_index": 2, + "sft_epochs": 1, + "sft_max_steps": 0, + "sft_batch_size": 1, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json b/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..bd04c896532f5a5ae0fa8959979709a445323fb4 --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.569, + "grad_norm": NaN, + "learning_rate": 2e-05, + "num_tokens": 91.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 3.569, + "grad_norm": NaN, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 1.2853, + "grad_norm": 1.139764428138733, + "learning_rate": 2e-05, + "num_tokens": 694.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 3.5581, + "grad_norm": NaN, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.002, + "step": 4 + }, + { + "loss": 0.8917, + "grad_norm": 1.0447810888290405, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 1297.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 1.1935, + "grad_norm": 0.8309267163276672, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 3.5163, + "grad_norm": 4.351670742034912, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1900.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 3.4885, + "grad_norm": 4.261757850646973, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 1.2711, + "grad_norm": 0.8578795790672302, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 2503.0, + "mean_token_accuracy": 0.7690802216529846, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.8313, + "grad_norm": 0.6491284370422363, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.005, + "step": 10 + }, + { + "loss": 1.2098, + "grad_norm": 0.8803694844245911, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 3527.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 3.3912, + "grad_norm": 3.3331027030944824, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 1.1925, + "grad_norm": 0.6839883327484131, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 4130.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 3.3481, + "grad_norm": 2.9968008995056152, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.8284, + "grad_norm": 0.5385816693305969, + "learning_rate": 1.989e-05, + "num_tokens": 4733.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 1.2033, + "grad_norm": 0.5642092823982239, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 1.2305, + "grad_norm": 0.6205269694328308, + "learning_rate": 1.987e-05, + "num_tokens": 5757.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 1.1978, + "grad_norm": 0.5339632630348206, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 3.2635, + "grad_norm": 2.3871994018554688, + "learning_rate": 1.985e-05, + "num_tokens": 6360.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 1.1722, + "grad_norm": 0.5115076303482056, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 1.234, + "grad_norm": 0.7502650618553162, + "learning_rate": 1.983e-05, + "num_tokens": 7384.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 1.2009, + "grad_norm": 0.563306450843811, + "learning_rate": 1.982e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 3.2024, + "grad_norm": 2.1435375213623047, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 7987.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 1.1136, + "grad_norm": 0.4755318760871887, + "learning_rate": 1.98e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.81, + "grad_norm": 0.42654362320899963, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 9011.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 3.1658, + "grad_norm": 2.022304058074951, + "learning_rate": 1.978e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 3.1525, + "grad_norm": 1.9966037273406982, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 9193.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 1.1701, + "grad_norm": 0.43180903792381287, + "learning_rate": 1.976e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 1.1161, + "grad_norm": 0.49122628569602966, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 10217.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 3.1096, + "grad_norm": 1.9505829811096191, + "learning_rate": 1.974e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 1.0957, + "grad_norm": 0.4052703380584717, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 10820.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 1.1922, + "grad_norm": 0.4599268436431885, + "learning_rate": 1.972e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 3.0661, + "grad_norm": 1.9074920415878296, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 11423.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 3.0517, + "grad_norm": 1.9043670892715454, + "learning_rate": 1.97e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.8217, + "grad_norm": 0.43874070048332214, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 12026.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 1.1533, + "grad_norm": 0.4097289741039276, + "learning_rate": 1.968e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 3.0079, + "grad_norm": 1.8589015007019043, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 12629.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 2.9929, + "grad_norm": 1.8493101596832275, + "learning_rate": 1.966e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 2.9771, + "grad_norm": 1.823657751083374, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 12811.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 1.1322, + "grad_norm": 0.41579654812812805, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 1.0436, + "grad_norm": 0.4191758632659912, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 13835.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.7707, + "grad_norm": 0.389350026845932, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.7557, + "grad_norm": 0.3683435320854187, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 14859.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 2.9037, + "grad_norm": 1.7245700359344482, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 2.8901, + "grad_norm": 1.7086819410324097, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 15041.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 1.0387, + "grad_norm": 0.40467050671577454, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 1.0567, + "grad_norm": 0.4369414746761322, + "learning_rate": 1.957e-05, + "num_tokens": 16065.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 1.1317, + "grad_norm": 0.4135839641094208, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 1.0284, + "grad_norm": 0.3962143063545227, + "learning_rate": 1.955e-05, + "num_tokens": 17089.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 2.8211, + "grad_norm": 1.6713019609451294, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.751, + "grad_norm": 0.3764272928237915, + "learning_rate": 1.953e-05, + "num_tokens": 17692.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 1.1035, + "grad_norm": 0.4032706618309021, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 1.066, + "grad_norm": 0.3904367685317993, + "learning_rate": 1.951e-05, + "num_tokens": 18716.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 2.7715, + "grad_norm": 1.6729886531829834, + "learning_rate": 1.95e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 2.7583, + "grad_norm": 1.668998122215271, + "learning_rate": 1.949e-05, + "num_tokens": 18898.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 2.7429, + "grad_norm": 1.6743063926696777, + "learning_rate": 1.948e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 1.1043, + "grad_norm": 0.41544175148010254, + "learning_rate": 1.947e-05, + "num_tokens": 19501.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 1.0547, + "grad_norm": 0.4136095345020294, + "learning_rate": 1.946e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 2.7022, + "grad_norm": 1.6811003684997559, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 20104.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 2.685, + "grad_norm": 1.6868253946304321, + "learning_rate": 1.944e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 2.6703, + "grad_norm": 1.6875874996185303, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 20286.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 1.0897, + "grad_norm": 0.3931529223918915, + "learning_rate": 1.942e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 1.0308, + "grad_norm": 0.4257798492908478, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 21310.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.752, + "grad_norm": 0.3678564429283142, + "learning_rate": 1.94e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 0.995, + "grad_norm": 0.414833128452301, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 22334.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 1.0055, + "grad_norm": 0.42559435963630676, + "learning_rate": 1.938e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 2.5807, + "grad_norm": 1.7541372776031494, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 22937.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 2.5636, + "grad_norm": 1.7794091701507568, + "learning_rate": 1.936e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 2.5482, + "grad_norm": 1.7919189929962158, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 23119.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.7033, + "grad_norm": 0.3789256811141968, + "learning_rate": 1.934e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.7623, + "grad_norm": 0.41511237621307373, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 24143.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 2.5008, + "grad_norm": 1.8457392454147339, + "learning_rate": 1.932e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 0.9835, + "grad_norm": 0.4251658618450165, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 24746.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.6836, + "grad_norm": 0.39055028557777405, + "learning_rate": 1.93e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 1.0516, + "grad_norm": 0.4297751784324646, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 25770.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": 0.9707, + "grad_norm": 0.408170223236084, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 1.0632, + "grad_norm": 0.4372476041316986, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 26794.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 2.419, + "grad_norm": 1.9062981605529785, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 2.4008, + "grad_norm": 1.9403553009033203, + "learning_rate": 1.925e-05, + "num_tokens": 26976.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 2.3866, + "grad_norm": 1.9395607709884644, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 2.3668, + "grad_norm": 1.948604941368103, + "learning_rate": 1.923e-05, + "num_tokens": 27158.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.7165, + "grad_norm": 0.3970690369606018, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 1.0087, + "grad_norm": 0.46349093317985535, + "learning_rate": 1.921e-05, + "num_tokens": 28182.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.7138, + "grad_norm": 0.3978181481361389, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.6682, + "grad_norm": 0.38714009523391724, + "learning_rate": 1.919e-05, + "num_tokens": 29206.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 2.2852, + "grad_norm": 1.8964459896087646, + "learning_rate": 1.918e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 2.2692, + "grad_norm": 1.8906216621398926, + "learning_rate": 1.917e-05, + "num_tokens": 29388.0, + "mean_token_accuracy": 0.644444465637207, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 2.253, + "grad_norm": 1.8771262168884277, + "learning_rate": 1.916e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.9113, + "grad_norm": 0.49527081847190857, + "learning_rate": 1.915e-05, + "num_tokens": 29991.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 1.0366, + "grad_norm": 0.4962358772754669, + "learning_rate": 1.914e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 2.2018, + "grad_norm": 1.8590370416641235, + "learning_rate": 1.913e-05, + "num_tokens": 30594.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 0.9951, + "grad_norm": 0.5745645761489868, + "learning_rate": 1.912e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.6545, + "grad_norm": 0.4285139739513397, + "learning_rate": 1.911e-05, + "num_tokens": 31618.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 2.1565, + "grad_norm": 1.8819890022277832, + "learning_rate": 1.91e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 2.1391, + "grad_norm": 1.9009383916854858, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 31800.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 0.9592, + "grad_norm": 0.5530417561531067, + "learning_rate": 1.908e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.639, + "grad_norm": 0.4635550081729889, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 32824.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 2.0893, + "grad_norm": 1.9755080938339233, + "learning_rate": 1.906e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 2.0698, + "grad_norm": 2.017965793609619, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 33006.0, + "mean_token_accuracy": 0.6666666865348816, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 2.0535, + "grad_norm": 2.0711710453033447, + "learning_rate": 1.904e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.6666666865348816, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 2.0313, + "grad_norm": 2.117086172103882, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 33188.0, + "mean_token_accuracy": 0.6666666865348816, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.6362, + "grad_norm": 0.48415306210517883, + "learning_rate": 1.902e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.6335, + "grad_norm": 0.5150465965270996, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 34212.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 0.9912, + "grad_norm": 0.6076453924179077, + "learning_rate": 1.9e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.052, + "step": 104 + }, + { + "loss": 0.9828, + "grad_norm": 0.5944868326187134, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 35236.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.8844, + "grad_norm": 0.5450642704963684, + "learning_rate": 1.898e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.9195, + "grad_norm": 0.5619152188301086, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 36260.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 1.9053, + "grad_norm": 2.4565858840942383, + "learning_rate": 1.896e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.6608, + "grad_norm": 0.5228564739227295, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 36863.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.6786, + "grad_norm": 0.5397571325302124, + "learning_rate": 1.894e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.6198, + "grad_norm": 0.537507176399231, + "learning_rate": 1.893e-05, + "num_tokens": 37887.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 1.8448, + "grad_norm": 2.565553665161133, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 0.9505, + "grad_norm": 0.5609534978866577, + "learning_rate": 1.891e-05, + "num_tokens": 38490.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": 0.6103, + "grad_norm": 0.5393182635307312, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 1.8089, + "grad_norm": 2.6849920749664307, + "learning_rate": 1.889e-05, + "num_tokens": 39093.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 0.961, + "grad_norm": 0.5978713035583496, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 1.777, + "grad_norm": 2.7187552452087402, + "learning_rate": 1.887e-05, + "num_tokens": 39696.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 1.7591, + "grad_norm": 2.7737131118774414, + "learning_rate": 1.886e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 1.74, + "grad_norm": 2.7507472038269043, + "learning_rate": 1.885e-05, + "num_tokens": 39878.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.6336, + "grad_norm": 0.6201249957084656, + "learning_rate": 1.884e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.5845, + "grad_norm": 0.5287116169929504, + "learning_rate": 1.883e-05, + "num_tokens": 40902.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.8665, + "grad_norm": 0.6071702241897583, + "learning_rate": 1.882e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.8748, + "grad_norm": 0.6387258172035217, + "learning_rate": 1.881e-05, + "num_tokens": 41926.0, + "mean_token_accuracy": 0.8258317112922668, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.875, + "grad_norm": 0.5957177877426147, + "learning_rate": 1.88e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8258317112922668, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.5784, + "grad_norm": 0.5134051442146301, + "learning_rate": 1.879e-05, + "num_tokens": 42950.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.5775, + "grad_norm": 0.5122160911560059, + "learning_rate": 1.878e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 1.6118, + "grad_norm": 2.893503189086914, + "learning_rate": 1.877e-05, + "num_tokens": 43553.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.6218, + "grad_norm": 0.5278106927871704, + "learning_rate": 1.876e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 1.5808, + "grad_norm": 2.9607582092285156, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 44156.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.802, + "grad_norm": 0.6248002052307129, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.8202, + "grad_norm": 0.6419914364814758, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 45180.0, + "mean_token_accuracy": 0.8238747715950012, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 1.534, + "grad_norm": 3.0163865089416504, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 1.5157, + "grad_norm": 3.01271390914917, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 45362.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 1.497, + "grad_norm": 2.959350824356079, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 1.4734, + "grad_norm": 2.8837082386016846, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 45544.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.8266, + "grad_norm": 0.6843762993812561, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.861, + "grad_norm": 0.7351704835891724, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 46568.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.845, + "grad_norm": 0.7598766088485718, + "learning_rate": 1.866e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 1.3777, + "grad_norm": 3.036391496658325, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 47171.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.5412, + "grad_norm": 0.6829193830490112, + "learning_rate": 1.864e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.7666, + "grad_norm": 0.7895976901054382, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 48195.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.5381, + "grad_norm": 0.790127694606781, + "learning_rate": 1.862e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 1.2811, + "grad_norm": 3.4602015018463135, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 48798.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 1.26, + "grad_norm": 3.52811336517334, + "learning_rate": 1.86e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 1.2314, + "grad_norm": 3.6009700298309326, + "learning_rate": 1.859e-05, + "num_tokens": 48980.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 1.2002, + "grad_norm": 3.6722474098205566, + "learning_rate": 1.858e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 1.1693, + "grad_norm": 3.4836974143981934, + "learning_rate": 1.857e-05, + "num_tokens": 49162.0, + "mean_token_accuracy": 0.7666666507720947, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 1.1338, + "grad_norm": 3.369781017303467, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 1.0973, + "grad_norm": 3.3117072582244873, + "learning_rate": 1.855e-05, + "num_tokens": 49344.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.8315, + "grad_norm": 0.9976187944412231, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 1.0272, + "grad_norm": 3.300879955291748, + "learning_rate": 1.853e-05, + "num_tokens": 49947.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 0.9891, + "grad_norm": 3.3772897720336914, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.5464, + "grad_norm": 0.9478758573532104, + "learning_rate": 1.851e-05, + "num_tokens": 50550.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.8039, + "grad_norm": 1.1654984951019287, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 0.8961, + "grad_norm": 4.251962184906006, + "learning_rate": 1.849e-05, + "num_tokens": 51153.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 0.8656, + "grad_norm": 4.492918491363525, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.8222222328186035, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.493, + "grad_norm": 0.8727006912231445, + "learning_rate": 1.847e-05, + "num_tokens": 51756.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.7707, + "grad_norm": 1.041538119316101, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.5714, + "grad_norm": 0.9487267136573792, + "learning_rate": 1.845e-05, + "num_tokens": 52780.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.4725, + "grad_norm": 0.798832356929779, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.7814, + "grad_norm": 0.9986205101013184, + "learning_rate": 1.843e-05, + "num_tokens": 53804.0, + "mean_token_accuracy": 0.8258317112922668, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.7441, + "grad_norm": 0.9336599707603455, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 0.7031, + "grad_norm": 5.16276741027832, + "learning_rate": 1.841e-05, + "num_tokens": 54407.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 0.679, + "grad_norm": 4.1701273918151855, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.7353, + "grad_norm": 1.0674586296081543, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 55010.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.7491, + "grad_norm": 1.21304452419281, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.6185, + "grad_norm": 4.724250316619873, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 55613.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.6687, + "grad_norm": 1.0483168363571167, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.5248, + "grad_norm": 1.1386994123458862, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 56637.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": 0.692, + "grad_norm": 1.000663161277771, + "learning_rate": 1.834e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.549, + "grad_norm": 5.925390720367432, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 57240.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.5316, + "grad_norm": 7.124028205871582, + "learning_rate": 1.832e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.6214, + "grad_norm": 1.0966285467147827, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 57843.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.482, + "grad_norm": 4.625036239624023, + "learning_rate": 1.83e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.6731, + "grad_norm": 1.3060588836669922, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 58446.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.5768, + "grad_norm": 1.7968002557754517, + "learning_rate": 1.828e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.6029, + "grad_norm": 1.7848604917526245, + "learning_rate": 1.827e-05, + "num_tokens": 59470.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.3979, + "grad_norm": 1.9516690969467163, + "learning_rate": 1.826e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.391, + "grad_norm": 3.8316330909729004, + "learning_rate": 1.825e-05, + "num_tokens": 60073.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.6449, + "grad_norm": 1.5616425275802612, + "learning_rate": 1.824e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.6533, + "grad_norm": 1.280671238899231, + "learning_rate": 1.823e-05, + "num_tokens": 61097.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.3584, + "grad_norm": 6.280538082122803, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.9444444179534912, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.3733, + "grad_norm": 1.0696591138839722, + "learning_rate": 1.821e-05, + "num_tokens": 61700.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.3357, + "grad_norm": 3.6380887031555176, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.9444444179534912, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.3244, + "grad_norm": 3.0167179107666016, + "learning_rate": 1.819e-05, + "num_tokens": 61882.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.5994, + "grad_norm": 1.6260021924972534, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.6215, + "grad_norm": 1.607763409614563, + "learning_rate": 1.817e-05, + "num_tokens": 62906.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.5443, + "grad_norm": 1.351562261581421, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.2865, + "grad_norm": 2.277933120727539, + "learning_rate": 1.815e-05, + "num_tokens": 63509.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.5709, + "grad_norm": 1.3398513793945312, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.2716, + "grad_norm": 3.923830986022949, + "learning_rate": 1.813e-05, + "num_tokens": 64112.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.509, + "grad_norm": 1.4502966403961182, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.4854, + "grad_norm": 1.4078965187072754, + "learning_rate": 1.811e-05, + "num_tokens": 65136.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.2501, + "grad_norm": 3.077928304672241, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.5453, + "grad_norm": 1.7737340927124023, + "learning_rate": 1.809e-05, + "num_tokens": 65739.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.239, + "grad_norm": 2.0369770526885986, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.2344, + "grad_norm": 1.9151840209960938, + "learning_rate": 1.807e-05, + "num_tokens": 65921.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.5325, + "grad_norm": 1.6656997203826904, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.4971, + "grad_norm": 1.9251680374145508, + "learning_rate": 1.805e-05, + "num_tokens": 66945.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.52, + "grad_norm": 1.8106904029846191, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.2154, + "grad_norm": 2.2629575729370117, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 67548.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.4612, + "grad_norm": 1.7021019458770752, + "learning_rate": 1.802e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.8962817788124084, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.4315, + "grad_norm": 2.6399946212768555, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 68572.0, + "mean_token_accuracy": 0.9060665369033813, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": 0.4603, + "grad_norm": 1.909094214439392, + "learning_rate": 1.8e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.4483, + "grad_norm": 1.7435243129730225, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 69596.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.4438, + "grad_norm": 2.1652462482452393, + "learning_rate": 1.798e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.4678, + "grad_norm": 2.338404417037964, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 70620.0, + "mean_token_accuracy": 0.8962817788124084, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.3195, + "grad_norm": 1.3209658861160278, + "learning_rate": 1.796e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.4409, + "grad_norm": 1.709653377532959, + "learning_rate": 1.795e-05, + "num_tokens": 71644.0, + "mean_token_accuracy": 0.8982387185096741, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.4037, + "grad_norm": 2.7179744243621826, + "learning_rate": 1.794e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.9060665369033813, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.2739, + "grad_norm": 1.0299943685531616, + "learning_rate": 1.793e-05, + "num_tokens": 72668.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.2022, + "grad_norm": 2.607898473739624, + "learning_rate": 1.792e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.2042, + "grad_norm": 2.916175127029419, + "learning_rate": 1.791e-05, + "num_tokens": 72850.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.3787, + "grad_norm": 2.026442527770996, + "learning_rate": 1.79e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.3879, + "grad_norm": 1.7650607824325562, + "learning_rate": 1.789e-05, + "num_tokens": 73874.0, + "mean_token_accuracy": 0.908023476600647, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.1951, + "grad_norm": 3.8692498207092285, + "learning_rate": 1.788e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.1904, + "grad_norm": 3.0922181606292725, + "learning_rate": 1.787e-05, + "num_tokens": 74056.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.301, + "grad_norm": 1.9583574533462524, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.1827, + "grad_norm": 1.9792364835739136, + "learning_rate": 1.785e-05, + "num_tokens": 74659.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.1794, + "grad_norm": 1.3933207988739014, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.3381, + "grad_norm": 1.6843299865722656, + "learning_rate": 1.783e-05, + "num_tokens": 75262.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.1732, + "grad_norm": 1.4762918949127197, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.1689, + "grad_norm": 1.1075265407562256, + "learning_rate": 1.781e-05, + "num_tokens": 75444.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.3562, + "grad_norm": 2.2154247760772705, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.1629, + "grad_norm": 1.3579362630844116, + "learning_rate": 1.779e-05, + "num_tokens": 76047.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.3199, + "grad_norm": 1.9855793714523315, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.3381, + "grad_norm": 1.787819266319275, + "learning_rate": 1.777e-05, + "num_tokens": 77071.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.1525, + "grad_norm": 1.0635879039764404, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.1496, + "grad_norm": 1.0544939041137695, + "learning_rate": 1.775e-05, + "num_tokens": 77253.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.1459, + "grad_norm": 1.147072672843933, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.1426, + "grad_norm": 1.0801589488983154, + "learning_rate": 1.773e-05, + "num_tokens": 77435.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.2557, + "grad_norm": 1.2963556051254272, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.1332, + "grad_norm": 1.3799799680709839, + "learning_rate": 1.771e-05, + "num_tokens": 78038.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.2481, + "grad_norm": 1.1608214378356934, + "learning_rate": 1.77e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.2642, + "grad_norm": 1.2985522747039795, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 79062.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.3124, + "grad_norm": 2.222142219543457, + "learning_rate": 1.768e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.3102, + "grad_norm": 2.533982753753662, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 80086.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.1218, + "grad_norm": 1.7190382480621338, + "learning_rate": 1.766e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.1169, + "grad_norm": 1.3357374668121338, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 80268.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.1147, + "grad_norm": 1.298270344734192, + "learning_rate": 1.764e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.3127, + "grad_norm": 2.2547061443328857, + "learning_rate": 1.763e-05, + "num_tokens": 80871.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.2312, + "grad_norm": 1.7744327783584595, + "learning_rate": 1.762e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.3975, + "grad_norm": 4.527610778808594, + "learning_rate": 1.761e-05, + "num_tokens": 81895.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.3551, + "grad_norm": 3.1718592643737793, + "learning_rate": 1.76e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.1045, + "grad_norm": 1.574190378189087, + "learning_rate": 1.759e-05, + "num_tokens": 82498.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.2236, + "grad_norm": 1.4468473196029663, + "learning_rate": 1.758e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.0999, + "grad_norm": 1.4842942953109741, + "learning_rate": 1.757e-05, + "num_tokens": 83101.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.2509, + "grad_norm": 1.7860370874404907, + "learning_rate": 1.756e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.2611, + "grad_norm": 1.6783521175384521, + "learning_rate": 1.755e-05, + "num_tokens": 84125.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.379, + "grad_norm": 2.3508005142211914, + "learning_rate": 1.754e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.0941, + "grad_norm": 2.0986952781677246, + "learning_rate": 1.753e-05, + "num_tokens": 84728.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.0924, + "grad_norm": 1.9180539846420288, + "learning_rate": 1.752e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.0906, + "grad_norm": 1.0870189666748047, + "learning_rate": 1.751e-05, + "num_tokens": 84910.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.2357, + "grad_norm": 1.0672377347946167, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.2584, + "grad_norm": 2.204198122024536, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 85934.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.0862, + "grad_norm": 2.385765552520752, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.2371, + "grad_norm": 1.8736376762390137, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 86537.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.2442, + "grad_norm": 1.8243354558944702, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.0824, + "grad_norm": 1.8955978155136108, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 87140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.3363, + "grad_norm": 2.798372507095337, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.0794, + "grad_norm": 1.304677128791809, + "learning_rate": 1.743e-05, + "num_tokens": 87743.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.0773, + "grad_norm": 1.626665711402893, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.1939, + "grad_norm": 1.7440603971481323, + "learning_rate": 1.741e-05, + "num_tokens": 88346.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.2501, + "grad_norm": 1.3810110092163086, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.3304, + "grad_norm": 3.183516025543213, + "learning_rate": 1.739e-05, + "num_tokens": 89370.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.2224, + "grad_norm": 2.094963550567627, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.2354, + "grad_norm": 1.3596550226211548, + "learning_rate": 1.737e-05, + "num_tokens": 90394.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.0727, + "grad_norm": 1.5260241031646729, + "learning_rate": 1.736e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.222, + "grad_norm": 1.5992202758789062, + "learning_rate": 1.735e-05, + "num_tokens": 90997.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.3177, + "grad_norm": 2.2656893730163574, + "learning_rate": 1.734e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.0713, + "grad_norm": 1.7473493814468384, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 91600.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.2135, + "grad_norm": 1.9787451028823853, + "learning_rate": 1.732e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.1763, + "grad_norm": 1.0072226524353027, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 92624.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.1957, + "grad_norm": 1.1664408445358276, + "learning_rate": 1.73e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.3349, + "grad_norm": 2.7109858989715576, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 93648.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.0711, + "grad_norm": 2.568545341491699, + "learning_rate": 1.728e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.1836, + "grad_norm": 1.850518822669983, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 94251.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.0695, + "grad_norm": 2.5018086433410645, + "learning_rate": 1.726e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.1961, + "grad_norm": 0.9769375324249268, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 94854.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.2135, + "grad_norm": 1.4824577569961548, + "learning_rate": 1.724e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.1623, + "grad_norm": 1.7970157861709595, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 95878.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.2098, + "grad_norm": 1.702469825744629, + "learning_rate": 1.722e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.0642, + "grad_norm": 1.6492910385131836, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 96481.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.1893, + "grad_norm": 1.3040688037872314, + "learning_rate": 1.72e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.0638, + "grad_norm": 2.035078287124634, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 97084.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.0617, + "grad_norm": 1.428052306175232, + "learning_rate": 1.718e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.1591, + "grad_norm": 1.416749119758606, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 97687.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.1787, + "grad_norm": 1.3673189878463745, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.324, + "grad_norm": 3.40804386138916, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 98711.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.0582, + "grad_norm": 2.4875428676605225, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.1816, + "grad_norm": 1.6370735168457031, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 99314.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.0556, + "grad_norm": 2.5525963306427, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.1861, + "grad_norm": 2.1719298362731934, + "learning_rate": 1.711e-05, + "num_tokens": 99917.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.201, + "grad_norm": 1.304052472114563, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.1531, + "grad_norm": 1.5254027843475342, + "learning_rate": 1.709e-05, + "num_tokens": 100941.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.2727, + "grad_norm": 2.922405242919922, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.1459, + "grad_norm": 1.7082411050796509, + "learning_rate": 1.707e-05, + "num_tokens": 101965.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.174, + "grad_norm": 1.3555234670639038, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.1749, + "grad_norm": 0.9526453018188477, + "learning_rate": 1.705e-05, + "num_tokens": 102989.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.1751, + "grad_norm": 1.491074800491333, + "learning_rate": 1.704e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.3221, + "grad_norm": 3.0102553367614746, + "learning_rate": 1.703e-05, + "num_tokens": 104013.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.1546, + "grad_norm": 2.2727670669555664, + "learning_rate": 1.702e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.1623, + "grad_norm": 1.1690260171890259, + "learning_rate": 1.701e-05, + "num_tokens": 105037.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.1757, + "grad_norm": 1.3821128606796265, + "learning_rate": 1.7e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.1345, + "grad_norm": 1.1042118072509766, + "learning_rate": 1.699e-05, + "num_tokens": 106061.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.1709, + "grad_norm": 1.283263087272644, + "learning_rate": 1.698e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.1741, + "grad_norm": 1.0933341979980469, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 107085.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.1479, + "grad_norm": 1.3540836572647095, + "learning_rate": 1.696e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.094, + "grad_norm": 5.643751621246338, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 107688.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.091, + "grad_norm": 5.622400760650635, + "learning_rate": 1.694e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.1534, + "grad_norm": 0.9459224343299866, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 108291.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.0764, + "grad_norm": 4.563518047332764, + "learning_rate": 1.692e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.0689, + "grad_norm": 3.9746463298797607, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 108473.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.1265, + "grad_norm": 1.5034980773925781, + "learning_rate": 1.69e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.055, + "grad_norm": 2.8813798427581787, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 109076.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.0502, + "grad_norm": 2.0983633995056152, + "learning_rate": 1.688e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.1459, + "grad_norm": 2.4966609477996826, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 109679.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.1373, + "grad_norm": 1.884824514389038, + "learning_rate": 1.686e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.12, + "grad_norm": 1.6215541362762451, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 110703.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.0514, + "grad_norm": 3.570695400238037, + "learning_rate": 1.684e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.0503, + "grad_norm": 3.7310097217559814, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 110885.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.1698, + "grad_norm": 1.3565757274627686, + "learning_rate": 1.682e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.144, + "grad_norm": 1.7988064289093018, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 111909.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.1553, + "grad_norm": 1.199349284172058, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.2808, + "grad_norm": 2.2785050868988037, + "learning_rate": 1.679e-05, + "num_tokens": 112933.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.1303, + "grad_norm": 1.4797053337097168, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.1437, + "grad_norm": 1.2159603834152222, + "learning_rate": 1.677e-05, + "num_tokens": 113957.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.1094, + "grad_norm": 1.3378634452819824, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.1107, + "grad_norm": 1.3265125751495361, + "learning_rate": 1.675e-05, + "num_tokens": 114981.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.104, + "grad_norm": 1.0398075580596924, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0508, + "grad_norm": 3.7928128242492676, + "learning_rate": 1.673e-05, + "num_tokens": 115584.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.1141, + "grad_norm": 1.543946385383606, + "learning_rate": 1.672e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.2347, + "grad_norm": 3.0478694438934326, + "learning_rate": 1.671e-05, + "num_tokens": 116608.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.1568, + "grad_norm": 1.438165307044983, + "learning_rate": 1.67e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0602, + "grad_norm": 4.521894454956055, + "learning_rate": 1.669e-05, + "num_tokens": 117211.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0575, + "grad_norm": 4.285327434539795, + "learning_rate": 1.668e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.1228, + "grad_norm": 1.7977162599563599, + "learning_rate": 1.667e-05, + "num_tokens": 117814.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0498, + "grad_norm": 3.2977139949798584, + "learning_rate": 1.666e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.1072, + "grad_norm": 1.0961717367172241, + "learning_rate": 1.665e-05, + "num_tokens": 118417.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.0888, + "grad_norm": 1.2719725370407104, + "learning_rate": 1.664e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.1016, + "grad_norm": 1.7138031721115112, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 119441.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.0775, + "grad_norm": 1.2170872688293457, + "learning_rate": 1.662e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0415, + "grad_norm": 2.3039064407348633, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 120044.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0407, + "grad_norm": 2.1441495418548584, + "learning_rate": 1.66e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 1.0, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0378, + "grad_norm": 1.570320725440979, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 120226.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0358, + "grad_norm": 1.359679937362671, + "learning_rate": 1.658e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.1491, + "grad_norm": 1.4656238555908203, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 120829.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.093, + "grad_norm": 1.550439715385437, + "learning_rate": 1.656e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.1191, + "grad_norm": 1.6594032049179077, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 121853.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.1667, + "grad_norm": 1.6316683292388916, + "learning_rate": 1.654e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.1172, + "grad_norm": 1.1592111587524414, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 122877.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0288, + "grad_norm": 1.2376233339309692, + "learning_rate": 1.652e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 1.0, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0279, + "grad_norm": 1.1726553440093994, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 123059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.158, + "grad_norm": 1.639247179031372, + "learning_rate": 1.65e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0254, + "grad_norm": 0.882344126701355, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 123662.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0236, + "grad_norm": 0.7603262066841125, + "learning_rate": 1.648e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0231, + "grad_norm": 1.0259835720062256, + "learning_rate": 1.647e-05, + "num_tokens": 123844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.1341, + "grad_norm": 1.3803941011428833, + "learning_rate": 1.646e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.26, + "grad_norm": 2.67657208442688, + "learning_rate": 1.645e-05, + "num_tokens": 124868.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.0787, + "grad_norm": 1.1956502199172974, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0184, + "grad_norm": 1.0563417673110962, + "learning_rate": 1.643e-05, + "num_tokens": 125471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.2769, + "grad_norm": 3.5824198722839355, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.017, + "grad_norm": 0.9444816708564758, + "learning_rate": 1.641e-05, + "num_tokens": 126074.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.1499, + "grad_norm": 1.6610344648361206, + "learning_rate": 1.64e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0159, + "grad_norm": 1.3713178634643555, + "learning_rate": 1.639e-05, + "num_tokens": 126677.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0142, + "grad_norm": 0.7958543300628662, + "learning_rate": 1.638e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 1.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0136, + "grad_norm": 0.7060168385505676, + "learning_rate": 1.637e-05, + "num_tokens": 126859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0126, + "grad_norm": 0.6885517239570618, + "learning_rate": 1.636e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.1437, + "grad_norm": 1.7837411165237427, + "learning_rate": 1.635e-05, + "num_tokens": 127462.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": 0.1352, + "grad_norm": 1.0794353485107422, + "learning_rate": 1.634e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.1036, + "grad_norm": 1.2649973630905151, + "learning_rate": 1.633e-05, + "num_tokens": 128486.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.082, + "grad_norm": 1.4123811721801758, + "learning_rate": 1.632e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.2251, + "grad_norm": 2.3190250396728516, + "learning_rate": 1.631e-05, + "num_tokens": 129510.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0101, + "grad_norm": 1.145607590675354, + "learning_rate": 1.63e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 1.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.01, + "grad_norm": 1.1430310010910034, + "learning_rate": 1.629e-05, + "num_tokens": 129692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.1157, + "grad_norm": 1.080237865447998, + "learning_rate": 1.628e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0094, + "grad_norm": 0.8564168810844421, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 130295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.009, + "grad_norm": 0.6895986199378967, + "learning_rate": 1.626e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0088, + "grad_norm": 0.7237755656242371, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 130477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0081, + "grad_norm": 0.7111520767211914, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 1.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.2266, + "grad_norm": 3.2268872261047363, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 131080.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.1096, + "grad_norm": 1.5681886672973633, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.1323, + "grad_norm": 1.1309343576431274, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 132104.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0065, + "grad_norm": 0.4017643630504608, + "learning_rate": 1.62e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.0901, + "grad_norm": 1.3869181871414185, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 132707.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.135, + "grad_norm": 1.0720597505569458, + "learning_rate": 1.618e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.2196, + "grad_norm": 2.46571683883667, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 133731.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.1479, + "grad_norm": 1.4283263683319092, + "learning_rate": 1.616e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.1442, + "grad_norm": 1.0318039655685425, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 134755.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.119, + "grad_norm": 0.9293051958084106, + "learning_rate": 1.614e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0122, + "grad_norm": 2.9073522090911865, + "learning_rate": 1.613e-05, + "num_tokens": 135358.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0156, + "grad_norm": 3.24949049949646, + "learning_rate": 1.612e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.2428, + "grad_norm": 2.2780046463012695, + "learning_rate": 1.611e-05, + "num_tokens": 135961.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0158, + "grad_norm": 2.8313698768615723, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.073, + "grad_norm": 1.1441925764083862, + "learning_rate": 1.609e-05, + "num_tokens": 136564.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.0713, + "grad_norm": 1.0356674194335938, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.1163, + "grad_norm": 0.9958234429359436, + "learning_rate": 1.607e-05, + "num_tokens": 137588.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.065, + "grad_norm": 1.0690953731536865, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0143, + "grad_norm": 2.4794986248016357, + "learning_rate": 1.605e-05, + "num_tokens": 138191.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.1213, + "grad_norm": 1.1662561893463135, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0133, + "grad_norm": 2.1572377681732178, + "learning_rate": 1.603e-05, + "num_tokens": 138794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": 0.2415, + "grad_norm": 2.1097450256347656, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.2415, + "grad_norm": 1.9146851301193237, + "learning_rate": 1.601e-05, + "num_tokens": 139818.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.0792, + "grad_norm": 1.4688655138015747, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.1037, + "grad_norm": 1.3678481578826904, + "learning_rate": 1.599e-05, + "num_tokens": 140842.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.0645, + "grad_norm": 1.394155740737915, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.1221, + "grad_norm": 1.3450697660446167, + "learning_rate": 1.597e-05, + "num_tokens": 141866.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0111, + "grad_norm": 1.5307925939559937, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 1.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.0111, + "grad_norm": 1.5876197814941406, + "learning_rate": 1.595e-05, + "num_tokens": 142048.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.1193, + "grad_norm": 1.4841184616088867, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.1328, + "grad_norm": 1.1095598936080933, + "learning_rate": 1.593e-05, + "num_tokens": 143072.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0093, + "grad_norm": 1.4608124494552612, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 1.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.1107, + "grad_norm": 1.4897429943084717, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 143675.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.1984, + "grad_norm": 2.675309419631958, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0076, + "grad_norm": 1.1623023748397827, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 144278.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0063, + "grad_norm": 0.732515275478363, + "learning_rate": 1.588e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 1.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.1286, + "grad_norm": 1.144338846206665, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 144881.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.1896, + "grad_norm": 2.561152219772339, + "learning_rate": 1.586e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.1736, + "grad_norm": 2.7632133960723877, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 145905.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0056, + "grad_norm": 0.5383828282356262, + "learning_rate": 1.584e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 1.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0053, + "grad_norm": 0.5213011503219604, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 146087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.1293, + "grad_norm": 1.3833296298980713, + "learning_rate": 1.582e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0047, + "grad_norm": 0.35407668352127075, + "learning_rate": 1.581e-05, + "num_tokens": 146690.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.1152, + "grad_norm": 1.2960784435272217, + "learning_rate": 1.58e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.0701, + "grad_norm": 1.1170578002929688, + "learning_rate": 1.579e-05, + "num_tokens": 147714.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.1111, + "grad_norm": 1.0579668283462524, + "learning_rate": 1.578e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0048, + "grad_norm": 0.4491373300552368, + "learning_rate": 1.577e-05, + "num_tokens": 148317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0052, + "grad_norm": 0.5798842906951904, + "learning_rate": 1.576e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0053, + "grad_norm": 0.6644476056098938, + "learning_rate": 1.575e-05, + "num_tokens": 148499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.1002, + "grad_norm": 1.4146150350570679, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0049, + "grad_norm": 0.5174235701560974, + "learning_rate": 1.573e-05, + "num_tokens": 149102.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.1005, + "grad_norm": 1.295534610748291, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.0997, + "grad_norm": 1.874627947807312, + "learning_rate": 1.571e-05, + "num_tokens": 150126.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0048, + "grad_norm": 0.477443128824234, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 1.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0048, + "grad_norm": 0.5091577172279358, + "learning_rate": 1.569e-05, + "num_tokens": 150308.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0045, + "grad_norm": 0.42573752999305725, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 1.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.1289, + "grad_norm": 1.2042423486709595, + "learning_rate": 1.567e-05, + "num_tokens": 150911.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.0741, + "grad_norm": 1.1629348993301392, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.004, + "grad_norm": 0.3303038775920868, + "learning_rate": 1.565e-05, + "num_tokens": 151514.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0039, + "grad_norm": 0.279052734375, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 1.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.1122, + "grad_norm": 1.5259605646133423, + "learning_rate": 1.563e-05, + "num_tokens": 152117.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.1174, + "grad_norm": 1.2986260652542114, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.0041, + "grad_norm": 0.4193200170993805, + "learning_rate": 1.561e-05, + "num_tokens": 152720.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.1207, + "grad_norm": 1.2413984537124634, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0045, + "grad_norm": 0.6368035078048706, + "learning_rate": 1.559e-05, + "num_tokens": 153323.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.101, + "grad_norm": 1.2425626516342163, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.1124, + "grad_norm": 1.019707202911377, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 154347.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0051, + "grad_norm": 0.8345929384231567, + "learning_rate": 1.556e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0052, + "grad_norm": 0.8587450385093689, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 154529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.1214, + "grad_norm": 1.1086853742599487, + "learning_rate": 1.554e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.1164, + "grad_norm": 1.238479495048523, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 155553.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.1249, + "grad_norm": 1.3684537410736084, + "learning_rate": 1.552e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0054, + "grad_norm": 0.947119951248169, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 156156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0056, + "grad_norm": 0.9146615266799927, + "learning_rate": 1.55e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.0782, + "grad_norm": 1.2344416379928589, + "learning_rate": 1.549e-05, + "num_tokens": 156759.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.4506, + "grad_norm": 7.777007579803467, + "learning_rate": 1.548e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.0639, + "grad_norm": 1.501968264579773, + "learning_rate": 1.547e-05, + "num_tokens": 157783.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0046, + "grad_norm": 0.6376725435256958, + "learning_rate": 1.546e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 1.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0043, + "grad_norm": 0.5955199003219604, + "learning_rate": 1.545e-05, + "num_tokens": 157965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.1027, + "grad_norm": 1.514914631843567, + "learning_rate": 1.544e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.1145, + "grad_norm": 1.1080951690673828, + "learning_rate": 1.543e-05, + "num_tokens": 158989.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.1661, + "grad_norm": 2.103287696838379, + "learning_rate": 1.542e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0041, + "grad_norm": 0.5920866131782532, + "learning_rate": 1.541e-05, + "num_tokens": 159592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.0831, + "grad_norm": 1.2727563381195068, + "learning_rate": 1.54e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.076, + "grad_norm": 1.3624043464660645, + "learning_rate": 1.539e-05, + "num_tokens": 160616.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0051, + "grad_norm": 1.0213030576705933, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 1.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0053, + "grad_norm": 1.1751487255096436, + "learning_rate": 1.537e-05, + "num_tokens": 160798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.1073, + "grad_norm": 1.1450884342193604, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.1152, + "grad_norm": 1.0188744068145752, + "learning_rate": 1.535e-05, + "num_tokens": 161822.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0042, + "grad_norm": 0.6943671703338623, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 1.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.0041, + "grad_norm": 0.5702145099639893, + "learning_rate": 1.533e-05, + "num_tokens": 162004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.1601, + "grad_norm": 2.467028856277466, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0036, + "grad_norm": 0.3947738707065582, + "learning_rate": 1.531e-05, + "num_tokens": 162607.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0035, + "grad_norm": 0.3578404486179352, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": 0.1018, + "grad_norm": 1.5206029415130615, + "learning_rate": 1.529e-05, + "num_tokens": 163210.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.0753, + "grad_norm": 1.400350570678711, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0032, + "grad_norm": 0.33458250761032104, + "learning_rate": 1.527e-05, + "num_tokens": 163813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0029, + "grad_norm": 0.2822412848472595, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 1.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0029, + "grad_norm": 0.24599352478981018, + "learning_rate": 1.525e-05, + "num_tokens": 163995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.0772, + "grad_norm": 1.2155442237854004, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0028, + "grad_norm": 0.2298114001750946, + "learning_rate": 1.523e-05, + "num_tokens": 164598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0027, + "grad_norm": 0.23676389455795288, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.0027, + "grad_norm": 0.21022361516952515, + "learning_rate": 1.521e-05, + "num_tokens": 164780.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.1104, + "grad_norm": 1.7568659782409668, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0027, + "grad_norm": 0.28411486744880676, + "learning_rate": 1.519e-05, + "num_tokens": 165383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0028, + "grad_norm": 0.2967180907726288, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 1.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0026, + "grad_norm": 0.31251031160354614, + "learning_rate": 1.517e-05, + "num_tokens": 165565.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.0629, + "grad_norm": 1.4641610383987427, + "learning_rate": 1.516e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.0024, + "grad_norm": 0.22654157876968384, + "learning_rate": 1.515e-05, + "num_tokens": 166168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.063, + "grad_norm": 1.187050223350525, + "learning_rate": 1.514e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.0565, + "grad_norm": 1.331944227218628, + "learning_rate": 1.513e-05, + "num_tokens": 167192.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0026, + "grad_norm": 0.37733522057533264, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 1.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.0989, + "grad_norm": 1.4206980466842651, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 167795.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0028, + "grad_norm": 0.3664330244064331, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 1.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.003, + "grad_norm": 0.5825914740562439, + "learning_rate": 1.509e-05, + "num_tokens": 167977.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.003, + "grad_norm": 0.47541120648384094, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 1.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": 0.1152, + "grad_norm": 1.194077730178833, + "learning_rate": 1.507e-05, + "num_tokens": 168580.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.0642, + "grad_norm": 1.5998581647872925, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0031, + "grad_norm": 0.45395979285240173, + "learning_rate": 1.505e-05, + "num_tokens": 169183.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.066, + "grad_norm": 1.4924191236495972, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0642, + "grad_norm": 1.4406323432922363, + "learning_rate": 1.503e-05, + "num_tokens": 170207.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.004, + "grad_norm": 0.7274853587150574, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 1.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0637, + "grad_norm": 1.4921272993087769, + "learning_rate": 1.501e-05, + "num_tokens": 170810.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0881, + "grad_norm": 1.3289899826049805, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0046, + "grad_norm": 0.9299827814102173, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 171413.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.0917, + "grad_norm": 1.0895007848739624, + "learning_rate": 1.498e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0055, + "grad_norm": 1.2428455352783203, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 172016.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.0904, + "grad_norm": 1.1731876134872437, + "learning_rate": 1.496e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0042, + "grad_norm": 0.8642317652702332, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 172619.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.0042, + "grad_norm": 0.9150028228759766, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 1.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.1244, + "grad_norm": 1.520849585533142, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 173222.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": 0.0667, + "grad_norm": 1.3897782564163208, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0028, + "grad_norm": 0.4630263149738312, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 173825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0026, + "grad_norm": 0.32279714941978455, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 1.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.1723, + "grad_norm": 2.5587806701660156, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 174428.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.084, + "grad_norm": 1.5307081937789917, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0455, + "grad_norm": 1.2075250148773193, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 175452.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0025, + "grad_norm": 0.3137587904930115, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.1133, + "grad_norm": 1.3542101383209229, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 176055.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0025, + "grad_norm": 0.3963753581047058, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 1.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.1022, + "grad_norm": 1.4186869859695435, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 176658.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0029, + "grad_norm": 0.533608615398407, + "learning_rate": 1.482e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 1.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0842, + "grad_norm": 1.5056371688842773, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 177261.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0033, + "grad_norm": 0.6577285528182983, + "learning_rate": 1.48e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 1.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.1089, + "grad_norm": 1.4338765144348145, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 177864.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.1055, + "grad_norm": 1.13351571559906, + "learning_rate": 1.478e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.0951, + "grad_norm": 1.237243413925171, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 178888.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.212, + "grad_norm": 3.4371607303619385, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.0058, + "grad_norm": 1.4969244003295898, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 179491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0068, + "grad_norm": 1.7211462259292603, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 1.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.0986, + "grad_norm": 0.948099672794342, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 180094.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0057, + "grad_norm": 1.391058325767517, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 1.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0042, + "grad_norm": 0.9918210506439209, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 180276.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.2042, + "grad_norm": 2.672642230987549, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.003, + "grad_norm": 0.45506858825683594, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 180879.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0797, + "grad_norm": 1.4114668369293213, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0027, + "grad_norm": 0.5301483869552612, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 181482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0668, + "grad_norm": 1.3311203718185425, + "learning_rate": 1.466e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.0022, + "grad_norm": 0.2691483795642853, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 182085.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.1992, + "grad_norm": 1.9987740516662598, + "learning_rate": 1.464e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.1435, + "grad_norm": 2.9904839992523193, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 183109.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.1085, + "grad_norm": 1.4652901887893677, + "learning_rate": 1.462e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0022, + "grad_norm": 0.30126360058784485, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 183712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0023, + "grad_norm": 0.28965601325035095, + "learning_rate": 1.46e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 1.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0022, + "grad_norm": 0.23019753396511078, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 183894.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0022, + "grad_norm": 0.21258652210235596, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 1.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0748, + "grad_norm": 1.3212836980819702, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 184497.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.002, + "grad_norm": 0.15865401923656464, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 1.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.002, + "grad_norm": 0.18746234476566315, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 184679.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0684, + "grad_norm": 1.4932857751846313, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0021, + "grad_norm": 0.23370607197284698, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 185282.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0765, + "grad_norm": 1.3977128267288208, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.0999, + "grad_norm": 1.421388030052185, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 186306.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0025, + "grad_norm": 0.41459253430366516, + "learning_rate": 1.45e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0026, + "grad_norm": 0.4490201473236084, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 186488.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0918, + "grad_norm": 1.3046605587005615, + "learning_rate": 1.448e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0864, + "grad_norm": 1.233083963394165, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 187512.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0032, + "grad_norm": 0.6014226078987122, + "learning_rate": 1.446e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.1619, + "grad_norm": 2.670433759689331, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 188115.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0034, + "grad_norm": 0.6123008131980896, + "learning_rate": 1.444e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 1.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.1146, + "grad_norm": 1.6403765678405762, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 188718.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": 0.1593, + "grad_norm": 2.7106077671051025, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0035, + "grad_norm": 0.693053126335144, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 189321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.06, + "grad_norm": 4.2686448097229, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.0764, + "grad_norm": 1.4215189218521118, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 190345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0037, + "grad_norm": 0.7100173234939575, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 1.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.1991, + "grad_norm": 2.5193188190460205, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 190948.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.0711, + "grad_norm": 1.3730517625808716, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.0891, + "grad_norm": 1.397972583770752, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 191972.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0928, + "grad_norm": 1.5409183502197266, + "learning_rate": 1.434e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.0893, + "grad_norm": 1.1101114749908447, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 192996.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0055, + "grad_norm": 1.2417343854904175, + "learning_rate": 1.432e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0829, + "grad_norm": 1.277969479560852, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 193599.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.0892, + "grad_norm": 1.385054349899292, + "learning_rate": 1.43e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.0074, + "grad_norm": 1.8123408555984497, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 194202.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0575, + "grad_norm": 1.3045315742492676, + "learning_rate": 1.428e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.1662, + "grad_norm": 2.5381715297698975, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 195226.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.0067, + "grad_norm": 1.5872633457183838, + "learning_rate": 1.426e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0061, + "grad_norm": 1.5367522239685059, + "learning_rate": 1.425e-05, + "num_tokens": 195408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0052, + "grad_norm": 1.1771265268325806, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0035, + "grad_norm": 0.596717119216919, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 195590.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0027, + "grad_norm": 0.3555561900138855, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 1.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0022, + "grad_norm": 0.31791797280311584, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 195772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.1456, + "grad_norm": 3.0790412425994873, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.0915, + "grad_norm": 1.610164761543274, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 196796.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.0019, + "grad_norm": 0.35682275891304016, + "learning_rate": 1.418e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 1.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0758, + "grad_norm": 1.1877442598342896, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 197399.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.0018, + "grad_norm": 0.3156123459339142, + "learning_rate": 1.416e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 1.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0017, + "grad_norm": 0.25764769315719604, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 197581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.1041, + "grad_norm": 1.8042068481445312, + "learning_rate": 1.414e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.1758, + "grad_norm": 2.5269131660461426, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 198605.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0016, + "grad_norm": 0.12714117765426636, + "learning_rate": 1.412e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0016, + "grad_norm": 0.13591638207435608, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 198787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.0943, + "grad_norm": 1.4506866931915283, + "learning_rate": 1.41e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0017, + "grad_norm": 0.17016956210136414, + "learning_rate": 1.409e-05, + "num_tokens": 199390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0715, + "grad_norm": 1.1805306673049927, + "learning_rate": 1.408e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.298, + "step": 596 + }, + { + "loss": 0.0831, + "grad_norm": 1.2475357055664062, + "learning_rate": 1.407e-05, + "num_tokens": 200414.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.002, + "grad_norm": 0.35699722170829773, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 1.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0721, + "grad_norm": 1.1971431970596313, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 201017.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.066, + "grad_norm": 1.1251575946807861, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0027, + "grad_norm": 0.5506196618080139, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 201620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.1048, + "grad_norm": 1.8220717906951904, + "learning_rate": 1.402e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.0037, + "grad_norm": 0.8545289039611816, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 202223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0037, + "grad_norm": 0.8475953936576843, + "learning_rate": 1.4e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 1.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.0967, + "grad_norm": 1.2703156471252441, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 202826.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": 0.098, + "grad_norm": 1.2548829317092896, + "learning_rate": 1.398e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.0924, + "grad_norm": 1.2570987939834595, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 203850.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0609, + "grad_norm": 1.531058669090271, + "learning_rate": 1.396e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.1424, + "grad_norm": 2.5060534477233887, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 204874.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0048, + "grad_norm": 1.0655303001403809, + "learning_rate": 1.394e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0593, + "grad_norm": 1.0243408679962158, + "learning_rate": 1.393e-05, + "num_tokens": 205477.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0905, + "grad_norm": 1.3182287216186523, + "learning_rate": 1.392e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0068, + "grad_norm": 1.4663218259811401, + "learning_rate": 1.391e-05, + "num_tokens": 206080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0057, + "grad_norm": 1.2375314235687256, + "learning_rate": 1.39e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0489, + "grad_norm": 1.071290135383606, + "learning_rate": 1.389e-05, + "num_tokens": 206683.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0743, + "grad_norm": 1.0402666330337524, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.1041, + "grad_norm": 2.195901870727539, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 207707.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0038, + "grad_norm": 0.7095027565956116, + "learning_rate": 1.386e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0804, + "grad_norm": 1.4653010368347168, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 208310.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0038, + "grad_norm": 0.7164344191551208, + "learning_rate": 1.384e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.1019, + "grad_norm": 1.508054494857788, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 208913.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0031, + "grad_norm": 0.4974660575389862, + "learning_rate": 1.382e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0031, + "grad_norm": 0.4921479821205139, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 209095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0614, + "grad_norm": 1.180677056312561, + "learning_rate": 1.38e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0843, + "grad_norm": 1.1165193319320679, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 210119.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0816, + "grad_norm": 1.4082179069519043, + "learning_rate": 1.378e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.0893, + "grad_norm": 1.1407965421676636, + "learning_rate": 1.377e-05, + "num_tokens": 211143.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0029, + "grad_norm": 0.47326186299324036, + "learning_rate": 1.376e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.003, + "grad_norm": 0.48467254638671875, + "learning_rate": 1.375e-05, + "num_tokens": 211325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0025, + "grad_norm": 0.3466941714286804, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0028, + "grad_norm": 0.383543461561203, + "learning_rate": 1.373e-05, + "num_tokens": 211507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0027, + "grad_norm": 0.3878021240234375, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0699, + "grad_norm": 1.2407838106155396, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 212110.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0956, + "grad_norm": 1.2576494216918945, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0022, + "grad_norm": 0.25685280561447144, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 212713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0022, + "grad_norm": 0.2545858323574066, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0023, + "grad_norm": 0.2819485366344452, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 212895.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0858, + "grad_norm": 1.0897297859191895, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0021, + "grad_norm": 0.325777530670166, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 213498.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0021, + "grad_norm": 0.29383793473243713, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 1.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0944, + "grad_norm": 1.389978289604187, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 214101.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0962, + "grad_norm": 1.3364863395690918, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0019, + "grad_norm": 0.23381884396076202, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 214704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.058, + "grad_norm": 1.5767658948898315, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.002, + "grad_norm": 0.288552463054657, + "learning_rate": 1.359e-05, + "num_tokens": 215307.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0894, + "grad_norm": 1.6633201837539673, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0829, + "grad_norm": 1.4220677614212036, + "learning_rate": 1.357e-05, + "num_tokens": 216331.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0845, + "grad_norm": 1.3433754444122314, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.0917, + "grad_norm": 1.295201063156128, + "learning_rate": 1.355e-05, + "num_tokens": 217355.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.0891, + "grad_norm": 1.3927174806594849, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.006, + "grad_norm": 1.4622353315353394, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 217958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0481, + "grad_norm": 1.178935170173645, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0075, + "grad_norm": 1.825118064880371, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 218561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.0065, + "grad_norm": 1.5563267469406128, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0059, + "grad_norm": 1.4133291244506836, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 218743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0753, + "grad_norm": 1.4185911417007446, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.087, + "grad_norm": 1.3738617897033691, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 219767.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0702, + "grad_norm": 1.0876400470733643, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0031, + "grad_norm": 0.587776243686676, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 220370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": 0.057, + "grad_norm": 1.4529519081115723, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0596, + "grad_norm": 1.0564322471618652, + "learning_rate": 1.343e-05, + "num_tokens": 221394.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0795, + "grad_norm": 1.359084129333496, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0602, + "grad_norm": 1.625110387802124, + "learning_rate": 1.341e-05, + "num_tokens": 222418.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.1519, + "grad_norm": 2.79744291305542, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.1522, + "grad_norm": 2.5003347396850586, + "learning_rate": 1.339e-05, + "num_tokens": 223442.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0591, + "grad_norm": 1.2735769748687744, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0603, + "grad_norm": 1.4963431358337402, + "learning_rate": 1.337e-05, + "num_tokens": 224466.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.008, + "grad_norm": 1.6320358514785767, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0885, + "grad_norm": 1.660543441772461, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 225069.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.059, + "grad_norm": 1.6638036966323853, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0092, + "grad_norm": 1.7701940536499023, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 225672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0717, + "grad_norm": 1.6387797594070435, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0795, + "grad_norm": 1.6651279926300049, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 226696.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0811, + "grad_norm": 1.6673662662506104, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.1082, + "grad_norm": 2.1547534465789795, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 227720.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0724, + "grad_norm": 1.5310810804367065, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.1319, + "grad_norm": 3.544659376144409, + "learning_rate": 1.327e-05, + "num_tokens": 228744.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0668, + "grad_norm": 1.4902386665344238, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0099, + "grad_norm": 1.8921332359313965, + "learning_rate": 1.325e-05, + "num_tokens": 229347.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0093, + "grad_norm": 1.8240478038787842, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0727, + "grad_norm": 1.3348301649093628, + "learning_rate": 1.323e-05, + "num_tokens": 229950.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.082, + "grad_norm": 1.235790491104126, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0743, + "grad_norm": 1.6094404458999634, + "learning_rate": 1.321e-05, + "num_tokens": 230974.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0079, + "grad_norm": 1.5763838291168213, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 1.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0882, + "grad_norm": 1.602766513824463, + "learning_rate": 1.319e-05, + "num_tokens": 231577.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0654, + "grad_norm": 1.5263670682907104, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0678, + "grad_norm": 1.2824158668518066, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 232601.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.1246, + "grad_norm": 2.722593307495117, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0428, + "grad_norm": 1.1944324970245361, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 233625.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0643, + "grad_norm": 1.0645701885223389, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.0061, + "grad_norm": 1.2870023250579834, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 234228.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0055, + "grad_norm": 1.1952035427093506, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 1.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0621, + "grad_norm": 1.063179850578308, + "learning_rate": 1.311e-05, + "num_tokens": 234831.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0047, + "grad_norm": 0.9894086122512817, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 1.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0763, + "grad_norm": 1.4259341955184937, + "learning_rate": 1.309e-05, + "num_tokens": 235434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0642, + "grad_norm": 1.2943477630615234, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.079, + "grad_norm": 1.5152034759521484, + "learning_rate": 1.307e-05, + "num_tokens": 236458.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0717, + "grad_norm": 1.1957803964614868, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.0599, + "grad_norm": 1.4417110681533813, + "learning_rate": 1.305e-05, + "num_tokens": 237482.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0654, + "grad_norm": 1.5242059230804443, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0365, + "grad_norm": 1.1553280353546143, + "learning_rate": 1.303e-05, + "num_tokens": 238506.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0045, + "grad_norm": 0.8679006695747375, + "learning_rate": 1.302e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0782, + "grad_norm": 1.3552151918411255, + "learning_rate": 1.301e-05, + "num_tokens": 239109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": 0.0777, + "grad_norm": 1.6802747249603271, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0895, + "grad_norm": 2.0004899501800537, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 240133.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0065, + "grad_norm": 1.2331161499023438, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 1.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0926, + "grad_norm": 1.814571738243103, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 240736.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0447, + "grad_norm": 1.2055951356887817, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.1061, + "grad_norm": 1.93771493434906, + "learning_rate": 1.295e-05, + "num_tokens": 241760.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0071, + "grad_norm": 1.3096961975097656, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 1.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0809, + "grad_norm": 1.462066650390625, + "learning_rate": 1.293e-05, + "num_tokens": 242363.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0696, + "grad_norm": 1.6013977527618408, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.0067, + "grad_norm": 1.247151494026184, + "learning_rate": 1.291e-05, + "num_tokens": 242966.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0822, + "grad_norm": 1.3341907262802124, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.1516, + "grad_norm": 2.655081033706665, + "learning_rate": 1.289e-05, + "num_tokens": 243990.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0628, + "grad_norm": 1.1444809436798096, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.0731, + "grad_norm": 1.465855598449707, + "learning_rate": 1.287e-05, + "num_tokens": 245014.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0057, + "grad_norm": 1.112541913986206, + "learning_rate": 1.286e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.1399, + "grad_norm": 3.088876485824585, + "learning_rate": 1.285e-05, + "num_tokens": 245617.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0759, + "grad_norm": 1.2233434915542603, + "learning_rate": 1.284e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0557, + "grad_norm": 1.2852802276611328, + "learning_rate": 1.283e-05, + "num_tokens": 246641.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.005, + "grad_norm": 1.0076061487197876, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 1.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0549, + "grad_norm": 1.230972409248352, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 247244.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.004, + "grad_norm": 0.7870916724205017, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 1.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0034, + "grad_norm": 0.6174665093421936, + "learning_rate": 1.279e-05, + "num_tokens": 247426.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.5346, + "grad_norm": 9.506900787353516, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0627, + "grad_norm": 1.454014539718628, + "learning_rate": 1.277e-05, + "num_tokens": 248450.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0024, + "grad_norm": 0.3459113836288452, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 1.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0775, + "grad_norm": 1.3046914339065552, + "learning_rate": 1.275e-05, + "num_tokens": 249053.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0528, + "grad_norm": 1.3675225973129272, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0629, + "grad_norm": 1.5410852432250977, + "learning_rate": 1.273e-05, + "num_tokens": 250077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0579, + "grad_norm": 1.2241291999816895, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0023, + "grad_norm": 0.32806485891342163, + "learning_rate": 1.271e-05, + "num_tokens": 250680.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0024, + "grad_norm": 0.3713594675064087, + "learning_rate": 1.27e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0024, + "grad_norm": 0.383628249168396, + "learning_rate": 1.269e-05, + "num_tokens": 250862.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0565, + "grad_norm": 1.4605262279510498, + "learning_rate": 1.268e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0907, + "grad_norm": 2.0260767936706543, + "learning_rate": 1.267e-05, + "num_tokens": 251886.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.1355, + "grad_norm": 2.7483110427856445, + "learning_rate": 1.266e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0028, + "grad_norm": 0.5287377834320068, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 252489.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0029, + "grad_norm": 0.5259289145469666, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0029, + "grad_norm": 0.5197233557701111, + "learning_rate": 1.263e-05, + "num_tokens": 252671.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0779, + "grad_norm": 1.9638550281524658, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0022, + "grad_norm": 0.34271013736724854, + "learning_rate": 1.261e-05, + "num_tokens": 253274.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0021, + "grad_norm": 0.31841135025024414, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.0021, + "grad_norm": 0.28541284799575806, + "learning_rate": 1.259e-05, + "num_tokens": 253456.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.0765, + "grad_norm": 1.1577314138412476, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0018, + "grad_norm": 0.2100057303905487, + "learning_rate": 1.257e-05, + "num_tokens": 254059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0017, + "grad_norm": 0.19263769686222076, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.0813, + "grad_norm": 1.540268898010254, + "learning_rate": 1.255e-05, + "num_tokens": 254662.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0705, + "grad_norm": 1.2791322469711304, + "learning_rate": 1.254e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.3907, + "grad_norm": 7.0182013511657715, + "learning_rate": 1.253e-05, + "num_tokens": 255686.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0017, + "grad_norm": 0.19119806587696075, + "learning_rate": 1.252e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 1.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0017, + "grad_norm": 0.18740034103393555, + "learning_rate": 1.251e-05, + "num_tokens": 255868.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0797, + "grad_norm": 1.8779743909835815, + "learning_rate": 1.25e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0018, + "grad_norm": 0.1861187219619751, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 256471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0017, + "grad_norm": 0.17008422315120697, + "learning_rate": 1.248e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0018, + "grad_norm": 0.2042454481124878, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 256653.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.083, + "grad_norm": 1.2712551355361938, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0019, + "grad_norm": 0.22894388437271118, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 257256.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0632, + "grad_norm": 1.2945611476898193, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0018, + "grad_norm": 0.21884307265281677, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 257859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0018, + "grad_norm": 0.22480158507823944, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0019, + "grad_norm": 0.24674543738365173, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 258041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0795, + "grad_norm": 2.106468677520752, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0018, + "grad_norm": 0.2204350233078003, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 258644.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0737, + "grad_norm": 1.4242573976516724, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0878, + "grad_norm": 1.518812656402588, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 259668.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0633, + "grad_norm": 1.0321228504180908, + "learning_rate": 1.236e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0756, + "grad_norm": 1.1949939727783203, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 260692.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0024, + "grad_norm": 0.4306935966014862, + "learning_rate": 1.234e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 1.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0627, + "grad_norm": 1.1531753540039062, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 261295.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.003, + "grad_norm": 0.6374348998069763, + "learning_rate": 1.232e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0036, + "grad_norm": 0.7683020234107971, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 261477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.1434, + "grad_norm": 2.3946049213409424, + "learning_rate": 1.23e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.0032, + "grad_norm": 0.6773089170455933, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 262080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.003, + "grad_norm": 0.5508646368980408, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0517, + "grad_norm": 1.0663422346115112, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 262683.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0598, + "grad_norm": 1.1945189237594604, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0024, + "grad_norm": 0.3890499174594879, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 263286.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0023, + "grad_norm": 0.3637482821941376, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 1.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0022, + "grad_norm": 0.3558770716190338, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 263468.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.0698, + "grad_norm": 1.282705545425415, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0753, + "grad_norm": 1.923362374305725, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 264492.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": 0.0769, + "grad_norm": 1.28227961063385, + "learning_rate": 1.22e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0019, + "grad_norm": 0.26410141587257385, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 265095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0802, + "grad_norm": 1.2387802600860596, + "learning_rate": 1.218e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.002, + "grad_norm": 0.3023037612438202, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 265698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0547, + "grad_norm": 1.3596991300582886, + "learning_rate": 1.216e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0725, + "grad_norm": 1.2279936075210571, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 266722.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0603, + "grad_norm": 1.4540890455245972, + "learning_rate": 1.214e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0026, + "grad_norm": 0.48957788944244385, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 267325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0771, + "grad_norm": 1.2322392463684082, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0434, + "grad_norm": 1.224611759185791, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 268349.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0034, + "grad_norm": 0.7317530512809753, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0038, + "grad_norm": 0.7885755300521851, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 268531.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0692, + "grad_norm": 1.2012921571731567, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.0036, + "grad_norm": 0.8018218874931335, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 269134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0451, + "grad_norm": 1.2235223054885864, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0474, + "grad_norm": 1.2205861806869507, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 270158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.0032, + "grad_norm": 0.7037767767906189, + "learning_rate": 1.204e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0518, + "grad_norm": 1.4091877937316895, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 270761.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0691, + "grad_norm": 1.106124758720398, + "learning_rate": 1.202e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0034, + "grad_norm": 0.7851144075393677, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 271364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0032, + "grad_norm": 0.7951046824455261, + "learning_rate": 1.2e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0831, + "grad_norm": 1.5029832124710083, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 271967.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0026, + "grad_norm": 0.5559270977973938, + "learning_rate": 1.198e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0022, + "grad_norm": 0.4153921902179718, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 272149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.0021, + "grad_norm": 0.37202781438827515, + "learning_rate": 1.196e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0529, + "grad_norm": 1.0388691425323486, + "learning_rate": 1.195e-05, + "num_tokens": 272752.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0017, + "grad_norm": 0.22652830183506012, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0645, + "grad_norm": 1.505333423614502, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 273355.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0865, + "grad_norm": 1.883539080619812, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0015, + "grad_norm": 0.16957923769950867, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 273958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0015, + "grad_norm": 0.19717897474765778, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0014, + "grad_norm": 0.1534471958875656, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 274140.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0494, + "grad_norm": 1.1535961627960205, + "learning_rate": 1.188e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0014, + "grad_norm": 0.1624767929315567, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 274743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0015, + "grad_norm": 0.17362011969089508, + "learning_rate": 1.186e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0775, + "grad_norm": 1.9903476238250732, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 275346.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.1399, + "grad_norm": 3.302823781967163, + "learning_rate": 1.184e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0673, + "grad_norm": 1.326196312904358, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 276370.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0015, + "grad_norm": 0.18564815819263458, + "learning_rate": 1.182e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 1.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0548, + "grad_norm": 1.438742756843567, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 276973.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0017, + "grad_norm": 0.23712487518787384, + "learning_rate": 1.18e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0018, + "grad_norm": 0.27533257007598877, + "learning_rate": 1.179e-05, + "num_tokens": 277155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0018, + "grad_norm": 0.2764306366443634, + "learning_rate": 1.178e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0513, + "grad_norm": 1.2485377788543701, + "learning_rate": 1.177e-05, + "num_tokens": 277758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.143, + "grad_norm": 2.3260533809661865, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0865, + "grad_norm": 2.006594181060791, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 278782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": 0.0728, + "grad_norm": 1.229394793510437, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0727, + "grad_norm": 1.264754295349121, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 279806.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0624, + "grad_norm": 1.1297813653945923, + "learning_rate": 1.172e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0657, + "grad_norm": 1.348644495010376, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 280830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.4017, + "grad_norm": 7.936118125915527, + "learning_rate": 1.17e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0509, + "grad_norm": 2.504011392593384, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 281854.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0071, + "grad_norm": 1.4856328964233398, + "learning_rate": 1.168e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0065, + "grad_norm": 1.3074718713760376, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 282036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.0064, + "grad_norm": 1.328763484954834, + "learning_rate": 1.166e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0545, + "grad_norm": 1.255282998085022, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 282639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.1362, + "grad_norm": 1.9963600635528564, + "learning_rate": 1.164e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0042, + "grad_norm": 0.8505628108978271, + "learning_rate": 1.163e-05, + "num_tokens": 283242.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0554, + "grad_norm": 1.5559666156768799, + "learning_rate": 1.162e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0029, + "grad_norm": 0.528516411781311, + "learning_rate": 1.161e-05, + "num_tokens": 283845.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0025, + "grad_norm": 0.40555793046951294, + "learning_rate": 1.16e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 1.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0021, + "grad_norm": 0.3407900333404541, + "learning_rate": 1.159e-05, + "num_tokens": 284027.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0726, + "grad_norm": 1.2919087409973145, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.4289, + "grad_norm": 6.98607063293457, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 285051.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0511, + "grad_norm": 1.4350818395614624, + "learning_rate": 1.156e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0519, + "grad_norm": 1.400582194328308, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 286075.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0017, + "grad_norm": 0.31648895144462585, + "learning_rate": 1.154e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0018, + "grad_norm": 0.3369519114494324, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 286257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0572, + "grad_norm": 1.1995043754577637, + "learning_rate": 1.152e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0742, + "grad_norm": 0.9991039633750916, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 287281.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0501, + "grad_norm": 1.4309474229812622, + "learning_rate": 1.15e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.1276, + "grad_norm": 2.5142507553100586, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 288305.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0023, + "grad_norm": 0.4930354058742523, + "learning_rate": 1.148e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 1.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.09, + "grad_norm": 1.8823350667953491, + "learning_rate": 1.147e-05, + "num_tokens": 288908.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0517, + "grad_norm": 1.3514404296875, + "learning_rate": 1.146e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0023, + "grad_norm": 0.39818212389945984, + "learning_rate": 1.145e-05, + "num_tokens": 289511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.0026, + "grad_norm": 0.4840705394744873, + "learning_rate": 1.144e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0551, + "grad_norm": 0.9981673955917358, + "learning_rate": 1.143e-05, + "num_tokens": 290114.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0025, + "grad_norm": 0.43263715505599976, + "learning_rate": 1.142e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.1179, + "grad_norm": 2.982013463973999, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 290717.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0691, + "grad_norm": 0.9637575745582581, + "learning_rate": 1.14e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0764, + "grad_norm": 1.1376231908798218, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 291741.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0474, + "grad_norm": 0.9938456416130066, + "learning_rate": 1.138e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0036, + "grad_norm": 0.6827121376991272, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 292344.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.069, + "grad_norm": 1.1721850633621216, + "learning_rate": 1.136e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0742, + "grad_norm": 1.3182216882705688, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 293368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0619, + "grad_norm": 1.405136227607727, + "learning_rate": 1.134e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0053, + "grad_norm": 1.0143218040466309, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 293971.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0822, + "grad_norm": 1.4492801427841187, + "learning_rate": 1.132e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0518, + "grad_norm": 1.1326556205749512, + "learning_rate": 1.131e-05, + "num_tokens": 294995.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0059, + "grad_norm": 1.0942848920822144, + "learning_rate": 1.13e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0486, + "grad_norm": 1.2563117742538452, + "learning_rate": 1.129e-05, + "num_tokens": 295598.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.0994, + "grad_norm": 2.3433609008789062, + "learning_rate": 1.128e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.1001, + "grad_norm": 2.7536284923553467, + "learning_rate": 1.127e-05, + "num_tokens": 296622.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": 0.0585, + "grad_norm": 0.9778537154197693, + "learning_rate": 1.126e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0062, + "grad_norm": 1.1226321458816528, + "learning_rate": 1.125e-05, + "num_tokens": 297225.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0471, + "grad_norm": 1.1883548498153687, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0784, + "grad_norm": 1.976486086845398, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 298249.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0701, + "grad_norm": 1.0843766927719116, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.067, + "grad_norm": 1.3081246614456177, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 299273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0062, + "grad_norm": 1.1432628631591797, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.0415, + "grad_norm": 0.9637823104858398, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 299876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0059, + "grad_norm": 1.120526909828186, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 1.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.005, + "grad_norm": 0.9103840589523315, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 300058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0696, + "grad_norm": 1.4037501811981201, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0466, + "grad_norm": 0.9911297559738159, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 301082.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.0383, + "grad_norm": 0.9758827090263367, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0675, + "grad_norm": 1.3758506774902344, + "learning_rate": 1.113e-05, + "num_tokens": 302106.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0032, + "grad_norm": 0.5923029780387878, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 1.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0032, + "grad_norm": 0.5734418630599976, + "learning_rate": 1.111e-05, + "num_tokens": 302288.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0533, + "grad_norm": 1.0125759840011597, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0738, + "grad_norm": 1.2687044143676758, + "learning_rate": 1.109e-05, + "num_tokens": 303312.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.372, + "grad_norm": 5.941206455230713, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.108, + "grad_norm": 2.1613714694976807, + "learning_rate": 1.107e-05, + "num_tokens": 304336.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.0024, + "grad_norm": 0.39348432421684265, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 1.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.0639, + "grad_norm": 1.184023141860962, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 304939.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0824, + "grad_norm": 1.9686490297317505, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0026, + "grad_norm": 0.44682711362838745, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 305542.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0028, + "grad_norm": 0.49993517994880676, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0026, + "grad_norm": 0.4428325891494751, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 305724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0709, + "grad_norm": 1.2466169595718384, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0735, + "grad_norm": 1.3401033878326416, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 306748.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0023, + "grad_norm": 0.3811323642730713, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 1.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0706, + "grad_norm": 1.4406594038009644, + "learning_rate": 1.097e-05, + "num_tokens": 307351.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.054, + "grad_norm": 1.363612413406372, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0505, + "grad_norm": 1.161858320236206, + "learning_rate": 1.095e-05, + "num_tokens": 308375.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.0022, + "grad_norm": 0.3702404797077179, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0023, + "grad_norm": 0.39905861020088196, + "learning_rate": 1.093e-05, + "num_tokens": 308557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0654, + "grad_norm": 1.083019733428955, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0613, + "grad_norm": 1.1142648458480835, + "learning_rate": 1.091e-05, + "num_tokens": 309581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0526, + "grad_norm": 1.24055016040802, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0687, + "grad_norm": 1.400773525238037, + "learning_rate": 1.089e-05, + "num_tokens": 310605.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0462, + "grad_norm": 1.1053345203399658, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0574, + "grad_norm": 1.0202289819717407, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 311629.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.1215, + "grad_norm": 2.0495526790618896, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0603, + "grad_norm": 0.9297711253166199, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 312653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0073, + "grad_norm": 1.4618480205535889, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0491, + "grad_norm": 1.1468454599380493, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 313256.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.07, + "grad_norm": 1.5984728336334229, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0097, + "grad_norm": 1.7861182689666748, + "learning_rate": 1.081e-05, + "num_tokens": 313859.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0098, + "grad_norm": 1.7681940793991089, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0086, + "grad_norm": 1.6711666584014893, + "learning_rate": 1.079e-05, + "num_tokens": 314041.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0431, + "grad_norm": 1.0142930746078491, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0417, + "grad_norm": 0.9444635510444641, + "learning_rate": 1.077e-05, + "num_tokens": 315065.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0054, + "grad_norm": 1.0890287160873413, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0045, + "grad_norm": 0.9186440706253052, + "learning_rate": 1.075e-05, + "num_tokens": 315247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0033, + "grad_norm": 0.6265022158622742, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 1.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0426, + "grad_norm": 1.0279744863510132, + "learning_rate": 1.073e-05, + "num_tokens": 315850.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0693, + "grad_norm": 1.372605323791504, + "learning_rate": 1.072e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0017, + "grad_norm": 0.21290767192840576, + "learning_rate": 1.071e-05, + "num_tokens": 316453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0015, + "grad_norm": 0.17253448069095612, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.0526, + "grad_norm": 1.160703182220459, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 317056.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0727, + "grad_norm": 1.2380679845809937, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.1214, + "grad_norm": 2.0913727283477783, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 318080.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0506, + "grad_norm": 1.0945791006088257, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.075, + "grad_norm": 1.382978916168213, + "learning_rate": 1.065e-05, + "num_tokens": 319104.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0015, + "grad_norm": 0.172458216547966, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0742, + "grad_norm": 1.5439574718475342, + "learning_rate": 1.063e-05, + "num_tokens": 319707.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": 0.0875, + "grad_norm": 1.514559030532837, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.1175, + "grad_norm": 2.566283941268921, + "learning_rate": 1.061e-05, + "num_tokens": 320731.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0018, + "grad_norm": 0.22718015313148499, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 1.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0662, + "grad_norm": 1.2446449995040894, + "learning_rate": 1.059e-05, + "num_tokens": 321334.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0023, + "grad_norm": 0.32198604941368103, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.1204, + "grad_norm": 3.195101261138916, + "learning_rate": 1.057e-05, + "num_tokens": 321937.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0647, + "grad_norm": 1.3185839653015137, + "learning_rate": 1.056e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0025, + "grad_norm": 0.3570478856563568, + "learning_rate": 1.055e-05, + "num_tokens": 322540.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0692, + "grad_norm": 1.1017460823059082, + "learning_rate": 1.054e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0583, + "grad_norm": 1.167201042175293, + "learning_rate": 1.053e-05, + "num_tokens": 323564.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.1038, + "grad_norm": 2.155097723007202, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0038, + "grad_norm": 0.646456778049469, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 324167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0751, + "grad_norm": 1.3510818481445312, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.1132, + "grad_norm": 2.1775286197662354, + "learning_rate": 1.049e-05, + "num_tokens": 325191.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.1073, + "grad_norm": 2.2072458267211914, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0048, + "grad_norm": 0.8271514177322388, + "learning_rate": 1.047e-05, + "num_tokens": 325794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0679, + "grad_norm": 1.0402039289474487, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0045, + "grad_norm": 0.7622825503349304, + "learning_rate": 1.045e-05, + "num_tokens": 326397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0538, + "grad_norm": 1.2865958213806152, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0656, + "grad_norm": 1.024865746498108, + "learning_rate": 1.043e-05, + "num_tokens": 327421.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0039, + "grad_norm": 0.6565131545066833, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0043, + "grad_norm": 0.7380317449569702, + "learning_rate": 1.041e-05, + "num_tokens": 327603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0035, + "grad_norm": 0.570799708366394, + "learning_rate": 1.04e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 1.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.062, + "grad_norm": 1.1511563062667847, + "learning_rate": 1.039e-05, + "num_tokens": 328206.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0695, + "grad_norm": 1.2906415462493896, + "learning_rate": 1.038e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0468, + "grad_norm": 1.2258033752441406, + "learning_rate": 1.037e-05, + "num_tokens": 329230.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0024, + "grad_norm": 0.3688075542449951, + "learning_rate": 1.036e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0023, + "grad_norm": 0.3373582065105438, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 329412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.0709, + "grad_norm": 2.084989309310913, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.002, + "grad_norm": 0.27264249324798584, + "learning_rate": 1.033e-05, + "num_tokens": 330015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0018, + "grad_norm": 0.24489571154117584, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0799, + "grad_norm": 1.8190633058547974, + "learning_rate": 1.031e-05, + "num_tokens": 330618.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0836, + "grad_norm": 1.4041454792022705, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.1136, + "grad_norm": 2.274580240249634, + "learning_rate": 1.029e-05, + "num_tokens": 331642.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0424, + "grad_norm": 1.3687119483947754, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0015, + "grad_norm": 0.16964252293109894, + "learning_rate": 1.027e-05, + "num_tokens": 332245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.0698, + "grad_norm": 1.1283705234527588, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0018, + "grad_norm": 0.22557133436203003, + "learning_rate": 1.025e-05, + "num_tokens": 332848.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0017, + "grad_norm": 0.21104346215724945, + "learning_rate": 1.024e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.0018, + "grad_norm": 0.24475614726543427, + "learning_rate": 1.023e-05, + "num_tokens": 333030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.0563, + "grad_norm": 2.955718755722046, + "learning_rate": 1.022e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0017, + "grad_norm": 0.24137888848781586, + "learning_rate": 1.021e-05, + "num_tokens": 333633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0017, + "grad_norm": 0.22060562670230865, + "learning_rate": 1.02e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0723, + "grad_norm": 1.5680960416793823, + "learning_rate": 1.019e-05, + "num_tokens": 334236.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0016, + "grad_norm": 0.2214270681142807, + "learning_rate": 1.018e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0016, + "grad_norm": 0.216565802693367, + "learning_rate": 1.017e-05, + "num_tokens": 334418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0684, + "grad_norm": 1.214136004447937, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.1141, + "grad_norm": 2.0787954330444336, + "learning_rate": 1.015e-05, + "num_tokens": 335442.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0015, + "grad_norm": 0.1908382773399353, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 1.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.0684, + "grad_norm": 0.9953256845474243, + "learning_rate": 1.013e-05, + "num_tokens": 336045.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.1151, + "grad_norm": 2.989778518676758, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0015, + "grad_norm": 0.1622181534767151, + "learning_rate": 1.011e-05, + "num_tokens": 336648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0015, + "grad_norm": 0.19451792538166046, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0015, + "grad_norm": 0.17583484947681427, + "learning_rate": 1.009e-05, + "num_tokens": 336830.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": 0.0971, + "grad_norm": 2.013803482055664, + "learning_rate": 1.008e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0015, + "grad_norm": 0.17960964143276215, + "learning_rate": 1.007e-05, + "num_tokens": 337433.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0015, + "grad_norm": 0.18522843718528748, + "learning_rate": 1.006e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.071, + "grad_norm": 1.612250804901123, + "learning_rate": 1.005e-05, + "num_tokens": 338036.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0789, + "grad_norm": 1.4309505224227905, + "learning_rate": 1.004e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0749, + "grad_norm": 1.3195449113845825, + "learning_rate": 1.003e-05, + "num_tokens": 339060.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0685, + "grad_norm": 2.325835943222046, + "learning_rate": 1.002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0454, + "grad_norm": 1.1207916736602783, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 340084.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.0018, + "grad_norm": 0.25914737582206726, + "learning_rate": 1e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 1.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0022, + "grad_norm": 0.35625582933425903, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 340266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.002, + "grad_norm": 0.3242781162261963, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0021, + "grad_norm": 0.3145410120487213, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 340448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0021, + "grad_norm": 0.33488088846206665, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0019, + "grad_norm": 0.2918454706668854, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 340630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0728, + "grad_norm": 1.2409576177597046, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.072, + "grad_norm": 1.2893600463867188, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 341654.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.043, + "grad_norm": 1.1790004968643188, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0602, + "grad_norm": 1.1076241731643677, + "learning_rate": 9.91e-06, + "num_tokens": 342678.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0017, + "grad_norm": 0.2319565713405609, + "learning_rate": 9.9e-06, + "num_tokens": 342769.0, + "mean_token_accuracy": 1.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0573, + "grad_norm": 2.263990879058838, + "learning_rate": 9.89e-06, + "num_tokens": 343281.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0018, + "grad_norm": 0.27414289116859436, + "learning_rate": 9.88e-06, + "num_tokens": 343372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.068, + "grad_norm": 1.3204398155212402, + "learning_rate": 9.87e-06, + "num_tokens": 343884.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0021, + "grad_norm": 0.33790865540504456, + "learning_rate": 9.86e-06, + "num_tokens": 343975.0, + "mean_token_accuracy": 1.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.002, + "grad_norm": 0.3250488340854645, + "learning_rate": 9.85e-06, + "num_tokens": 344066.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0614, + "grad_norm": 1.4563555717468262, + "learning_rate": 9.84e-06, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0499, + "grad_norm": 3.906182289123535, + "learning_rate": 9.83e-06, + "num_tokens": 345090.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.1039, + "grad_norm": 2.9131107330322266, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.1067, + "grad_norm": 3.119446039199829, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 346114.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.0023, + "grad_norm": 0.3656690716743469, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 346205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": 0.0647, + "grad_norm": 1.234238862991333, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 346717.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0612, + "grad_norm": 1.0838911533355713, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0668, + "grad_norm": 1.8563507795333862, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 347741.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0027, + "grad_norm": 0.447256475687027, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 347832.0, + "mean_token_accuracy": 1.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0029, + "grad_norm": 0.4668635427951813, + "learning_rate": 9.75e-06, + "num_tokens": 347923.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0027, + "grad_norm": 0.45568251609802246, + "learning_rate": 9.74e-06, + "num_tokens": 348014.0, + "mean_token_accuracy": 1.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0029, + "grad_norm": 0.5207828283309937, + "learning_rate": 9.73e-06, + "num_tokens": 348105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0023, + "grad_norm": 0.3548046946525574, + "learning_rate": 9.72e-06, + "num_tokens": 348196.0, + "mean_token_accuracy": 1.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.0022, + "grad_norm": 0.3339339792728424, + "learning_rate": 9.71e-06, + "num_tokens": 348287.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0449, + "grad_norm": 1.344630479812622, + "learning_rate": 9.7e-06, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0627, + "grad_norm": 1.3697110414505005, + "learning_rate": 9.69e-06, + "num_tokens": 349311.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0631, + "grad_norm": 1.4324746131896973, + "learning_rate": 9.68e-06, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0598, + "grad_norm": 1.1418583393096924, + "learning_rate": 9.67e-06, + "num_tokens": 350335.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0703, + "grad_norm": 1.3187053203582764, + "learning_rate": 9.66e-06, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0674, + "grad_norm": 1.5415701866149902, + "learning_rate": 9.65e-06, + "num_tokens": 351359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0022, + "grad_norm": 0.5410366654396057, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 351450.0, + "mean_token_accuracy": 1.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0472, + "grad_norm": 1.4691059589385986, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 351962.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0714, + "grad_norm": 1.8328925371170044, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0502, + "grad_norm": 1.4959746599197388, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 352986.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0025, + "grad_norm": 0.3770292103290558, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 353077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.0638, + "grad_norm": 1.2776446342468262, + "learning_rate": 9.59e-06, + "num_tokens": 353589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0437, + "grad_norm": 1.0079017877578735, + "learning_rate": 9.58e-06, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": 0.0628, + "grad_norm": 1.1776297092437744, + "learning_rate": 9.57e-06, + "num_tokens": 354613.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0444, + "grad_norm": 1.2560832500457764, + "learning_rate": 9.56e-06, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0658, + "grad_norm": 1.9305787086486816, + "learning_rate": 9.55e-06, + "num_tokens": 355637.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0673, + "grad_norm": 1.5484907627105713, + "learning_rate": 9.54e-06, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0603, + "grad_norm": 1.2816107273101807, + "learning_rate": 9.53e-06, + "num_tokens": 356661.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0071, + "grad_norm": 1.2031859159469604, + "learning_rate": 9.52e-06, + "num_tokens": 356752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0446, + "grad_norm": 1.0432018041610718, + "learning_rate": 9.51e-06, + "num_tokens": 357264.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0082, + "grad_norm": 1.3467326164245605, + "learning_rate": 9.5e-06, + "num_tokens": 357355.0, + "mean_token_accuracy": 1.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": 0.044, + "grad_norm": 1.1683317422866821, + "learning_rate": 9.49e-06, + "num_tokens": 357867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.007, + "grad_norm": 1.1747612953186035, + "learning_rate": 9.48e-06, + "num_tokens": 357958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0623, + "grad_norm": 1.1376299858093262, + "learning_rate": 9.47e-06, + "num_tokens": 358470.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0711, + "grad_norm": 1.2417066097259521, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0052, + "grad_norm": 0.9077128171920776, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 359073.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0053, + "grad_norm": 0.951680600643158, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 359164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0467, + "grad_norm": 1.1328734159469604, + "learning_rate": 9.43e-06, + "num_tokens": 359676.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0036, + "grad_norm": 0.6388375163078308, + "learning_rate": 9.42e-06, + "num_tokens": 359767.0, + "mean_token_accuracy": 1.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0713, + "grad_norm": 1.098759651184082, + "learning_rate": 9.41e-06, + "num_tokens": 360279.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0024, + "grad_norm": 0.3749485909938812, + "learning_rate": 9.4e-06, + "num_tokens": 360370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.078, + "grad_norm": 1.4193601608276367, + "learning_rate": 9.39e-06, + "num_tokens": 360882.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0021, + "grad_norm": 0.29766610264778137, + "learning_rate": 9.38e-06, + "num_tokens": 360973.0, + "mean_token_accuracy": 1.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0019, + "grad_norm": 0.2773911952972412, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 361064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0016, + "grad_norm": 0.19664674997329712, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0994, + "grad_norm": 2.1268746852874756, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 361667.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.0476, + "grad_norm": 1.1297088861465454, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0518, + "grad_norm": 1.1052606105804443, + "learning_rate": 9.33e-06, + "num_tokens": 362691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0483, + "grad_norm": 1.1215248107910156, + "learning_rate": 9.32e-06, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0595, + "grad_norm": 1.192276120185852, + "learning_rate": 9.31e-06, + "num_tokens": 363715.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.1127, + "grad_norm": 2.282710552215576, + "learning_rate": 9.3e-06, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0014, + "grad_norm": 0.18352188169956207, + "learning_rate": 9.29e-06, + "num_tokens": 364318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0668, + "grad_norm": 1.2716619968414307, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.1147, + "grad_norm": 2.7008156776428223, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 365342.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.1018, + "grad_norm": 2.031930446624756, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.002, + "grad_norm": 0.2863346338272095, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 365945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0471, + "grad_norm": 1.2682809829711914, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.002, + "grad_norm": 0.30941078066825867, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 366548.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0024, + "grad_norm": 0.3932475745677948, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": 0.0632, + "grad_norm": 1.0679800510406494, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 367151.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0663, + "grad_norm": 1.3005118370056152, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0423, + "grad_norm": 1.1240161657333374, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 368175.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0029, + "grad_norm": 0.4581877887248993, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 368266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0029, + "grad_norm": 0.47186893224716187, + "learning_rate": 9.17e-06, + "num_tokens": 368357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0032, + "grad_norm": 0.5238748788833618, + "learning_rate": 9.16e-06, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0028, + "grad_norm": 0.4411686062812805, + "learning_rate": 9.15e-06, + "num_tokens": 368539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0026, + "grad_norm": 0.40239110589027405, + "learning_rate": 9.14e-06, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0023, + "grad_norm": 0.3315543234348297, + "learning_rate": 9.13e-06, + "num_tokens": 368721.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0021, + "grad_norm": 0.2885858416557312, + "learning_rate": 9.12e-06, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.073, + "grad_norm": 1.8177210092544556, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 369324.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.0966, + "grad_norm": 1.7291756868362427, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0016, + "grad_norm": 0.19609428942203522, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 369927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0496, + "grad_norm": 1.1353715658187866, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0015, + "grad_norm": 0.17373698949813843, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 370530.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.0441, + "grad_norm": 1.0672266483306885, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0014, + "grad_norm": 0.154168039560318, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 371133.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0737, + "grad_norm": 1.3493475914001465, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0013, + "grad_norm": 0.14875750243663788, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 371736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0012, + "grad_norm": 0.13037247955799103, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0012, + "grad_norm": 0.12503254413604736, + "learning_rate": 9.01e-06, + "num_tokens": 371918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0012, + "grad_norm": 0.12820948660373688, + "learning_rate": 9e-06, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.0885, + "grad_norm": 1.8362265825271606, + "learning_rate": 8.99e-06, + "num_tokens": 372521.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0012, + "grad_norm": 0.12838858366012573, + "learning_rate": 8.98e-06, + "num_tokens": 372612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0495, + "grad_norm": 1.446435809135437, + "learning_rate": 8.97e-06, + "num_tokens": 373124.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.07, + "grad_norm": 1.1417546272277832, + "learning_rate": 8.96e-06, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0679, + "grad_norm": 1.1534578800201416, + "learning_rate": 8.95e-06, + "num_tokens": 374148.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.0556, + "grad_norm": 1.263162612915039, + "learning_rate": 8.94e-06, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0687, + "grad_norm": 1.441730260848999, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 375172.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0561, + "grad_norm": 0.989497721195221, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0508, + "grad_norm": 1.1718560457229614, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 376196.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0436, + "grad_norm": 1.1105691194534302, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0574, + "grad_norm": 1.159988522529602, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 377220.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0028, + "grad_norm": 0.5130383968353271, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 377311.0, + "mean_token_accuracy": 1.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0703, + "grad_norm": 1.8314932584762573, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 377823.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0389, + "grad_norm": 0.7763837575912476, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0648, + "grad_norm": 1.4212884902954102, + "learning_rate": 8.85e-06, + "num_tokens": 378847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0467, + "grad_norm": 1.0347092151641846, + "learning_rate": 8.84e-06, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0574, + "grad_norm": 0.9852561950683594, + "learning_rate": 8.83e-06, + "num_tokens": 379871.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0443, + "grad_norm": 1.2871586084365845, + "learning_rate": 8.82e-06, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0497, + "grad_norm": 1.0900676250457764, + "learning_rate": 8.81e-06, + "num_tokens": 380895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0094, + "grad_norm": 1.5167303085327148, + "learning_rate": 8.8e-06, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0091, + "grad_norm": 1.4984208345413208, + "learning_rate": 8.79e-06, + "num_tokens": 381077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0087, + "grad_norm": 1.4189144372940063, + "learning_rate": 8.78e-06, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0711, + "grad_norm": 1.5254539251327515, + "learning_rate": 8.77e-06, + "num_tokens": 381680.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0559, + "grad_norm": 0.9745803475379944, + "learning_rate": 8.76e-06, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0487, + "grad_norm": 0.9314166307449341, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 382704.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.0985, + "grad_norm": 1.935889482498169, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.0884, + "grad_norm": 2.4487457275390625, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 383728.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0417, + "grad_norm": 1.0779677629470825, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0071, + "grad_norm": 1.1962640285491943, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 384331.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0412, + "grad_norm": 1.0417979955673218, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.0064, + "grad_norm": 1.0799331665039062, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 384934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0061, + "grad_norm": 1.0343092679977417, + "learning_rate": 8.68e-06, + "num_tokens": 385025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0516, + "grad_norm": 1.2088981866836548, + "learning_rate": 8.67e-06, + "num_tokens": 385537.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0645, + "grad_norm": 1.4574052095413208, + "learning_rate": 8.66e-06, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0608, + "grad_norm": 1.5976455211639404, + "learning_rate": 8.65e-06, + "num_tokens": 386561.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0034, + "grad_norm": 0.562424898147583, + "learning_rate": 8.64e-06, + "num_tokens": 386652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0031, + "grad_norm": 0.5184334516525269, + "learning_rate": 8.63e-06, + "num_tokens": 386743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0538, + "grad_norm": 1.175452709197998, + "learning_rate": 8.62e-06, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0457, + "grad_norm": 1.0699386596679688, + "learning_rate": 8.61e-06, + "num_tokens": 387767.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0409, + "grad_norm": 1.2275623083114624, + "learning_rate": 8.6e-06, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0024, + "grad_norm": 0.36210763454437256, + "learning_rate": 8.59e-06, + "num_tokens": 388370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0506, + "grad_norm": 1.1862293481826782, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0417, + "grad_norm": 1.0955649614334106, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 389394.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0021, + "grad_norm": 0.3166447579860687, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 389485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0021, + "grad_norm": 0.3213079571723938, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 389576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.002, + "grad_norm": 0.29460856318473816, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0018, + "grad_norm": 0.2646322250366211, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 389758.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.0962, + "grad_norm": 1.9064080715179443, + "learning_rate": 8.52e-06, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0018, + "grad_norm": 0.26078224182128906, + "learning_rate": 8.51e-06, + "num_tokens": 390361.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0015, + "grad_norm": 0.22155798971652985, + "learning_rate": 8.5e-06, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0462, + "grad_norm": 1.282672643661499, + "learning_rate": 8.49e-06, + "num_tokens": 390964.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0615, + "grad_norm": 1.0272878408432007, + "learning_rate": 8.48e-06, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0391, + "grad_norm": 1.081066370010376, + "learning_rate": 8.47e-06, + "num_tokens": 391988.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0016, + "grad_norm": 0.2022254467010498, + "learning_rate": 8.46e-06, + "num_tokens": 392079.0, + "mean_token_accuracy": 1.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0647, + "grad_norm": 1.203537106513977, + "learning_rate": 8.45e-06, + "num_tokens": 392591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0411, + "grad_norm": 1.3823119401931763, + "learning_rate": 8.44e-06, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0017, + "grad_norm": 0.23678964376449585, + "learning_rate": 8.43e-06, + "num_tokens": 393194.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0498, + "grad_norm": 1.1035040616989136, + "learning_rate": 8.42e-06, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0019, + "grad_norm": 0.2826336622238159, + "learning_rate": 8.41e-06, + "num_tokens": 393797.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0018, + "grad_norm": 0.26219162344932556, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0369, + "grad_norm": 0.8924168944358826, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 394400.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.002, + "grad_norm": 0.2968710660934448, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 394491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0655, + "grad_norm": 1.4359571933746338, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 395003.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0793, + "grad_norm": 1.4873827695846558, + "learning_rate": 8.36e-06, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0022, + "grad_norm": 0.3399635851383209, + "learning_rate": 8.35e-06, + "num_tokens": 395606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0388, + "grad_norm": 1.2504096031188965, + "learning_rate": 8.34e-06, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0022, + "grad_norm": 0.34148266911506653, + "learning_rate": 8.33e-06, + "num_tokens": 396209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0022, + "grad_norm": 0.33662110567092896, + "learning_rate": 8.32e-06, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.0022, + "grad_norm": 0.324468731880188, + "learning_rate": 8.31e-06, + "num_tokens": 396391.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.1031, + "grad_norm": 1.776872992515564, + "learning_rate": 8.3e-06, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0019, + "grad_norm": 0.27522948384284973, + "learning_rate": 8.29e-06, + "num_tokens": 396994.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0625, + "grad_norm": 1.0583921670913696, + "learning_rate": 8.28e-06, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.002, + "grad_norm": 0.2976676821708679, + "learning_rate": 8.27e-06, + "num_tokens": 397597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0428, + "grad_norm": 1.0262646675109863, + "learning_rate": 8.26e-06, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.0569, + "grad_norm": 1.088004469871521, + "learning_rate": 8.25e-06, + "num_tokens": 398621.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0617, + "grad_norm": 1.422031044960022, + "learning_rate": 8.24e-06, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0705, + "grad_norm": 1.1122493743896484, + "learning_rate": 8.23e-06, + "num_tokens": 399645.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0023, + "grad_norm": 0.3706248998641968, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 399736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0548, + "grad_norm": 1.159569501876831, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 400248.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0027, + "grad_norm": 0.44550517201423645, + "learning_rate": 8.2e-06, + "num_tokens": 400339.0, + "mean_token_accuracy": 1.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0505, + "grad_norm": 1.0908255577087402, + "learning_rate": 8.19e-06, + "num_tokens": 400851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0429, + "grad_norm": 0.9888002276420593, + "learning_rate": 8.18e-06, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.039, + "grad_norm": 1.1269707679748535, + "learning_rate": 8.17e-06, + "num_tokens": 401875.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0547, + "grad_norm": 2.2459864616394043, + "learning_rate": 8.16e-06, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0648, + "grad_norm": 1.141405463218689, + "learning_rate": 8.15e-06, + "num_tokens": 402899.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0036, + "grad_norm": 0.6154343485832214, + "learning_rate": 8.14e-06, + "num_tokens": 402990.0, + "mean_token_accuracy": 1.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0037, + "grad_norm": 0.607581377029419, + "learning_rate": 8.13e-06, + "num_tokens": 403081.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.041, + "grad_norm": 1.0139696598052979, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0548, + "grad_norm": 1.2063956260681152, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 404105.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0546, + "grad_norm": 1.0185149908065796, + "learning_rate": 8.1e-06, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0846, + "grad_norm": 1.5638638734817505, + "learning_rate": 8.09e-06, + "num_tokens": 405129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0397, + "grad_norm": 0.9592515826225281, + "learning_rate": 8.08e-06, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0732, + "grad_norm": 2.417308807373047, + "learning_rate": 8.07e-06, + "num_tokens": 406153.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0397, + "grad_norm": 1.0397586822509766, + "learning_rate": 8.06e-06, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0539, + "grad_norm": 1.0043741464614868, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 407177.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0064, + "grad_norm": 1.0331615209579468, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 407268.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": 0.3439, + "grad_norm": 7.151169776916504, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 407780.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.3186, + "grad_norm": 6.194533348083496, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0064, + "grad_norm": 1.0373780727386475, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 408383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0693, + "grad_norm": 1.3804030418395996, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0063, + "grad_norm": 1.0356889963150024, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 408986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0063, + "grad_norm": 1.025659203529358, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 409077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.1028, + "grad_norm": 2.4993162155151367, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 409589.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0765, + "grad_norm": 1.528414011001587, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0039, + "grad_norm": 0.6606444120407104, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 410192.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.1021, + "grad_norm": 1.9298466444015503, + "learning_rate": 7.94e-06, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0658, + "grad_norm": 1.2403901815414429, + "learning_rate": 7.93e-06, + "num_tokens": 411216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0901, + "grad_norm": 2.676560878753662, + "learning_rate": 7.92e-06, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0025, + "grad_norm": 0.3969874083995819, + "learning_rate": 7.91e-06, + "num_tokens": 411819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0022, + "grad_norm": 0.3410389721393585, + "learning_rate": 7.9e-06, + "num_tokens": 411910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.0467, + "grad_norm": 1.2688374519348145, + "learning_rate": 7.89e-06, + "num_tokens": 412422.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0906, + "grad_norm": 1.5839786529541016, + "learning_rate": 7.88e-06, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0808, + "grad_norm": 1.8329588174819946, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 413446.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": 0.0678, + "grad_norm": 1.438069462776184, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0675, + "grad_norm": 1.4430946111679077, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 414470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0019, + "grad_norm": 0.29633986949920654, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 414561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0494, + "grad_norm": 1.1387202739715576, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 415073.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0021, + "grad_norm": 0.32885608077049255, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 415164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.0862, + "grad_norm": 2.407383680343628, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 415676.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0614, + "grad_norm": 1.1128315925598145, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0022, + "grad_norm": 0.3651196360588074, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 416279.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0648, + "grad_norm": 1.3287708759307861, + "learning_rate": 7.78e-06, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.0023, + "grad_norm": 0.3838794231414795, + "learning_rate": 7.77e-06, + "num_tokens": 416882.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0684, + "grad_norm": 1.4677760601043701, + "learning_rate": 7.76e-06, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0024, + "grad_norm": 0.42079463601112366, + "learning_rate": 7.75e-06, + "num_tokens": 417485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0024, + "grad_norm": 0.42147955298423767, + "learning_rate": 7.74e-06, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0441, + "grad_norm": 1.1677274703979492, + "learning_rate": 7.73e-06, + "num_tokens": 418088.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0492, + "grad_norm": 1.4035431146621704, + "learning_rate": 7.72e-06, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0671, + "grad_norm": 1.9446959495544434, + "learning_rate": 7.71e-06, + "num_tokens": 419112.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0025, + "grad_norm": 0.4543871581554413, + "learning_rate": 7.7e-06, + "num_tokens": 419203.0, + "mean_token_accuracy": 1.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.042, + "grad_norm": 1.1771857738494873, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 419715.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0679, + "grad_norm": 1.3713475465774536, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0026, + "grad_norm": 0.47350987792015076, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 420318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0633, + "grad_norm": 1.3524508476257324, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": 0.0637, + "grad_norm": 1.2763797044754028, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 421342.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0902, + "grad_norm": 1.6739592552185059, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0031, + "grad_norm": 0.5534782409667969, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 421945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.0501, + "grad_norm": 1.3401867151260376, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.046, + "grad_norm": 1.1883294582366943, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 422969.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0466, + "grad_norm": 1.101483941078186, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.071, + "grad_norm": 1.3334777355194092, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 423993.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0558, + "grad_norm": 1.267762541770935, + "learning_rate": 7.58e-06, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0658, + "grad_norm": 1.4283661842346191, + "learning_rate": 7.57e-06, + "num_tokens": 425017.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0411, + "grad_norm": 0.9805395007133484, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0766, + "grad_norm": 1.4888850450515747, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 426041.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0055, + "grad_norm": 0.9557706713676453, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 426132.0, + "mean_token_accuracy": 1.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0054, + "grad_norm": 0.9585487842559814, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 426223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0538, + "grad_norm": 1.1800369024276733, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0051, + "grad_norm": 0.8553330898284912, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 426826.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0629, + "grad_norm": 1.230909824371338, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.068, + "grad_norm": 1.453507900238037, + "learning_rate": 7.49e-06, + "num_tokens": 427850.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0427, + "grad_norm": 0.9869980812072754, + "learning_rate": 7.48e-06, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.1017, + "grad_norm": 2.1453680992126465, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 428874.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0042, + "grad_norm": 0.7140144109725952, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 428965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.0616, + "grad_norm": 1.021086573600769, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 429477.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0434, + "grad_norm": 1.1894596815109253, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0862, + "grad_norm": 2.159723997116089, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 430501.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.0429, + "grad_norm": 1.066892147064209, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0572, + "grad_norm": 1.0095235109329224, + "learning_rate": 7.41e-06, + "num_tokens": 431525.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.054, + "grad_norm": 1.2086626291275024, + "learning_rate": 7.4e-06, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0046, + "grad_norm": 0.7741432189941406, + "learning_rate": 7.39e-06, + "num_tokens": 432128.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0047, + "grad_norm": 0.7828612923622131, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 432219.0, + "mean_token_accuracy": 1.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0045, + "grad_norm": 0.7598645687103271, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 432310.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0046, + "grad_norm": 0.7734522819519043, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 432401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": 0.057, + "grad_norm": 1.0973255634307861, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 432913.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.065, + "grad_norm": 1.709967017173767, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0931, + "grad_norm": 2.1337525844573975, + "learning_rate": 7.33e-06, + "num_tokens": 433937.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0028, + "grad_norm": 0.4441553056240082, + "learning_rate": 7.32e-06, + "num_tokens": 434028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0805, + "grad_norm": 3.2075629234313965, + "learning_rate": 7.31e-06, + "num_tokens": 434540.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0026, + "grad_norm": 0.4167421758174896, + "learning_rate": 7.3e-06, + "num_tokens": 434631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0023, + "grad_norm": 0.35469523072242737, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 434722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0021, + "grad_norm": 0.31768423318862915, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0441, + "grad_norm": 0.9787921905517578, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 435325.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0019, + "grad_norm": 0.2729261517524719, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 435416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0016, + "grad_norm": 0.21043084561824799, + "learning_rate": 7.25e-06, + "num_tokens": 435507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0015, + "grad_norm": 0.1971331685781479, + "learning_rate": 7.24e-06, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0802, + "grad_norm": 1.84896719455719, + "learning_rate": 7.23e-06, + "num_tokens": 436110.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.0687, + "grad_norm": 1.369922399520874, + "learning_rate": 7.22e-06, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0014, + "grad_norm": 0.16199085116386414, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 436713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0013, + "grad_norm": 0.14561891555786133, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0762, + "grad_norm": 2.150111436843872, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 437316.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.0011, + "grad_norm": 0.12219979614019394, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 437407.0, + "mean_token_accuracy": 1.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0409, + "grad_norm": 1.0275540351867676, + "learning_rate": 7.17e-06, + "num_tokens": 437919.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0622, + "grad_norm": 1.3782963752746582, + "learning_rate": 7.16e-06, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0942, + "grad_norm": 2.0990819931030273, + "learning_rate": 7.15e-06, + "num_tokens": 438943.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0556, + "grad_norm": 1.1607019901275635, + "learning_rate": 7.14e-06, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0012, + "grad_norm": 0.14383459091186523, + "learning_rate": 7.13e-06, + "num_tokens": 439546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0443, + "grad_norm": 1.0032017230987549, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0014, + "grad_norm": 0.18446141481399536, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 440149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0014, + "grad_norm": 0.19693079590797424, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.0486, + "grad_norm": 1.2597516775131226, + "learning_rate": 7.09e-06, + "num_tokens": 440752.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0014, + "grad_norm": 0.1964249163866043, + "learning_rate": 7.08e-06, + "num_tokens": 440843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0015, + "grad_norm": 0.21462222933769226, + "learning_rate": 7.07e-06, + "num_tokens": 440934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0508, + "grad_norm": 1.3977996110916138, + "learning_rate": 7.06e-06, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": 0.0828, + "grad_norm": 1.5659841299057007, + "learning_rate": 7.05e-06, + "num_tokens": 441958.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0603, + "grad_norm": 1.602921724319458, + "learning_rate": 7.04e-06, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0744, + "grad_norm": 2.2317163944244385, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 442982.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0561, + "grad_norm": 2.125541925430298, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.002, + "grad_norm": 0.3173121213912964, + "learning_rate": 7.01e-06, + "num_tokens": 443585.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0459, + "grad_norm": 1.2071703672409058, + "learning_rate": 7e-06, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0432, + "grad_norm": 1.2934582233428955, + "learning_rate": 6.99e-06, + "num_tokens": 444609.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0489, + "grad_norm": 1.1334161758422852, + "learning_rate": 6.98e-06, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0579, + "grad_norm": 0.9369598627090454, + "learning_rate": 6.97e-06, + "num_tokens": 445633.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0033, + "grad_norm": 0.5776845812797546, + "learning_rate": 6.96e-06, + "num_tokens": 445724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0569, + "grad_norm": 1.3031799793243408, + "learning_rate": 6.95e-06, + "num_tokens": 446236.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0037, + "grad_norm": 0.6248667240142822, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 446327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0032, + "grad_norm": 0.5299662947654724, + "learning_rate": 6.93e-06, + "num_tokens": 446418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0667, + "grad_norm": 1.8433657884597778, + "learning_rate": 6.92e-06, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0577, + "grad_norm": 1.1226876974105835, + "learning_rate": 6.91e-06, + "num_tokens": 447442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.0567, + "grad_norm": 1.1603243350982666, + "learning_rate": 6.9e-06, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0032, + "grad_norm": 0.5435492992401123, + "learning_rate": 6.89e-06, + "num_tokens": 448045.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0606, + "grad_norm": 0.9929336905479431, + "learning_rate": 6.88e-06, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0036, + "grad_norm": 0.6169335842132568, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 448648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0649, + "grad_norm": 1.2230188846588135, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0613, + "grad_norm": 1.0680222511291504, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 449672.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.0455, + "grad_norm": 1.529793620109558, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0036, + "grad_norm": 0.614677906036377, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 450275.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.074, + "grad_norm": 2.1550259590148926, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0541, + "grad_norm": 0.9593685269355774, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 451299.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.0036, + "grad_norm": 0.5768935084342957, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 451390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0465, + "grad_norm": 1.2158730030059814, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 451902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0438, + "grad_norm": 1.1586334705352783, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0444, + "grad_norm": 1.4859849214553833, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 452926.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0403, + "grad_norm": 1.1270227432250977, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.004, + "grad_norm": 0.6430424451828003, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 453529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.0906, + "grad_norm": 1.5925347805023193, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0422, + "grad_norm": 0.9977685213088989, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 454553.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0564, + "grad_norm": 1.1696628332138062, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0518, + "grad_norm": 0.9724094271659851, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 455577.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0047, + "grad_norm": 0.7779951095581055, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 455668.0, + "mean_token_accuracy": 1.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0043, + "grad_norm": 0.7115391492843628, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 455759.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.3534, + "grad_norm": 6.629246234893799, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0038, + "grad_norm": 0.6219172477722168, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 456362.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0041, + "grad_norm": 0.6817074418067932, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 456453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0601, + "grad_norm": 1.2284682989120483, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 456965.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0585, + "grad_norm": 1.3272614479064941, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0417, + "grad_norm": 0.929707944393158, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 457989.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.0768, + "grad_norm": 1.2148957252502441, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.003, + "grad_norm": 0.4916832149028778, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 458592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": 0.0659, + "grad_norm": 1.1595323085784912, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0669, + "grad_norm": 1.3607900142669678, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 459616.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0843, + "grad_norm": 2.730896472930908, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0587, + "grad_norm": 1.2983198165893555, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 460640.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.0675, + "grad_norm": 1.475829839706421, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0034, + "grad_norm": 0.569835364818573, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 461243.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0031, + "grad_norm": 0.5171738862991333, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 461334.0, + "mean_token_accuracy": 1.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0032, + "grad_norm": 0.5472842454910278, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 461425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0029, + "grad_norm": 0.4868464767932892, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 461516.0, + "mean_token_accuracy": 1.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0616, + "grad_norm": 1.1753767728805542, + "learning_rate": 6.51e-06, + "num_tokens": 462028.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.05, + "grad_norm": 1.306359052658081, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.0027, + "grad_norm": 0.4471572935581207, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 462631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0535, + "grad_norm": 1.1857725381851196, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0023, + "grad_norm": 0.39148810505867004, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 463234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0021, + "grad_norm": 0.3375743329524994, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0601, + "grad_norm": 3.349716901779175, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 463837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.077, + "grad_norm": 1.3602453470230103, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0482, + "grad_norm": 1.1098014116287231, + "learning_rate": 6.43e-06, + "num_tokens": 464861.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0019, + "grad_norm": 0.3053341507911682, + "learning_rate": 6.42e-06, + "num_tokens": 464952.0, + "mean_token_accuracy": 1.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0019, + "grad_norm": 0.3125056326389313, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 465043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0019, + "grad_norm": 0.28826457262039185, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0652, + "grad_norm": 1.4113070964813232, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 465646.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0467, + "grad_norm": 1.2754263877868652, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0017, + "grad_norm": 0.2621810734272003, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 466249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0658, + "grad_norm": 1.0557119846343994, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0567, + "grad_norm": 1.4838411808013916, + "learning_rate": 6.35e-06, + "num_tokens": 467273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0017, + "grad_norm": 0.26117855310440063, + "learning_rate": 6.34e-06, + "num_tokens": 467364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0447, + "grad_norm": 1.1064739227294922, + "learning_rate": 6.33e-06, + "num_tokens": 467876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0435, + "grad_norm": 1.063262939453125, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.066, + "grad_norm": 1.1504032611846924, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 468900.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0641, + "grad_norm": 1.203201174736023, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0585, + "grad_norm": 1.2477880716323853, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 469924.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0025, + "grad_norm": 0.4655078947544098, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 470015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0602, + "grad_norm": 1.341115951538086, + "learning_rate": 6.27e-06, + "num_tokens": 470527.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0904, + "grad_norm": 2.366762399673462, + "learning_rate": 6.26e-06, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0033, + "grad_norm": 0.6076349020004272, + "learning_rate": 6.25e-06, + "num_tokens": 471130.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0907, + "grad_norm": 1.9339498281478882, + "learning_rate": 6.24e-06, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0864, + "grad_norm": 1.780813217163086, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 472154.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0033, + "grad_norm": 0.6028679609298706, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 472245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0542, + "grad_norm": 1.0088207721710205, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 472757.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0758, + "grad_norm": 1.5442019701004028, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0034, + "grad_norm": 0.6019788980484009, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 473360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.277, + "grad_norm": 5.171119689941406, + "learning_rate": 6.18e-06, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0036, + "grad_norm": 0.6451438665390015, + "learning_rate": 6.17e-06, + "num_tokens": 473963.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0037, + "grad_norm": 0.6643303036689758, + "learning_rate": 6.16e-06, + "num_tokens": 474054.0, + "mean_token_accuracy": 1.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0034, + "grad_norm": 0.6205865740776062, + "learning_rate": 6.15e-06, + "num_tokens": 474145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0029, + "grad_norm": 0.4953503906726837, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 474236.0, + "mean_token_accuracy": 1.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.0027, + "grad_norm": 0.46802619099617004, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 474327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0908, + "grad_norm": 1.535525918006897, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0417, + "grad_norm": 0.9248743653297424, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 475351.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.002, + "grad_norm": 0.3165223300457001, + "learning_rate": 6.1e-06, + "num_tokens": 475442.0, + "mean_token_accuracy": 1.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0542, + "grad_norm": 0.9654661417007446, + "learning_rate": 6.09e-06, + "num_tokens": 475954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0692, + "grad_norm": 1.3097866773605347, + "learning_rate": 6.08e-06, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0701, + "grad_norm": 1.50612473487854, + "learning_rate": 6.07e-06, + "num_tokens": 476978.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0017, + "grad_norm": 0.2454281896352768, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 477069.0, + "mean_token_accuracy": 1.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0855, + "grad_norm": 1.9738035202026367, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 477581.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0017, + "grad_norm": 0.2594867944717407, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 477672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0579, + "grad_norm": 1.1067945957183838, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 478184.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0566, + "grad_norm": 1.0555428266525269, + "learning_rate": 6.02e-06, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0016, + "grad_norm": 0.24508465826511383, + "learning_rate": 6.01e-06, + "num_tokens": 478787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0632, + "grad_norm": 1.3900046348571777, + "learning_rate": 6e-06, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0404, + "grad_norm": 0.9500136971473694, + "learning_rate": 5.99e-06, + "num_tokens": 479811.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0573, + "grad_norm": 1.2340861558914185, + "learning_rate": 5.98e-06, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.04, + "grad_norm": 1.035536527633667, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 480835.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.064, + "grad_norm": 0.9856736660003662, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0456, + "grad_norm": 1.2168488502502441, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 481859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.0819, + "grad_norm": 1.6233789920806885, + "learning_rate": 5.94e-06, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0644, + "grad_norm": 1.539711594581604, + "learning_rate": 5.93e-06, + "num_tokens": 482883.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0031, + "grad_norm": 0.5361098647117615, + "learning_rate": 5.92e-06, + "num_tokens": 482974.0, + "mean_token_accuracy": 1.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0657, + "grad_norm": 1.5077885389328003, + "learning_rate": 5.91e-06, + "num_tokens": 483486.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0033, + "grad_norm": 0.5819950699806213, + "learning_rate": 5.9e-06, + "num_tokens": 483577.0, + "mean_token_accuracy": 1.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": 0.0844, + "grad_norm": 1.6911466121673584, + "learning_rate": 5.89e-06, + "num_tokens": 484089.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.059, + "grad_norm": 0.909106969833374, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0046, + "grad_norm": 0.8148921132087708, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 484692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0603, + "grad_norm": 1.50859797000885, + "learning_rate": 5.86e-06, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0041, + "grad_norm": 0.7295659780502319, + "learning_rate": 5.85e-06, + "num_tokens": 485295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.0532, + "grad_norm": 1.1242952346801758, + "learning_rate": 5.84e-06, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0544, + "grad_norm": 0.9595649838447571, + "learning_rate": 5.83e-06, + "num_tokens": 486319.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0042, + "grad_norm": 0.7197695374488831, + "learning_rate": 5.82e-06, + "num_tokens": 486410.0, + "mean_token_accuracy": 1.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": 0.0637, + "grad_norm": 1.327078938484192, + "learning_rate": 5.81e-06, + "num_tokens": 486922.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0515, + "grad_norm": 1.3836802244186401, + "learning_rate": 5.8e-06, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0471, + "grad_norm": 2.055051326751709, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 487946.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0634, + "grad_norm": 1.3304088115692139, + "learning_rate": 5.78e-06, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0042, + "grad_norm": 0.7247684597969055, + "learning_rate": 5.77e-06, + "num_tokens": 488549.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0042, + "grad_norm": 0.7230411767959595, + "learning_rate": 5.76e-06, + "num_tokens": 488640.0, + "mean_token_accuracy": 1.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0802, + "grad_norm": 1.942260980606079, + "learning_rate": 5.75e-06, + "num_tokens": 489152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0408, + "grad_norm": 0.9843087792396545, + "learning_rate": 5.74e-06, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0037, + "grad_norm": 0.6149731278419495, + "learning_rate": 5.73e-06, + "num_tokens": 489755.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0035, + "grad_norm": 0.591227114200592, + "learning_rate": 5.72e-06, + "num_tokens": 489846.0, + "mean_token_accuracy": 1.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0034, + "grad_norm": 0.5716548562049866, + "learning_rate": 5.71e-06, + "num_tokens": 489937.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0028, + "grad_norm": 0.4706770181655884, + "learning_rate": 5.7e-06, + "num_tokens": 490028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0023, + "grad_norm": 0.37091749906539917, + "learning_rate": 5.69e-06, + "num_tokens": 490119.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0592, + "grad_norm": 1.1389172077178955, + "learning_rate": 5.68e-06, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0021, + "grad_norm": 0.33143892884254456, + "learning_rate": 5.67e-06, + "num_tokens": 490722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.068, + "grad_norm": 2.0014731884002686, + "learning_rate": 5.66e-06, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0433, + "grad_norm": 1.1497068405151367, + "learning_rate": 5.65e-06, + "num_tokens": 491746.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0017, + "grad_norm": 0.2540724575519562, + "learning_rate": 5.64e-06, + "num_tokens": 491837.0, + "mean_token_accuracy": 1.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0403, + "grad_norm": 1.0868761539459229, + "learning_rate": 5.63e-06, + "num_tokens": 492349.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0015, + "grad_norm": 0.19899524748325348, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 492440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0455, + "grad_norm": 1.617480754852295, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 492952.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0014, + "grad_norm": 0.19665531814098358, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 493043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0648, + "grad_norm": 1.622554898262024, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 493555.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0014, + "grad_norm": 0.18810254335403442, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 493646.0, + "mean_token_accuracy": 1.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0701, + "grad_norm": 1.4964152574539185, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 494158.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0013, + "grad_norm": 0.15776444971561432, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 494249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0012, + "grad_norm": 0.1539117842912674, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 494340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0013, + "grad_norm": 0.1636369377374649, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0012, + "grad_norm": 0.15004193782806396, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 494522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0012, + "grad_norm": 0.15097948908805847, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0012, + "grad_norm": 0.14485493302345276, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 494704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.047, + "grad_norm": 1.3281570672988892, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0519, + "grad_norm": 2.394688844680786, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 495728.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.0012, + "grad_norm": 0.1376945525407791, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 495819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0011, + "grad_norm": 0.13309122622013092, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 495910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0439, + "grad_norm": 1.0667738914489746, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.0012, + "grad_norm": 0.14376237988471985, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 496513.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0011, + "grad_norm": 0.13507920503616333, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0749, + "grad_norm": 1.5052191019058228, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 497116.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0012, + "grad_norm": 0.14203152060508728, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 497207.0, + "mean_token_accuracy": 1.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0445, + "grad_norm": 1.228667974472046, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 497719.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.0656, + "grad_norm": 1.407843828201294, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0647, + "grad_norm": 1.6894930601119995, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 498743.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0012, + "grad_norm": 0.14642253518104553, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 498834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0452, + "grad_norm": 1.07169508934021, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 499346.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0013, + "grad_norm": 0.1761048138141632, + "learning_rate": 5.36e-06, + "num_tokens": 499437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0849, + "grad_norm": 2.0752289295196533, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 499949.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0425, + "grad_norm": 1.113696575164795, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0846, + "grad_norm": 1.7338367700576782, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 500973.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0014, + "grad_norm": 0.1934671550989151, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 501064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0443, + "grad_norm": 1.1740210056304932, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 501576.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0016, + "grad_norm": 0.221791610121727, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 501667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0419, + "grad_norm": 1.0604463815689087, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 502179.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0018, + "grad_norm": 0.2774617373943329, + "learning_rate": 5.28e-06, + "num_tokens": 502270.0, + "mean_token_accuracy": 1.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0715, + "grad_norm": 1.4584964513778687, + "learning_rate": 5.27e-06, + "num_tokens": 502782.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0424, + "grad_norm": 1.1874643564224243, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0681, + "grad_norm": 1.1877933740615845, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 503806.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0574, + "grad_norm": 1.2860503196716309, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0024, + "grad_norm": 0.38671889901161194, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 504409.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0778, + "grad_norm": 1.683851718902588, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0624, + "grad_norm": 1.148560643196106, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 505433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0026, + "grad_norm": 0.422258198261261, + "learning_rate": 5.2e-06, + "num_tokens": 505524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0029, + "grad_norm": 0.48346948623657227, + "learning_rate": 5.19e-06, + "num_tokens": 505615.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.003, + "grad_norm": 0.4990505874156952, + "learning_rate": 5.18e-06, + "num_tokens": 505706.0, + "mean_token_accuracy": 1.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0444, + "grad_norm": 1.1750332117080688, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 506218.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0631, + "grad_norm": 1.0927088260650635, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0029, + "grad_norm": 0.491895854473114, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 506821.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0029, + "grad_norm": 0.48604080080986023, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 506912.0, + "mean_token_accuracy": 1.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0646, + "grad_norm": 1.8152271509170532, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 507424.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0905, + "grad_norm": 2.1916065216064453, + "learning_rate": 5.12e-06, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0441, + "grad_norm": 0.9943680167198181, + "learning_rate": 5.11e-06, + "num_tokens": 508448.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0028, + "grad_norm": 0.4724738299846649, + "learning_rate": 5.1e-06, + "num_tokens": 508539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0455, + "grad_norm": 1.327681303024292, + "learning_rate": 5.09e-06, + "num_tokens": 509051.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0401, + "grad_norm": 1.00179922580719, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.2741, + "grad_norm": 5.871794700622559, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 510075.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0028, + "grad_norm": 0.48077592253685, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 510166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0706, + "grad_norm": 1.4320826530456543, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 510678.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.0435, + "grad_norm": 1.2258262634277344, + "learning_rate": 5.04e-06, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0031, + "grad_norm": 0.5447593331336975, + "learning_rate": 5.03e-06, + "num_tokens": 511281.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0408, + "grad_norm": 1.0005323886871338, + "learning_rate": 5.02e-06, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0031, + "grad_norm": 0.52440345287323, + "learning_rate": 5.01e-06, + "num_tokens": 511884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0931, + "grad_norm": 2.2890543937683105, + "learning_rate": 5e-06, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0028, + "grad_norm": 0.47974297404289246, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 512487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0028, + "grad_norm": 0.4712013900279999, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 512578.0, + "mean_token_accuracy": 1.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0734, + "grad_norm": 1.7330412864685059, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 513090.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.0412, + "grad_norm": 1.2318421602249146, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0577, + "grad_norm": 1.1624799966812134, + "learning_rate": 4.95e-06, + "num_tokens": 514114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0667, + "grad_norm": 1.3667885065078735, + "learning_rate": 4.94e-06, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0472, + "grad_norm": 1.0038102865219116, + "learning_rate": 4.93e-06, + "num_tokens": 515138.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0662, + "grad_norm": 1.370149850845337, + "learning_rate": 4.92e-06, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.003, + "grad_norm": 0.4965730309486389, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 515741.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0397, + "grad_norm": 0.9282152056694031, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0576, + "grad_norm": 1.0276484489440918, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 516765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0656, + "grad_norm": 1.319326400756836, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0636, + "grad_norm": 1.2873133420944214, + "learning_rate": 4.87e-06, + "num_tokens": 517789.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.0032, + "grad_norm": 0.5650099515914917, + "learning_rate": 4.86e-06, + "num_tokens": 517880.0, + "mean_token_accuracy": 1.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0404, + "grad_norm": 1.389515995979309, + "learning_rate": 4.85e-06, + "num_tokens": 518392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0036, + "grad_norm": 0.6158953309059143, + "learning_rate": 4.84e-06, + "num_tokens": 518483.0, + "mean_token_accuracy": 1.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0823, + "grad_norm": 2.242391347885132, + "learning_rate": 4.83e-06, + "num_tokens": 518995.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0653, + "grad_norm": 1.5677355527877808, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0781, + "grad_norm": 2.0974771976470947, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 520019.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0611, + "grad_norm": 1.4084426164627075, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0044, + "grad_norm": 0.7955360412597656, + "learning_rate": 4.79e-06, + "num_tokens": 520622.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0352, + "grad_norm": 0.9566419124603271, + "learning_rate": 4.78e-06, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0564, + "grad_norm": 0.9539786577224731, + "learning_rate": 4.77e-06, + "num_tokens": 521646.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0459, + "grad_norm": 1.0773917436599731, + "learning_rate": 4.76e-06, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.075, + "grad_norm": 2.423198938369751, + "learning_rate": 4.75e-06, + "num_tokens": 522670.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0044, + "grad_norm": 0.7832935452461243, + "learning_rate": 4.74e-06, + "num_tokens": 522761.0, + "mean_token_accuracy": 1.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0661, + "grad_norm": 1.3831069469451904, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 523273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.0043, + "grad_norm": 0.7653414011001587, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 523364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0039, + "grad_norm": 0.7014725208282471, + "learning_rate": 4.71e-06, + "num_tokens": 523455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0042, + "grad_norm": 0.7603307962417603, + "learning_rate": 4.7e-06, + "num_tokens": 523546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0622, + "grad_norm": 1.3033061027526855, + "learning_rate": 4.69e-06, + "num_tokens": 524058.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0774, + "grad_norm": 2.0244553089141846, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0035, + "grad_norm": 0.6342400908470154, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 524661.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0031, + "grad_norm": 0.5407992601394653, + "learning_rate": 4.66e-06, + "num_tokens": 524752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0611, + "grad_norm": 1.2235374450683594, + "learning_rate": 4.65e-06, + "num_tokens": 525264.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0623, + "grad_norm": 1.3751453161239624, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0027, + "grad_norm": 0.4813397526741028, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 525867.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.0664, + "grad_norm": 1.2894669771194458, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.056, + "grad_norm": 1.4559017419815063, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 526891.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": 0.0775, + "grad_norm": 2.593362808227539, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.3138, + "grad_norm": 5.148370742797852, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 527915.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0756, + "grad_norm": 2.2736735343933105, + "learning_rate": 4.58e-06, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.065, + "grad_norm": 3.2683534622192383, + "learning_rate": 4.57e-06, + "num_tokens": 528939.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0025, + "grad_norm": 0.44800934195518494, + "learning_rate": 4.56e-06, + "num_tokens": 529030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.2697, + "grad_norm": 5.550428867340088, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 529542.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0566, + "grad_norm": 1.0541280508041382, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0021, + "grad_norm": 0.3617427945137024, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 530145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0473, + "grad_norm": 1.3375787734985352, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0021, + "grad_norm": 0.33384522795677185, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 530748.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0379, + "grad_norm": 1.0544806718826294, + "learning_rate": 4.5e-06, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0023, + "grad_norm": 0.39406508207321167, + "learning_rate": 4.49e-06, + "num_tokens": 531351.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0752, + "grad_norm": 1.9515206813812256, + "learning_rate": 4.48e-06, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.0023, + "grad_norm": 0.3835340738296509, + "learning_rate": 4.47e-06, + "num_tokens": 531954.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.059, + "grad_norm": 1.1221628189086914, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0021, + "grad_norm": 0.3509887456893921, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 532557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.064, + "grad_norm": 1.205573320388794, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0718, + "grad_norm": 2.1418721675872803, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 533581.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0414, + "grad_norm": 1.3037139177322388, + "learning_rate": 4.42e-06, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.0736, + "grad_norm": 2.1680147647857666, + "learning_rate": 4.41e-06, + "num_tokens": 534605.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0021, + "grad_norm": 0.347339004278183, + "learning_rate": 4.4e-06, + "num_tokens": 534696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": 0.0736, + "grad_norm": 2.0864803791046143, + "learning_rate": 4.39e-06, + "num_tokens": 535208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0025, + "grad_norm": 0.4395049810409546, + "learning_rate": 4.38e-06, + "num_tokens": 535299.0, + "mean_token_accuracy": 1.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0023, + "grad_norm": 0.39004504680633545, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 535390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0022, + "grad_norm": 0.36095598340034485, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 535481.0, + "mean_token_accuracy": 1.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0582, + "grad_norm": 1.2327930927276611, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 535993.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0461, + "grad_norm": 1.040818452835083, + "learning_rate": 4.34e-06, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.248, + "grad_norm": 5.55968713760376, + "learning_rate": 4.33e-06, + "num_tokens": 537017.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0021, + "grad_norm": 0.33996713161468506, + "learning_rate": 4.32e-06, + "num_tokens": 537108.0, + "mean_token_accuracy": 1.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0885, + "grad_norm": 1.9103176593780518, + "learning_rate": 4.31e-06, + "num_tokens": 537620.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0021, + "grad_norm": 0.3596363663673401, + "learning_rate": 4.3e-06, + "num_tokens": 537711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0024, + "grad_norm": 0.38911113142967224, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 537802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.0575, + "grad_norm": 1.1043959856033325, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.0398, + "grad_norm": 1.0082714557647705, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 538826.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.07, + "grad_norm": 1.312532901763916, + "learning_rate": 4.26e-06, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.0019, + "grad_norm": 0.314879834651947, + "learning_rate": 4.25e-06, + "num_tokens": 539429.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.002, + "grad_norm": 0.32559505105018616, + "learning_rate": 4.24e-06, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0021, + "grad_norm": 0.3332079350948334, + "learning_rate": 4.23e-06, + "num_tokens": 539611.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0585, + "grad_norm": 1.1406902074813843, + "learning_rate": 4.22e-06, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0018, + "grad_norm": 0.2799522876739502, + "learning_rate": 4.21e-06, + "num_tokens": 540214.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0525, + "grad_norm": 1.1263917684555054, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0019, + "grad_norm": 0.28769129514694214, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 540817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.002, + "grad_norm": 0.3043234348297119, + "learning_rate": 4.18e-06, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0018, + "grad_norm": 0.2788783311843872, + "learning_rate": 4.17e-06, + "num_tokens": 540999.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.002, + "grad_norm": 0.3088054358959198, + "learning_rate": 4.16e-06, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.0382, + "grad_norm": 1.0789445638656616, + "learning_rate": 4.15e-06, + "num_tokens": 541602.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.0435, + "grad_norm": 1.0291471481323242, + "learning_rate": 4.14e-06, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0754, + "grad_norm": 1.4396899938583374, + "learning_rate": 4.13e-06, + "num_tokens": 542626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.05, + "grad_norm": 1.1235865354537964, + "learning_rate": 4.12e-06, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0018, + "grad_norm": 0.2745732069015503, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 543229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0017, + "grad_norm": 0.2619018256664276, + "learning_rate": 4.1e-06, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.063, + "grad_norm": 1.068122148513794, + "learning_rate": 4.09e-06, + "num_tokens": 543832.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.076, + "grad_norm": 1.5099190473556519, + "learning_rate": 4.08e-06, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.075, + "grad_norm": 1.370004415512085, + "learning_rate": 4.07e-06, + "num_tokens": 544856.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.06, + "grad_norm": 1.2732493877410889, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.045, + "grad_norm": 1.2496861219406128, + "learning_rate": 4.05e-06, + "num_tokens": 545880.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0471, + "grad_norm": 1.1135365962982178, + "learning_rate": 4.04e-06, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0668, + "grad_norm": 1.5768578052520752, + "learning_rate": 4.03e-06, + "num_tokens": 546904.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0024, + "grad_norm": 0.3887575566768646, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 546995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0023, + "grad_norm": 0.3817980885505676, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 547086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.2858, + "grad_norm": 5.93766975402832, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0023, + "grad_norm": 0.3757269084453583, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 547689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0611, + "grad_norm": 1.3149932622909546, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.085, + "grad_norm": 1.8090168237686157, + "learning_rate": 3.97e-06, + "num_tokens": 548713.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0624, + "grad_norm": 1.2021411657333374, + "learning_rate": 3.96e-06, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0597, + "grad_norm": 1.1230809688568115, + "learning_rate": 3.95e-06, + "num_tokens": 549737.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0521, + "grad_norm": 1.225655198097229, + "learning_rate": 3.94e-06, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0028, + "grad_norm": 0.4546661674976349, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 550340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.2426, + "grad_norm": 4.83814001083374, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0032, + "grad_norm": 0.5268356800079346, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 550943.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.003, + "grad_norm": 0.5073143839836121, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0571, + "grad_norm": 1.12201988697052, + "learning_rate": 3.89e-06, + "num_tokens": 551546.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0027, + "grad_norm": 0.441703200340271, + "learning_rate": 3.88e-06, + "num_tokens": 551637.0, + "mean_token_accuracy": 1.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.06, + "grad_norm": 1.055845022201538, + "learning_rate": 3.87e-06, + "num_tokens": 552149.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0026, + "grad_norm": 0.4252733290195465, + "learning_rate": 3.86e-06, + "num_tokens": 552240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0654, + "grad_norm": 1.2097599506378174, + "learning_rate": 3.85e-06, + "num_tokens": 552752.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0031, + "grad_norm": 0.5153416395187378, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 552843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0412, + "grad_norm": 1.2524850368499756, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 553355.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0603, + "grad_norm": 1.216737985610962, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0027, + "grad_norm": 0.4374849498271942, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 553958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0027, + "grad_norm": 0.45386913418769836, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 554049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0772, + "grad_norm": 2.3643293380737305, + "learning_rate": 3.79e-06, + "num_tokens": 554561.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0585, + "grad_norm": 1.1927247047424316, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0024, + "grad_norm": 0.4038313329219818, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 555164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0024, + "grad_norm": 0.3948758542537689, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 555255.0, + "mean_token_accuracy": 1.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0022, + "grad_norm": 0.36720144748687744, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 555346.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0024, + "grad_norm": 0.3845508098602295, + "learning_rate": 3.74e-06, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0021, + "grad_norm": 0.33976465463638306, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 555528.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0656, + "grad_norm": 1.0829418897628784, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0816, + "grad_norm": 1.7684704065322876, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 556552.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0021, + "grad_norm": 0.3379213809967041, + "learning_rate": 3.7e-06, + "num_tokens": 556643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0017, + "grad_norm": 0.268597275018692, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 556734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0571, + "grad_norm": 1.7145894765853882, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0017, + "grad_norm": 0.262333482503891, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 557337.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0453, + "grad_norm": 1.0645833015441895, + "learning_rate": 3.66e-06, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0596, + "grad_norm": 1.364123821258545, + "learning_rate": 3.65e-06, + "num_tokens": 558361.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0472, + "grad_norm": 0.9277791380882263, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.062, + "grad_norm": 1.2970867156982422, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 559385.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0486, + "grad_norm": 1.1752419471740723, + "learning_rate": 3.62e-06, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.067, + "grad_norm": 1.646427869796753, + "learning_rate": 3.61e-06, + "num_tokens": 560409.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.0488, + "grad_norm": 1.3798638582229614, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0585, + "grad_norm": 1.2615973949432373, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 561433.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0536, + "grad_norm": 1.4801198244094849, + "learning_rate": 3.58e-06, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0021, + "grad_norm": 0.3402940332889557, + "learning_rate": 3.57e-06, + "num_tokens": 562036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0506, + "grad_norm": 0.878396213054657, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0022, + "grad_norm": 0.37959179282188416, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 562639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0023, + "grad_norm": 0.39978647232055664, + "learning_rate": 3.54e-06, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.0692, + "grad_norm": 1.6479856967926025, + "learning_rate": 3.53e-06, + "num_tokens": 563242.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0022, + "grad_norm": 0.37655898928642273, + "learning_rate": 3.52e-06, + "num_tokens": 563333.0, + "mean_token_accuracy": 1.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0547, + "grad_norm": 1.4809867143630981, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 563845.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.038, + "grad_norm": 1.2819538116455078, + "learning_rate": 3.5e-06, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0437, + "grad_norm": 1.2474430799484253, + "learning_rate": 3.49e-06, + "num_tokens": 564869.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0611, + "grad_norm": 1.1493180990219116, + "learning_rate": 3.48e-06, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.062, + "grad_norm": 1.4344936609268188, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 565893.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0027, + "grad_norm": 0.501312255859375, + "learning_rate": 3.46e-06, + "num_tokens": 565984.0, + "mean_token_accuracy": 1.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.003, + "grad_norm": 0.57524174451828, + "learning_rate": 3.45e-06, + "num_tokens": 566075.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.003, + "grad_norm": 0.546630322933197, + "learning_rate": 3.44e-06, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0028, + "grad_norm": 0.5239407420158386, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 566257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0395, + "grad_norm": 0.8654681444168091, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.0399, + "grad_norm": 0.9791849851608276, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 567281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0714, + "grad_norm": 1.4680542945861816, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0029, + "grad_norm": 0.5489619970321655, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 567884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0652, + "grad_norm": 1.445259690284729, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.0031, + "grad_norm": 0.554716944694519, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 568487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0655, + "grad_norm": 1.0966905355453491, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0494, + "grad_norm": 1.049824833869934, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 569511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0591, + "grad_norm": 1.8449171781539917, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.003, + "grad_norm": 0.5422641634941101, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 570114.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.0805, + "grad_norm": 1.8794130086898804, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.0481, + "grad_norm": 0.9934747219085693, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 571138.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0497, + "grad_norm": 1.2348871231079102, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": 0.0444, + "grad_norm": 1.1614453792572021, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 572162.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0388, + "grad_norm": 1.22681725025177, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0032, + "grad_norm": 0.5757941603660583, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 572765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0034, + "grad_norm": 0.611791729927063, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 572856.0, + "mean_token_accuracy": 1.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0616, + "grad_norm": 1.136299967765808, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 573368.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0433, + "grad_norm": 1.2018715143203735, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.042, + "grad_norm": 1.0409917831420898, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 574392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.044, + "grad_norm": 1.2323369979858398, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0034, + "grad_norm": 0.6153194904327393, + "learning_rate": 3.21e-06, + "num_tokens": 574995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0034, + "grad_norm": 0.6106674671173096, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 575086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.0639, + "grad_norm": 1.089705467224121, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 575598.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0692, + "grad_norm": 1.5026510953903198, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0637, + "grad_norm": 1.383870005607605, + "learning_rate": 3.17e-06, + "num_tokens": 576622.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0032, + "grad_norm": 0.568756639957428, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 576713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.0413, + "grad_norm": 1.2440272569656372, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 577225.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.039, + "grad_norm": 1.180145025253296, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0033, + "grad_norm": 0.6265860795974731, + "learning_rate": 3.13e-06, + "num_tokens": 577828.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0033, + "grad_norm": 0.5880522727966309, + "learning_rate": 3.12e-06, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0032, + "grad_norm": 0.5984041690826416, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 578010.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0557, + "grad_norm": 1.0321638584136963, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0585, + "grad_norm": 1.1382465362548828, + "learning_rate": 3.09e-06, + "num_tokens": 579034.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0032, + "grad_norm": 0.5756648778915405, + "learning_rate": 3.08e-06, + "num_tokens": 579125.0, + "mean_token_accuracy": 1.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.003, + "grad_norm": 0.5428857207298279, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 579216.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0774, + "grad_norm": 1.805572271347046, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0569, + "grad_norm": 1.139460563659668, + "learning_rate": 3.05e-06, + "num_tokens": 580240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.0426, + "grad_norm": 1.383743405342102, + "learning_rate": 3.04e-06, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0024, + "grad_norm": 0.4358248710632324, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 580843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0397, + "grad_norm": 1.0429037809371948, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0457, + "grad_norm": 1.3951339721679688, + "learning_rate": 3.01e-06, + "num_tokens": 581867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0027, + "grad_norm": 0.47018593549728394, + "learning_rate": 3e-06, + "num_tokens": 581958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0731, + "grad_norm": 1.9685642719268799, + "learning_rate": 2.99e-06, + "num_tokens": 582470.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.0026, + "grad_norm": 0.45238158106803894, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 582561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0024, + "grad_norm": 0.40610402822494507, + "learning_rate": 2.97e-06, + "num_tokens": 582652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0525, + "grad_norm": 1.0180531740188599, + "learning_rate": 2.96e-06, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0436, + "grad_norm": 1.2175544500350952, + "learning_rate": 2.95e-06, + "num_tokens": 583676.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.0601, + "grad_norm": 1.2007901668548584, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0566, + "grad_norm": 1.2265726327896118, + "learning_rate": 2.93e-06, + "num_tokens": 584700.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0556, + "grad_norm": 1.1947659254074097, + "learning_rate": 2.92e-06, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0027, + "grad_norm": 0.464779794216156, + "learning_rate": 2.91e-06, + "num_tokens": 585303.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.0026, + "grad_norm": 0.4438534080982208, + "learning_rate": 2.9e-06, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0593, + "grad_norm": 1.0972975492477417, + "learning_rate": 2.89e-06, + "num_tokens": 585906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0835, + "grad_norm": 1.884253978729248, + "learning_rate": 2.88e-06, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0633, + "grad_norm": 1.0084459781646729, + "learning_rate": 2.87e-06, + "num_tokens": 586930.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0558, + "grad_norm": 1.0302374362945557, + "learning_rate": 2.86e-06, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0542, + "grad_norm": 0.9511706829071045, + "learning_rate": 2.85e-06, + "num_tokens": 587954.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0506, + "grad_norm": 1.4875551462173462, + "learning_rate": 2.84e-06, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0596, + "grad_norm": 1.1406636238098145, + "learning_rate": 2.83e-06, + "num_tokens": 588978.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0843, + "grad_norm": 1.663854718208313, + "learning_rate": 2.82e-06, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.003, + "grad_norm": 0.5147997140884399, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 589581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0862, + "grad_norm": 1.6565779447555542, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0031, + "grad_norm": 0.5479184985160828, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 590184.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0444, + "grad_norm": 1.354533076286316, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0031, + "grad_norm": 0.5383754968643188, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 590787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0405, + "grad_norm": 1.1847655773162842, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0686, + "grad_norm": 1.8093054294586182, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 591811.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0599, + "grad_norm": 0.9621073603630066, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0037, + "grad_norm": 0.6532343626022339, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 592414.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.062, + "grad_norm": 1.1963555812835693, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0471, + "grad_norm": 1.2936190366744995, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 593438.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0039, + "grad_norm": 0.6896610856056213, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 593529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.0035, + "grad_norm": 0.619045615196228, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 593620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.0037, + "grad_norm": 0.6495220065116882, + "learning_rate": 2.68e-06, + "num_tokens": 593711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0033, + "grad_norm": 0.5850738286972046, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 593802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0394, + "grad_norm": 1.1021217107772827, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.003, + "grad_norm": 0.5251200795173645, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 594405.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0029, + "grad_norm": 0.5125622153282166, + "learning_rate": 2.64e-06, + "num_tokens": 594496.0, + "mean_token_accuracy": 1.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0829, + "grad_norm": 1.8204774856567383, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 595008.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.0624, + "grad_norm": 1.3469654321670532, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0587, + "grad_norm": 1.1263304948806763, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 596032.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0791, + "grad_norm": 2.308769941329956, + "learning_rate": 2.6e-06, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0025, + "grad_norm": 0.42390695214271545, + "learning_rate": 2.59e-06, + "num_tokens": 596635.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0025, + "grad_norm": 0.4351828694343567, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0025, + "grad_norm": 0.45117858052253723, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 596817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.002, + "grad_norm": 0.3449709117412567, + "learning_rate": 2.56e-06, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0552, + "grad_norm": 1.02012038230896, + "learning_rate": 2.55e-06, + "num_tokens": 597420.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0021, + "grad_norm": 0.35598093271255493, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 597511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0706, + "grad_norm": 1.9882680177688599, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 598023.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0585, + "grad_norm": 1.1153826713562012, + "learning_rate": 2.52e-06, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0606, + "grad_norm": 1.6919127702713013, + "learning_rate": 2.51e-06, + "num_tokens": 599047.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.0381, + "grad_norm": 0.9558757543563843, + "learning_rate": 2.5e-06, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0021, + "grad_norm": 0.3558536469936371, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 599650.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0522, + "grad_norm": 1.5039445161819458, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0762, + "grad_norm": 1.8451253175735474, + "learning_rate": 2.47e-06, + "num_tokens": 600674.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0021, + "grad_norm": 0.3580801486968994, + "learning_rate": 2.46e-06, + "num_tokens": 600765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0596, + "grad_norm": 1.0082149505615234, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 601277.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0019, + "grad_norm": 0.31669387221336365, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 601368.0, + "mean_token_accuracy": 1.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0021, + "grad_norm": 0.3432970345020294, + "learning_rate": 2.43e-06, + "num_tokens": 601459.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0574, + "grad_norm": 1.3162227869033813, + "learning_rate": 2.42e-06, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0435, + "grad_norm": 1.0670703649520874, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 602483.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0461, + "grad_norm": 1.2668665647506714, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0594, + "grad_norm": 1.4527745246887207, + "learning_rate": 2.39e-06, + "num_tokens": 603507.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.002, + "grad_norm": 0.3514978885650635, + "learning_rate": 2.38e-06, + "num_tokens": 603598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": 0.0729, + "grad_norm": 2.0161454677581787, + "learning_rate": 2.37e-06, + "num_tokens": 604110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0022, + "grad_norm": 0.38664510846138, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 604201.0, + "mean_token_accuracy": 1.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0353, + "grad_norm": 0.9888522624969482, + "learning_rate": 2.35e-06, + "num_tokens": 604713.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0816, + "grad_norm": 1.6845252513885498, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.002, + "grad_norm": 0.34472399950027466, + "learning_rate": 2.33e-06, + "num_tokens": 605316.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0612, + "grad_norm": 1.5795350074768066, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.036, + "grad_norm": 1.0923341512680054, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 606340.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0021, + "grad_norm": 0.36445900797843933, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 606431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0021, + "grad_norm": 0.36632096767425537, + "learning_rate": 2.29e-06, + "num_tokens": 606522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0024, + "grad_norm": 0.4193936884403229, + "learning_rate": 2.28e-06, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0021, + "grad_norm": 0.36693835258483887, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 606704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0695, + "grad_norm": 1.6587837934494019, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0439, + "grad_norm": 1.2197368144989014, + "learning_rate": 2.25e-06, + "num_tokens": 607728.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.0737, + "grad_norm": 1.8300983905792236, + "learning_rate": 2.24e-06, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0443, + "grad_norm": 1.1544647216796875, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 608752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0023, + "grad_norm": 0.40331411361694336, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 608843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.0024, + "grad_norm": 0.4283469021320343, + "learning_rate": 2.21e-06, + "num_tokens": 608934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0023, + "grad_norm": 0.38760119676589966, + "learning_rate": 2.2e-06, + "num_tokens": 609025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0768, + "grad_norm": 2.4320685863494873, + "learning_rate": 2.19e-06, + "num_tokens": 609537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.0022, + "grad_norm": 0.3753429353237152, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 609628.0, + "mean_token_accuracy": 1.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.0022, + "grad_norm": 0.37054023146629333, + "learning_rate": 2.17e-06, + "num_tokens": 609719.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.063, + "grad_norm": 1.1455004215240479, + "learning_rate": 2.16e-06, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.002, + "grad_norm": 0.3473651707172394, + "learning_rate": 2.15e-06, + "num_tokens": 610322.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0613, + "grad_norm": 1.3616305589675903, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0728, + "grad_norm": 1.4589122533798218, + "learning_rate": 2.13e-06, + "num_tokens": 611346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0021, + "grad_norm": 0.3479214906692505, + "learning_rate": 2.12e-06, + "num_tokens": 611437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0652, + "grad_norm": 1.3161977529525757, + "learning_rate": 2.11e-06, + "num_tokens": 611949.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0019, + "grad_norm": 0.30886292457580566, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 612040.0, + "mean_token_accuracy": 1.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0592, + "grad_norm": 1.1527003049850464, + "learning_rate": 2.09e-06, + "num_tokens": 612552.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0019, + "grad_norm": 0.32701927423477173, + "learning_rate": 2.08e-06, + "num_tokens": 612643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0019, + "grad_norm": 0.31851011514663696, + "learning_rate": 2.07e-06, + "num_tokens": 612734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0019, + "grad_norm": 0.3128160238265991, + "learning_rate": 2.06e-06, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0609, + "grad_norm": 1.4082930088043213, + "learning_rate": 2.05e-06, + "num_tokens": 613337.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0367, + "grad_norm": 1.014041781425476, + "learning_rate": 2.04e-06, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0018, + "grad_norm": 0.31275689601898193, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 613940.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0651, + "grad_norm": 1.7855079174041748, + "learning_rate": 2.02e-06, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0019, + "grad_norm": 0.3344590663909912, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 614543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0647, + "grad_norm": 1.4787598848342896, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0578, + "grad_norm": 1.2822742462158203, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 615567.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0431, + "grad_norm": 1.270432472229004, + "learning_rate": 1.98e-06, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0629, + "grad_norm": 1.4008212089538574, + "learning_rate": 1.97e-06, + "num_tokens": 616591.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0018, + "grad_norm": 0.29254984855651855, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 616682.0, + "mean_token_accuracy": 1.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.002, + "grad_norm": 0.33816665410995483, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 616773.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0407, + "grad_norm": 1.2000517845153809, + "learning_rate": 1.94e-06, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0021, + "grad_norm": 0.36089253425598145, + "learning_rate": 1.93e-06, + "num_tokens": 617376.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.0018, + "grad_norm": 0.3009200990200043, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0681, + "grad_norm": 1.279045581817627, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 617979.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.041, + "grad_norm": 0.9949601292610168, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0436, + "grad_norm": 1.0469834804534912, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 619003.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.07, + "grad_norm": 1.9559322595596313, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.002, + "grad_norm": 0.34342578053474426, + "learning_rate": 1.87e-06, + "num_tokens": 619606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.0878, + "grad_norm": 1.9412786960601807, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.002, + "grad_norm": 0.32897070050239563, + "learning_rate": 1.85e-06, + "num_tokens": 620209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0558, + "grad_norm": 1.230363368988037, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0021, + "grad_norm": 0.36400625109672546, + "learning_rate": 1.83e-06, + "num_tokens": 620812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0836, + "grad_norm": 2.0716917514801025, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0621, + "grad_norm": 1.304250717163086, + "learning_rate": 1.81e-06, + "num_tokens": 621836.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0021, + "grad_norm": 0.36326804757118225, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 621927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0021, + "grad_norm": 0.35329553484916687, + "learning_rate": 1.79e-06, + "num_tokens": 622018.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0022, + "grad_norm": 0.37259048223495483, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0427, + "grad_norm": 1.4227620363235474, + "learning_rate": 1.77e-06, + "num_tokens": 622621.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.0019, + "grad_norm": 0.3209492564201355, + "learning_rate": 1.76e-06, + "num_tokens": 622712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.0461, + "grad_norm": 1.0381195545196533, + "learning_rate": 1.75e-06, + "num_tokens": 623224.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.042, + "grad_norm": 1.2007672786712646, + "learning_rate": 1.74e-06, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0021, + "grad_norm": 0.36294040083885193, + "learning_rate": 1.73e-06, + "num_tokens": 623827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0021, + "grad_norm": 0.36834561824798584, + "learning_rate": 1.72e-06, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0571, + "grad_norm": 1.3143699169158936, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 624430.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0019, + "grad_norm": 0.3313964307308197, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 624521.0, + "mean_token_accuracy": 1.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.002, + "grad_norm": 0.357883095741272, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 624612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0021, + "grad_norm": 0.3507683277130127, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0019, + "grad_norm": 0.32915839552879333, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 624794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.055, + "grad_norm": 1.478965163230896, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0563, + "grad_norm": 1.0098392963409424, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 625818.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0018, + "grad_norm": 0.30924662947654724, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 625909.0, + "mean_token_accuracy": 1.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0662, + "grad_norm": 1.276971459388733, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 626421.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0018, + "grad_norm": 0.3022649586200714, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 626512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0019, + "grad_norm": 0.32340654730796814, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 626603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.038, + "grad_norm": 1.0054205656051636, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.0445, + "grad_norm": 1.2428219318389893, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 627627.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0531, + "grad_norm": 1.1613452434539795, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0018, + "grad_norm": 0.2842133641242981, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 628230.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0018, + "grad_norm": 0.3061327040195465, + "learning_rate": 1.56e-06, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0019, + "grad_norm": 0.31931373476982117, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 628412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0689, + "grad_norm": 1.777726650238037, + "learning_rate": 1.54e-06, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0626, + "grad_norm": 1.0839914083480835, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 629436.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0621, + "grad_norm": 1.0777654647827148, + "learning_rate": 1.52e-06, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0617, + "grad_norm": 1.3572564125061035, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 630460.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0019, + "grad_norm": 0.31615281105041504, + "learning_rate": 1.5e-06, + "num_tokens": 630551.0, + "mean_token_accuracy": 1.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0584, + "grad_norm": 1.4089421033859253, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 631063.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0376, + "grad_norm": 0.9989500641822815, + "learning_rate": 1.48e-06, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0564, + "grad_norm": 1.4619941711425781, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 632087.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0017, + "grad_norm": 0.27881649136543274, + "learning_rate": 1.46e-06, + "num_tokens": 632178.0, + "mean_token_accuracy": 1.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.0021, + "grad_norm": 0.3606109619140625, + "learning_rate": 1.45e-06, + "num_tokens": 632269.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0018, + "grad_norm": 0.3089398145675659, + "learning_rate": 1.44e-06, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.002, + "grad_norm": 0.35239994525909424, + "learning_rate": 1.43e-06, + "num_tokens": 632451.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.0434, + "grad_norm": 1.028780460357666, + "learning_rate": 1.42e-06, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.055, + "grad_norm": 1.3252202272415161, + "learning_rate": 1.41e-06, + "num_tokens": 633475.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.002, + "grad_norm": 0.34616848826408386, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 633566.0, + "mean_token_accuracy": 1.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0021, + "grad_norm": 0.345546156167984, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 633657.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": 0.041, + "grad_norm": 1.0742279291152954, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.0558, + "grad_norm": 1.3981537818908691, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 634681.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0021, + "grad_norm": 0.3480032682418823, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 634772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0414, + "grad_norm": 1.1904889345169067, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 635284.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0019, + "grad_norm": 0.32626014947891235, + "learning_rate": 1.34e-06, + "num_tokens": 635375.0, + "mean_token_accuracy": 1.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.0019, + "grad_norm": 0.3311507999897003, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 635466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.0417, + "grad_norm": 1.0487819910049438, + "learning_rate": 1.32e-06, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0612, + "grad_norm": 1.482262372970581, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 636490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0597, + "grad_norm": 1.0906400680541992, + "learning_rate": 1.3e-06, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0451, + "grad_norm": 1.3021650314331055, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 637514.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0566, + "grad_norm": 1.1073824167251587, + "learning_rate": 1.28e-06, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0021, + "grad_norm": 0.366703599691391, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 638117.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0402, + "grad_norm": 1.114858865737915, + "learning_rate": 1.26e-06, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0726, + "grad_norm": 1.9793658256530762, + "learning_rate": 1.25e-06, + "num_tokens": 639141.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0393, + "grad_norm": 1.212233066558838, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.002, + "grad_norm": 0.3448551893234253, + "learning_rate": 1.23e-06, + "num_tokens": 639744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.002, + "grad_norm": 0.33576035499572754, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 639835.0, + "mean_token_accuracy": 1.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0662, + "grad_norm": 1.6050575971603394, + "learning_rate": 1.21e-06, + "num_tokens": 640347.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0361, + "grad_norm": 1.034451961517334, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0022, + "grad_norm": 0.3761736750602722, + "learning_rate": 1.19e-06, + "num_tokens": 640950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0648, + "grad_norm": 1.8947163820266724, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0556, + "grad_norm": 1.317289113998413, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 641974.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0441, + "grad_norm": 1.1064449548721313, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0895, + "grad_norm": 1.8790072202682495, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 642998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0824, + "grad_norm": 2.2661681175231934, + "learning_rate": 1.14e-06, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": 0.08, + "grad_norm": 2.5085411071777344, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 644022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0382, + "grad_norm": 0.8821580410003662, + "learning_rate": 1.12e-06, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.0419, + "grad_norm": 1.2789467573165894, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 645046.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0661, + "grad_norm": 1.2416129112243652, + "learning_rate": 1.1e-06, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0385, + "grad_norm": 1.19954514503479, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 646070.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0803, + "grad_norm": 1.7022594213485718, + "learning_rate": 1.08e-06, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0651, + "grad_norm": 1.4528557062149048, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 647094.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0647, + "grad_norm": 1.2057602405548096, + "learning_rate": 1.06e-06, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0609, + "grad_norm": 1.2766141891479492, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 648118.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0437, + "grad_norm": 1.1985217332839966, + "learning_rate": 1.04e-06, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0571, + "grad_norm": 1.1973105669021606, + "learning_rate": 1.03e-06, + "num_tokens": 649142.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0664, + "grad_norm": 1.5751904249191284, + "learning_rate": 1.02e-06, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0436, + "grad_norm": 1.0939377546310425, + "learning_rate": 1.01e-06, + "num_tokens": 650166.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0031, + "grad_norm": 0.5472993850708008, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 650257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0595, + "grad_norm": 1.3305593729019165, + "learning_rate": 9.9e-07, + "num_tokens": 650769.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": 0.0391, + "grad_norm": 1.123191475868225, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0032, + "grad_norm": 0.5546753406524658, + "learning_rate": 9.7e-07, + "num_tokens": 651372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0031, + "grad_norm": 0.5491161942481995, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 651463.0, + "mean_token_accuracy": 1.0, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.0687, + "grad_norm": 2.234290599822998, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 651975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0586, + "grad_norm": 1.2323557138442993, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0557, + "grad_norm": 1.1316601037979126, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 652999.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.0399, + "grad_norm": 1.354643702507019, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0032, + "grad_norm": 0.5774580836296082, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 653602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.2131, + "grad_norm": 5.501800537109375, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0552, + "grad_norm": 1.1691670417785645, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 654626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0571, + "grad_norm": 1.3334885835647583, + "learning_rate": 8.8e-07, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0033, + "grad_norm": 0.5850784778594971, + "learning_rate": 8.7e-07, + "num_tokens": 655229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0751, + "grad_norm": 2.8085896968841553, + "learning_rate": 8.6e-07, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0805, + "grad_norm": 1.9259722232818604, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 656253.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0404, + "grad_norm": 1.23832106590271, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0566, + "grad_norm": 1.0702412128448486, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 657277.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0608, + "grad_norm": 1.4386783838272095, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0592, + "grad_norm": 1.2550030946731567, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 658301.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0434, + "grad_norm": 1.8757680654525757, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.2038, + "grad_norm": 4.9877095222473145, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 659325.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0037, + "grad_norm": 0.6778392791748047, + "learning_rate": 7.8e-07, + "num_tokens": 659416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.048, + "grad_norm": 1.6256376504898071, + "learning_rate": 7.7e-07, + "num_tokens": 659928.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.0561, + "grad_norm": 1.4658511877059937, + "learning_rate": 7.6e-07, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.071, + "grad_norm": 1.7589434385299683, + "learning_rate": 7.5e-07, + "num_tokens": 660952.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.0403, + "grad_norm": 1.2130093574523926, + "learning_rate": 7.4e-07, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0594, + "grad_norm": 1.2599217891693115, + "learning_rate": 7.3e-07, + "num_tokens": 661976.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0584, + "grad_norm": 1.2125273942947388, + "learning_rate": 7.2e-07, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0039, + "grad_norm": 0.6885141730308533, + "learning_rate": 7.1e-07, + "num_tokens": 662579.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.056, + "grad_norm": 1.233972430229187, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.004, + "grad_norm": 0.7142868041992188, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 663182.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0614, + "grad_norm": 1.4658222198486328, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0493, + "grad_norm": 1.051007866859436, + "learning_rate": 6.7e-07, + "num_tokens": 664206.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0409, + "grad_norm": 1.2317217588424683, + "learning_rate": 6.6e-07, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.004, + "grad_norm": 0.7169041633605957, + "learning_rate": 6.5e-07, + "num_tokens": 664809.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0393, + "grad_norm": 1.290911316871643, + "learning_rate": 6.4e-07, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.043, + "grad_norm": 1.550564169883728, + "learning_rate": 6.3e-07, + "num_tokens": 665833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.044, + "grad_norm": 1.1559568643569946, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0761, + "grad_norm": 1.5238863229751587, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 666857.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0479, + "grad_norm": 1.310771107673645, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0463, + "grad_norm": 1.120958924293518, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 667881.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.0039, + "grad_norm": 0.6784827709197998, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 667972.0, + "mean_token_accuracy": 1.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0672, + "grad_norm": 1.386460542678833, + "learning_rate": 5.7e-07, + "num_tokens": 668484.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0453, + "grad_norm": 1.2751063108444214, + "learning_rate": 5.6e-07, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.062, + "grad_norm": 1.0763590335845947, + "learning_rate": 5.5e-07, + "num_tokens": 669508.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0571, + "grad_norm": 1.2678844928741455, + "learning_rate": 5.4e-07, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.004, + "grad_norm": 0.7198203802108765, + "learning_rate": 5.3e-07, + "num_tokens": 670111.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0442, + "grad_norm": 1.2891501188278198, + "learning_rate": 5.2e-07, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0039, + "grad_norm": 0.6999010443687439, + "learning_rate": 5.1e-07, + "num_tokens": 670714.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.004, + "grad_norm": 0.7249695658683777, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 670805.0, + "mean_token_accuracy": 1.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0781, + "grad_norm": 1.6599754095077515, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 671317.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0038, + "grad_norm": 0.6885353922843933, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 671408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0568, + "grad_norm": 1.6591845750808716, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 671920.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0038, + "grad_norm": 0.6629458069801331, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 672011.0, + "mean_token_accuracy": 1.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0553, + "grad_norm": 1.0831410884857178, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 672523.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.065, + "grad_norm": 1.709847331047058, + "learning_rate": 4.4e-07, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0446, + "grad_norm": 1.2094167470932007, + "learning_rate": 4.3e-07, + "num_tokens": 673547.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0585, + "grad_norm": 1.23978853225708, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0039, + "grad_norm": 0.6842091083526611, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 674150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0603, + "grad_norm": 1.337598204612732, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.004, + "grad_norm": 0.7296668291091919, + "learning_rate": 3.9e-07, + "num_tokens": 674753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0038, + "grad_norm": 0.6806443333625793, + "learning_rate": 3.8e-07, + "num_tokens": 674844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0038, + "grad_norm": 0.6828562021255493, + "learning_rate": 3.7e-07, + "num_tokens": 674935.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0667, + "grad_norm": 1.748108148574829, + "learning_rate": 3.6e-07, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0386, + "grad_norm": 1.3246146440505981, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 675959.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0038, + "grad_norm": 0.6706036329269409, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 676050.0, + "mean_token_accuracy": 1.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0552, + "grad_norm": 1.2772272825241089, + "learning_rate": 3.3e-07, + "num_tokens": 676562.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0596, + "grad_norm": 1.3164302110671997, + "learning_rate": 3.2e-07, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0582, + "grad_norm": 1.3520668745040894, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 677586.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0547, + "grad_norm": 1.2490239143371582, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0387, + "grad_norm": 1.1652135848999023, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 678610.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0626, + "grad_norm": 1.9845855236053467, + "learning_rate": 2.8e-07, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0038, + "grad_norm": 0.6789660453796387, + "learning_rate": 2.7e-07, + "num_tokens": 679213.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.0037, + "grad_norm": 0.678180456161499, + "learning_rate": 2.6e-07, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0038, + "grad_norm": 0.6906817555427551, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 679395.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0516, + "grad_norm": 1.1001511812210083, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0037, + "grad_norm": 0.6647882461547852, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 679998.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.0627, + "grad_norm": 1.4906483888626099, + "learning_rate": 2.2e-07, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0653, + "grad_norm": 1.6483995914459229, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 681022.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0542, + "grad_norm": 1.1732497215270996, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0035, + "grad_norm": 0.6123244762420654, + "learning_rate": 1.9e-07, + "num_tokens": 681625.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0628, + "grad_norm": 3.3254270553588867, + "learning_rate": 1.8e-07, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.0409, + "grad_norm": 1.0730781555175781, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 682649.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0034, + "grad_norm": 0.5923974514007568, + "learning_rate": 1.6e-07, + "num_tokens": 682740.0, + "mean_token_accuracy": 1.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.033, + "grad_norm": 1.07072114944458, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 683252.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0563, + "grad_norm": 1.1191027164459229, + "learning_rate": 1.4e-07, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0034, + "grad_norm": 0.6199093461036682, + "learning_rate": 1.3e-07, + "num_tokens": 683855.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0497, + "grad_norm": 1.2205955982208252, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0553, + "grad_norm": 1.2247557640075684, + "learning_rate": 1.1e-07, + "num_tokens": 684879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.0615, + "grad_norm": 1.5119178295135498, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0036, + "grad_norm": 0.6369652152061462, + "learning_rate": 9e-08, + "num_tokens": 685482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0409, + "grad_norm": 1.2765092849731445, + "learning_rate": 8e-08, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0446, + "grad_norm": 1.0794225931167603, + "learning_rate": 7e-08, + "num_tokens": 686506.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0037, + "grad_norm": 0.6602066159248352, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 686597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0637, + "grad_norm": 1.4354852437973022, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 687109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.0037, + "grad_norm": 0.6749649047851562, + "learning_rate": 4e-08, + "num_tokens": 687200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0, + "step": 2000 + }, + { + "train_runtime": 372.1845, + "train_samples_per_second": 5.374, + "train_steps_per_second": 5.374, + "total_flos": 1.1456146931712e+16, + "train_loss": 0.18184852770145518, + "epoch": 1.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..cf39b39eacfc4a0eb4375b757c1d2cdd829d1bbd --- /dev/null +++ b/docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "unsloth_available": false, + "train_runtime": 372.1845, + "train_loss": 0.18184852770145518, + "train_metrics": { + "train_runtime": 372.1845, + "train_samples_per_second": 5.374, + "train_steps_per_second": 5.374, + "total_flos": 1.1456146931712e+16, + "train_loss": 0.18184852770145518 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/active_model_manifest.json b/docs/results/active_model_manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..45ae2bb95cb0f8b13972ee9ee8efe58819b86713 --- /dev/null +++ b/docs/results/active_model_manifest.json @@ -0,0 +1,68 @@ +{ + "status": "ok", + "enabled": true, + "activated_at_utc": "2026-04-26T02:24:15.464507+00:00", + "run_id": "qwen-qwen2-5-0-5b-instruct", + "source": "top-level", + "label": "local-qwen-0.5b-active-smoke", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "base_model": "Qwen/Qwen2.5-0.5B-Instruct", + "preferred_artifact": "grpo_adapter", + "mode": "symlink", + "source_checkpoint_dir": "checkpoints", + "source_report_dir": "outputs/reports", + "grpo_adapter": "checkpoints/active/grpo_adapter", + "merged_model": "checkpoints/active/merged", + "sft_adapter": "checkpoints/active/sft_adapter", + "availability": { + "grpo_adapter": true, + "merged": true, + "sft_adapter": true + }, + "reports": { + "improvement_report_benchmark.json": "outputs/reports/active_model/improvement_report_benchmark.json", + "anti_hacking_overfit_report.json": "outputs/reports/active_model/anti_hacking_overfit_report.json", + "grpo_trl_run_strict_check.json": "outputs/reports/active_model/grpo_trl_run_strict_check.json", + "postsave_inference.json": "outputs/reports/active_model/postsave_inference.json", + "sft_trl_run.json": "outputs/reports/active_model/sft_trl_run.json", + "plot_index.json": "outputs/reports/active_model/plot_index.json", + "dose_train.json": "outputs/reports/active_model/dose_train.json", + "baselines.json": "outputs/reports/active_model/baselines.json", + "robustness.json": "outputs/reports/active_model/robustness.json", + "grpo_trl_run_fallback_check.json": "outputs/reports/active_model/grpo_trl_run_fallback_check.json", + "sft_run.json": "outputs/reports/active_model/sft_run.json", + "benchmark_report.txt": "outputs/reports/active_model/benchmark_report.txt", + "dosing_grpo.json": "outputs/reports/active_model/dosing_grpo.json", + "grpo_ablation_report.json": "outputs/reports/active_model/grpo_ablation_report.json", + "frontier_ready.json": "outputs/reports/active_model/frontier_ready.json", + "improvement_report.json": "outputs/reports/active_model/improvement_report.json", + "hf_sweep_summary.json": "outputs/reports/active_model/hf_sweep_summary.json", + "planner_grpo.json": "outputs/reports/active_model/planner_grpo.json", + "grpo_trl_run.json": "outputs/reports/active_model/grpo_trl_run.json", + "risk_train.json": "outputs/reports/active_model/risk_train.json", + "grpo_trl_run_smoke.json": "outputs/reports/active_model/grpo_trl_run_smoke.json", + "inference_benchmark.json": "outputs/reports/active_model/inference_benchmark.json", + "supervisor_grpo.json": "outputs/reports/active_model/supervisor_grpo.json", + "acceptance_gate.json": "outputs/reports/active_model/acceptance_gate.json", + "grpo_trl_run_auto.json": "outputs/reports/active_model/grpo_trl_run_auto.json", + "hf_training_status.json": "outputs/reports/active_model/hf_training_status.json", + "benchmark_report.json": "outputs/reports/active_model/benchmark_report.json", + "postsave_inference_smoke.json": "outputs/reports/active_model/postsave_inference_smoke.json", + "graph_train.json": "outputs/reports/active_model/graph_train.json", + "sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", + "sweeps/qwen-qwen2-5-3b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_training_cycle/grpo_trl_run.json": "outputs/reports/active_model/grpo_training_cycle/grpo_trl_run.json", + "grpo_training_cycle/hf_training_status.json": "outputs/reports/active_model/grpo_training_cycle/hf_training_status.json" + }, + "notes": "This manifest controls local product inference. Prefer grpo_adapter for the RL policy; merged is the SFT baseline fallback when no GRPO adapter is available." +} \ No newline at end of file diff --git a/docs/results/anti_cheat_failure_rates.png b/docs/results/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..c1a45c7419347596de81beac5035d365784ad3f3 Binary files /dev/null and b/docs/results/anti_cheat_failure_rates.png differ diff --git a/docs/results/anti_hacking_overfit_report.json b/docs/results/anti_hacking_overfit_report.json new file mode 100644 index 0000000000000000000000000000000000000000..a66e9ebd9c9ca782e054ccff61d6a7c6c58fdf4a --- /dev/null +++ b/docs/results/anti_hacking_overfit_report.json @@ -0,0 +1,22 @@ +{ + "passed": false, + "training_mode": "full", + "warnings": [ + "Qwen2.5-3B:high_exploit_rate" + ], + "completed_models": [ + "Qwen/Qwen2.5-3B-Instruct" + ], + "failed_or_skipped_models": [], + "checks": { + "reward_bounds": [ + 0.001, + 0.999 + ], + "reward_precision": 3, + "fallback_backends_rejected": true, + "exploit_rate_threshold": 0.35, + "train_holdout_gap_threshold": 0.25, + "min_validity_rate": 0.8 + } +} \ No newline at end of file diff --git a/docs/results/avg_process_fidelity.png b/docs/results/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..ef562e57a51bcaaec4664c89eb4d2c0c439e6231 Binary files /dev/null and b/docs/results/avg_process_fidelity.png differ diff --git a/docs/results/avg_reward.png b/docs/results/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..edb2fa8c25074d88c90bce5c243af90dcb28e1c6 Binary files /dev/null and b/docs/results/avg_reward.png differ diff --git a/docs/results/baselines.json b/docs/results/baselines.json new file mode 100644 index 0000000000000000000000000000000000000000..3a4790a06cc3a416ca49989ffc2a3a7c54434d9e --- /dev/null +++ b/docs/results/baselines.json @@ -0,0 +1,119 @@ +{ + "no_change": { + "mode": "REGIMEN_OPT", + "action_type": "KEEP_REGIMEN", + "target_drug": null, + "replacement_drug": null, + "dose_bucket": "NA", + "taper_days": null, + "monitoring_plan": null, + "evidence_query": null, + "new_drug_name": null, + "candidate_components": [], + "candidate_id": "cand_01", + "confidence": 0.8, + "rationale_brief": "Baseline no-change policy." + }, + "rules_only": { + "mode": "REGIMEN_OPT", + "action_type": "SUBSTITUTE_WITHIN_CLASS", + "target_drug": "opioid_like", + "replacement_drug": "non_opioid_analgesic", + "dose_bucket": "NA", + "taper_days": null, + "monitoring_plan": null, + "evidence_query": null, + "new_drug_name": null, + "candidate_components": [], + "candidate_id": "cand_04", + "confidence": 0.75, + "rationale_brief": "Rules-only selected top legal candidate." + }, + "greedy": { + "mode": "REGIMEN_OPT", + "action_type": "SUBSTITUTE_WITHIN_CLASS", + "target_drug": "opioid_like", + "replacement_drug": "non_opioid_analgesic", + "dose_bucket": "NA", + "taper_days": null, + "monitoring_plan": null, + "evidence_query": null, + "new_drug_name": null, + "candidate_components": [], + "candidate_id": "cand_04", + "confidence": 0.72, + "rationale_brief": "Greedy safety/burden improvement baseline." + }, + "contextual_bandit": { + "mode": "REGIMEN_OPT", + "action_type": "SUBSTITUTE_WITHIN_CLASS", + "target_drug": "opioid_like", + "replacement_drug": "non_opioid_analgesic", + "dose_bucket": "NA", + "taper_days": null, + "monitoring_plan": null, + "evidence_query": null, + "new_drug_name": null, + "candidate_components": [], + "candidate_id": "cand_04", + "confidence": 0.68, + "rationale_brief": "Contextual bandit selected candidate." + }, + "contextual_bandit_topk": [ + { + "candidate_id": "cand_09", + "score": 1.1532307878304324, + "exploration_bonus": 1.1532307878304324, + "algorithm": "linucb" + }, + { + "candidate_id": "cand_10", + "score": 1.1489735636645433, + "exploration_bonus": 1.1489735636645433, + "algorithm": "linucb" + }, + { + "candidate_id": "cand_08", + "score": 1.1447401451857973, + "exploration_bonus": 1.1447401451857973, + "algorithm": "linucb" + } + ], + "beam_search": { + "mode": "REGIMEN_OPT", + "action_type": "SUBSTITUTE_WITHIN_CLASS", + "target_drug": "opioid_like", + "replacement_drug": "non_opioid_analgesic", + "dose_bucket": "NA", + "taper_days": null, + "monitoring_plan": null, + "evidence_query": null, + "new_drug_name": null, + "candidate_components": [], + "candidate_id": "cand_04", + "confidence": 0.74, + "rationale_brief": "Beam-search(3) top candidate." + }, + "baseline_policy": "no_change_candidate", + "episodes": 8, + "avg_reward": 0.747, + "legality_rate": 1.0, + "success_rate": 0.0, + "policy_stack_ablations": { + "bandit-only": { + "avg_reward": 0.7616666666666667, + "legality_rate": 1.0, + "steps": 3.0 + }, + "llm-only": { + "avg_reward": 0.7753333333333333, + "legality_rate": 1.0, + "steps": 3.0 + }, + "llm+bandit": { + "avg_reward": 0.7753333333333333, + "legality_rate": 1.0, + "steps": 3.0 + } + } +} \ No newline at end of file diff --git a/docs/results/benchmark_report.json b/docs/results/benchmark_report.json new file mode 100644 index 0000000000000000000000000000000000000000..8efc286c219c65f5df0f61195a6fb9cbc0e14ada --- /dev/null +++ b/docs/results/benchmark_report.json @@ -0,0 +1,52 @@ +{ + "offline_policy_eval": { + "avg_reward": 0.772833, + "legal_rate": 1.0, + "success_rate": 0.0 + }, + "safety_eval": { + "severe_violation_rate": 0.0, + "illegal_step_rate": 0.0 + }, + "dosing_eval": { + "target_attainment": 0.75, + "toxicity_avoidance": 1.0 + }, + "robustness_eval": { + "missing_labs_safety_rate": 0.666667, + "noisy_dose_info_safety_rate": 1.0, + "conflicting_meds_safety_rate": 1.0, + "alias_noise_safety_rate": 1.0, + "hidden_duplicate_detection_rate": 1.0, + "wrong_candidate_id_resilience": 1.0, + "stale_evidence_safety_rate": 1.0, + "delayed_ade_manifestation_safety_rate": 1.0 + }, + "calibration_eval": { + "ece_proxy": 0.08625 + }, + "abstention_eval": { + "appropriate_abstention_rate": 0.0 + }, + "process_eval": { + "process_fidelity": 0.92, + "avg_invalid_actions": 0.333333 + }, + "subgroup_eval": { + "renal_compromise": { + "avg_reward": 0.774, + "legal_rate": 1.0 + }, + "hepatic_compromise": { + "avg_reward": 0.779333, + "legal_rate": 1.0 + }, + "frail": { + "avg_reward": 0.781667, + "legal_rate": 1.0 + } + }, + "explainability_eval": { + "grounding_rate": 0.8 + } +} \ No newline at end of file diff --git a/docs/results/benchmark_report.txt b/docs/results/benchmark_report.txt new file mode 100644 index 0000000000000000000000000000000000000000..8efc286c219c65f5df0f61195a6fb9cbc0e14ada --- /dev/null +++ b/docs/results/benchmark_report.txt @@ -0,0 +1,52 @@ +{ + "offline_policy_eval": { + "avg_reward": 0.772833, + "legal_rate": 1.0, + "success_rate": 0.0 + }, + "safety_eval": { + "severe_violation_rate": 0.0, + "illegal_step_rate": 0.0 + }, + "dosing_eval": { + "target_attainment": 0.75, + "toxicity_avoidance": 1.0 + }, + "robustness_eval": { + "missing_labs_safety_rate": 0.666667, + "noisy_dose_info_safety_rate": 1.0, + "conflicting_meds_safety_rate": 1.0, + "alias_noise_safety_rate": 1.0, + "hidden_duplicate_detection_rate": 1.0, + "wrong_candidate_id_resilience": 1.0, + "stale_evidence_safety_rate": 1.0, + "delayed_ade_manifestation_safety_rate": 1.0 + }, + "calibration_eval": { + "ece_proxy": 0.08625 + }, + "abstention_eval": { + "appropriate_abstention_rate": 0.0 + }, + "process_eval": { + "process_fidelity": 0.92, + "avg_invalid_actions": 0.333333 + }, + "subgroup_eval": { + "renal_compromise": { + "avg_reward": 0.774, + "legal_rate": 1.0 + }, + "hepatic_compromise": { + "avg_reward": 0.779333, + "legal_rate": 1.0 + }, + "frail": { + "avg_reward": 0.781667, + "legal_rate": 1.0 + } + }, + "explainability_eval": { + "grounding_rate": 0.8 + } +} \ No newline at end of file diff --git a/docs/results/dose_train.json b/docs/results/dose_train.json new file mode 100644 index 0000000000000000000000000000000000000000..3bb2d9dd4c8a3461d87923edf631ecf3a22b5f33 --- /dev/null +++ b/docs/results/dose_train.json @@ -0,0 +1,6 @@ +{ + "dataset_size": 120.0, + "status": "trained", + "train_mae": 0.0025, + "model_path": "outputs/models/dose_model.pkl" +} \ No newline at end of file diff --git a/docs/results/dosing_grpo.json b/docs/results/dosing_grpo.json new file mode 100644 index 0000000000000000000000000000000000000000..1752bc84f741b6e0066175069bd885fb048fde2f --- /dev/null +++ b/docs/results/dosing_grpo.json @@ -0,0 +1,28 @@ +{ + "avg_reward": 0.7785555555555557, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.0, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.5, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9200000000000002, + "exploit_detection_count": 3.0, + "reward_columns": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.999, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9000000000000001, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.56, + "efficiency_score": 0.77, + "process_fidelity_score": 0.9200000000000002, + "explanation_grounding_score": 0.7999999999999999, + "anti_cheat_score": 0.6663333333333333, + "uncertainty_calibration_score": 0.87 + } +} \ No newline at end of file diff --git a/docs/results/frontier_ready.json b/docs/results/frontier_ready.json new file mode 100644 index 0000000000000000000000000000000000000000..ef8f952db5fc8453c14dad5091bc9c1e33625f49 --- /dev/null +++ b/docs/results/frontier_ready.json @@ -0,0 +1,8 @@ +{ + "frontier_models": [ + "qwen2.5:7b-instruct", + "qwen2.5:14b-instruct" + ], + "deployment_mode": "hf_or_vllm_ready", + "notes": "Baseline complete; ready for larger model sweep." +} \ No newline at end of file diff --git a/docs/results/graph_train.json b/docs/results/graph_train.json new file mode 100644 index 0000000000000000000000000000000000000000..91955cfb1a71b04e168b21920c3911df0f36df4a --- /dev/null +++ b/docs/results/graph_train.json @@ -0,0 +1,5 @@ +{ + "num_samples": 180, + "status": "trained", + "model_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/models/graph_model.pkl" +} \ No newline at end of file diff --git a/docs/results/grpo_ablation_report.json b/docs/results/grpo_ablation_report.json new file mode 100644 index 0000000000000000000000000000000000000000..89d5d32978be7e468119b45142923322586f281c --- /dev/null +++ b/docs/results/grpo_ablation_report.json @@ -0,0 +1,149 @@ +{ + "status": "ok", + "ablations": { + "bandit_only": { + "avg_reward": 0.779625, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.8125, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.483125, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9056250000000008, + "exploit_detection_count": 2.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0625, + "avg_invalid_actions": 0.0625, + "reward_columns": { + "format_compliance_score": 0.9989999999999996, + "candidate_alignment_score": 0.9989999999999996, + "legality_score": 0.9989999999999996, + "safety_delta_score": 0.483125, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999995, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000002, + "efficiency_score": 0.5855625, + "process_fidelity_score": 0.9056250000000008, + "explanation_grounding_score": 0.8000000000000004, + "anti_cheat_score": 0.9366249999999997, + "uncertainty_calibration_score": 0.8531250000000004 + }, + "primary_reward_channels": { + "safety_legality": 0.9469062499999998, + "clinical_improvement": 0.6273749999999997, + "dosing_quality": 0.6550000000000001, + "process_integrity": 0.8225937500000001 + }, + "policy_stack": "bandit-only", + "failure_mining": { + "total_rows": 32, + "failure_rows": 2, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 2 + } + ] + } + }, + "llm_only": { + "avg_reward": 0.7723913043478261, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.4882608695652174, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.4882608695652174, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999998, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8482608695652176 + }, + "primary_reward_channels": { + "safety_legality": 0.8853478260869562, + "clinical_improvement": 0.6290869565217388, + "dosing_quality": 0.6549999999999998, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm-only", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + }, + "llm_bandit": { + "avg_reward": 0.7647391304347826, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.48982608695652174, + "avg_dosing_quality": 0.717391304347826, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.48982608695652174, + "burden_improvement_score": 0.5043478260869565, + "disease_stability_score": 0.8582608695652173, + "dosing_quality_score": 0.717391304347826, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8126086956521739 + }, + "primary_reward_channels": { + "safety_legality": 0.8765217391304347, + "clinical_improvement": 0.6171739130434781, + "dosing_quality": 0.6386956521739129, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm+bandit", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + } + } +} \ No newline at end of file diff --git a/docs/results/grpo_reward_curves.png b/docs/results/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..9031adc2f899c7277a1cd3322fee213c2d06eded --- /dev/null +++ b/docs/results/grpo_reward_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:193950a030afd8642db1cfe245bb47e89f88461f5a9682fede7228058253511b +size 147353 diff --git a/docs/results/grpo_training_cycle/avg_process_fidelity.png b/docs/results/grpo_training_cycle/avg_process_fidelity.png new file mode 100644 index 0000000000000000000000000000000000000000..ef562e57a51bcaaec4664c89eb4d2c0c439e6231 Binary files /dev/null and b/docs/results/grpo_training_cycle/avg_process_fidelity.png differ diff --git a/docs/results/grpo_training_cycle/avg_reward.png b/docs/results/grpo_training_cycle/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..edb2fa8c25074d88c90bce5c243af90dcb28e1c6 Binary files /dev/null and b/docs/results/grpo_training_cycle/avg_reward.png differ diff --git a/docs/results/grpo_training_cycle/grpo_trl_run.json b/docs/results/grpo_training_cycle/grpo_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..1c242f4589a311ae34d0448039293b45b8d911e1 --- /dev/null +++ b/docs/results/grpo_training_cycle/grpo_trl_run.json @@ -0,0 +1,42 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "records": 2000, + "prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 4000, + "avg_reward": 0.782178, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.985277, + "safety_delta_score": 0.496104, + "burden_improvement_score": 0.494346, + "disease_stability_score": 0.8912, + "dosing_quality_score": 0.511938, + "abstention_quality_score": 0.56, + "efficiency_score": 0.84942, + "process_fidelity_score": 0.905268, + "explanation_grounding_score": 0.800248, + "anti_cheat_score": 0.48004, + "uncertainty_calibration_score": 0.730195 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.798661, + "clinical_improvement": 0.62689, + "dosing_quality": 0.535969, + "process_integrity": 0.888448 + } + }, + "reward_log": "/app/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "train_runtime": 6960.8084, + "train_samples_per_second": 0.287, + "train_steps_per_second": 0.287, + "total_flos": 0.0, + "train_loss": 2.3633859725151752e-06 + }, + "artifact_path": "/app/checkpoints/grpo_adapter", + "unsloth_available": false +} \ No newline at end of file diff --git a/docs/results/grpo_training_cycle/hf_training_status.json b/docs/results/grpo_training_cycle/hf_training_status.json new file mode 100644 index 0000000000000000000000000000000000000000..0822dcb1b0bdbad63e954a12d2b4bb7c157bc7b4 --- /dev/null +++ b/docs/results/grpo_training_cycle/hf_training_status.json @@ -0,0 +1,123 @@ +{ + "status": "running", + "started_at": 1777161126.3536248, + "finished_at": null, + "commands": [ + { + "args": [ + "python", + "scripts/bootstrap_data.py" + ], + "returncode": 0, + "elapsed_seconds": 0.821 + }, + { + "args": [ + "python", + "scripts/build_training_corpus.py", + "--profile", + "massive", + "--with-local", + "--with-synthetic", + "--with-hf" + ], + "returncode": 0, + "elapsed_seconds": 4.367 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--epochs", + "1", + "--max-steps", + "20", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 24.564 + }, + { + "args": [ + "reuse_artifact", + "grpo_adapter", + "/app/checkpoints/grpo_adapter" + ], + "returncode": 0, + "elapsed_seconds": 0.0 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sft_adapter", + "--output-dir", + "checkpoints/merged" + ], + "returncode": 0, + "elapsed_seconds": 9.014 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "3", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct" + ], + "returncode": 0, + "elapsed_seconds": 14.811 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8" + ], + "returncode": 0, + "elapsed_seconds": 4.458 + }, + { + "args": [ + "python", + "scripts/evaluate_baselines.py" + ], + "returncode": 0, + "elapsed_seconds": 4.603 + }, + { + "args": [ + "python", + "scripts/evaluate_all.py" + ], + "returncode": 0, + "elapsed_seconds": 4.271 + }, + { + "args": [ + "python", + "scripts/evaluate_compare_runs.py", + "--baseline", + "outputs/reports/baselines.json", + "--candidate", + "outputs/reports/benchmark_report.json", + "--output", + "outputs/reports/improvement_report.json" + ], + "returncode": 0, + "elapsed_seconds": 0.037 + } + ], + "artifact_repo_id": "TheJackBright/polyguard-openenv-training-artifacts" +} \ No newline at end of file diff --git a/docs/results/grpo_training_cycle/legality_rate.png b/docs/results/grpo_training_cycle/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..b4c1e418b0262902ad1c9ad4818f4d9b22a152d0 Binary files /dev/null and b/docs/results/grpo_training_cycle/legality_rate.png differ diff --git a/docs/results/grpo_training_cycle/policy_stack_avg_reward.png b/docs/results/grpo_training_cycle/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..b28dc57ac180e83b38194b17251e3cf3a5a941da Binary files /dev/null and b/docs/results/grpo_training_cycle/policy_stack_avg_reward.png differ diff --git a/docs/results/grpo_training_cycle/success_rate.png b/docs/results/grpo_training_cycle/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..b918ae36817cfb351bb924de05a638e1ee4c73c2 Binary files /dev/null and b/docs/results/grpo_training_cycle/success_rate.png differ diff --git a/docs/results/grpo_trl_run.json b/docs/results/grpo_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..87ca8fb39dcfbc92786e290045c1da201ca5d1df --- /dev/null +++ b/docs/results/grpo_trl_run.json @@ -0,0 +1,43 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "records": 2000, + "prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 4000, + "avg_reward": 0.767, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.929, + "safety_delta_score": 0.497, + "burden_improvement_score": 0.469, + "disease_stability_score": 0.861, + "dosing_quality_score": 0.526, + "abstention_quality_score": 0.56, + "efficiency_score": 0.849, + "process_fidelity_score": 0.856, + "explanation_grounding_score": 0.795, + "anti_cheat_score": 0.589, + "uncertainty_calibration_score": 0.747 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.816, + "clinical_improvement": 0.609, + "dosing_quality": 0.543, + "process_integrity": 0.875 + } + }, + "reward_log": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_reward_components.jsonl", + "train_metrics": { + "train_runtime": 6873.9375, + "train_samples_per_second": 0.291, + "train_steps_per_second": 0.291, + "total_flos": 0.0, + "train_loss": 2.665005830824185e-06 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter", + "unsloth_available": false +} \ No newline at end of file diff --git a/docs/results/grpo_trl_run_auto.json b/docs/results/grpo_trl_run_auto.json new file mode 100644 index 0000000000000000000000000000000000000000..6ee3447446fe2c94787048f5abecfd2186024ed2 --- /dev/null +++ b/docs/results/grpo_trl_run_auto.json @@ -0,0 +1,39 @@ +{ + "status": "fallback", + "backend": "env_reward_fallback", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "records": 2, + "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 2, + "avg_reward": 0.798, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.999, + "safety_delta_score": 0.671, + "burden_improvement_score": 0.525, + "disease_stability_score": 0.74, + "dosing_quality_score": 0.5, + "abstention_quality_score": 0.56, + "efficiency_score": 0.857, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.8, + "anti_cheat_score": 0.5, + "uncertainty_calibration_score": 0.74 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.8095, + "clinical_improvement": 0.645, + "dosing_quality": 0.53, + "process_integrity": 0.894 + } + }, + "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "steps_executed": 2.0 + }, + "artifact_path": "", + "unsloth_available": false, + "trl_runtime_error": "We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.\nCheck your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'." +} \ No newline at end of file diff --git a/docs/results/grpo_trl_run_fallback_check.json b/docs/results/grpo_trl_run_fallback_check.json new file mode 100644 index 0000000000000000000000000000000000000000..e99d2da3538269276216240b8223f8102ea6ae86 --- /dev/null +++ b/docs/results/grpo_trl_run_fallback_check.json @@ -0,0 +1,39 @@ +{ + "status": "fallback", + "backend": "env_reward_fallback", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "records": 1, + "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 1, + "avg_reward": 0.764, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.999, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9, + "dosing_quality_score": 0.5, + "abstention_quality_score": 0.56, + "efficiency_score": 0.857, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.8, + "anti_cheat_score": 0.001, + "uncertainty_calibration_score": 0.7 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.675, + "clinical_improvement": 0.633, + "dosing_quality": 0.53, + "process_integrity": 0.894 + } + }, + "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "steps_executed": 1.0 + }, + "artifact_path": "", + "unsloth_available": false, + "trl_runtime_error": "forced_fallback" +} \ No newline at end of file diff --git a/docs/results/grpo_trl_run_smoke.json b/docs/results/grpo_trl_run_smoke.json new file mode 100644 index 0000000000000000000000000000000000000000..e99d2da3538269276216240b8223f8102ea6ae86 --- /dev/null +++ b/docs/results/grpo_trl_run_smoke.json @@ -0,0 +1,39 @@ +{ + "status": "fallback", + "backend": "env_reward_fallback", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "records": 1, + "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 1, + "avg_reward": 0.764, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.999, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9, + "dosing_quality_score": 0.5, + "abstention_quality_score": 0.56, + "efficiency_score": 0.857, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.8, + "anti_cheat_score": 0.001, + "uncertainty_calibration_score": 0.7 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.675, + "clinical_improvement": 0.633, + "dosing_quality": 0.53, + "process_integrity": 0.894 + } + }, + "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "steps_executed": 1.0 + }, + "artifact_path": "", + "unsloth_available": false, + "trl_runtime_error": "forced_fallback" +} \ No newline at end of file diff --git a/docs/results/grpo_trl_run_strict_check.json b/docs/results/grpo_trl_run_strict_check.json new file mode 100644 index 0000000000000000000000000000000000000000..e99d2da3538269276216240b8223f8102ea6ae86 --- /dev/null +++ b/docs/results/grpo_trl_run_strict_check.json @@ -0,0 +1,39 @@ +{ + "status": "fallback", + "backend": "env_reward_fallback", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "records": 1, + "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 1, + "avg_reward": 0.764, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.999, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9, + "dosing_quality_score": 0.5, + "abstention_quality_score": 0.56, + "efficiency_score": 0.857, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.8, + "anti_cheat_score": 0.001, + "uncertainty_calibration_score": 0.7 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.675, + "clinical_improvement": 0.633, + "dosing_quality": 0.53, + "process_integrity": 0.894 + } + }, + "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl", + "train_metrics": { + "steps_executed": 1.0 + }, + "artifact_path": "", + "unsloth_available": false, + "trl_runtime_error": "forced_fallback" +} \ No newline at end of file diff --git a/docs/results/hf_space_verification.json b/docs/results/hf_space_verification.json new file mode 100644 index 0000000000000000000000000000000000000000..63b40675a8e34e277d751c7e761515ffc7b67a19 --- /dev/null +++ b/docs/results/hf_space_verification.json @@ -0,0 +1,29 @@ +{ + "passed": true, + "status": "running", + "checked_on": "2026-04-26", + "repo_id": "TheJackBright/polyguard-openenv", + "space_url": "https://huggingface.co/spaces/TheJackBright/polyguard-openenv", + "runtime_url": "https://thejackbright-polyguard-openenv.hf.space", + "space_sha": "877add7878fbdf2011ed3d5d378cdca5fe7bac4b", + "space_private": false, + "runtime": { + "stage": "RUNNING", + "hardware": { + "current": "cpu-basic", + "requested": "cpu-basic" + }, + "replicas": { + "current": 1, + "requested": 1 + }, + "domain": "thejackbright-polyguard-openenv.hf.space" + }, + "openenv_validation": { + "command": "uv run openenv validate --url https://thejackbright-polyguard-openenv.hf.space", + "passed": true, + "passed_count": 6, + "total_count": 6, + "failed_criteria": [] + } +} diff --git a/docs/results/hf_sweep_summary.json b/docs/results/hf_sweep_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..84ba2e15d4664707736c04078a165add687580b9 --- /dev/null +++ b/docs/results/hf_sweep_summary.json @@ -0,0 +1,76 @@ +{ + "status": "ok", + "training_mode": "full", + "completed_models": 1, + "failed_or_skipped_models": 0, + "models": [ + { + "run_id": "qwen-qwen2-5-3b-instruct", + "training_mode": "full", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen2.5-3B", + "status": "completed", + "error": "", + "sft_backend": "trl_transformers", + "sft_examples": 2000, + "sft_train_loss": 0.15688225453009363, + "sft_runtime": 715.2908, + "grpo_backend": "trl_transformers", + "grpo_records": 2000, + "grpo_avg_reward": 0.767, + "sft_inference_reward": 0.781, + "sft_valid_rate": 1.0, + "sft_latency_seconds": 2.863, + "grpo_inference_reward": 0.726, + "grpo_valid_rate": 1.0, + "grpo_latency_seconds": 3.681, + "train_holdout_gap": 0.041, + "fallback_detected": false, + "reward_range_ok": true, + "reward_range_failures": [], + "exploit_rate": 0.411, + "legal_rate": 0.93, + "candidate_diversity": 0.003, + "top_candidate_rate": 0.668, + "reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.929, + "safety_delta_score": 0.497, + "burden_improvement_score": 0.469, + "disease_stability_score": 0.861, + "dosing_quality_score": 0.526, + "abstention_quality_score": 0.56, + "efficiency_score": 0.849, + "process_fidelity_score": 0.856, + "explanation_grounding_score": 0.795, + "anti_cheat_score": 0.589, + "uncertainty_calibration_score": 0.747 + }, + "primary_reward_channels": { + "safety_legality": 0.816, + "clinical_improvement": 0.609, + "dosing_quality": 0.543, + "process_integrity": 0.875 + }, + "artifact_paths": { + "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", + "grpo": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter" + } + } + ], + "charts": { + "sft_vs_grpo_reward": "outputs/plots/sft_vs_grpo_reward.png", + "sft_loss_curves": "outputs/plots/sft_loss_curves.png", + "qwen_model_sft_reward": "outputs/plots/qwen_model_sft_reward.png", + "qwen_model_sft_loss": "outputs/plots/qwen_model_sft_loss.png", + "sft_validity_reward": "outputs/plots/sft_validity_reward.png", + "grpo_reward_curves": "outputs/plots/grpo_reward_curves.png", + "qwen_model_grpo_reward": "outputs/plots/qwen_model_grpo_reward.png", + "reward_component_bars": "outputs/plots/reward_component_bars.png", + "anti_cheat_failure_rates": "outputs/plots/anti_cheat_failure_rates.png", + "train_holdout_gap": "outputs/plots/train_holdout_gap.png", + "inference_validity_reward": "outputs/plots/inference_validity_reward.png", + "inference_latency_validity": "outputs/plots/inference_latency_validity.png" + } +} \ No newline at end of file diff --git a/docs/results/hf_training_status.json b/docs/results/hf_training_status.json new file mode 100644 index 0000000000000000000000000000000000000000..0643d5242678c705d72994ce2033210cd84e2c2b --- /dev/null +++ b/docs/results/hf_training_status.json @@ -0,0 +1,227 @@ +{ + "status": "ok", + "started_at": 1777180786.0648105, + "finished_at": 1777188659.441074, + "commands": [ + { + "args": [ + "python", + "scripts/bootstrap_data.py" + ], + "returncode": 0, + "elapsed_seconds": 0.507 + }, + { + "args": [ + "python", + "scripts/build_training_corpus.py", + "--profile", + "massive", + "--with-local", + "--with-synthetic", + "--with-hf" + ], + "returncode": 0, + "elapsed_seconds": 3.695 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-3B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 737.28 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-3B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 6885.399 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 15.74 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-3B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 20.985 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-3B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 26.691 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 4.001 + }, + { + "args": [ + "reuse_artifact", + "grpo_adapter", + "/app/checkpoints/grpo_adapter" + ], + "returncode": 0, + "elapsed_seconds": 0.0 + }, + { + "args": [ + "python", + "scripts/evaluate_baselines.py" + ], + "returncode": 0, + "elapsed_seconds": 4.163 + }, + { + "args": [ + "python", + "scripts/evaluate_all.py" + ], + "returncode": 0, + "elapsed_seconds": 3.798 + }, + { + "args": [ + "python", + "scripts/evaluate_compare_runs.py", + "--baseline", + "outputs/reports/baselines.json", + "--candidate", + "outputs/reports/benchmark_report.json", + "--output", + "outputs/reports/improvement_report.json" + ], + "returncode": 0, + "elapsed_seconds": 0.034 + }, + { + "args": [ + "python", + "scripts/benchmark_inference.py" + ], + "returncode": 0, + "elapsed_seconds": 2.39 + }, + { + "args": [ + "python", + "scripts/run_robustness_suite.py" + ], + "returncode": 0, + "elapsed_seconds": 2.692 + }, + { + "args": [ + "python", + "scripts/generate_hf_training_report.py", + "--mode", + "full" + ], + "returncode": 0, + "elapsed_seconds": 2.078 + } + ], + "artifact_repo_id": "adithya9903/polyguard-openenv-training-3b-artifacts", + "training_mode": "full", + "model_sweep": [ + "Qwen/Qwen2.5-3B-Instruct" + ], + "improved": true, + "anti_hacking_passed": false, + "completed_run_ids": [ + "qwen-qwen2-5-3b-instruct" + ] +} \ No newline at end of file diff --git a/docs/results/improvement_report.json b/docs/results/improvement_report.json new file mode 100644 index 0000000000000000000000000000000000000000..886c258a6e289158e33375ff020b7746cee4b7fb --- /dev/null +++ b/docs/results/improvement_report.json @@ -0,0 +1,19 @@ +{ + "status": "ok", + "baseline": "outputs/reports/baselines.json", + "candidate": "outputs/reports/benchmark_report.json", + "deltas": { + "avg_reward": 0.025833, + "legality_rate": 0.0, + "success_rate": 0.0, + "avg_process_fidelity": 0.92, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0 + }, + "gate": { + "avg_reward_up": true, + "legality_up": true, + "success_up": true + }, + "improved": true +} \ No newline at end of file diff --git a/docs/results/improvement_report_benchmark.json b/docs/results/improvement_report_benchmark.json new file mode 100644 index 0000000000000000000000000000000000000000..5d8b11e47a79b24417c790054095326e72258681 --- /dev/null +++ b/docs/results/improvement_report_benchmark.json @@ -0,0 +1,19 @@ +{ + "status": "ok", + "baseline": "outputs/reports/baselines.json", + "candidate": "outputs/reports/benchmark_report.json", + "deltas": { + "avg_reward": -0.0025, + "legality_rate": 0.0, + "success_rate": 0.0, + "avg_process_fidelity": 0.92, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0 + }, + "gate": { + "avg_reward_up": false, + "legality_up": true, + "success_up": true + }, + "improved": false +} \ No newline at end of file diff --git a/docs/results/inference_benchmark.json b/docs/results/inference_benchmark.json new file mode 100644 index 0000000000000000000000000000000000000000..488238c8abed1d68e070cbfe6119f29ab485ee0e --- /dev/null +++ b/docs/results/inference_benchmark.json @@ -0,0 +1,43 @@ +{ + "status": "ok", + "runs": [ + { + "run": 0, + "provider": "transformers_ranker_fallback", + "candidate_id": "cand_04", + "latency_ms": 1751.989, + "rationale": "Transformers fallback selected cand_04 via local ranker; active_model_enabled=False; active_model_available=False." + }, + { + "run": 1, + "provider": "transformers_ranker_fallback", + "candidate_id": "cand_02", + "latency_ms": 0.166, + "rationale": "Transformers fallback selected cand_02 via local ranker; active_model_enabled=False; active_model_available=False." + }, + { + "run": 2, + "provider": "transformers_ranker_fallback", + "candidate_id": "cand_04", + "latency_ms": 0.157, + "rationale": "Transformers fallback selected cand_04 via local ranker; active_model_enabled=False; active_model_available=False." + }, + { + "run": 3, + "provider": "transformers_ranker_fallback", + "candidate_id": "cand_04", + "latency_ms": 0.164, + "rationale": "Transformers fallback selected cand_04 via local ranker; active_model_enabled=False; active_model_available=False." + }, + { + "run": 4, + "provider": "transformers_ranker_fallback", + "candidate_id": "cand_04", + "latency_ms": 0.153, + "rationale": "Transformers fallback selected cand_04 via local ranker; active_model_enabled=False; active_model_available=False." + } + ], + "avg_latency_ms": 350.526, + "provider_requested": "transformers", + "model": "Qwen/Qwen2.5-0.5B-Instruct" +} \ No newline at end of file diff --git a/docs/results/inference_latency_validity.png b/docs/results/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..764aa7851cac76b39b892e49122daa7f4a321c43 Binary files /dev/null and b/docs/results/inference_latency_validity.png differ diff --git a/docs/results/inference_validity_reward.png b/docs/results/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..98bd4b15d4d1d11521e8e7234a1f45b6de8c3d58 Binary files /dev/null and b/docs/results/inference_validity_reward.png differ diff --git a/docs/results/legality_rate.png b/docs/results/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..b4c1e418b0262902ad1c9ad4818f4d9b22a152d0 Binary files /dev/null and b/docs/results/legality_rate.png differ diff --git a/docs/results/planner_grpo.json b/docs/results/planner_grpo.json new file mode 100644 index 0000000000000000000000000000000000000000..c7e9aa4742688c7625d2182953907f8df1b35c7f --- /dev/null +++ b/docs/results/planner_grpo.json @@ -0,0 +1,28 @@ +{ + "avg_reward": 0.77625, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.0, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.5, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.92, + "exploit_detection_count": 4.0, + "reward_columns": { + "format_compliance_score": 0.9990000000000001, + "candidate_alignment_score": 0.9990000000000001, + "legality_score": 0.9990000000000001, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9000000000000002, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000002, + "efficiency_score": 0.73, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.7999999999999999, + "anti_cheat_score": 0.6663333333333333, + "uncertainty_calibration_score": 0.8699999999999998 + } +} \ No newline at end of file diff --git a/docs/results/plot_index.json b/docs/results/plot_index.json new file mode 100644 index 0000000000000000000000000000000000000000..653af59e59bdb01a0215534ae5e3274a36b60d0e --- /dev/null +++ b/docs/results/plot_index.json @@ -0,0 +1,9 @@ +{ + "plots": [ + "/app/outputs/plots/avg_reward.png", + "/app/outputs/plots/legality_rate.png", + "/app/outputs/plots/success_rate.png", + "/app/outputs/plots/avg_process_fidelity.png", + "/app/outputs/plots/policy_stack_avg_reward.png" + ] +} \ No newline at end of file diff --git a/docs/results/policy_stack_avg_reward.png b/docs/results/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..b28dc57ac180e83b38194b17251e3cf3a5a941da Binary files /dev/null and b/docs/results/policy_stack_avg_reward.png differ diff --git a/docs/results/postsave_inference.json b/docs/results/postsave_inference.json new file mode 100644 index 0000000000000000000000000000000000000000..6f146ec46e61500fb1904fe354c75dfb860c0700 --- /dev/null +++ b/docs/results/postsave_inference.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "adapter", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 3.681, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"cand_01\", \"rationale\": \"The patient is a 67-year-old male with a history of hypertension, hyperlipidemia, and type 2 diabetes. He has been on metformin for 10 years and lisinopril for 5 years. He has no significant past medical history. He is a current smoker and drinks", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 3.941, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"cand_01\", \"rationale\": \"The patient is a 46-year-old female with a history of hypertension, hyperlipidemia, and type 2 diabetes. She has been on metformin for her diabetes and lisinopril for her hypertension. The patient is currently being evaluated for possible coronary artery disease (CAD) due to symptoms of", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 3.634, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"cand_01\", \"rationale\": \"The patient is a 65 year old male with a history of hypertension, hyperlipidemia, and type 2 diabetes. He has been on metformin for 10 years and lisinopril for 5 years. He has not had any recent hospitalizations or emergency department visits. His most", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 3.636, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"cand_01\", \"rationale\": \"The patient is a 67 year old male with a history of hypertension, hyperlipidemia, and type 2 diabetes. He has been on metformin for 10 years and lisinopril for 5 years. He has not had any recent hospitalizations or emergency department visits. His most", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 3.548, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"cand_01\", \"rationale\": \"The patient is a 57 year old male with a history of hypertension, hyperlipidemia, and type 2 diabetes. He has been on metformin for 10 years and lisinopril for 5 years. He has not had any recent hospitalizations or surgeries. His most recent A", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 3.647, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/postsave_inference_smoke.json b/docs/results/postsave_inference_smoke.json new file mode 100644 index 0000000000000000000000000000000000000000..9e752252d57d5a96a5438d816904747db17731e1 --- /dev/null +++ b/docs/results/postsave_inference_smoke.json @@ -0,0 +1,23 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 1, + "valid_rate": 1.0, + "avg_env_reward": 0.717, + "avg_latency_seconds": 5.523, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_123\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "}\n\nSure, I can help you choose a candidate ID based on the information provided. Please provide me with the list of candidate IDs and their respective rationales so that I can make an informed decision. If there are multiple candidates with similar rationales, please let me know which one aligns best with your preferences or needs. Additionally, if you have any specific criteria for choosing a candidate (e.g", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 5.523, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/README.md b/docs/results/qwen_completed_runs/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fcba3b66b45f901bfd53a301208f0fa5fb55603f --- /dev/null +++ b/docs/results/qwen_completed_runs/README.md @@ -0,0 +1,33 @@ +# Qwen Completed Runs Bundle + +Created: 2026-04-26T03:28:38.201754+00:00 + +## Provenance + +- The HF artifact repository currently contains only `.gitattributes`, so final uploaded remote JSON/plots are not available yet. +- The live Space status proves Qwen 0.5B and Qwen 1.5B completed SFT, GRPO, post-save inference, and ablations where listed in `reports/remote_status/`. +- The SFT histories, post-save SFT inference JSON, and generated SFT charts are from locally available mirrored sweep files under `outputs/reports/sweeps/`. +- The combined GRPO/reward charts copied into `charts/local_available_combined/` are the currently available local chart artifacts and should be replaced by the final uploaded sweep charts after the Space completes. + +## Summary + +| Model | Remote SFT | Remote GRPO | Local SFT loss | Local post-save reward | Local valid rate | +| --- | --- | --- | ---: | ---: | ---: | +| Qwen 0.5B | True | True | 0.1923 | 0.726 | 1.000 | +| Qwen 1.5B | True | True | 0.1152 | 0.726 | 1.000 | + +## Key Generated Charts + +- `charts/generated/qwen_0_5b_sft_training_loss.png` +- `charts/generated/qwen_1_5b_sft_training_loss.png` +- `charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png` +- `charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png` +- `charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png` +- `charts/generated/qwen_0_5b_1_5b_postsave_reward.png` + +## Folders + +- `reports/local_available_sft/qwen_0_5b/` and `reports/local_available_sft/qwen_1_5b/`: SFT JSON, history, metadata, post-save inference. +- `reports/remote_status/`: live HF status snapshot and completed command records for remote SFT/GRPO stages. +- `charts/generated/`: newly generated model-specific and comparison charts. +- `charts/local_available_combined/`: existing broader evaluation/reward charts available locally now. diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..b95e45c9a250061a3b1a9b7c64e65e76865527d3 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..4af661e82ad7e399bb7d2febe50e21a77b17525f Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_latency.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..7f1f2d82a3b7ad44087b43911bf15229097ff167 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_postsave_reward.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..dc080d9c70f8ba2a42b057f21a03f174d5194fc3 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..69105f0a757971ca7e6efffaacc2b196b645fb3f Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_1_5b_sft_runtime.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..45f78128f0917935a330806909768c98eeaf2697 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_learning_rate.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..5cd2796a45edb4de3ed9709b5c2e6e48e6659c04 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_token_accuracy.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..a6e66e10e1d3f0433ae5e4a216aaaf9639f288de Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_sft_training_loss.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..10dbe25be26876985fa5e089d44d5393a3581075 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..cd1b3f3d9b1fab40fa3ffffaa1e2b7eaa89a4a06 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png b/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..befa6e73ca5610dde8049309cd258958ac19d68a Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_learning_rate.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png b/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..af516737117c9a56d285ee6fc2b4d9d43dba87f6 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_token_accuracy.png differ diff --git a/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png b/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..7b1a3e66d2d547d9809f2a80dd472ebde13cc17d Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/generated/qwen_1_5b_sft_training_loss.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png b/docs/results/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..9ee2415b64aa6d1e4357754bd432cfc43dbf5091 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/anti_cheat_failure_rates.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/avg_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..edb2fa8c25074d88c90bce5c243af90dcb28e1c6 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/avg_reward.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png b/docs/results/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..b8b1c8d550e72424ffeef18cd8fff38ce8c91cab Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/grpo_reward_curves.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png b/docs/results/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..0fb4d13ec904f9d31e23bc155fe571425145913c Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/inference_latency_validity.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..635d3af233d076393ea09b507584d2f51c07b5a1 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/inference_validity_reward.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..b28dc57ac180e83b38194b17251e3cf3a5a941da Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/policy_stack_avg_reward.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4b35e432d6d777827f6bf0dc189bfc74b4427125 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_grpo_reward.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png b/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1ec58084d2c79f340541654e5d99906a3ae592ac Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_loss.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2773c4f16e553eeffc43c9ef348a988b77735c52 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/qwen_model_sft_reward.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png b/docs/results/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fc18c8433fb28860795036a1aab24f9aa05f61af Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/reward_component_bars.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png b/docs/results/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..8d5bf10a57fdc8264485616fd51d637f0709f104 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/sft_loss_curves.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..5616296656c79ff7946479ce233f9b9e7c582a05 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/sft_validity_reward.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png b/docs/results/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4765e95fbbc1f1ed2f8a6686909241a75486caa5 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/sft_vs_grpo_reward.png differ diff --git a/docs/results/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png b/docs/results/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..3bf8436ec672a1cb1875c178b9369e85e5aca2e8 Binary files /dev/null and b/docs/results/qwen_completed_runs/charts/local_available_combined/train_holdout_gap.png differ diff --git a/docs/results/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json b/docs/results/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..e9299083f8d91a48a5d1417ec6d960946717d7e6 --- /dev/null +++ b/docs/results/qwen_completed_runs/manifests/qwen_0_5b_1_5b_summary.json @@ -0,0 +1,50 @@ +[ + { + "key": "qwen_0_5b", + "label": "Qwen 0.5B", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "run_id": "qwen-qwen2-5-0-5b-instruct", + "remote_completed_command_count": 6, + "remote_sft_completed": true, + "remote_grpo_completed": true, + "remote_ablation_completed": true, + "local_available_files": [ + "run_metadata.json", + "sft_trl_run.json", + "sft_history.json", + "postsave_inference_sft.json" + ], + "local_sft_backend": "trl_transformers", + "local_sft_examples_used": 2000, + "local_sft_train_loss": 0.19233327957964502, + "local_sft_train_runtime": 234.6302, + "local_sft_samples_per_second": 17.048, + "local_postsave_valid_rate": 1.0, + "local_postsave_avg_reward": 0.726, + "local_postsave_avg_latency_seconds": 1.839 + }, + { + "key": "qwen_1_5b", + "label": "Qwen 1.5B", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "run_id": "qwen-qwen2-5-1-5b-instruct", + "remote_completed_command_count": 6, + "remote_sft_completed": true, + "remote_grpo_completed": true, + "remote_ablation_completed": true, + "local_available_files": [ + "run_metadata.json", + "sft_trl_run.json", + "sft_history.json", + "postsave_inference_sft.json" + ], + "local_sft_backend": "trl_transformers", + "local_sft_examples_used": 2000, + "local_sft_train_loss": 0.11515871361242898, + "local_sft_train_runtime": 483.7085, + "local_sft_samples_per_second": 8.269, + "local_postsave_valid_rate": 1.0, + "local_postsave_avg_reward": 0.726, + "local_postsave_avg_latency_seconds": 2.158 + } +] \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..e97c6f0e7743a9470ea6efc3b741694dafed8b57 --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 1.839, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8000\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.224, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "There are 5 candidates with IDs . The candidate with ID has the lowest prediction score. Therefore, we will choose candidate . Rationale: Candidate has the lowest prediction score among the candidates with IDs , indicating that they are the most likely to", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "there is no patient with patient_id = 8002, please select a different patient_id. Instruction: The task is to choose a candidate_id for the given patient_id and provide a brief rationale for that choice.\nThus, the final answer is (e).Human beings have been able to use fire since at least 3 million years ago, but we are not sure how they learned", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8003\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 1.737, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8004\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_1", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.736, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..986493a2b78954513d0716891dc5dd71c576c569 --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "model_index": 0, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 2, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..7d5a627d2519bb0afa7485ff88f51eb813d742e6 --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.0856, + "grad_norm": 3.887380838394165, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.001, + "step": 1 + }, + { + "loss": 1.6647, + "grad_norm": 1.2190884351730347, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.7138103246688843, + "epoch": 0.002, + "step": 2 + }, + { + "loss": 1.1696, + "grad_norm": 0.8276316523551941, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.003, + "step": 3 + }, + { + "loss": 3.0464, + "grad_norm": 3.3297364711761475, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.004, + "step": 4 + }, + { + "loss": 1.1875, + "grad_norm": 0.8076611757278442, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.005, + "step": 5 + }, + { + "loss": 1.6105, + "grad_norm": 1.0332727432250977, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.7188019752502441, + "epoch": 0.006, + "step": 6 + }, + { + "loss": 1.5834, + "grad_norm": 1.0094527006149292, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.007, + "step": 7 + }, + { + "loss": 1.1683, + "grad_norm": 0.7861526012420654, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.008, + "step": 8 + }, + { + "loss": 1.3843, + "grad_norm": 0.7377748489379883, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7495107650756836, + "epoch": 0.009, + "step": 9 + }, + { + "loss": 1.584, + "grad_norm": 0.9443085193634033, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.720465898513794, + "epoch": 0.01, + "step": 10 + }, + { + "loss": 1.366, + "grad_norm": 0.7967380285263062, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7504892349243164, + "epoch": 0.011, + "step": 11 + }, + { + "loss": 1.5266, + "grad_norm": 1.0016096830368042, + "learning_rate": 1.989e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.012, + "step": 12 + }, + { + "loss": 1.2453, + "grad_norm": 0.9283791184425354, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.7836938500404358, + "epoch": 0.013, + "step": 13 + }, + { + "loss": 1.6206, + "grad_norm": 0.9805537462234497, + "learning_rate": 1.987e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7171381115913391, + "epoch": 0.014, + "step": 14 + }, + { + "loss": 1.5375, + "grad_norm": 0.9191323518753052, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.015, + "step": 15 + }, + { + "loss": 1.3423, + "grad_norm": 0.7822748422622681, + "learning_rate": 1.985e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.016, + "step": 16 + }, + { + "loss": 2.9309, + "grad_norm": 2.773752450942993, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5277777910232544, + "epoch": 0.017, + "step": 17 + }, + { + "loss": 1.1574, + "grad_norm": 0.7265554666519165, + "learning_rate": 1.983e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7876712083816528, + "epoch": 0.018, + "step": 18 + }, + { + "loss": 2.9093, + "grad_norm": 2.9051146507263184, + "learning_rate": 1.982e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5388888716697693, + "epoch": 0.019, + "step": 19 + }, + { + "loss": 1.5786, + "grad_norm": 0.9728697538375854, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.02, + "step": 20 + }, + { + "loss": 1.0934, + "grad_norm": 0.7319854497909546, + "learning_rate": 1.98e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.7974559664726257, + "epoch": 0.021, + "step": 21 + }, + { + "loss": 1.2097, + "grad_norm": 0.8981963992118835, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.022, + "step": 22 + }, + { + "loss": 1.4816, + "grad_norm": 1.0308023691177368, + "learning_rate": 1.978e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.023, + "step": 23 + }, + { + "loss": 1.3218, + "grad_norm": 0.7793745398521423, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.024, + "step": 24 + }, + { + "loss": 1.4883, + "grad_norm": 1.0108226537704468, + "learning_rate": 1.976e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.025, + "step": 25 + }, + { + "loss": 1.1398, + "grad_norm": 0.7284001111984253, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7857142686843872, + "epoch": 0.026, + "step": 26 + }, + { + "loss": 1.5201, + "grad_norm": 0.9933396577835083, + "learning_rate": 1.974e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.7354409098625183, + "epoch": 0.027, + "step": 27 + }, + { + "loss": 2.8162, + "grad_norm": 3.1626200675964355, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.028, + "step": 28 + }, + { + "loss": 1.31, + "grad_norm": 0.8019158244132996, + "learning_rate": 1.972e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.7573385238647461, + "epoch": 0.029, + "step": 29 + }, + { + "loss": 2.7985, + "grad_norm": 3.126246929168701, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.03, + "step": 30 + }, + { + "loss": 1.5341, + "grad_norm": 0.952720582485199, + "learning_rate": 1.97e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7271214723587036, + "epoch": 0.031, + "step": 31 + }, + { + "loss": 1.0763, + "grad_norm": 0.7093926668167114, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.032, + "step": 32 + }, + { + "loss": 1.2127, + "grad_norm": 0.813561201095581, + "learning_rate": 1.968e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.033, + "step": 33 + }, + { + "loss": 2.7516, + "grad_norm": 3.1947083473205566, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.034, + "step": 34 + }, + { + "loss": 1.1881, + "grad_norm": 1.0367817878723145, + "learning_rate": 1.966e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.035, + "step": 35 + }, + { + "loss": 1.1991, + "grad_norm": 0.9249914288520813, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.036, + "step": 36 + }, + { + "loss": 1.0422, + "grad_norm": 0.7850101590156555, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.037, + "step": 37 + }, + { + "loss": 1.2488, + "grad_norm": 0.8151567578315735, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7651663422584534, + "epoch": 0.038, + "step": 38 + }, + { + "loss": 1.5095, + "grad_norm": 1.0585670471191406, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.7254575490951538, + "epoch": 0.039, + "step": 39 + }, + { + "loss": 2.6828, + "grad_norm": 3.3681087493896484, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.04, + "step": 40 + }, + { + "loss": 1.1754, + "grad_norm": 1.029766321182251, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.041, + "step": 41 + }, + { + "loss": 1.0827, + "grad_norm": 0.7520174980163574, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.042, + "step": 42 + }, + { + "loss": 1.1385, + "grad_norm": 1.012759804725647, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.043, + "step": 43 + }, + { + "loss": 2.6322, + "grad_norm": 3.4875218868255615, + "learning_rate": 1.957e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.044, + "step": 44 + }, + { + "loss": 1.23, + "grad_norm": 0.9103058576583862, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.045, + "step": 45 + }, + { + "loss": 1.4499, + "grad_norm": 1.0566458702087402, + "learning_rate": 1.955e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.046, + "step": 46 + }, + { + "loss": 1.1171, + "grad_norm": 1.0389467477798462, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.047, + "step": 47 + }, + { + "loss": 1.4262, + "grad_norm": 1.0595616102218628, + "learning_rate": 1.953e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.048, + "step": 48 + }, + { + "loss": 1.1224, + "grad_norm": 1.0530123710632324, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.049, + "step": 49 + }, + { + "loss": 2.5409, + "grad_norm": 3.6781489849090576, + "learning_rate": 1.951e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.05, + "step": 50 + }, + { + "loss": 1.0942, + "grad_norm": 1.0411880016326904, + "learning_rate": 1.95e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.7970049977302551, + "epoch": 0.051, + "step": 51 + }, + { + "loss": 1.0622, + "grad_norm": 0.8258970975875854, + "learning_rate": 1.949e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.052, + "step": 52 + }, + { + "loss": 1.1977, + "grad_norm": 0.8957047462463379, + "learning_rate": 1.948e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.7700586915016174, + "epoch": 0.053, + "step": 53 + }, + { + "loss": 1.3695, + "grad_norm": 1.122542142868042, + "learning_rate": 1.947e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.7520798444747925, + "epoch": 0.054, + "step": 54 + }, + { + "loss": 0.8548, + "grad_norm": 0.7688314914703369, + "learning_rate": 1.946e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.055, + "step": 55 + }, + { + "loss": 1.0659, + "grad_norm": 1.0568362474441528, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.056, + "step": 56 + }, + { + "loss": 1.0294, + "grad_norm": 0.8596540689468384, + "learning_rate": 1.944e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.057, + "step": 57 + }, + { + "loss": 1.4359, + "grad_norm": 1.2490142583847046, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.7321131229400635, + "epoch": 0.058, + "step": 58 + }, + { + "loss": 2.416, + "grad_norm": 3.7482848167419434, + "learning_rate": 1.942e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.059, + "step": 59 + }, + { + "loss": 1.0725, + "grad_norm": 1.117326259613037, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.06, + "step": 60 + }, + { + "loss": 0.9739, + "grad_norm": 0.8864734768867493, + "learning_rate": 1.94e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.061, + "step": 61 + }, + { + "loss": 1.1443, + "grad_norm": 0.9423307776451111, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.7739726305007935, + "epoch": 0.062, + "step": 62 + }, + { + "loss": 0.8009, + "grad_norm": 0.8988932967185974, + "learning_rate": 1.938e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.063, + "step": 63 + }, + { + "loss": 1.0508, + "grad_norm": 1.1697311401367188, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.064, + "step": 64 + }, + { + "loss": 1.2747, + "grad_norm": 1.2967511415481567, + "learning_rate": 1.936e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.7570715546607971, + "epoch": 0.065, + "step": 65 + }, + { + "loss": 1.2796, + "grad_norm": 1.2881773710250854, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7554076313972473, + "epoch": 0.066, + "step": 66 + }, + { + "loss": 2.3052, + "grad_norm": 4.034823894500732, + "learning_rate": 1.934e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.067, + "step": 67 + }, + { + "loss": 1.2806, + "grad_norm": 1.3690178394317627, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.7587354183197021, + "epoch": 0.068, + "step": 68 + }, + { + "loss": 1.1807, + "grad_norm": 1.0886963605880737, + "learning_rate": 1.932e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.7632094025611877, + "epoch": 0.069, + "step": 69 + }, + { + "loss": 1.0076, + "grad_norm": 1.3501569032669067, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.07, + "step": 70 + }, + { + "loss": 0.921, + "grad_norm": 1.0231209993362427, + "learning_rate": 1.93e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8111546039581299, + "epoch": 0.071, + "step": 71 + }, + { + "loss": 2.1999, + "grad_norm": 4.47637939453125, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.072, + "step": 72 + }, + { + "loss": 2.1852, + "grad_norm": 4.533531188964844, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.073, + "step": 73 + }, + { + "loss": 2.1623, + "grad_norm": 4.683750152587891, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.074, + "step": 74 + }, + { + "loss": 1.2988, + "grad_norm": 1.5087296962738037, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.075, + "step": 75 + }, + { + "loss": 2.1266, + "grad_norm": 4.944180011749268, + "learning_rate": 1.925e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.076, + "step": 76 + }, + { + "loss": 0.9762, + "grad_norm": 1.0376505851745605, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.077, + "step": 77 + }, + { + "loss": 2.0834, + "grad_norm": 5.394686222076416, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.078, + "step": 78 + }, + { + "loss": 0.9309, + "grad_norm": 1.0764528512954712, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8170254230499268, + "epoch": 0.079, + "step": 79 + }, + { + "loss": 0.7549, + "grad_norm": 1.089787244796753, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.08, + "step": 80 + }, + { + "loss": 1.0972, + "grad_norm": 1.2265634536743164, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.7915851473808289, + "epoch": 0.081, + "step": 81 + }, + { + "loss": 2.0061, + "grad_norm": 5.302765846252441, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.082, + "step": 82 + }, + { + "loss": 1.1197, + "grad_norm": 1.216346025466919, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.7749511003494263, + "epoch": 0.083, + "step": 83 + }, + { + "loss": 1.181, + "grad_norm": 1.5846738815307617, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.084, + "step": 84 + }, + { + "loss": 0.8929, + "grad_norm": 1.1130127906799316, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8268101811408997, + "epoch": 0.085, + "step": 85 + }, + { + "loss": 1.9339, + "grad_norm": NaN, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.086, + "step": 86 + }, + { + "loss": 1.1623, + "grad_norm": 1.7714096307754517, + "learning_rate": 1.915e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.7720465660095215, + "epoch": 0.087, + "step": 87 + }, + { + "loss": 1.0203, + "grad_norm": 1.204126000404358, + "learning_rate": 1.914e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.088, + "step": 88 + }, + { + "loss": 0.8569, + "grad_norm": 1.2058078050613403, + "learning_rate": 1.913e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.089, + "step": 89 + }, + { + "loss": 1.197, + "grad_norm": 1.8821589946746826, + "learning_rate": 1.912e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.7670549154281616, + "epoch": 0.09, + "step": 90 + }, + { + "loss": 1.1908, + "grad_norm": 1.9740996360778809, + "learning_rate": 1.911e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.7703827023506165, + "epoch": 0.091, + "step": 91 + }, + { + "loss": 0.889, + "grad_norm": 1.5037046670913696, + "learning_rate": 1.91e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8236272931098938, + "epoch": 0.092, + "step": 92 + }, + { + "loss": 1.1821, + "grad_norm": 1.539967656135559, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.093, + "step": 93 + }, + { + "loss": 1.0278, + "grad_norm": 1.2005809545516968, + "learning_rate": 1.908e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.094, + "step": 94 + }, + { + "loss": 1.1361, + "grad_norm": 1.8167128562927246, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.095, + "step": 95 + }, + { + "loss": 1.0977, + "grad_norm": 2.2985150814056396, + "learning_rate": 1.906e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.096, + "step": 96 + }, + { + "loss": 1.0695, + "grad_norm": 1.590173602104187, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.097, + "step": 97 + }, + { + "loss": 1.1519, + "grad_norm": 1.5389997959136963, + "learning_rate": 1.904e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.098, + "step": 98 + }, + { + "loss": 1.1507, + "grad_norm": 1.6002172231674194, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.099, + "step": 99 + }, + { + "loss": 1.0454, + "grad_norm": 1.181969404220581, + "learning_rate": 1.902e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.1, + "step": 100 + }, + { + "loss": 1.0897, + "grad_norm": 1.832823634147644, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.101, + "step": 101 + }, + { + "loss": 0.8593, + "grad_norm": 1.2972052097320557, + "learning_rate": 1.9e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.102, + "step": 102 + }, + { + "loss": 0.9507, + "grad_norm": 1.114174723625183, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8150684833526611, + "epoch": 0.103, + "step": 103 + }, + { + "loss": 0.8422, + "grad_norm": 1.0837013721466064, + "learning_rate": 1.898e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.104, + "step": 104 + }, + { + "loss": 0.9674, + "grad_norm": 1.1756479740142822, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.105, + "step": 105 + }, + { + "loss": 0.7975, + "grad_norm": 1.3874446153640747, + "learning_rate": 1.896e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.840266227722168, + "epoch": 0.106, + "step": 106 + }, + { + "loss": 1.0557, + "grad_norm": 1.959272027015686, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.7936772108078003, + "epoch": 0.107, + "step": 107 + }, + { + "loss": 1.0885, + "grad_norm": 1.503557801246643, + "learning_rate": 1.894e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.108, + "step": 108 + }, + { + "loss": 0.8082, + "grad_norm": 1.470276117324829, + "learning_rate": 1.893e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.8302828669548035, + "epoch": 0.109, + "step": 109 + }, + { + "loss": 1.5508, + "grad_norm": 6.328886985778809, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.6944444179534912, + "epoch": 0.11, + "step": 110 + }, + { + "loss": 1.0059, + "grad_norm": 1.5663049221038818, + "learning_rate": 1.891e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.8103161454200745, + "epoch": 0.111, + "step": 111 + }, + { + "loss": 1.0336, + "grad_norm": 1.4562171697616577, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.112, + "step": 112 + }, + { + "loss": 1.0438, + "grad_norm": 1.5646629333496094, + "learning_rate": 1.889e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.113, + "step": 113 + }, + { + "loss": 1.0279, + "grad_norm": 1.513607144355774, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.114, + "step": 114 + }, + { + "loss": 1.4402, + "grad_norm": 6.165053367614746, + "learning_rate": 1.887e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.115, + "step": 115 + }, + { + "loss": 0.7349, + "grad_norm": 1.454982876777649, + "learning_rate": 1.886e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.116, + "step": 116 + }, + { + "loss": 0.7338, + "grad_norm": 1.9169820547103882, + "learning_rate": 1.885e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.841930091381073, + "epoch": 0.117, + "step": 117 + }, + { + "loss": 0.7831, + "grad_norm": 1.3472567796707153, + "learning_rate": 1.884e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.118, + "step": 118 + }, + { + "loss": 1.028, + "grad_norm": 1.5241106748580933, + "learning_rate": 1.883e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.8036605715751648, + "epoch": 0.119, + "step": 119 + }, + { + "loss": 1.3458, + "grad_norm": 5.9579386711120605, + "learning_rate": 1.882e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.12, + "step": 120 + }, + { + "loss": 0.7727, + "grad_norm": 1.444265604019165, + "learning_rate": 1.881e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.8385518789291382, + "epoch": 0.121, + "step": 121 + }, + { + "loss": 0.6351, + "grad_norm": 1.281785488128662, + "learning_rate": 1.88e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.122, + "step": 122 + }, + { + "loss": 0.6884, + "grad_norm": 1.6917502880096436, + "learning_rate": 1.879e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.123, + "step": 123 + }, + { + "loss": 0.886, + "grad_norm": 1.6544225215911865, + "learning_rate": 1.878e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.8286189436912537, + "epoch": 0.124, + "step": 124 + }, + { + "loss": 0.7652, + "grad_norm": 1.2762014865875244, + "learning_rate": 1.877e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.125, + "step": 125 + }, + { + "loss": 1.2517, + "grad_norm": 7.621744632720947, + "learning_rate": 1.876e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.126, + "step": 126 + }, + { + "loss": 0.6909, + "grad_norm": 1.8651930093765259, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.127, + "step": 127 + }, + { + "loss": 0.9464, + "grad_norm": 2.0513856410980225, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.821963369846344, + "epoch": 0.128, + "step": 128 + }, + { + "loss": 0.8355, + "grad_norm": 1.3392603397369385, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.8405088186264038, + "epoch": 0.129, + "step": 129 + }, + { + "loss": 0.7124, + "grad_norm": 1.7539966106414795, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.861896812915802, + "epoch": 0.13, + "step": 130 + }, + { + "loss": 1.1931, + "grad_norm": 7.2109856605529785, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.131, + "step": 131 + }, + { + "loss": 0.806, + "grad_norm": 1.531593918800354, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.8424657583236694, + "epoch": 0.132, + "step": 132 + }, + { + "loss": 0.7483, + "grad_norm": 1.6686372756958008, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.133, + "step": 133 + }, + { + "loss": 0.905, + "grad_norm": 3.809466600418091, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.8336106538772583, + "epoch": 0.134, + "step": 134 + }, + { + "loss": 0.7299, + "grad_norm": 1.7963030338287354, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.135, + "step": 135 + }, + { + "loss": 0.6384, + "grad_norm": 2.485582113265991, + "learning_rate": 1.866e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.8718801736831665, + "epoch": 0.136, + "step": 136 + }, + { + "loss": 0.5473, + "grad_norm": 1.6607071161270142, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.137, + "step": 137 + }, + { + "loss": 0.6719, + "grad_norm": 1.6095962524414062, + "learning_rate": 1.864e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.860232949256897, + "epoch": 0.138, + "step": 138 + }, + { + "loss": 0.8772, + "grad_norm": 1.8398959636688232, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.8352745175361633, + "epoch": 0.139, + "step": 139 + }, + { + "loss": 0.6813, + "grad_norm": 1.754347324371338, + "learning_rate": 1.862e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.14, + "step": 140 + }, + { + "loss": 0.8176, + "grad_norm": 1.8010166883468628, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.141, + "step": 141 + }, + { + "loss": 0.6013, + "grad_norm": 2.131845712661743, + "learning_rate": 1.86e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.8768718838691711, + "epoch": 0.142, + "step": 142 + }, + { + "loss": 1.0551, + "grad_norm": 8.797135353088379, + "learning_rate": 1.859e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.8055555820465088, + "epoch": 0.143, + "step": 143 + }, + { + "loss": 0.8096, + "grad_norm": 1.6665289402008057, + "learning_rate": 1.858e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.144, + "step": 144 + }, + { + "loss": 0.6237, + "grad_norm": 2.031190872192383, + "learning_rate": 1.857e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.8735440969467163, + "epoch": 0.145, + "step": 145 + }, + { + "loss": 0.8527, + "grad_norm": 2.5186493396759033, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.8386023044586182, + "epoch": 0.146, + "step": 146 + }, + { + "loss": 0.83, + "grad_norm": 1.5677316188812256, + "learning_rate": 1.855e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.8444226980209351, + "epoch": 0.147, + "step": 147 + }, + { + "loss": 0.6951, + "grad_norm": 3.395341634750366, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.148, + "step": 148 + }, + { + "loss": 0.7634, + "grad_norm": 1.658737301826477, + "learning_rate": 1.853e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.149, + "step": 149 + }, + { + "loss": 0.6195, + "grad_norm": 1.4803838729858398, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.8776907920837402, + "epoch": 0.15, + "step": 150 + }, + { + "loss": 0.6916, + "grad_norm": 1.462860345840454, + "learning_rate": 1.851e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.151, + "step": 151 + }, + { + "loss": 0.7854, + "grad_norm": 1.6279668807983398, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.152, + "step": 152 + }, + { + "loss": 0.749, + "grad_norm": 1.8625388145446777, + "learning_rate": 1.849e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.153, + "step": 153 + }, + { + "loss": 0.6619, + "grad_norm": 1.6320242881774902, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.8679060935974121, + "epoch": 0.154, + "step": 154 + }, + { + "loss": 0.9864, + "grad_norm": NaN, + "learning_rate": 1.847e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.8222222328186035, + "epoch": 0.155, + "step": 155 + }, + { + "loss": 0.7698, + "grad_norm": 2.241466999053955, + "learning_rate": 1.847e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.156, + "step": 156 + }, + { + "loss": 0.8501, + "grad_norm": 2.594738721847534, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.8435940146446228, + "epoch": 0.157, + "step": 157 + }, + { + "loss": 0.962, + "grad_norm": 10.902610778808594, + "learning_rate": 1.845e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.8166666626930237, + "epoch": 0.158, + "step": 158 + }, + { + "loss": 0.7822, + "grad_norm": 1.6955127716064453, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.159, + "step": 159 + }, + { + "loss": 0.7942, + "grad_norm": 2.5727546215057373, + "learning_rate": 1.843e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.8519134521484375, + "epoch": 0.16, + "step": 160 + }, + { + "loss": 0.8074, + "grad_norm": 2.082172155380249, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.161, + "step": 161 + }, + { + "loss": 0.6346, + "grad_norm": 1.4917131662368774, + "learning_rate": 1.841e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.162, + "step": 162 + }, + { + "loss": 0.6574, + "grad_norm": 1.7243297100067139, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.163, + "step": 163 + }, + { + "loss": 0.7782, + "grad_norm": 2.236922264099121, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.164, + "step": 164 + }, + { + "loss": 0.7541, + "grad_norm": 2.998671531677246, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.165, + "step": 165 + }, + { + "loss": 0.7637, + "grad_norm": 2.231337070465088, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.166, + "step": 166 + }, + { + "loss": 0.4918, + "grad_norm": 2.1853654384613037, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.167, + "step": 167 + }, + { + "loss": 0.8615, + "grad_norm": 19.52778434753418, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.168, + "step": 168 + }, + { + "loss": 0.727, + "grad_norm": 2.8629372119903564, + "learning_rate": 1.834e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.169, + "step": 169 + }, + { + "loss": 0.6812, + "grad_norm": 2.578798294067383, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.8600782752037048, + "epoch": 0.17, + "step": 170 + }, + { + "loss": 0.718, + "grad_norm": 2.7950305938720703, + "learning_rate": 1.832e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.171, + "step": 171 + }, + { + "loss": 0.8269, + "grad_norm": 18.518278121948242, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.8333333134651184, + "epoch": 0.172, + "step": 172 + }, + { + "loss": 0.8122, + "grad_norm": 10.636402130126953, + "learning_rate": 1.83e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.8500000238418579, + "epoch": 0.173, + "step": 173 + }, + { + "loss": 0.5631, + "grad_norm": 1.8652675151824951, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.174, + "step": 174 + }, + { + "loss": 0.5823, + "grad_norm": 2.174743890762329, + "learning_rate": 1.828e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.175, + "step": 175 + }, + { + "loss": 0.6878, + "grad_norm": 2.426223039627075, + "learning_rate": 1.827e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.176, + "step": 176 + }, + { + "loss": 0.4815, + "grad_norm": 2.2111594676971436, + "learning_rate": 1.826e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.177, + "step": 177 + }, + { + "loss": 0.7905, + "grad_norm": 12.419157981872559, + "learning_rate": 1.825e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.178, + "step": 178 + }, + { + "loss": 0.6485, + "grad_norm": 2.6929852962493896, + "learning_rate": 1.824e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.8851913213729858, + "epoch": 0.179, + "step": 179 + }, + { + "loss": 0.5821, + "grad_norm": 2.588067054748535, + "learning_rate": 1.823e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.18, + "step": 180 + }, + { + "loss": 0.5376, + "grad_norm": 2.6413276195526123, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.181, + "step": 181 + }, + { + "loss": 0.4776, + "grad_norm": 2.0201733112335205, + "learning_rate": 1.821e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.182, + "step": 182 + }, + { + "loss": 0.7141, + "grad_norm": 8.398615837097168, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 0.8611111044883728, + "epoch": 0.183, + "step": 183 + }, + { + "loss": 0.687, + "grad_norm": 6.920986175537109, + "learning_rate": 1.819e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.184, + "step": 184 + }, + { + "loss": 0.6518, + "grad_norm": 3.54260516166687, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.185, + "step": 185 + }, + { + "loss": 0.6429, + "grad_norm": 4.033841609954834, + "learning_rate": 1.817e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.186, + "step": 186 + }, + { + "loss": 0.4786, + "grad_norm": 2.4023964405059814, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.187, + "step": 187 + }, + { + "loss": 0.5997, + "grad_norm": 2.695603370666504, + "learning_rate": 1.815e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.188, + "step": 188 + }, + { + "loss": 0.6251, + "grad_norm": 7.4209184646606445, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.189, + "step": 189 + }, + { + "loss": 0.6324, + "grad_norm": 10.130674362182617, + "learning_rate": 1.813e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.19, + "step": 190 + }, + { + "loss": 0.5939, + "grad_norm": 2.6180245876312256, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.873776912689209, + "epoch": 0.191, + "step": 191 + }, + { + "loss": 0.4098, + "grad_norm": 2.2663474082946777, + "learning_rate": 1.811e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.192, + "step": 192 + }, + { + "loss": 0.5111, + "grad_norm": 2.2139604091644287, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.8894324898719788, + "epoch": 0.193, + "step": 193 + }, + { + "loss": 0.4332, + "grad_norm": 2.2271547317504883, + "learning_rate": 1.809e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.194, + "step": 194 + }, + { + "loss": 0.4893, + "grad_norm": 2.0789742469787598, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.8972602486610413, + "epoch": 0.195, + "step": 195 + }, + { + "loss": 0.5755, + "grad_norm": 18.601898193359375, + "learning_rate": 1.807e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.196, + "step": 196 + }, + { + "loss": 0.4635, + "grad_norm": 6.127828598022461, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.197, + "step": 197 + }, + { + "loss": 0.603, + "grad_norm": 2.668287515640259, + "learning_rate": 1.805e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.198, + "step": 198 + }, + { + "loss": 0.6088, + "grad_norm": 2.419572353363037, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.8757338523864746, + "epoch": 0.199, + "step": 199 + }, + { + "loss": 0.5672, + "grad_norm": 3.028404712677002, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.8885191082954407, + "epoch": 0.2, + "step": 200 + }, + { + "loss": 0.4556, + "grad_norm": 4.009725093841553, + "learning_rate": 1.802e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.201, + "step": 201 + }, + { + "loss": 0.5269, + "grad_norm": 2.9101243019104004, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.202, + "step": 202 + }, + { + "loss": 0.6214, + "grad_norm": 2.7398433685302734, + "learning_rate": 1.8e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.8581213355064392, + "epoch": 0.203, + "step": 203 + }, + { + "loss": 0.5646, + "grad_norm": 2.60606050491333, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.204, + "step": 204 + }, + { + "loss": 0.3748, + "grad_norm": 3.7512423992156982, + "learning_rate": 1.798e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9234609007835388, + "epoch": 0.205, + "step": 205 + }, + { + "loss": 0.597, + "grad_norm": 3.150888442993164, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.206, + "step": 206 + }, + { + "loss": 0.511, + "grad_norm": 3.328899383544922, + "learning_rate": 1.796e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.207, + "step": 207 + }, + { + "loss": 0.491, + "grad_norm": 8.625993728637695, + "learning_rate": 1.795e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.208, + "step": 208 + }, + { + "loss": 0.4053, + "grad_norm": 2.2067341804504395, + "learning_rate": 1.794e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.209, + "step": 209 + }, + { + "loss": 0.4192, + "grad_norm": 2.0993006229400635, + "learning_rate": 1.793e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.21, + "step": 210 + }, + { + "loss": 0.3785, + "grad_norm": 2.821485996246338, + "learning_rate": 1.792e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9151414036750793, + "epoch": 0.211, + "step": 211 + }, + { + "loss": 0.5336, + "grad_norm": 2.169666051864624, + "learning_rate": 1.791e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.8901830315589905, + "epoch": 0.212, + "step": 212 + }, + { + "loss": 0.5235, + "grad_norm": 3.1590685844421387, + "learning_rate": 1.79e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.8835616707801819, + "epoch": 0.213, + "step": 213 + }, + { + "loss": 0.4736, + "grad_norm": 11.030704498291016, + "learning_rate": 1.789e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 0.9055555462837219, + "epoch": 0.214, + "step": 214 + }, + { + "loss": 0.5599, + "grad_norm": 3.9144341945648193, + "learning_rate": 1.788e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.215, + "step": 215 + }, + { + "loss": 0.5102, + "grad_norm": 2.9705278873443604, + "learning_rate": 1.787e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.216, + "step": 216 + }, + { + "loss": 0.4821, + "grad_norm": 3.4463229179382324, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.217, + "step": 217 + }, + { + "loss": 0.4385, + "grad_norm": 8.850930213928223, + "learning_rate": 1.785e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 0.9277777671813965, + "epoch": 0.218, + "step": 218 + }, + { + "loss": 0.4633, + "grad_norm": 2.936647415161133, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.219, + "step": 219 + }, + { + "loss": 0.4098, + "grad_norm": 6.922672271728516, + "learning_rate": 1.783e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.22, + "step": 220 + }, + { + "loss": 0.5233, + "grad_norm": 2.318746328353882, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.221, + "step": 221 + }, + { + "loss": 0.3223, + "grad_norm": 4.281177520751953, + "learning_rate": 1.781e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.222, + "step": 222 + }, + { + "loss": 0.4973, + "grad_norm": 3.6921546459198, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.8951746821403503, + "epoch": 0.223, + "step": 223 + }, + { + "loss": 0.4666, + "grad_norm": 3.4926915168762207, + "learning_rate": 1.779e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.224, + "step": 224 + }, + { + "loss": 0.3519, + "grad_norm": 2.668114423751831, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.225, + "step": 225 + }, + { + "loss": 0.4244, + "grad_norm": 2.4111084938049316, + "learning_rate": 1.777e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.226, + "step": 226 + }, + { + "loss": 0.3912, + "grad_norm": 10.561456680297852, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 0.949999988079071, + "epoch": 0.227, + "step": 227 + }, + { + "loss": 0.5091, + "grad_norm": 2.472616672515869, + "learning_rate": 1.775e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.228, + "step": 228 + }, + { + "loss": 0.4842, + "grad_norm": 2.881739854812622, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.229, + "step": 229 + }, + { + "loss": 0.4435, + "grad_norm": 3.2438275814056396, + "learning_rate": 1.773e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.23, + "step": 230 + }, + { + "loss": 0.3527, + "grad_norm": 2.2769415378570557, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.231, + "step": 231 + }, + { + "loss": 0.4951, + "grad_norm": 3.046674966812134, + "learning_rate": 1.771e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.232, + "step": 232 + }, + { + "loss": 0.4926, + "grad_norm": 4.042079925537109, + "learning_rate": 1.77e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.233, + "step": 233 + }, + { + "loss": 0.4564, + "grad_norm": 4.222212314605713, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9051580429077148, + "epoch": 0.234, + "step": 234 + }, + { + "loss": 0.3074, + "grad_norm": 3.150768280029297, + "learning_rate": 1.768e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.235, + "step": 235 + }, + { + "loss": 0.3858, + "grad_norm": 3.456815004348755, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.236, + "step": 236 + }, + { + "loss": 0.3352, + "grad_norm": 9.094295501708984, + "learning_rate": 1.766e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.237, + "step": 237 + }, + { + "loss": 0.4867, + "grad_norm": 3.2864322662353516, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.238, + "step": 238 + }, + { + "loss": 0.3303, + "grad_norm": 5.672657012939453, + "learning_rate": 1.764e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.239, + "step": 239 + }, + { + "loss": 0.4708, + "grad_norm": 3.677504062652588, + "learning_rate": 1.763e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.24, + "step": 240 + }, + { + "loss": 0.3175, + "grad_norm": 5.829269886016846, + "learning_rate": 1.762e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.241, + "step": 241 + }, + { + "loss": 0.4315, + "grad_norm": 3.211578130722046, + "learning_rate": 1.761e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.242, + "step": 242 + }, + { + "loss": 0.3084, + "grad_norm": 5.2650628089904785, + "learning_rate": 1.76e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.243, + "step": 243 + }, + { + "loss": 0.4516, + "grad_norm": 5.401496887207031, + "learning_rate": 1.759e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.244, + "step": 244 + }, + { + "loss": 0.4197, + "grad_norm": 3.938694953918457, + "learning_rate": 1.758e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.245, + "step": 245 + }, + { + "loss": 0.4329, + "grad_norm": 3.4744861125946045, + "learning_rate": 1.757e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.246, + "step": 246 + }, + { + "loss": 0.4525, + "grad_norm": 4.853247165679932, + "learning_rate": 1.756e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 0.9084858298301697, + "epoch": 0.247, + "step": 247 + }, + { + "loss": 0.2768, + "grad_norm": 5.6177144050598145, + "learning_rate": 1.755e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.248, + "step": 248 + }, + { + "loss": 0.3517, + "grad_norm": 2.8669052124023438, + "learning_rate": 1.754e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.249, + "step": 249 + }, + { + "loss": 0.4142, + "grad_norm": 3.5590577125549316, + "learning_rate": 1.753e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.25, + "step": 250 + }, + { + "loss": 0.4307, + "grad_norm": 5.072361946105957, + "learning_rate": 1.752e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.251, + "step": 251 + }, + { + "loss": 0.3981, + "grad_norm": 3.637819528579712, + "learning_rate": 1.751e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.912915825843811, + "epoch": 0.252, + "step": 252 + }, + { + "loss": 0.4344, + "grad_norm": 4.066125869750977, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.253, + "step": 253 + }, + { + "loss": 0.3574, + "grad_norm": 4.836447715759277, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.254, + "step": 254 + }, + { + "loss": 0.2738, + "grad_norm": 14.006624221801758, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.255, + "step": 255 + }, + { + "loss": 0.3416, + "grad_norm": 5.2639079093933105, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.256, + "step": 256 + }, + { + "loss": 0.2762, + "grad_norm": 12.536176681518555, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.257, + "step": 257 + }, + { + "loss": 0.4114, + "grad_norm": 6.311218738555908, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9090019464492798, + "epoch": 0.258, + "step": 258 + }, + { + "loss": 0.3912, + "grad_norm": 3.2677178382873535, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.259, + "step": 259 + }, + { + "loss": 0.3059, + "grad_norm": 4.582422256469727, + "learning_rate": 1.743e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.26, + "step": 260 + }, + { + "loss": 0.3697, + "grad_norm": 5.214661121368408, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.261, + "step": 261 + }, + { + "loss": 0.3486, + "grad_norm": 5.719533920288086, + "learning_rate": 1.741e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.262, + "step": 262 + }, + { + "loss": 0.328, + "grad_norm": 4.692359924316406, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9363992214202881, + "epoch": 0.263, + "step": 263 + }, + { + "loss": 0.3665, + "grad_norm": 2.810206174850464, + "learning_rate": 1.739e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.264, + "step": 264 + }, + { + "loss": 0.2363, + "grad_norm": 6.301739692687988, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.265, + "step": 265 + }, + { + "loss": 0.3762, + "grad_norm": 2.9034929275512695, + "learning_rate": 1.737e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.266, + "step": 266 + }, + { + "loss": 0.3573, + "grad_norm": 5.10465669631958, + "learning_rate": 1.736e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.267, + "step": 267 + }, + { + "loss": 0.3708, + "grad_norm": 2.8359761238098145, + "learning_rate": 1.735e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9251247644424438, + "epoch": 0.268, + "step": 268 + }, + { + "loss": 0.3615, + "grad_norm": 2.6100833415985107, + "learning_rate": 1.734e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.9267886877059937, + "epoch": 0.269, + "step": 269 + }, + { + "loss": 0.3131, + "grad_norm": 3.610330820083618, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.27, + "step": 270 + }, + { + "loss": 0.3301, + "grad_norm": 3.1220433712005615, + "learning_rate": 1.732e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.271, + "step": 271 + }, + { + "loss": 0.2314, + "grad_norm": 7.683000564575195, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.272, + "step": 272 + }, + { + "loss": 0.2391, + "grad_norm": 10.635171890258789, + "learning_rate": 1.73e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.273, + "step": 273 + }, + { + "loss": 0.3934, + "grad_norm": 7.659923076629639, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 0.9334442615509033, + "epoch": 0.274, + "step": 274 + }, + { + "loss": 0.3376, + "grad_norm": 5.6293864250183105, + "learning_rate": 1.728e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.275, + "step": 275 + }, + { + "loss": 0.3734, + "grad_norm": 4.872118949890137, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.276, + "step": 276 + }, + { + "loss": 0.2395, + "grad_norm": 3.4475960731506348, + "learning_rate": 1.726e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.277, + "step": 277 + }, + { + "loss": 0.3513, + "grad_norm": 3.5093634128570557, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.278, + "step": 278 + }, + { + "loss": 0.3505, + "grad_norm": 3.436389446258545, + "learning_rate": 1.724e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 0.9367720484733582, + "epoch": 0.279, + "step": 279 + }, + { + "loss": 0.3041, + "grad_norm": 3.4393298625946045, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.28, + "step": 280 + }, + { + "loss": 0.2922, + "grad_norm": 3.826392889022827, + "learning_rate": 1.722e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.281, + "step": 281 + }, + { + "loss": 0.3414, + "grad_norm": 7.017237663269043, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.282, + "step": 282 + }, + { + "loss": 0.3521, + "grad_norm": 4.018287658691406, + "learning_rate": 1.72e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.283, + "step": 283 + }, + { + "loss": 0.3455, + "grad_norm": 3.9697959423065186, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.284, + "step": 284 + }, + { + "loss": 0.3368, + "grad_norm": 3.0641541481018066, + "learning_rate": 1.718e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.285, + "step": 285 + }, + { + "loss": 0.3244, + "grad_norm": 4.277006149291992, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.286, + "step": 286 + }, + { + "loss": 0.353, + "grad_norm": 2.6876814365386963, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.287, + "step": 287 + }, + { + "loss": 0.3236, + "grad_norm": 3.7715723514556885, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.288, + "step": 288 + }, + { + "loss": 0.3158, + "grad_norm": 3.555406332015991, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.289, + "step": 289 + }, + { + "loss": 0.2062, + "grad_norm": 9.316679000854492, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.29, + "step": 290 + }, + { + "loss": 0.2002, + "grad_norm": 5.817254543304443, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.291, + "step": 291 + }, + { + "loss": 0.2809, + "grad_norm": 5.106694221496582, + "learning_rate": 1.711e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.292, + "step": 292 + }, + { + "loss": 0.295, + "grad_norm": 7.797866344451904, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.293, + "step": 293 + }, + { + "loss": 0.3144, + "grad_norm": 8.002677917480469, + "learning_rate": 1.709e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.294, + "step": 294 + }, + { + "loss": 0.2345, + "grad_norm": 4.315321445465088, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.295, + "step": 295 + }, + { + "loss": 0.306, + "grad_norm": 4.690162181854248, + "learning_rate": 1.707e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.296, + "step": 296 + }, + { + "loss": 0.3098, + "grad_norm": 4.387345790863037, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.297, + "step": 297 + }, + { + "loss": 0.2898, + "grad_norm": 5.204096794128418, + "learning_rate": 1.705e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.298, + "step": 298 + }, + { + "loss": 0.2894, + "grad_norm": 4.000877380371094, + "learning_rate": 1.704e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.299, + "step": 299 + }, + { + "loss": 0.3295, + "grad_norm": 5.276703357696533, + "learning_rate": 1.703e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9383561611175537, + "epoch": 0.3, + "step": 300 + }, + { + "loss": 0.2139, + "grad_norm": 2.6593077182769775, + "learning_rate": 1.702e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.301, + "step": 301 + }, + { + "loss": 0.2077, + "grad_norm": 9.37561321258545, + "learning_rate": 1.701e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.302, + "step": 302 + }, + { + "loss": 0.2274, + "grad_norm": 2.972815990447998, + "learning_rate": 1.7e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.303, + "step": 303 + }, + { + "loss": 0.2545, + "grad_norm": 2.4279375076293945, + "learning_rate": 1.699e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.304, + "step": 304 + }, + { + "loss": 0.2871, + "grad_norm": 2.8517541885375977, + "learning_rate": 1.698e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.305, + "step": 305 + }, + { + "loss": 0.2877, + "grad_norm": 4.114612102508545, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.306, + "step": 306 + }, + { + "loss": 0.2145, + "grad_norm": 14.7569580078125, + "learning_rate": 1.696e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.307, + "step": 307 + }, + { + "loss": 0.294, + "grad_norm": 3.094182252883911, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.308, + "step": 308 + }, + { + "loss": 0.2044, + "grad_norm": 3.026052951812744, + "learning_rate": 1.694e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.309, + "step": 309 + }, + { + "loss": 0.3061, + "grad_norm": 3.1381635665893555, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.31, + "step": 310 + }, + { + "loss": 0.2239, + "grad_norm": 2.3573496341705322, + "learning_rate": 1.692e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.311, + "step": 311 + }, + { + "loss": 0.2853, + "grad_norm": 7.762936115264893, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.312, + "step": 312 + }, + { + "loss": 0.2793, + "grad_norm": 7.716437816619873, + "learning_rate": 1.69e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.313, + "step": 313 + }, + { + "loss": 0.2764, + "grad_norm": 4.531182765960693, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.314, + "step": 314 + }, + { + "loss": 0.1807, + "grad_norm": 5.600939750671387, + "learning_rate": 1.688e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.315, + "step": 315 + }, + { + "loss": 0.1751, + "grad_norm": 6.357442378997803, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.316, + "step": 316 + }, + { + "loss": 0.2278, + "grad_norm": 4.381490230560303, + "learning_rate": 1.686e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.317, + "step": 317 + }, + { + "loss": 0.1693, + "grad_norm": 4.711330413818359, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.318, + "step": 318 + }, + { + "loss": 0.2719, + "grad_norm": 7.21658182144165, + "learning_rate": 1.684e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.319, + "step": 319 + }, + { + "loss": 0.1613, + "grad_norm": 2.806929111480713, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.32, + "step": 320 + }, + { + "loss": 0.2236, + "grad_norm": 3.729052782058716, + "learning_rate": 1.682e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.321, + "step": 321 + }, + { + "loss": 0.3026, + "grad_norm": 3.512017250061035, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.322, + "step": 322 + }, + { + "loss": 0.2492, + "grad_norm": 5.842523097991943, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.323, + "step": 323 + }, + { + "loss": 0.2591, + "grad_norm": 3.444624662399292, + "learning_rate": 1.679e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9442269802093506, + "epoch": 0.324, + "step": 324 + }, + { + "loss": 0.245, + "grad_norm": 3.560624837875366, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.325, + "step": 325 + }, + { + "loss": 0.2493, + "grad_norm": 3.812241792678833, + "learning_rate": 1.677e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.326, + "step": 326 + }, + { + "loss": 0.1623, + "grad_norm": 9.361125946044922, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.327, + "step": 327 + }, + { + "loss": 0.2385, + "grad_norm": 4.130789279937744, + "learning_rate": 1.675e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.328, + "step": 328 + }, + { + "loss": 0.248, + "grad_norm": 3.7591042518615723, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.329, + "step": 329 + }, + { + "loss": 0.2815, + "grad_norm": 6.346067905426025, + "learning_rate": 1.673e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.33, + "step": 330 + }, + { + "loss": 0.2502, + "grad_norm": 3.433945655822754, + "learning_rate": 1.672e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.331, + "step": 331 + }, + { + "loss": 0.2994, + "grad_norm": 3.7655599117279053, + "learning_rate": 1.671e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9403131008148193, + "epoch": 0.332, + "step": 332 + }, + { + "loss": 0.2622, + "grad_norm": 3.707118511199951, + "learning_rate": 1.67e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.333, + "step": 333 + }, + { + "loss": 0.2418, + "grad_norm": 5.776569843292236, + "learning_rate": 1.669e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.334, + "step": 334 + }, + { + "loss": 0.2278, + "grad_norm": 2.7461037635803223, + "learning_rate": 1.668e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.335, + "step": 335 + }, + { + "loss": 0.2152, + "grad_norm": 2.729001760482788, + "learning_rate": 1.667e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.336, + "step": 336 + }, + { + "loss": 0.2093, + "grad_norm": 2.409708261489868, + "learning_rate": 1.666e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.337, + "step": 337 + }, + { + "loss": 0.2121, + "grad_norm": 4.6761651039123535, + "learning_rate": 1.665e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.338, + "step": 338 + }, + { + "loss": 0.2645, + "grad_norm": 3.167815685272217, + "learning_rate": 1.664e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.339, + "step": 339 + }, + { + "loss": 0.1629, + "grad_norm": 12.654186248779297, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.34, + "step": 340 + }, + { + "loss": 0.2156, + "grad_norm": 2.461930751800537, + "learning_rate": 1.662e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.341, + "step": 341 + }, + { + "loss": 0.2281, + "grad_norm": 4.044505596160889, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.342, + "step": 342 + }, + { + "loss": 0.2303, + "grad_norm": 3.00589656829834, + "learning_rate": 1.66e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.343, + "step": 343 + }, + { + "loss": 0.2372, + "grad_norm": 1.9332551956176758, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.344, + "step": 344 + }, + { + "loss": 0.2303, + "grad_norm": 3.804724931716919, + "learning_rate": 1.658e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.345, + "step": 345 + }, + { + "loss": 0.1629, + "grad_norm": 13.47612190246582, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.346, + "step": 346 + }, + { + "loss": 0.2276, + "grad_norm": 3.5881187915802, + "learning_rate": 1.656e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.347, + "step": 347 + }, + { + "loss": 0.2474, + "grad_norm": 3.895529270172119, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.348, + "step": 348 + }, + { + "loss": 0.2205, + "grad_norm": 3.4531259536743164, + "learning_rate": 1.654e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.349, + "step": 349 + }, + { + "loss": 0.2277, + "grad_norm": 3.849405288696289, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.35, + "step": 350 + }, + { + "loss": 0.1993, + "grad_norm": 3.522599458694458, + "learning_rate": 1.652e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.351, + "step": 351 + }, + { + "loss": 0.2291, + "grad_norm": 3.7573893070220947, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.352, + "step": 352 + }, + { + "loss": 0.1756, + "grad_norm": 4.224817276000977, + "learning_rate": 1.65e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.353, + "step": 353 + }, + { + "loss": 0.1992, + "grad_norm": 2.2447433471679688, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.354, + "step": 354 + }, + { + "loss": 0.184, + "grad_norm": 2.0203311443328857, + "learning_rate": 1.648e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.355, + "step": 355 + }, + { + "loss": 0.2236, + "grad_norm": 3.499854803085327, + "learning_rate": 1.647e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.356, + "step": 356 + }, + { + "loss": 0.2141, + "grad_norm": 5.057332992553711, + "learning_rate": 1.646e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.357, + "step": 357 + }, + { + "loss": 0.232, + "grad_norm": 2.861778974533081, + "learning_rate": 1.645e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.358, + "step": 358 + }, + { + "loss": 0.184, + "grad_norm": 3.52634596824646, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.359, + "step": 359 + }, + { + "loss": 0.2205, + "grad_norm": 2.3115124702453613, + "learning_rate": 1.643e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.36, + "step": 360 + }, + { + "loss": 0.1838, + "grad_norm": 3.043916940689087, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.361, + "step": 361 + }, + { + "loss": 0.1874, + "grad_norm": 3.2404396533966064, + "learning_rate": 1.641e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.362, + "step": 362 + }, + { + "loss": 0.4084, + "grad_norm": 12.86927604675293, + "learning_rate": 1.64e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.363, + "step": 363 + }, + { + "loss": 0.1677, + "grad_norm": 3.4789700508117676, + "learning_rate": 1.639e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.364, + "step": 364 + }, + { + "loss": 0.1922, + "grad_norm": 4.1049699783325195, + "learning_rate": 1.638e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.365, + "step": 365 + }, + { + "loss": 0.1915, + "grad_norm": 3.2055957317352295, + "learning_rate": 1.637e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.366, + "step": 366 + }, + { + "loss": 0.166, + "grad_norm": 12.477117538452148, + "learning_rate": 1.636e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.367, + "step": 367 + }, + { + "loss": 0.1799, + "grad_norm": 4.58711051940918, + "learning_rate": 1.635e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.368, + "step": 368 + }, + { + "loss": 0.2299, + "grad_norm": 2.874641180038452, + "learning_rate": 1.634e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.369, + "step": 369 + }, + { + "loss": 0.1414, + "grad_norm": 5.157703399658203, + "learning_rate": 1.633e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.37, + "step": 370 + }, + { + "loss": 0.1812, + "grad_norm": 3.2541451454162598, + "learning_rate": 1.632e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.371, + "step": 371 + }, + { + "loss": 0.1366, + "grad_norm": 3.705273151397705, + "learning_rate": 1.631e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.372, + "step": 372 + }, + { + "loss": 0.1681, + "grad_norm": 3.6492865085601807, + "learning_rate": 1.63e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.373, + "step": 373 + }, + { + "loss": 0.1324, + "grad_norm": 3.3717288970947266, + "learning_rate": 1.629e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.374, + "step": 374 + }, + { + "loss": 0.1816, + "grad_norm": 4.410749912261963, + "learning_rate": 1.628e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.375, + "step": 375 + }, + { + "loss": 0.3611, + "grad_norm": 11.978804588317871, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.376, + "step": 376 + }, + { + "loss": 0.1686, + "grad_norm": 2.8153111934661865, + "learning_rate": 1.626e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.377, + "step": 377 + }, + { + "loss": 0.1293, + "grad_norm": 3.5253026485443115, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.378, + "step": 378 + }, + { + "loss": 0.1597, + "grad_norm": 2.9006922245025635, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.379, + "step": 379 + }, + { + "loss": 0.1975, + "grad_norm": 6.231935024261475, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.38, + "step": 380 + }, + { + "loss": 0.1232, + "grad_norm": 3.3006174564361572, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.381, + "step": 381 + }, + { + "loss": 0.1599, + "grad_norm": 3.177495241165161, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.382, + "step": 382 + }, + { + "loss": 0.1858, + "grad_norm": 2.967477798461914, + "learning_rate": 1.62e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.383, + "step": 383 + }, + { + "loss": 0.1725, + "grad_norm": 2.6947214603424072, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.384, + "step": 384 + }, + { + "loss": 0.1644, + "grad_norm": 3.6320605278015137, + "learning_rate": 1.618e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.385, + "step": 385 + }, + { + "loss": 0.1726, + "grad_norm": 6.163839817047119, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.386, + "step": 386 + }, + { + "loss": 0.2253, + "grad_norm": 3.695767879486084, + "learning_rate": 1.616e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.387, + "step": 387 + }, + { + "loss": 0.1295, + "grad_norm": 11.877620697021484, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.388, + "step": 388 + }, + { + "loss": 0.1641, + "grad_norm": 2.5848593711853027, + "learning_rate": 1.614e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.389, + "step": 389 + }, + { + "loss": 0.1299, + "grad_norm": 11.58799934387207, + "learning_rate": 1.613e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.39, + "step": 390 + }, + { + "loss": 0.153, + "grad_norm": 3.0241589546203613, + "learning_rate": 1.612e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.391, + "step": 391 + }, + { + "loss": 0.1741, + "grad_norm": 4.446482181549072, + "learning_rate": 1.611e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.392, + "step": 392 + }, + { + "loss": 0.1517, + "grad_norm": 2.0452992916107178, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.393, + "step": 393 + }, + { + "loss": 0.1482, + "grad_norm": 3.511587142944336, + "learning_rate": 1.609e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.394, + "step": 394 + }, + { + "loss": 0.1673, + "grad_norm": 4.165390968322754, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.395, + "step": 395 + }, + { + "loss": 0.1577, + "grad_norm": 2.5295603275299072, + "learning_rate": 1.607e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.396, + "step": 396 + }, + { + "loss": 0.1444, + "grad_norm": 2.6492788791656494, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.397, + "step": 397 + }, + { + "loss": 0.1731, + "grad_norm": 3.1617088317871094, + "learning_rate": 1.605e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.398, + "step": 398 + }, + { + "loss": 0.1411, + "grad_norm": 2.628790855407715, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.399, + "step": 399 + }, + { + "loss": 0.1442, + "grad_norm": 2.589632272720337, + "learning_rate": 1.603e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.4, + "step": 400 + }, + { + "loss": 0.1647, + "grad_norm": 2.7175090312957764, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.401, + "step": 401 + }, + { + "loss": 0.1225, + "grad_norm": 9.854316711425781, + "learning_rate": 1.601e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.402, + "step": 402 + }, + { + "loss": 0.1635, + "grad_norm": 2.513782501220703, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.403, + "step": 403 + }, + { + "loss": 0.1172, + "grad_norm": 4.978464126586914, + "learning_rate": 1.599e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.404, + "step": 404 + }, + { + "loss": 0.1535, + "grad_norm": 6.545207977294922, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.405, + "step": 405 + }, + { + "loss": 0.1554, + "grad_norm": 4.268946647644043, + "learning_rate": 1.597e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.406, + "step": 406 + }, + { + "loss": 0.1143, + "grad_norm": 2.5581111907958984, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.407, + "step": 407 + }, + { + "loss": 0.1446, + "grad_norm": 4.272138595581055, + "learning_rate": 1.595e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.408, + "step": 408 + }, + { + "loss": 0.1058, + "grad_norm": 1.8749103546142578, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.409, + "step": 409 + }, + { + "loss": 0.1972, + "grad_norm": 4.553700923919678, + "learning_rate": 1.593e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.41, + "step": 410 + }, + { + "loss": 0.1465, + "grad_norm": 4.258208751678467, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.411, + "step": 411 + }, + { + "loss": 0.1556, + "grad_norm": 2.6741788387298584, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.412, + "step": 412 + }, + { + "loss": 0.1074, + "grad_norm": 5.901241779327393, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.413, + "step": 413 + }, + { + "loss": 0.1999, + "grad_norm": 2.886406421661377, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 414 + }, + { + "loss": 0.163, + "grad_norm": 3.367415189743042, + "learning_rate": 1.588e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.415, + "step": 415 + }, + { + "loss": 0.1678, + "grad_norm": 2.3446123600006104, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.416, + "step": 416 + }, + { + "loss": 0.2442, + "grad_norm": 4.648331165313721, + "learning_rate": 1.586e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.417, + "step": 417 + }, + { + "loss": 0.1314, + "grad_norm": 3.296555519104004, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.418, + "step": 418 + }, + { + "loss": 0.1224, + "grad_norm": 14.873774528503418, + "learning_rate": 1.584e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.419, + "step": 419 + }, + { + "loss": 0.1792, + "grad_norm": 2.493760108947754, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.42, + "step": 420 + }, + { + "loss": 0.1289, + "grad_norm": 4.287231922149658, + "learning_rate": 1.582e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.421, + "step": 421 + }, + { + "loss": 0.1176, + "grad_norm": 12.776876449584961, + "learning_rate": 1.581e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.422, + "step": 422 + }, + { + "loss": 0.1651, + "grad_norm": 2.691632032394409, + "learning_rate": 1.58e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.423, + "step": 423 + }, + { + "loss": 0.271, + "grad_norm": 7.320021152496338, + "learning_rate": 1.579e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.424, + "step": 424 + }, + { + "loss": 0.1183, + "grad_norm": 2.511960029602051, + "learning_rate": 1.578e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.425, + "step": 425 + }, + { + "loss": 0.1387, + "grad_norm": 2.424102306365967, + "learning_rate": 1.577e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.426, + "step": 426 + }, + { + "loss": 0.1443, + "grad_norm": 3.659524917602539, + "learning_rate": 1.576e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.427, + "step": 427 + }, + { + "loss": 0.2176, + "grad_norm": 4.393547058105469, + "learning_rate": 1.575e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.428, + "step": 428 + }, + { + "loss": 0.1576, + "grad_norm": 3.995103359222412, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.429, + "step": 429 + }, + { + "loss": 0.0995, + "grad_norm": 7.335996627807617, + "learning_rate": 1.573e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.43, + "step": 430 + }, + { + "loss": 0.1224, + "grad_norm": 2.3261799812316895, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.431, + "step": 431 + }, + { + "loss": 0.1781, + "grad_norm": 3.084444761276245, + "learning_rate": 1.571e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.432, + "step": 432 + }, + { + "loss": 0.1262, + "grad_norm": 2.499669075012207, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.433, + "step": 433 + }, + { + "loss": 0.1306, + "grad_norm": 2.529611587524414, + "learning_rate": 1.569e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.434, + "step": 434 + }, + { + "loss": 0.1473, + "grad_norm": 2.308983325958252, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.435, + "step": 435 + }, + { + "loss": 0.1387, + "grad_norm": 2.9792327880859375, + "learning_rate": 1.567e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.436, + "step": 436 + }, + { + "loss": 0.1256, + "grad_norm": 3.446150302886963, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.437, + "step": 437 + }, + { + "loss": 0.1884, + "grad_norm": 2.8107986450195312, + "learning_rate": 1.565e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.438, + "step": 438 + }, + { + "loss": 0.1801, + "grad_norm": 2.476114511489868, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.439, + "step": 439 + }, + { + "loss": 0.1216, + "grad_norm": 2.8834075927734375, + "learning_rate": 1.563e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.44, + "step": 440 + }, + { + "loss": 0.1391, + "grad_norm": 3.0233523845672607, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.441, + "step": 441 + }, + { + "loss": 0.1355, + "grad_norm": 3.540644645690918, + "learning_rate": 1.561e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.442, + "step": 442 + }, + { + "loss": 0.1031, + "grad_norm": 2.104804515838623, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.443, + "step": 443 + }, + { + "loss": 0.1389, + "grad_norm": 2.2567386627197266, + "learning_rate": 1.559e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.444, + "step": 444 + }, + { + "loss": 0.116, + "grad_norm": 2.4400763511657715, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.445, + "step": 445 + }, + { + "loss": 0.1294, + "grad_norm": 2.306941509246826, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.446, + "step": 446 + }, + { + "loss": 0.1189, + "grad_norm": 2.5862247943878174, + "learning_rate": 1.556e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.447, + "step": 447 + }, + { + "loss": 0.2484, + "grad_norm": 4.606533050537109, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.448, + "step": 448 + }, + { + "loss": 0.2119, + "grad_norm": 3.4597740173339844, + "learning_rate": 1.554e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.449, + "step": 449 + }, + { + "loss": 0.1395, + "grad_norm": 3.5644280910491943, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.45, + "step": 450 + }, + { + "loss": 0.1167, + "grad_norm": 13.761821746826172, + "learning_rate": 1.552e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.451, + "step": 451 + }, + { + "loss": 0.1423, + "grad_norm": 3.3145618438720703, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.452, + "step": 452 + }, + { + "loss": 0.131, + "grad_norm": 4.129085540771484, + "learning_rate": 1.55e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.453, + "step": 453 + }, + { + "loss": 0.1337, + "grad_norm": 2.807199001312256, + "learning_rate": 1.549e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.454, + "step": 454 + }, + { + "loss": 0.1235, + "grad_norm": 2.291154384613037, + "learning_rate": 1.548e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.455, + "step": 455 + }, + { + "loss": 0.123, + "grad_norm": 3.186185836791992, + "learning_rate": 1.547e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.456, + "step": 456 + }, + { + "loss": 0.13, + "grad_norm": 2.2184228897094727, + "learning_rate": 1.546e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.457, + "step": 457 + }, + { + "loss": 0.1232, + "grad_norm": 2.6860218048095703, + "learning_rate": 1.545e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.458, + "step": 458 + }, + { + "loss": 0.1668, + "grad_norm": 2.615064859390259, + "learning_rate": 1.544e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.459, + "step": 459 + }, + { + "loss": 0.1268, + "grad_norm": 3.520294427871704, + "learning_rate": 1.543e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.46, + "step": 460 + }, + { + "loss": 0.1183, + "grad_norm": 3.490569829940796, + "learning_rate": 1.542e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.461, + "step": 461 + }, + { + "loss": 0.1025, + "grad_norm": 12.270122528076172, + "learning_rate": 1.541e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.462, + "step": 462 + }, + { + "loss": 0.1059, + "grad_norm": 2.1151371002197266, + "learning_rate": 1.54e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.463, + "step": 463 + }, + { + "loss": 0.1021, + "grad_norm": 2.0290112495422363, + "learning_rate": 1.539e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.464, + "step": 464 + }, + { + "loss": 0.0993, + "grad_norm": 10.768261909484863, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.465, + "step": 465 + }, + { + "loss": 0.1187, + "grad_norm": 3.7776851654052734, + "learning_rate": 1.537e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.466, + "step": 466 + }, + { + "loss": 0.0929, + "grad_norm": 3.5349013805389404, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.467, + "step": 467 + }, + { + "loss": 0.1292, + "grad_norm": 4.221794605255127, + "learning_rate": 1.535e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.468, + "step": 468 + }, + { + "loss": 0.1597, + "grad_norm": 3.645026445388794, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.469, + "step": 469 + }, + { + "loss": 0.1281, + "grad_norm": 4.336436748504639, + "learning_rate": 1.533e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.47, + "step": 470 + }, + { + "loss": 0.1427, + "grad_norm": 4.119178295135498, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.471, + "step": 471 + }, + { + "loss": 0.1959, + "grad_norm": 3.495059013366699, + "learning_rate": 1.531e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.472, + "step": 472 + }, + { + "loss": 0.1062, + "grad_norm": 2.910947799682617, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.473, + "step": 473 + }, + { + "loss": 0.1641, + "grad_norm": 1.9516125917434692, + "learning_rate": 1.529e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.474, + "step": 474 + }, + { + "loss": 0.1267, + "grad_norm": 2.637050151824951, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.475, + "step": 475 + }, + { + "loss": 0.1602, + "grad_norm": 2.365922689437866, + "learning_rate": 1.527e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 476 + }, + { + "loss": 0.145, + "grad_norm": 3.577690362930298, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.477, + "step": 477 + }, + { + "loss": 0.1917, + "grad_norm": 2.425001621246338, + "learning_rate": 1.525e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.478, + "step": 478 + }, + { + "loss": 0.1295, + "grad_norm": 2.570420503616333, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.479, + "step": 479 + }, + { + "loss": 0.1216, + "grad_norm": 2.951737403869629, + "learning_rate": 1.523e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.48, + "step": 480 + }, + { + "loss": 0.1172, + "grad_norm": 2.9054367542266846, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.481, + "step": 481 + }, + { + "loss": 0.1028, + "grad_norm": 11.967851638793945, + "learning_rate": 1.521e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.482, + "step": 482 + }, + { + "loss": 0.1411, + "grad_norm": 3.018132448196411, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.483, + "step": 483 + }, + { + "loss": 0.0953, + "grad_norm": 2.7196693420410156, + "learning_rate": 1.519e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.484, + "step": 484 + }, + { + "loss": 0.1322, + "grad_norm": 3.49013090133667, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.485, + "step": 485 + }, + { + "loss": 0.0793, + "grad_norm": 3.015738010406494, + "learning_rate": 1.517e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.486, + "step": 486 + }, + { + "loss": 0.1429, + "grad_norm": 2.9223875999450684, + "learning_rate": 1.516e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.487, + "step": 487 + }, + { + "loss": 0.1468, + "grad_norm": 3.956615924835205, + "learning_rate": 1.515e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.488, + "step": 488 + }, + { + "loss": 0.1171, + "grad_norm": 4.619190216064453, + "learning_rate": 1.514e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.489, + "step": 489 + }, + { + "loss": 0.0767, + "grad_norm": 1.605452299118042, + "learning_rate": 1.513e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.49, + "step": 490 + }, + { + "loss": 0.128, + "grad_norm": 4.304430961608887, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.491, + "step": 491 + }, + { + "loss": 0.0781, + "grad_norm": 1.868319034576416, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.492, + "step": 492 + }, + { + "loss": 0.1311, + "grad_norm": 2.720447540283203, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.493, + "step": 493 + }, + { + "loss": 0.1312, + "grad_norm": 3.6773548126220703, + "learning_rate": 1.509e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.494, + "step": 494 + }, + { + "loss": 0.164, + "grad_norm": 3.9428446292877197, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.495, + "step": 495 + }, + { + "loss": 0.1516, + "grad_norm": 2.488532781600952, + "learning_rate": 1.507e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.496, + "step": 496 + }, + { + "loss": 0.076, + "grad_norm": 3.0369679927825928, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.497, + "step": 497 + }, + { + "loss": 0.1552, + "grad_norm": 2.921428680419922, + "learning_rate": 1.505e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.498, + "step": 498 + }, + { + "loss": 0.0745, + "grad_norm": 4.530489921569824, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.499, + "step": 499 + }, + { + "loss": 0.1431, + "grad_norm": 2.894956350326538, + "learning_rate": 1.503e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.5, + "step": 500 + }, + { + "loss": 0.1196, + "grad_norm": 2.8564133644104004, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.501, + "step": 501 + }, + { + "loss": 0.1022, + "grad_norm": 2.487640857696533, + "learning_rate": 1.501e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.502, + "step": 502 + }, + { + "loss": 0.0816, + "grad_norm": 9.081964492797852, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.503, + "step": 503 + }, + { + "loss": 0.0696, + "grad_norm": 5.340896129608154, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.504, + "step": 504 + }, + { + "loss": 0.1355, + "grad_norm": 2.5042786598205566, + "learning_rate": 1.498e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.505, + "step": 505 + }, + { + "loss": 0.1177, + "grad_norm": 2.9676339626312256, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.506, + "step": 506 + }, + { + "loss": 0.1305, + "grad_norm": 2.792555570602417, + "learning_rate": 1.496e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.507, + "step": 507 + }, + { + "loss": 0.1155, + "grad_norm": 3.074509620666504, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.508, + "step": 508 + }, + { + "loss": 0.1274, + "grad_norm": 3.4446146488189697, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.509, + "step": 509 + }, + { + "loss": 0.0961, + "grad_norm": 4.31768798828125, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.51, + "step": 510 + }, + { + "loss": 0.1406, + "grad_norm": 3.5040206909179688, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.511, + "step": 511 + }, + { + "loss": 0.163, + "grad_norm": 3.973576307296753, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.512, + "step": 512 + }, + { + "loss": 0.1435, + "grad_norm": 2.7186615467071533, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.513, + "step": 513 + }, + { + "loss": 0.1024, + "grad_norm": 2.8186845779418945, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.514, + "step": 514 + }, + { + "loss": 0.0781, + "grad_norm": 10.394554138183594, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.515, + "step": 515 + }, + { + "loss": 0.0874, + "grad_norm": 10.657512664794922, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.516, + "step": 516 + }, + { + "loss": 0.0946, + "grad_norm": 2.6607813835144043, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.517, + "step": 517 + }, + { + "loss": 0.1189, + "grad_norm": 2.2012691497802734, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.518, + "step": 518 + }, + { + "loss": 0.1313, + "grad_norm": 3.873806953430176, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.519, + "step": 519 + }, + { + "loss": 0.0999, + "grad_norm": 1.8396018743515015, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.52, + "step": 520 + }, + { + "loss": 0.1057, + "grad_norm": 2.922558307647705, + "learning_rate": 1.482e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.521, + "step": 521 + }, + { + "loss": 0.0865, + "grad_norm": 2.5007052421569824, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.522, + "step": 522 + }, + { + "loss": 0.1029, + "grad_norm": 1.885617733001709, + "learning_rate": 1.48e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.523, + "step": 523 + }, + { + "loss": 0.0958, + "grad_norm": 1.7554020881652832, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.524, + "step": 524 + }, + { + "loss": 0.1244, + "grad_norm": 3.055809736251831, + "learning_rate": 1.478e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.525, + "step": 525 + }, + { + "loss": 0.1059, + "grad_norm": 2.518828868865967, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.526, + "step": 526 + }, + { + "loss": 0.0849, + "grad_norm": 4.157986640930176, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.527, + "step": 527 + }, + { + "loss": 0.0949, + "grad_norm": 5.624795436859131, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.528, + "step": 528 + }, + { + "loss": 0.1133, + "grad_norm": 4.383209228515625, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.529, + "step": 529 + }, + { + "loss": 0.0753, + "grad_norm": 10.447527885437012, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.53, + "step": 530 + }, + { + "loss": 0.0758, + "grad_norm": 2.0648767948150635, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.531, + "step": 531 + }, + { + "loss": 0.109, + "grad_norm": 2.311145782470703, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.532, + "step": 532 + }, + { + "loss": 0.0993, + "grad_norm": 2.5646841526031494, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.533, + "step": 533 + }, + { + "loss": 0.061, + "grad_norm": 4.201132774353027, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 534 + }, + { + "loss": 0.1403, + "grad_norm": 3.2465627193450928, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.535, + "step": 535 + }, + { + "loss": 0.0917, + "grad_norm": 4.278575420379639, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.536, + "step": 536 + }, + { + "loss": 0.1363, + "grad_norm": 2.6477434635162354, + "learning_rate": 1.466e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.537, + "step": 537 + }, + { + "loss": 0.1035, + "grad_norm": 2.616262435913086, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.538, + "step": 538 + }, + { + "loss": 0.1702, + "grad_norm": 2.8426945209503174, + "learning_rate": 1.464e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.539, + "step": 539 + }, + { + "loss": 0.0969, + "grad_norm": 2.934753179550171, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.54, + "step": 540 + }, + { + "loss": 0.0628, + "grad_norm": 6.173173904418945, + "learning_rate": 1.462e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.541, + "step": 541 + }, + { + "loss": 0.113, + "grad_norm": 2.183295249938965, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.542, + "step": 542 + }, + { + "loss": 0.0674, + "grad_norm": 2.466468095779419, + "learning_rate": 1.46e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.543, + "step": 543 + }, + { + "loss": 0.0629, + "grad_norm": 6.685276508331299, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.544, + "step": 544 + }, + { + "loss": 0.0606, + "grad_norm": 6.428196907043457, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 545 + }, + { + "loss": 0.0552, + "grad_norm": 3.2987399101257324, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 546 + }, + { + "loss": 0.1492, + "grad_norm": 3.802187919616699, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.547, + "step": 547 + }, + { + "loss": 0.0903, + "grad_norm": 3.23189115524292, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.548, + "step": 548 + }, + { + "loss": 0.0758, + "grad_norm": 3.0735082626342773, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.549, + "step": 549 + }, + { + "loss": 0.0978, + "grad_norm": 2.9236018657684326, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.55, + "step": 550 + }, + { + "loss": 0.0489, + "grad_norm": 1.232297420501709, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 551 + }, + { + "loss": 0.0472, + "grad_norm": 1.1960967779159546, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 552 + }, + { + "loss": 0.1622, + "grad_norm": 2.9212372303009033, + "learning_rate": 1.45e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.553, + "step": 553 + }, + { + "loss": 0.0964, + "grad_norm": 2.9365901947021484, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.554, + "step": 554 + }, + { + "loss": 0.1015, + "grad_norm": 3.297194719314575, + "learning_rate": 1.448e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.555, + "step": 555 + }, + { + "loss": 0.108, + "grad_norm": 3.8434770107269287, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.556, + "step": 556 + }, + { + "loss": 0.0869, + "grad_norm": 3.068513870239258, + "learning_rate": 1.446e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.557, + "step": 557 + }, + { + "loss": 0.0823, + "grad_norm": 2.382955312728882, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.558, + "step": 558 + }, + { + "loss": 0.0952, + "grad_norm": 2.0796663761138916, + "learning_rate": 1.444e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.559, + "step": 559 + }, + { + "loss": 0.0904, + "grad_norm": 2.491260290145874, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.56, + "step": 560 + }, + { + "loss": 0.0888, + "grad_norm": 1.8683680295944214, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.561, + "step": 561 + }, + { + "loss": 0.0824, + "grad_norm": 2.5860776901245117, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.562, + "step": 562 + }, + { + "loss": 0.0648, + "grad_norm": 10.482237815856934, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 563 + }, + { + "loss": 0.1033, + "grad_norm": 1.8212071657180786, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.564, + "step": 564 + }, + { + "loss": 0.1275, + "grad_norm": 2.206996440887451, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.565, + "step": 565 + }, + { + "loss": 0.1174, + "grad_norm": 2.454157590866089, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.566, + "step": 566 + }, + { + "loss": 0.0846, + "grad_norm": 2.7483479976654053, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.567, + "step": 567 + }, + { + "loss": 0.0712, + "grad_norm": 9.780473709106445, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.568, + "step": 568 + }, + { + "loss": 0.0838, + "grad_norm": 2.227144718170166, + "learning_rate": 1.434e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.569, + "step": 569 + }, + { + "loss": 0.0996, + "grad_norm": 2.4927093982696533, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.57, + "step": 570 + }, + { + "loss": 0.0723, + "grad_norm": 2.6736180782318115, + "learning_rate": 1.432e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.571, + "step": 571 + }, + { + "loss": 0.0765, + "grad_norm": 1.8901737928390503, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 0.572, + "step": 572 + }, + { + "loss": 0.0661, + "grad_norm": 1.9803191423416138, + "learning_rate": 1.43e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.573, + "step": 573 + }, + { + "loss": 0.06, + "grad_norm": 1.9032983779907227, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.574, + "step": 574 + }, + { + "loss": 0.0437, + "grad_norm": 2.9226999282836914, + "learning_rate": 1.428e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 575 + }, + { + "loss": 0.1345, + "grad_norm": 2.60559344291687, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.576, + "step": 576 + }, + { + "loss": 0.043, + "grad_norm": 3.43766713142395, + "learning_rate": 1.426e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 577 + }, + { + "loss": 0.0881, + "grad_norm": 3.27600359916687, + "learning_rate": 1.425e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.578, + "step": 578 + }, + { + "loss": 0.0777, + "grad_norm": 3.8467905521392822, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.579, + "step": 579 + }, + { + "loss": 0.0971, + "grad_norm": 3.3157150745391846, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.58, + "step": 580 + }, + { + "loss": 0.0769, + "grad_norm": 2.6883363723754883, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.581, + "step": 581 + }, + { + "loss": 0.0381, + "grad_norm": 2.187551736831665, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 582 + }, + { + "loss": 0.0571, + "grad_norm": 1.9329798221588135, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.583, + "step": 583 + }, + { + "loss": 0.0984, + "grad_norm": 2.6686573028564453, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 584 + }, + { + "loss": 0.0904, + "grad_norm": 2.7718393802642822, + "learning_rate": 1.418e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.585, + "step": 585 + }, + { + "loss": 0.0364, + "grad_norm": 3.612837314605713, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 586 + }, + { + "loss": 0.1408, + "grad_norm": 2.518528461456299, + "learning_rate": 1.416e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.587, + "step": 587 + }, + { + "loss": 0.0875, + "grad_norm": 2.7795908451080322, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.588, + "step": 588 + }, + { + "loss": 0.0644, + "grad_norm": 2.4260590076446533, + "learning_rate": 1.414e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 0.589, + "step": 589 + }, + { + "loss": 0.0884, + "grad_norm": 2.681588888168335, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 590 + }, + { + "loss": 0.1001, + "grad_norm": 2.8202459812164307, + "learning_rate": 1.412e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.591, + "step": 591 + }, + { + "loss": 0.0774, + "grad_norm": 1.7170965671539307, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.592, + "step": 592 + }, + { + "loss": 0.069, + "grad_norm": 1.68620765209198, + "learning_rate": 1.41e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.593, + "step": 593 + }, + { + "loss": 0.0694, + "grad_norm": 2.236591339111328, + "learning_rate": 1.409e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.594, + "step": 594 + }, + { + "loss": 0.0943, + "grad_norm": 2.7542996406555176, + "learning_rate": 1.408e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.595, + "step": 595 + }, + { + "loss": 0.0578, + "grad_norm": 1.8813996315002441, + "learning_rate": 1.407e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.596, + "step": 596 + }, + { + "loss": 0.0911, + "grad_norm": 2.0993378162384033, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.597, + "step": 597 + }, + { + "loss": 0.107, + "grad_norm": 2.6184418201446533, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.598, + "step": 598 + }, + { + "loss": 0.0803, + "grad_norm": 1.8751370906829834, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.599, + "step": 599 + }, + { + "loss": 0.0774, + "grad_norm": 3.0198869705200195, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.6, + "step": 600 + }, + { + "loss": 0.2953, + "grad_norm": 14.372690200805664, + "learning_rate": 1.402e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.601, + "step": 601 + }, + { + "loss": 0.0943, + "grad_norm": 2.2585110664367676, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.602, + "step": 602 + }, + { + "loss": 0.0432, + "grad_norm": 8.796082496643066, + "learning_rate": 1.4e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.603, + "step": 603 + }, + { + "loss": 0.1307, + "grad_norm": 2.903687000274658, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.604, + "step": 604 + }, + { + "loss": 0.1348, + "grad_norm": 3.1296894550323486, + "learning_rate": 1.398e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.605, + "step": 605 + }, + { + "loss": 0.1161, + "grad_norm": 2.436495542526245, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.606, + "step": 606 + }, + { + "loss": 0.0368, + "grad_norm": 5.359442710876465, + "learning_rate": 1.396e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.607, + "step": 607 + }, + { + "loss": 0.1177, + "grad_norm": 3.3482797145843506, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.608, + "step": 608 + }, + { + "loss": 0.1024, + "grad_norm": 3.229761838912964, + "learning_rate": 1.394e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.609, + "step": 609 + }, + { + "loss": 0.0988, + "grad_norm": 2.772888660430908, + "learning_rate": 1.393e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.61, + "step": 610 + }, + { + "loss": 0.0699, + "grad_norm": 2.91560435295105, + "learning_rate": 1.392e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.611, + "step": 611 + }, + { + "loss": 0.1212, + "grad_norm": 3.1388144493103027, + "learning_rate": 1.391e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.612, + "step": 612 + }, + { + "loss": 0.0776, + "grad_norm": 2.409531831741333, + "learning_rate": 1.39e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.613, + "step": 613 + }, + { + "loss": 0.0922, + "grad_norm": 2.301997423171997, + "learning_rate": 1.389e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.614, + "step": 614 + }, + { + "loss": 0.0382, + "grad_norm": 6.567748546600342, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.615, + "step": 615 + }, + { + "loss": 0.0702, + "grad_norm": 2.9374635219573975, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 616 + }, + { + "loss": 0.0952, + "grad_norm": 2.805278778076172, + "learning_rate": 1.386e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.617, + "step": 617 + }, + { + "loss": 0.0809, + "grad_norm": 2.7832789421081543, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.618, + "step": 618 + }, + { + "loss": 0.0967, + "grad_norm": 2.5809061527252197, + "learning_rate": 1.384e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.619, + "step": 619 + }, + { + "loss": 0.1193, + "grad_norm": 4.146383285522461, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.62, + "step": 620 + }, + { + "loss": 0.0646, + "grad_norm": 2.3339507579803467, + "learning_rate": 1.382e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.621, + "step": 621 + }, + { + "loss": 0.0698, + "grad_norm": 2.154700756072998, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.622, + "step": 622 + }, + { + "loss": 0.0861, + "grad_norm": 3.4389989376068115, + "learning_rate": 1.38e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.623, + "step": 623 + }, + { + "loss": 0.0744, + "grad_norm": 2.087575674057007, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.624, + "step": 624 + }, + { + "loss": 0.093, + "grad_norm": 2.7172322273254395, + "learning_rate": 1.378e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.625, + "step": 625 + }, + { + "loss": 0.0731, + "grad_norm": 2.2669014930725098, + "learning_rate": 1.377e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.626, + "step": 626 + }, + { + "loss": 0.0747, + "grad_norm": 3.104933500289917, + "learning_rate": 1.376e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.627, + "step": 627 + }, + { + "loss": 0.085, + "grad_norm": 2.475816249847412, + "learning_rate": 1.375e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.628, + "step": 628 + }, + { + "loss": 0.1415, + "grad_norm": 3.2964231967926025, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.629, + "step": 629 + }, + { + "loss": 0.0823, + "grad_norm": 1.5372464656829834, + "learning_rate": 1.373e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.63, + "step": 630 + }, + { + "loss": 0.1085, + "grad_norm": 2.136002540588379, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.631, + "step": 631 + }, + { + "loss": 0.0802, + "grad_norm": 2.1365489959716797, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.632, + "step": 632 + }, + { + "loss": 0.0359, + "grad_norm": 7.951494216918945, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.633, + "step": 633 + }, + { + "loss": 0.0344, + "grad_norm": 7.441174507141113, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.634, + "step": 634 + }, + { + "loss": 0.0838, + "grad_norm": 2.689347505569458, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.635, + "step": 635 + }, + { + "loss": 0.1337, + "grad_norm": 4.8380937576293945, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.636, + "step": 636 + }, + { + "loss": 0.1259, + "grad_norm": 3.2358460426330566, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.637, + "step": 637 + }, + { + "loss": 0.0269, + "grad_norm": 3.706432580947876, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 638 + }, + { + "loss": 0.0617, + "grad_norm": 2.4131107330322266, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.639, + "step": 639 + }, + { + "loss": 0.0225, + "grad_norm": 2.5498831272125244, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 640 + }, + { + "loss": 0.1159, + "grad_norm": 2.7629480361938477, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.641, + "step": 641 + }, + { + "loss": 0.0249, + "grad_norm": 2.194697380065918, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 642 + }, + { + "loss": 0.0852, + "grad_norm": 2.5653960704803467, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.643, + "step": 643 + }, + { + "loss": 0.0783, + "grad_norm": 2.402456283569336, + "learning_rate": 1.359e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 644 + }, + { + "loss": 0.1104, + "grad_norm": 2.646005392074585, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.645, + "step": 645 + }, + { + "loss": 0.0582, + "grad_norm": 2.135377883911133, + "learning_rate": 1.357e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.646, + "step": 646 + }, + { + "loss": 0.0242, + "grad_norm": 2.295201539993286, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 647 + }, + { + "loss": 0.0712, + "grad_norm": 2.529376745223999, + "learning_rate": 1.355e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.648, + "step": 648 + }, + { + "loss": 0.0697, + "grad_norm": 2.2107226848602295, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.649, + "step": 649 + }, + { + "loss": 0.1203, + "grad_norm": 2.456563711166382, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.65, + "step": 650 + }, + { + "loss": 0.091, + "grad_norm": 2.3880977630615234, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.651, + "step": 651 + }, + { + "loss": 0.0641, + "grad_norm": 2.5870609283447266, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.652, + "step": 652 + }, + { + "loss": 0.0678, + "grad_norm": 2.0148985385894775, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.653, + "step": 653 + }, + { + "loss": 0.0745, + "grad_norm": 2.9625463485717773, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.654, + "step": 654 + }, + { + "loss": 0.0759, + "grad_norm": 2.3625717163085938, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.655, + "step": 655 + }, + { + "loss": 0.0826, + "grad_norm": 3.747469902038574, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.656, + "step": 656 + }, + { + "loss": 0.0772, + "grad_norm": 2.4018380641937256, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.657, + "step": 657 + }, + { + "loss": 0.0834, + "grad_norm": 2.684398889541626, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.658, + "step": 658 + }, + { + "loss": 0.074, + "grad_norm": 2.106499671936035, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.659, + "step": 659 + }, + { + "loss": 0.0759, + "grad_norm": 2.1065762042999268, + "learning_rate": 1.343e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.66, + "step": 660 + }, + { + "loss": 0.1232, + "grad_norm": 2.89585280418396, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.661, + "step": 661 + }, + { + "loss": 0.0784, + "grad_norm": 2.267303943634033, + "learning_rate": 1.341e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.662, + "step": 662 + }, + { + "loss": 0.0591, + "grad_norm": 1.4712592363357544, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.663, + "step": 663 + }, + { + "loss": 0.0626, + "grad_norm": 1.9069504737854004, + "learning_rate": 1.339e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.664, + "step": 664 + }, + { + "loss": 0.1356, + "grad_norm": 3.2215309143066406, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.665, + "step": 665 + }, + { + "loss": 0.0678, + "grad_norm": 2.080892562866211, + "learning_rate": 1.337e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.666, + "step": 666 + }, + { + "loss": 0.0643, + "grad_norm": 2.593749523162842, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.667, + "step": 667 + }, + { + "loss": 0.3105, + "grad_norm": 13.254192352294922, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.668, + "step": 668 + }, + { + "loss": 0.0305, + "grad_norm": 7.083673000335693, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.669, + "step": 669 + }, + { + "loss": 0.0827, + "grad_norm": 1.9234445095062256, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.67, + "step": 670 + }, + { + "loss": 0.072, + "grad_norm": 1.6489096879959106, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.671, + "step": 671 + }, + { + "loss": 0.0786, + "grad_norm": 2.5704004764556885, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.672, + "step": 672 + }, + { + "loss": 0.1092, + "grad_norm": 2.335846424102783, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.673, + "step": 673 + }, + { + "loss": 0.08, + "grad_norm": 1.7859958410263062, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.674, + "step": 674 + }, + { + "loss": 0.0303, + "grad_norm": 6.245123386383057, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.675, + "step": 675 + }, + { + "loss": 0.0248, + "grad_norm": 6.11707878112793, + "learning_rate": 1.327e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.676, + "step": 676 + }, + { + "loss": 0.0714, + "grad_norm": 2.122776985168457, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.677, + "step": 677 + }, + { + "loss": 0.0583, + "grad_norm": 2.350274085998535, + "learning_rate": 1.325e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.678, + "step": 678 + }, + { + "loss": 0.0192, + "grad_norm": 3.1966686248779297, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 679 + }, + { + "loss": 0.087, + "grad_norm": 2.123091459274292, + "learning_rate": 1.323e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.68, + "step": 680 + }, + { + "loss": 0.0536, + "grad_norm": 2.108837842941284, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.681, + "step": 681 + }, + { + "loss": 0.0187, + "grad_norm": 2.225255012512207, + "learning_rate": 1.321e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 682 + }, + { + "loss": 0.0689, + "grad_norm": 1.968031883239746, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.683, + "step": 683 + }, + { + "loss": 0.0822, + "grad_norm": 2.5669515132904053, + "learning_rate": 1.319e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.684, + "step": 684 + }, + { + "loss": 0.0661, + "grad_norm": 2.156057596206665, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.685, + "step": 685 + }, + { + "loss": 0.0545, + "grad_norm": 2.8333444595336914, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.686, + "step": 686 + }, + { + "loss": 0.0889, + "grad_norm": 3.069793939590454, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.687, + "step": 687 + }, + { + "loss": 0.0761, + "grad_norm": 1.9274708032608032, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.688, + "step": 688 + }, + { + "loss": 0.1089, + "grad_norm": 2.992846965789795, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.689, + "step": 689 + }, + { + "loss": 0.1287, + "grad_norm": 4.56328821182251, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.69, + "step": 690 + }, + { + "loss": 0.1186, + "grad_norm": 2.255676746368408, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.691, + "step": 691 + }, + { + "loss": 0.0906, + "grad_norm": 1.8538860082626343, + "learning_rate": 1.311e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.692, + "step": 692 + }, + { + "loss": 0.2418, + "grad_norm": 11.443807601928711, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9434276223182678, + "epoch": 0.693, + "step": 693 + }, + { + "loss": 0.0399, + "grad_norm": 9.349817276000977, + "learning_rate": 1.309e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.694, + "step": 694 + }, + { + "loss": 0.037, + "grad_norm": 9.234195709228516, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.695, + "step": 695 + }, + { + "loss": 0.1228, + "grad_norm": 2.415926456451416, + "learning_rate": 1.307e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.696, + "step": 696 + }, + { + "loss": 0.0524, + "grad_norm": 2.570728063583374, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.697, + "step": 697 + }, + { + "loss": 0.086, + "grad_norm": 3.062072992324829, + "learning_rate": 1.305e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.698, + "step": 698 + }, + { + "loss": 0.0829, + "grad_norm": 2.552957534790039, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.699, + "step": 699 + }, + { + "loss": 0.1109, + "grad_norm": 2.1273176670074463, + "learning_rate": 1.303e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.7, + "step": 700 + }, + { + "loss": 0.0811, + "grad_norm": 2.13920259475708, + "learning_rate": 1.302e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.701, + "step": 701 + }, + { + "loss": 0.0689, + "grad_norm": 2.0192079544067383, + "learning_rate": 1.301e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.702, + "step": 702 + }, + { + "loss": 0.0726, + "grad_norm": 1.9012140035629272, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.703, + "step": 703 + }, + { + "loss": 0.075, + "grad_norm": 2.420971393585205, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.704, + "step": 704 + }, + { + "loss": 0.0965, + "grad_norm": 1.7867904901504517, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.705, + "step": 705 + }, + { + "loss": 0.0757, + "grad_norm": 2.5515830516815186, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.706, + "step": 706 + }, + { + "loss": 0.0758, + "grad_norm": 2.5376474857330322, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.707, + "step": 707 + }, + { + "loss": 0.0995, + "grad_norm": 1.8845465183258057, + "learning_rate": 1.295e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.708, + "step": 708 + }, + { + "loss": 0.0824, + "grad_norm": 2.292940616607666, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.709, + "step": 709 + }, + { + "loss": 0.0723, + "grad_norm": 2.140986919403076, + "learning_rate": 1.293e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.71, + "step": 710 + }, + { + "loss": 0.0714, + "grad_norm": 2.8790059089660645, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.711, + "step": 711 + }, + { + "loss": 0.0623, + "grad_norm": 1.6493089199066162, + "learning_rate": 1.291e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.712, + "step": 712 + }, + { + "loss": 0.0657, + "grad_norm": 1.8830665349960327, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.713, + "step": 713 + }, + { + "loss": 0.029, + "grad_norm": 7.065803527832031, + "learning_rate": 1.289e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.714, + "step": 714 + }, + { + "loss": 0.0952, + "grad_norm": 2.2632198333740234, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.715, + "step": 715 + }, + { + "loss": 0.0383, + "grad_norm": 8.098624229431152, + "learning_rate": 1.287e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.716, + "step": 716 + }, + { + "loss": 0.023, + "grad_norm": 5.657382011413574, + "learning_rate": 1.286e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.717, + "step": 717 + }, + { + "loss": 0.0649, + "grad_norm": 1.4795526266098022, + "learning_rate": 1.285e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.718, + "step": 718 + }, + { + "loss": 0.0737, + "grad_norm": 2.7369728088378906, + "learning_rate": 1.284e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.719, + "step": 719 + }, + { + "loss": 0.0637, + "grad_norm": 2.345536708831787, + "learning_rate": 1.283e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.72, + "step": 720 + }, + { + "loss": 0.0594, + "grad_norm": 2.2326128482818604, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.721, + "step": 721 + }, + { + "loss": 0.057, + "grad_norm": 3.0859591960906982, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.722, + "step": 722 + }, + { + "loss": 0.0709, + "grad_norm": 2.870548963546753, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.723, + "step": 723 + }, + { + "loss": 0.0772, + "grad_norm": 3.3536510467529297, + "learning_rate": 1.279e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.724, + "step": 724 + }, + { + "loss": 0.0163, + "grad_norm": 2.2633590698242188, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 725 + }, + { + "loss": 0.0128, + "grad_norm": 1.1394838094711304, + "learning_rate": 1.277e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 726 + }, + { + "loss": 0.0683, + "grad_norm": 2.8505446910858154, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.727, + "step": 727 + }, + { + "loss": 0.0557, + "grad_norm": 2.6770808696746826, + "learning_rate": 1.275e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.728, + "step": 728 + }, + { + "loss": 0.0586, + "grad_norm": 3.0272936820983887, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.729, + "step": 729 + }, + { + "loss": 0.0126, + "grad_norm": 0.8217504620552063, + "learning_rate": 1.273e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 730 + }, + { + "loss": 0.0776, + "grad_norm": 4.100428581237793, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.731, + "step": 731 + }, + { + "loss": 0.0689, + "grad_norm": 2.3711600303649902, + "learning_rate": 1.271e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.732, + "step": 732 + }, + { + "loss": 0.0797, + "grad_norm": 3.585756301879883, + "learning_rate": 1.27e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.733, + "step": 733 + }, + { + "loss": 0.0532, + "grad_norm": 2.134615421295166, + "learning_rate": 1.269e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.734, + "step": 734 + }, + { + "loss": 0.0974, + "grad_norm": 2.3772988319396973, + "learning_rate": 1.268e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.735, + "step": 735 + }, + { + "loss": 0.1153, + "grad_norm": 2.4541940689086914, + "learning_rate": 1.267e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.736, + "step": 736 + }, + { + "loss": 0.048, + "grad_norm": 1.6060377359390259, + "learning_rate": 1.266e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.737, + "step": 737 + }, + { + "loss": 0.0451, + "grad_norm": 2.1678755283355713, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.738, + "step": 738 + }, + { + "loss": 0.0748, + "grad_norm": 2.047844409942627, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.739, + "step": 739 + }, + { + "loss": 0.0824, + "grad_norm": 2.762352705001831, + "learning_rate": 1.263e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.74, + "step": 740 + }, + { + "loss": 0.1146, + "grad_norm": 3.0128841400146484, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.741, + "step": 741 + }, + { + "loss": 0.0711, + "grad_norm": 2.0650486946105957, + "learning_rate": 1.261e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.742, + "step": 742 + }, + { + "loss": 0.0334, + "grad_norm": 7.7052412033081055, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.743, + "step": 743 + }, + { + "loss": 0.0709, + "grad_norm": 1.5119361877441406, + "learning_rate": 1.259e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.744, + "step": 744 + }, + { + "loss": 0.0308, + "grad_norm": 7.3754143714904785, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.745, + "step": 745 + }, + { + "loss": 0.0995, + "grad_norm": 2.8331611156463623, + "learning_rate": 1.257e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.746, + "step": 746 + }, + { + "loss": 0.0562, + "grad_norm": 3.423184871673584, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.747, + "step": 747 + }, + { + "loss": 0.0659, + "grad_norm": 1.857692003250122, + "learning_rate": 1.255e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.748, + "step": 748 + }, + { + "loss": 0.2618, + "grad_norm": 11.681804656982422, + "learning_rate": 1.254e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.749, + "step": 749 + }, + { + "loss": 0.0791, + "grad_norm": 2.311647415161133, + "learning_rate": 1.253e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.75, + "step": 750 + }, + { + "loss": 0.0486, + "grad_norm": 2.8530430793762207, + "learning_rate": 1.252e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.751, + "step": 751 + }, + { + "loss": 0.1104, + "grad_norm": 2.617987871170044, + "learning_rate": 1.251e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.752, + "step": 752 + }, + { + "loss": 0.0195, + "grad_norm": 4.978179931640625, + "learning_rate": 1.25e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.753, + "step": 753 + }, + { + "loss": 0.0726, + "grad_norm": 2.0882959365844727, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.754, + "step": 754 + }, + { + "loss": 0.0754, + "grad_norm": 2.1230452060699463, + "learning_rate": 1.248e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.755, + "step": 755 + }, + { + "loss": 0.0707, + "grad_norm": 2.2002744674682617, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.756, + "step": 756 + }, + { + "loss": 0.0494, + "grad_norm": 1.7500207424163818, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.757, + "step": 757 + }, + { + "loss": 0.0811, + "grad_norm": 1.8128851652145386, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.758, + "step": 758 + }, + { + "loss": 0.0756, + "grad_norm": 2.397252082824707, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.759, + "step": 759 + }, + { + "loss": 0.0501, + "grad_norm": 1.975466012954712, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.76, + "step": 760 + }, + { + "loss": 0.1087, + "grad_norm": 2.2733750343322754, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 761 + }, + { + "loss": 0.1041, + "grad_norm": 2.3084492683410645, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.762, + "step": 762 + }, + { + "loss": 0.0496, + "grad_norm": 2.098421096801758, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.763, + "step": 763 + }, + { + "loss": 0.0626, + "grad_norm": 2.004920482635498, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.764, + "step": 764 + }, + { + "loss": 0.0667, + "grad_norm": 1.603124737739563, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.765, + "step": 765 + }, + { + "loss": 0.0829, + "grad_norm": 2.5960142612457275, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.766, + "step": 766 + }, + { + "loss": 0.0234, + "grad_norm": 5.8595757484436035, + "learning_rate": 1.236e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.767, + "step": 767 + }, + { + "loss": 0.1032, + "grad_norm": 1.7731209993362427, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 768 + }, + { + "loss": 0.0228, + "grad_norm": 6.049434185028076, + "learning_rate": 1.234e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.769, + "step": 769 + }, + { + "loss": 0.0828, + "grad_norm": 1.9529765844345093, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.77, + "step": 770 + }, + { + "loss": 0.0718, + "grad_norm": 1.3272991180419922, + "learning_rate": 1.232e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.771, + "step": 771 + }, + { + "loss": 0.0907, + "grad_norm": 2.2710683345794678, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.772, + "step": 772 + }, + { + "loss": 0.2171, + "grad_norm": 6.965005397796631, + "learning_rate": 1.23e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.773, + "step": 773 + }, + { + "loss": 0.0657, + "grad_norm": 2.213243007659912, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.774, + "step": 774 + }, + { + "loss": 0.1745, + "grad_norm": 6.300892353057861, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.775, + "step": 775 + }, + { + "loss": 0.06, + "grad_norm": 2.4582417011260986, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.776, + "step": 776 + }, + { + "loss": 0.0516, + "grad_norm": 1.6709243059158325, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.777, + "step": 777 + }, + { + "loss": 0.1051, + "grad_norm": 2.654740810394287, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.778, + "step": 778 + }, + { + "loss": 0.072, + "grad_norm": 2.0503504276275635, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.779, + "step": 779 + }, + { + "loss": 0.0742, + "grad_norm": 1.800299882888794, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.78, + "step": 780 + }, + { + "loss": 0.0737, + "grad_norm": 2.063502788543701, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.781, + "step": 781 + }, + { + "loss": 0.1061, + "grad_norm": 2.698178291320801, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.782, + "step": 782 + }, + { + "loss": 0.0737, + "grad_norm": 2.0112061500549316, + "learning_rate": 1.22e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.783, + "step": 783 + }, + { + "loss": 0.0195, + "grad_norm": 5.365294933319092, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.784, + "step": 784 + }, + { + "loss": 0.0601, + "grad_norm": 1.5453028678894043, + "learning_rate": 1.218e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.785, + "step": 785 + }, + { + "loss": 0.2441, + "grad_norm": 10.393324851989746, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.786, + "step": 786 + }, + { + "loss": 0.1079, + "grad_norm": 2.6032726764678955, + "learning_rate": 1.216e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.787, + "step": 787 + }, + { + "loss": 0.0639, + "grad_norm": 2.6428260803222656, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.788, + "step": 788 + }, + { + "loss": 0.0632, + "grad_norm": 1.3782398700714111, + "learning_rate": 1.214e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.789, + "step": 789 + }, + { + "loss": 0.0189, + "grad_norm": 4.952188014984131, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.79, + "step": 790 + }, + { + "loss": 0.0613, + "grad_norm": 1.8376456499099731, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.791, + "step": 791 + }, + { + "loss": 0.0539, + "grad_norm": 1.6092228889465332, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.792, + "step": 792 + }, + { + "loss": 0.0151, + "grad_norm": 3.721954345703125, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 793 + }, + { + "loss": 0.0168, + "grad_norm": 3.578442096710205, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 794 + }, + { + "loss": 0.0494, + "grad_norm": 1.714572787284851, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 795 + }, + { + "loss": 0.0715, + "grad_norm": 2.152249813079834, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 796 + }, + { + "loss": 0.0106, + "grad_norm": 1.2338261604309082, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 797 + }, + { + "loss": 0.0948, + "grad_norm": 3.4057295322418213, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 798 + }, + { + "loss": 0.0967, + "grad_norm": 2.297558546066284, + "learning_rate": 1.204e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.799, + "step": 799 + }, + { + "loss": 0.0715, + "grad_norm": 2.948807716369629, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 800 + }, + { + "loss": 0.0691, + "grad_norm": 2.480257749557495, + "learning_rate": 1.202e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.801, + "step": 801 + }, + { + "loss": 0.2602, + "grad_norm": 9.955911636352539, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.802, + "step": 802 + }, + { + "loss": 0.0623, + "grad_norm": 2.92844295501709, + "learning_rate": 1.2e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.803, + "step": 803 + }, + { + "loss": 0.0922, + "grad_norm": 2.3774516582489014, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.804, + "step": 804 + }, + { + "loss": 0.0664, + "grad_norm": 1.5494801998138428, + "learning_rate": 1.198e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.805, + "step": 805 + }, + { + "loss": 0.1929, + "grad_norm": 6.599433422088623, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.806, + "step": 806 + }, + { + "loss": 0.02, + "grad_norm": 5.4353718757629395, + "learning_rate": 1.196e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.807, + "step": 807 + }, + { + "loss": 0.0603, + "grad_norm": 1.707094669342041, + "learning_rate": 1.195e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.808, + "step": 808 + }, + { + "loss": 0.0722, + "grad_norm": 2.148479461669922, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.809, + "step": 809 + }, + { + "loss": 0.0717, + "grad_norm": 2.687295436859131, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.81, + "step": 810 + }, + { + "loss": 0.0695, + "grad_norm": 2.940627098083496, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.811, + "step": 811 + }, + { + "loss": 0.0195, + "grad_norm": 5.349563121795654, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.812, + "step": 812 + }, + { + "loss": 0.0931, + "grad_norm": 1.7995429039001465, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.813, + "step": 813 + }, + { + "loss": 0.0175, + "grad_norm": 5.07689094543457, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.814, + "step": 814 + }, + { + "loss": 0.0159, + "grad_norm": 4.247437000274658, + "learning_rate": 1.188e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.815, + "step": 815 + }, + { + "loss": 0.0783, + "grad_norm": 2.34236216545105, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.816, + "step": 816 + }, + { + "loss": 0.113, + "grad_norm": 2.772456407546997, + "learning_rate": 1.186e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.817, + "step": 817 + }, + { + "loss": 0.0621, + "grad_norm": 2.3582286834716797, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.818, + "step": 818 + }, + { + "loss": 0.0522, + "grad_norm": 3.014678716659546, + "learning_rate": 1.184e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.819, + "step": 819 + }, + { + "loss": 0.0758, + "grad_norm": 2.709341049194336, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.82, + "step": 820 + }, + { + "loss": 0.0718, + "grad_norm": 2.3536617755889893, + "learning_rate": 1.182e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.821, + "step": 821 + }, + { + "loss": 0.0789, + "grad_norm": 3.258106231689453, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.822, + "step": 822 + }, + { + "loss": 0.0763, + "grad_norm": 2.218254804611206, + "learning_rate": 1.18e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.823, + "step": 823 + }, + { + "loss": 0.0599, + "grad_norm": 2.2704806327819824, + "learning_rate": 1.179e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.824, + "step": 824 + }, + { + "loss": 0.0126, + "grad_norm": 2.4626388549804688, + "learning_rate": 1.178e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 825 + }, + { + "loss": 0.0669, + "grad_norm": 2.0617358684539795, + "learning_rate": 1.177e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.826, + "step": 826 + }, + { + "loss": 0.066, + "grad_norm": 2.0766263008117676, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.827, + "step": 827 + }, + { + "loss": 0.0618, + "grad_norm": 1.5771903991699219, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.828, + "step": 828 + }, + { + "loss": 0.0687, + "grad_norm": 1.789569616317749, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.829, + "step": 829 + }, + { + "loss": 0.0157, + "grad_norm": 4.058000087738037, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.83, + "step": 830 + }, + { + "loss": 0.0389, + "grad_norm": 1.5074262619018555, + "learning_rate": 1.172e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.831, + "step": 831 + }, + { + "loss": 0.0663, + "grad_norm": 2.1943564414978027, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.832, + "step": 832 + }, + { + "loss": 0.0734, + "grad_norm": 2.0293729305267334, + "learning_rate": 1.17e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.833, + "step": 833 + }, + { + "loss": 0.0734, + "grad_norm": 1.9577043056488037, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.834, + "step": 834 + }, + { + "loss": 0.0729, + "grad_norm": 2.053274154663086, + "learning_rate": 1.168e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 835 + }, + { + "loss": 0.1016, + "grad_norm": 4.023435115814209, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.836, + "step": 836 + }, + { + "loss": 0.0618, + "grad_norm": 2.152527093887329, + "learning_rate": 1.166e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.837, + "step": 837 + }, + { + "loss": 0.0633, + "grad_norm": 2.2773494720458984, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.838, + "step": 838 + }, + { + "loss": 0.0207, + "grad_norm": 5.423501491546631, + "learning_rate": 1.164e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.839, + "step": 839 + }, + { + "loss": 0.0651, + "grad_norm": 1.2856030464172363, + "learning_rate": 1.163e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.84, + "step": 840 + }, + { + "loss": 0.0628, + "grad_norm": 1.8682835102081299, + "learning_rate": 1.162e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 841 + }, + { + "loss": 0.0192, + "grad_norm": 4.855226516723633, + "learning_rate": 1.161e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.842, + "step": 842 + }, + { + "loss": 0.0757, + "grad_norm": 1.910493016242981, + "learning_rate": 1.16e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.843, + "step": 843 + }, + { + "loss": 0.0778, + "grad_norm": 3.503009796142578, + "learning_rate": 1.159e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.844, + "step": 844 + }, + { + "loss": 0.05, + "grad_norm": 1.867902398109436, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.845, + "step": 845 + }, + { + "loss": 0.0145, + "grad_norm": 3.8562870025634766, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 846 + }, + { + "loss": 0.0668, + "grad_norm": 1.7752705812454224, + "learning_rate": 1.156e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.847, + "step": 847 + }, + { + "loss": 0.0735, + "grad_norm": 2.393582582473755, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.848, + "step": 848 + }, + { + "loss": 0.0985, + "grad_norm": 2.7950665950775146, + "learning_rate": 1.154e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.849, + "step": 849 + }, + { + "loss": 0.0681, + "grad_norm": 2.1131601333618164, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.85, + "step": 850 + }, + { + "loss": 0.0515, + "grad_norm": 2.2755846977233887, + "learning_rate": 1.152e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.851, + "step": 851 + }, + { + "loss": 0.0434, + "grad_norm": 1.569434642791748, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.852, + "step": 852 + }, + { + "loss": 0.1047, + "grad_norm": 3.0928077697753906, + "learning_rate": 1.15e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.853, + "step": 853 + }, + { + "loss": 0.0575, + "grad_norm": 2.008404016494751, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.854, + "step": 854 + }, + { + "loss": 0.0579, + "grad_norm": 1.4861952066421509, + "learning_rate": 1.148e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.855, + "step": 855 + }, + { + "loss": 0.069, + "grad_norm": 1.9950709342956543, + "learning_rate": 1.147e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.856, + "step": 856 + }, + { + "loss": 0.0155, + "grad_norm": 4.394257068634033, + "learning_rate": 1.146e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.857, + "step": 857 + }, + { + "loss": 0.0969, + "grad_norm": 2.6770575046539307, + "learning_rate": 1.145e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.858, + "step": 858 + }, + { + "loss": 0.0712, + "grad_norm": 2.319610595703125, + "learning_rate": 1.144e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 859 + }, + { + "loss": 0.0689, + "grad_norm": 1.8970541954040527, + "learning_rate": 1.143e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.86, + "step": 860 + }, + { + "loss": 0.0899, + "grad_norm": 1.8339478969573975, + "learning_rate": 1.142e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.861, + "step": 861 + }, + { + "loss": 0.1032, + "grad_norm": 2.781162977218628, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.862, + "step": 862 + }, + { + "loss": 0.0604, + "grad_norm": 2.540081024169922, + "learning_rate": 1.14e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.863, + "step": 863 + }, + { + "loss": 0.0491, + "grad_norm": 1.9644439220428467, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.864, + "step": 864 + }, + { + "loss": 0.0802, + "grad_norm": 1.8939117193222046, + "learning_rate": 1.138e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.865, + "step": 865 + }, + { + "loss": 0.0681, + "grad_norm": 2.0177180767059326, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.866, + "step": 866 + }, + { + "loss": 0.0476, + "grad_norm": 1.9407687187194824, + "learning_rate": 1.136e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.867, + "step": 867 + }, + { + "loss": 0.0188, + "grad_norm": 5.371039390563965, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.868, + "step": 868 + }, + { + "loss": 0.0508, + "grad_norm": 1.873732566833496, + "learning_rate": 1.134e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.869, + "step": 869 + }, + { + "loss": 0.0237, + "grad_norm": 6.1496429443359375, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.87, + "step": 870 + }, + { + "loss": 0.099, + "grad_norm": 4.506502151489258, + "learning_rate": 1.132e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.871, + "step": 871 + }, + { + "loss": 0.1, + "grad_norm": 5.314243316650391, + "learning_rate": 1.131e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.872, + "step": 872 + }, + { + "loss": 0.0123, + "grad_norm": 3.1825995445251465, + "learning_rate": 1.13e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 873 + }, + { + "loss": 0.0132, + "grad_norm": 3.1502106189727783, + "learning_rate": 1.129e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 874 + }, + { + "loss": 0.0622, + "grad_norm": 2.719097375869751, + "learning_rate": 1.128e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.875, + "step": 875 + }, + { + "loss": 0.0992, + "grad_norm": 3.1199769973754883, + "learning_rate": 1.127e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.876, + "step": 876 + }, + { + "loss": 0.066, + "grad_norm": 2.5837504863739014, + "learning_rate": 1.126e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.877, + "step": 877 + }, + { + "loss": 0.0542, + "grad_norm": 2.4771666526794434, + "learning_rate": 1.125e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.878, + "step": 878 + }, + { + "loss": 0.0937, + "grad_norm": 3.6200714111328125, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.879, + "step": 879 + }, + { + "loss": 0.0674, + "grad_norm": 2.399535655975342, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.88, + "step": 880 + }, + { + "loss": 0.0678, + "grad_norm": 2.516605854034424, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.881, + "step": 881 + }, + { + "loss": 0.0668, + "grad_norm": 2.5172040462493896, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.882, + "step": 882 + }, + { + "loss": 0.0744, + "grad_norm": 2.4523816108703613, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.883, + "step": 883 + }, + { + "loss": 0.1019, + "grad_norm": 3.3321380615234375, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.884, + "step": 884 + }, + { + "loss": 0.0837, + "grad_norm": 1.8811334371566772, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.885, + "step": 885 + }, + { + "loss": 0.0531, + "grad_norm": 1.9141852855682373, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.886, + "step": 886 + }, + { + "loss": 0.0408, + "grad_norm": 1.487582802772522, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.887, + "step": 887 + }, + { + "loss": 0.0218, + "grad_norm": 5.286271095275879, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.888, + "step": 888 + }, + { + "loss": 0.0628, + "grad_norm": 1.7239201068878174, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.889, + "step": 889 + }, + { + "loss": 0.0625, + "grad_norm": 1.7386255264282227, + "learning_rate": 1.113e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.89, + "step": 890 + }, + { + "loss": 0.0405, + "grad_norm": 1.4104888439178467, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.891, + "step": 891 + }, + { + "loss": 0.0226, + "grad_norm": 4.608585834503174, + "learning_rate": 1.111e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.892, + "step": 892 + }, + { + "loss": 0.0968, + "grad_norm": 2.3830323219299316, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.893, + "step": 893 + }, + { + "loss": 0.0739, + "grad_norm": 1.8739683628082275, + "learning_rate": 1.109e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.894, + "step": 894 + }, + { + "loss": 0.058, + "grad_norm": 2.673945665359497, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.895, + "step": 895 + }, + { + "loss": 0.0943, + "grad_norm": 3.0288586616516113, + "learning_rate": 1.107e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.896, + "step": 896 + }, + { + "loss": 0.0726, + "grad_norm": 2.270813465118408, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.897, + "step": 897 + }, + { + "loss": 0.0589, + "grad_norm": 1.880444049835205, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.898, + "step": 898 + }, + { + "loss": 0.0143, + "grad_norm": 3.3361847400665283, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 899 + }, + { + "loss": 0.059, + "grad_norm": 1.848816990852356, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.9, + "step": 900 + }, + { + "loss": 0.0714, + "grad_norm": 2.0221500396728516, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.901, + "step": 901 + }, + { + "loss": 0.0668, + "grad_norm": 4.154532432556152, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.902, + "step": 902 + }, + { + "loss": 0.0617, + "grad_norm": 1.9648317098617554, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.903, + "step": 903 + }, + { + "loss": 0.0652, + "grad_norm": 2.866431474685669, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.904, + "step": 904 + }, + { + "loss": 0.0459, + "grad_norm": 2.3324079513549805, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.905, + "step": 905 + }, + { + "loss": 0.0111, + "grad_norm": 2.3991503715515137, + "learning_rate": 1.097e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 906 + }, + { + "loss": 0.0654, + "grad_norm": 1.9646960496902466, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.907, + "step": 907 + }, + { + "loss": 0.0798, + "grad_norm": 2.720228433609009, + "learning_rate": 1.095e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.908, + "step": 908 + }, + { + "loss": 0.0974, + "grad_norm": 2.5758628845214844, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.909, + "step": 909 + }, + { + "loss": 0.0621, + "grad_norm": 2.303436517715454, + "learning_rate": 1.093e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.91, + "step": 910 + }, + { + "loss": 0.0944, + "grad_norm": 2.617363929748535, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.911, + "step": 911 + }, + { + "loss": 0.0571, + "grad_norm": 1.898218035697937, + "learning_rate": 1.091e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.912, + "step": 912 + }, + { + "loss": 0.0136, + "grad_norm": 3.2630972862243652, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 913 + }, + { + "loss": 0.0482, + "grad_norm": 2.0208237171173096, + "learning_rate": 1.089e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.914, + "step": 914 + }, + { + "loss": 0.0486, + "grad_norm": 1.8037229776382446, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.915, + "step": 915 + }, + { + "loss": 0.0118, + "grad_norm": 2.722412586212158, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 916 + }, + { + "loss": 0.0687, + "grad_norm": 2.6608150005340576, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.917, + "step": 917 + }, + { + "loss": 0.0101, + "grad_norm": 1.664276361465454, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 918 + }, + { + "loss": 0.0609, + "grad_norm": 2.5043087005615234, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.919, + "step": 919 + }, + { + "loss": 0.0685, + "grad_norm": 2.0320653915405273, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.92, + "step": 920 + }, + { + "loss": 0.0709, + "grad_norm": 2.7590584754943848, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.921, + "step": 921 + }, + { + "loss": 0.0511, + "grad_norm": 2.424579620361328, + "learning_rate": 1.081e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.922, + "step": 922 + }, + { + "loss": 0.061, + "grad_norm": 1.826949119567871, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.923, + "step": 923 + }, + { + "loss": 0.0086, + "grad_norm": 1.5401605367660522, + "learning_rate": 1.079e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 924 + }, + { + "loss": 0.0667, + "grad_norm": 2.49796724319458, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.925, + "step": 925 + }, + { + "loss": 0.0741, + "grad_norm": 2.141827344894409, + "learning_rate": 1.077e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.926, + "step": 926 + }, + { + "loss": 0.0662, + "grad_norm": 2.1507174968719482, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.927, + "step": 927 + }, + { + "loss": 0.0596, + "grad_norm": 1.928731083869934, + "learning_rate": 1.075e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.928, + "step": 928 + }, + { + "loss": 0.0469, + "grad_norm": 2.391432523727417, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.929, + "step": 929 + }, + { + "loss": 0.0121, + "grad_norm": 2.9941039085388184, + "learning_rate": 1.073e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 930 + }, + { + "loss": 0.0452, + "grad_norm": 2.110806465148926, + "learning_rate": 1.072e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.931, + "step": 931 + }, + { + "loss": 0.0624, + "grad_norm": 1.8115919828414917, + "learning_rate": 1.071e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.932, + "step": 932 + }, + { + "loss": 0.0456, + "grad_norm": 1.548567533493042, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.933, + "step": 933 + }, + { + "loss": 0.0565, + "grad_norm": 1.9886720180511475, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.934, + "step": 934 + }, + { + "loss": 0.0457, + "grad_norm": 1.8589720726013184, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.935, + "step": 935 + }, + { + "loss": 0.041, + "grad_norm": 1.6640335321426392, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.936, + "step": 936 + }, + { + "loss": 0.0712, + "grad_norm": 2.0171613693237305, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.937, + "step": 937 + }, + { + "loss": 0.0628, + "grad_norm": 1.6715848445892334, + "learning_rate": 1.065e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.938, + "step": 938 + }, + { + "loss": 0.0416, + "grad_norm": 2.1554946899414062, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.939, + "step": 939 + }, + { + "loss": 0.0737, + "grad_norm": 2.242116689682007, + "learning_rate": 1.063e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.94, + "step": 940 + }, + { + "loss": 0.0177, + "grad_norm": 4.810120105743408, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.941, + "step": 941 + }, + { + "loss": 0.0649, + "grad_norm": 1.675683617591858, + "learning_rate": 1.061e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.942, + "step": 942 + }, + { + "loss": 0.0727, + "grad_norm": 2.5127744674682617, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.943, + "step": 943 + }, + { + "loss": 0.0587, + "grad_norm": 2.14599871635437, + "learning_rate": 1.059e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.944, + "step": 944 + }, + { + "loss": 0.1132, + "grad_norm": 2.5991926193237305, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.945, + "step": 945 + }, + { + "loss": 0.0786, + "grad_norm": 2.0661518573760986, + "learning_rate": 1.057e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.946, + "step": 946 + }, + { + "loss": 0.0686, + "grad_norm": 1.411996841430664, + "learning_rate": 1.056e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 947 + }, + { + "loss": 0.0886, + "grad_norm": 1.8908826112747192, + "learning_rate": 1.055e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.948, + "step": 948 + }, + { + "loss": 0.0795, + "grad_norm": 1.8596928119659424, + "learning_rate": 1.054e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.949, + "step": 949 + }, + { + "loss": 0.064, + "grad_norm": 2.0051939487457275, + "learning_rate": 1.053e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.95, + "step": 950 + }, + { + "loss": 0.0761, + "grad_norm": 1.7486968040466309, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 951 + }, + { + "loss": 0.0519, + "grad_norm": 1.7253214120864868, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.952, + "step": 952 + }, + { + "loss": 0.0688, + "grad_norm": 1.7860913276672363, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.953, + "step": 953 + }, + { + "loss": 0.0287, + "grad_norm": 6.397044658660889, + "learning_rate": 1.049e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 954 + }, + { + "loss": 0.0877, + "grad_norm": 1.6188372373580933, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.955, + "step": 955 + }, + { + "loss": 0.0595, + "grad_norm": 1.6029514074325562, + "learning_rate": 1.047e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.956, + "step": 956 + }, + { + "loss": 0.2163, + "grad_norm": 8.956819534301758, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.957, + "step": 957 + }, + { + "loss": 0.0666, + "grad_norm": 1.4872380495071411, + "learning_rate": 1.045e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.958, + "step": 958 + }, + { + "loss": 0.092, + "grad_norm": 3.029266595840454, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.959, + "step": 959 + }, + { + "loss": 0.0757, + "grad_norm": 1.899221658706665, + "learning_rate": 1.043e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.96, + "step": 960 + }, + { + "loss": 0.0666, + "grad_norm": 1.577907681465149, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.961, + "step": 961 + }, + { + "loss": 0.0581, + "grad_norm": 1.467238426208496, + "learning_rate": 1.041e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 962 + }, + { + "loss": 0.1923, + "grad_norm": 8.706313133239746, + "learning_rate": 1.04e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.963, + "step": 963 + }, + { + "loss": 0.062, + "grad_norm": 2.0428693294525146, + "learning_rate": 1.039e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.964, + "step": 964 + }, + { + "loss": 0.0775, + "grad_norm": 2.0258123874664307, + "learning_rate": 1.038e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.965, + "step": 965 + }, + { + "loss": 0.0661, + "grad_norm": 1.7304749488830566, + "learning_rate": 1.037e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.966, + "step": 966 + }, + { + "loss": 0.0547, + "grad_norm": 1.6691105365753174, + "learning_rate": 1.036e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.967, + "step": 967 + }, + { + "loss": 0.0617, + "grad_norm": 1.681009292602539, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.968, + "step": 968 + }, + { + "loss": 0.0544, + "grad_norm": 1.8074179887771606, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.969, + "step": 969 + }, + { + "loss": 0.0396, + "grad_norm": 1.812711477279663, + "learning_rate": 1.033e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.97, + "step": 970 + }, + { + "loss": 0.0577, + "grad_norm": 2.0831782817840576, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.971, + "step": 971 + }, + { + "loss": 0.0776, + "grad_norm": 1.3640745878219604, + "learning_rate": 1.031e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.972, + "step": 972 + }, + { + "loss": 0.0454, + "grad_norm": 1.9006543159484863, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.973, + "step": 973 + }, + { + "loss": 0.0633, + "grad_norm": 1.6996928453445435, + "learning_rate": 1.029e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.974, + "step": 974 + }, + { + "loss": 0.0738, + "grad_norm": 1.9721561670303345, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.975, + "step": 975 + }, + { + "loss": 0.0439, + "grad_norm": 2.2615768909454346, + "learning_rate": 1.027e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.976, + "step": 976 + }, + { + "loss": 0.0237, + "grad_norm": 5.635776519775391, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.977, + "step": 977 + }, + { + "loss": 0.094, + "grad_norm": 2.4352505207061768, + "learning_rate": 1.025e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.978, + "step": 978 + }, + { + "loss": 0.0648, + "grad_norm": 1.6868159770965576, + "learning_rate": 1.024e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.979, + "step": 979 + }, + { + "loss": 0.0652, + "grad_norm": 2.1479756832122803, + "learning_rate": 1.023e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.98, + "step": 980 + }, + { + "loss": 0.0597, + "grad_norm": 2.0000855922698975, + "learning_rate": 1.022e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.981, + "step": 981 + }, + { + "loss": 0.0643, + "grad_norm": 2.511259078979492, + "learning_rate": 1.021e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.982, + "step": 982 + }, + { + "loss": 0.0161, + "grad_norm": 3.99651837348938, + "learning_rate": 1.02e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.983, + "step": 983 + }, + { + "loss": 0.0649, + "grad_norm": 2.231045722961426, + "learning_rate": 1.019e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.984, + "step": 984 + }, + { + "loss": 0.0386, + "grad_norm": 1.9224427938461304, + "learning_rate": 1.018e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.985, + "step": 985 + }, + { + "loss": 0.0673, + "grad_norm": 2.328557014465332, + "learning_rate": 1.017e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.986, + "step": 986 + }, + { + "loss": 0.0642, + "grad_norm": 2.1176366806030273, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.987, + "step": 987 + }, + { + "loss": 0.0643, + "grad_norm": 2.319209098815918, + "learning_rate": 1.015e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.988, + "step": 988 + }, + { + "loss": 0.0126, + "grad_norm": 2.7921886444091797, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 989 + }, + { + "loss": 0.056, + "grad_norm": 1.6485341787338257, + "learning_rate": 1.013e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.99, + "step": 990 + }, + { + "loss": 0.0559, + "grad_norm": 1.85313081741333, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.991, + "step": 991 + }, + { + "loss": 0.0718, + "grad_norm": 2.0347867012023926, + "learning_rate": 1.011e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.992, + "step": 992 + }, + { + "loss": 0.0611, + "grad_norm": 2.6210453510284424, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.993, + "step": 993 + }, + { + "loss": 0.0428, + "grad_norm": 2.1774537563323975, + "learning_rate": 1.009e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.994, + "step": 994 + }, + { + "loss": 0.0564, + "grad_norm": 1.4708741903305054, + "learning_rate": 1.008e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.995, + "step": 995 + }, + { + "loss": 0.0461, + "grad_norm": 2.133490562438965, + "learning_rate": 1.007e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.996, + "step": 996 + }, + { + "loss": 0.0654, + "grad_norm": 1.8513908386230469, + "learning_rate": 1.006e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.997, + "step": 997 + }, + { + "loss": 0.0467, + "grad_norm": 2.651682138442993, + "learning_rate": 1.005e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.998, + "step": 998 + }, + { + "loss": 0.0496, + "grad_norm": 1.6719735860824585, + "learning_rate": 1.004e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.999, + "step": 999 + }, + { + "loss": 0.064, + "grad_norm": 1.7016679048538208, + "learning_rate": 1.003e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.0, + "step": 1000 + }, + { + "loss": 0.0601, + "grad_norm": 1.5496330261230469, + "learning_rate": 1.002e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.001, + "step": 1001 + }, + { + "loss": 0.0185, + "grad_norm": 4.8348541259765625, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687985.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.002, + "step": 1002 + }, + { + "loss": 0.0205, + "grad_norm": 5.356715202331543, + "learning_rate": 1e-05, + "num_tokens": 688167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.003, + "step": 1003 + }, + { + "loss": 0.065, + "grad_norm": 2.8306968212127686, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.004, + "step": 1004 + }, + { + "loss": 0.048, + "grad_norm": 1.684121012687683, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.005, + "step": 1005 + }, + { + "loss": 0.0611, + "grad_norm": 1.78119957447052, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.006, + "step": 1006 + }, + { + "loss": 0.069, + "grad_norm": 2.2316365242004395, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.007, + "step": 1007 + }, + { + "loss": 0.0779, + "grad_norm": 2.183338165283203, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.008, + "step": 1008 + }, + { + "loss": 0.0642, + "grad_norm": 1.943967580795288, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.009, + "step": 1009 + }, + { + "loss": 0.0415, + "grad_norm": 1.6110951900482178, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.01, + "step": 1010 + }, + { + "loss": 0.0117, + "grad_norm": 3.0185630321502686, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 1011 + }, + { + "loss": 0.0992, + "grad_norm": 3.14607310295105, + "learning_rate": 9.91e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 1.012, + "step": 1012 + }, + { + "loss": 0.047, + "grad_norm": 1.2475289106369019, + "learning_rate": 9.9e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.013, + "step": 1013 + }, + { + "loss": 0.0819, + "grad_norm": 2.5398612022399902, + "learning_rate": 9.89e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.014, + "step": 1014 + }, + { + "loss": 0.0555, + "grad_norm": 1.682294249534607, + "learning_rate": 9.88e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.015, + "step": 1015 + }, + { + "loss": 0.0867, + "grad_norm": 2.457875967025757, + "learning_rate": 9.87e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.016, + "step": 1016 + }, + { + "loss": 0.0667, + "grad_norm": 1.7135660648345947, + "learning_rate": 9.86e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.017, + "step": 1017 + }, + { + "loss": 0.0378, + "grad_norm": 1.4605510234832764, + "learning_rate": 9.85e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.018, + "step": 1018 + }, + { + "loss": 0.0612, + "grad_norm": 3.01509690284729, + "learning_rate": 9.84e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.019, + "step": 1019 + }, + { + "loss": 0.0623, + "grad_norm": 2.2433955669403076, + "learning_rate": 9.83e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.02, + "step": 1020 + }, + { + "loss": 0.0192, + "grad_norm": 5.402326583862305, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.021, + "step": 1021 + }, + { + "loss": 0.099, + "grad_norm": 4.552786827087402, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.022, + "step": 1022 + }, + { + "loss": 0.0569, + "grad_norm": 2.1845462322235107, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.023, + "step": 1023 + }, + { + "loss": 0.063, + "grad_norm": 2.7287683486938477, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.024, + "step": 1024 + }, + { + "loss": 0.0426, + "grad_norm": 2.1356048583984375, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.025, + "step": 1025 + }, + { + "loss": 0.0626, + "grad_norm": 2.1982219219207764, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.026, + "step": 1026 + }, + { + "loss": 0.0881, + "grad_norm": 2.790822982788086, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.027, + "step": 1027 + }, + { + "loss": 0.0872, + "grad_norm": 2.464653968811035, + "learning_rate": 9.75e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.028, + "step": 1028 + }, + { + "loss": 0.0144, + "grad_norm": 3.807983636856079, + "learning_rate": 9.74e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.029, + "step": 1029 + }, + { + "loss": 0.0594, + "grad_norm": 1.6763768196105957, + "learning_rate": 9.73e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.03, + "step": 1030 + }, + { + "loss": 0.0882, + "grad_norm": 1.924737811088562, + "learning_rate": 9.72e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.031, + "step": 1031 + }, + { + "loss": 0.0488, + "grad_norm": 2.331883430480957, + "learning_rate": 9.71e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.032, + "step": 1032 + }, + { + "loss": 0.088, + "grad_norm": 2.7460174560546875, + "learning_rate": 9.7e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.033, + "step": 1033 + }, + { + "loss": 0.0446, + "grad_norm": 1.7645024061203003, + "learning_rate": 9.69e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.034, + "step": 1034 + }, + { + "loss": 0.0806, + "grad_norm": 1.7870028018951416, + "learning_rate": 9.68e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.035, + "step": 1035 + }, + { + "loss": 0.0602, + "grad_norm": 1.6170544624328613, + "learning_rate": 9.67e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.036, + "step": 1036 + }, + { + "loss": 0.0427, + "grad_norm": 2.0376412868499756, + "learning_rate": 9.66e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.037, + "step": 1037 + }, + { + "loss": 0.0636, + "grad_norm": 2.1391189098358154, + "learning_rate": 9.65e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.038, + "step": 1038 + }, + { + "loss": 0.0127, + "grad_norm": 3.4139318466186523, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 1039 + }, + { + "loss": 0.0532, + "grad_norm": 2.2980690002441406, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.04, + "step": 1040 + }, + { + "loss": 0.042, + "grad_norm": 1.7804741859436035, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.041, + "step": 1041 + }, + { + "loss": 0.039, + "grad_norm": 1.5417966842651367, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.042, + "step": 1042 + }, + { + "loss": 0.0691, + "grad_norm": 1.9181416034698486, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.043, + "step": 1043 + }, + { + "loss": 0.0105, + "grad_norm": 2.567687511444092, + "learning_rate": 9.59e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 1044 + }, + { + "loss": 0.0513, + "grad_norm": 2.1507062911987305, + "learning_rate": 9.58e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.045, + "step": 1045 + }, + { + "loss": 0.0661, + "grad_norm": 2.6471474170684814, + "learning_rate": 9.57e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.046, + "step": 1046 + }, + { + "loss": 0.0528, + "grad_norm": 1.6081326007843018, + "learning_rate": 9.56e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.047, + "step": 1047 + }, + { + "loss": 0.0148, + "grad_norm": 3.6129963397979736, + "learning_rate": 9.55e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.048, + "step": 1048 + }, + { + "loss": 0.0589, + "grad_norm": 1.6536871194839478, + "learning_rate": 9.54e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 1049 + }, + { + "loss": 0.0893, + "grad_norm": 2.1024138927459717, + "learning_rate": 9.53e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.05, + "step": 1050 + }, + { + "loss": 0.0628, + "grad_norm": 1.6858649253845215, + "learning_rate": 9.52e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.051, + "step": 1051 + }, + { + "loss": 0.0532, + "grad_norm": 1.6352399587631226, + "learning_rate": 9.51e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.052, + "step": 1052 + }, + { + "loss": 0.0673, + "grad_norm": 1.62017822265625, + "learning_rate": 9.5e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.053, + "step": 1053 + }, + { + "loss": 0.0577, + "grad_norm": 1.5879229307174683, + "learning_rate": 9.49e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.054, + "step": 1054 + }, + { + "loss": 0.0148, + "grad_norm": 4.010829925537109, + "learning_rate": 9.48e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.055, + "step": 1055 + }, + { + "loss": 0.0147, + "grad_norm": 4.00789213180542, + "learning_rate": 9.47e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.056, + "step": 1056 + }, + { + "loss": 0.015, + "grad_norm": 4.107461929321289, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.057, + "step": 1057 + }, + { + "loss": 0.0458, + "grad_norm": 2.3218655586242676, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.058, + "step": 1058 + }, + { + "loss": 0.0119, + "grad_norm": 2.9490623474121094, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 1059 + }, + { + "loss": 0.0367, + "grad_norm": 1.8217196464538574, + "learning_rate": 9.43e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.06, + "step": 1060 + }, + { + "loss": 0.0079, + "grad_norm": 1.3022953271865845, + "learning_rate": 9.42e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 1061 + }, + { + "loss": 0.0724, + "grad_norm": 2.17926287651062, + "learning_rate": 9.41e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.062, + "step": 1062 + }, + { + "loss": 0.039, + "grad_norm": 1.739366888999939, + "learning_rate": 9.4e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.063, + "step": 1063 + }, + { + "loss": 0.0534, + "grad_norm": 2.180590867996216, + "learning_rate": 9.39e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.064, + "step": 1064 + }, + { + "loss": 0.0063, + "grad_norm": 0.5163084864616394, + "learning_rate": 9.38e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 1065 + }, + { + "loss": 0.0584, + "grad_norm": 2.8058063983917236, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.066, + "step": 1066 + }, + { + "loss": 0.0582, + "grad_norm": 2.005493640899658, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.067, + "step": 1067 + }, + { + "loss": 0.0497, + "grad_norm": 2.923448324203491, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.068, + "step": 1068 + }, + { + "loss": 0.006, + "grad_norm": 0.48110926151275635, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 1069 + }, + { + "loss": 0.0704, + "grad_norm": 2.408653497695923, + "learning_rate": 9.33e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.07, + "step": 1070 + }, + { + "loss": 0.0878, + "grad_norm": 2.767408847808838, + "learning_rate": 9.32e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 1071 + }, + { + "loss": 0.0599, + "grad_norm": 1.9640824794769287, + "learning_rate": 9.31e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.072, + "step": 1072 + }, + { + "loss": 0.0674, + "grad_norm": 2.939439535140991, + "learning_rate": 9.3e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.073, + "step": 1073 + }, + { + "loss": 0.0866, + "grad_norm": 2.223776340484619, + "learning_rate": 9.29e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.074, + "step": 1074 + }, + { + "loss": 0.0819, + "grad_norm": 1.7831770181655884, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.075, + "step": 1075 + }, + { + "loss": 0.0552, + "grad_norm": 1.528134822845459, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.076, + "step": 1076 + }, + { + "loss": 0.0105, + "grad_norm": 2.722768783569336, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 1077 + }, + { + "loss": 0.0559, + "grad_norm": 1.601446509361267, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.078, + "step": 1078 + }, + { + "loss": 0.0571, + "grad_norm": 1.6370468139648438, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.079, + "step": 1079 + }, + { + "loss": 0.0611, + "grad_norm": 1.7496470212936401, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.08, + "step": 1080 + }, + { + "loss": 0.0582, + "grad_norm": 1.8051985502243042, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.081, + "step": 1081 + }, + { + "loss": 0.0527, + "grad_norm": 1.1893869638442993, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.082, + "step": 1082 + }, + { + "loss": 0.0613, + "grad_norm": 1.7861930131912231, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.083, + "step": 1083 + }, + { + "loss": 0.0771, + "grad_norm": 1.6442121267318726, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.084, + "step": 1084 + }, + { + "loss": 0.0614, + "grad_norm": 1.7604858875274658, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.085, + "step": 1085 + }, + { + "loss": 0.0686, + "grad_norm": 1.7211897373199463, + "learning_rate": 9.17e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.086, + "step": 1086 + }, + { + "loss": 0.0851, + "grad_norm": 2.2072157859802246, + "learning_rate": 9.16e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.087, + "step": 1087 + }, + { + "loss": 0.0234, + "grad_norm": 6.049727916717529, + "learning_rate": 9.15e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.088, + "step": 1088 + }, + { + "loss": 0.0462, + "grad_norm": 2.178677558898926, + "learning_rate": 9.14e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.089, + "step": 1089 + }, + { + "loss": 0.0866, + "grad_norm": 2.1971359252929688, + "learning_rate": 9.13e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.09, + "step": 1090 + }, + { + "loss": 0.0701, + "grad_norm": 2.604931116104126, + "learning_rate": 9.12e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.091, + "step": 1091 + }, + { + "loss": 0.1403, + "grad_norm": 4.8585004806518555, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.092, + "step": 1092 + }, + { + "loss": 0.0418, + "grad_norm": 2.0918304920196533, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.093, + "step": 1093 + }, + { + "loss": 0.0607, + "grad_norm": 1.5581291913986206, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.094, + "step": 1094 + }, + { + "loss": 0.0464, + "grad_norm": 2.2121376991271973, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.095, + "step": 1095 + }, + { + "loss": 0.0187, + "grad_norm": 5.02223539352417, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.096, + "step": 1096 + }, + { + "loss": 0.051, + "grad_norm": 1.1968108415603638, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.097, + "step": 1097 + }, + { + "loss": 0.0379, + "grad_norm": 1.5838263034820557, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.098, + "step": 1098 + }, + { + "loss": 0.0599, + "grad_norm": 2.1656548976898193, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.099, + "step": 1099 + }, + { + "loss": 0.0531, + "grad_norm": 1.5780129432678223, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1, + "step": 1100 + }, + { + "loss": 0.0101, + "grad_norm": 2.5371878147125244, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 1101 + }, + { + "loss": 0.0635, + "grad_norm": 1.7947604656219482, + "learning_rate": 9.01e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.102, + "step": 1102 + }, + { + "loss": 0.0522, + "grad_norm": 2.101656436920166, + "learning_rate": 9e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.103, + "step": 1103 + }, + { + "loss": 0.0803, + "grad_norm": 1.9881861209869385, + "learning_rate": 8.99e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.104, + "step": 1104 + }, + { + "loss": 0.0618, + "grad_norm": 1.884840965270996, + "learning_rate": 8.98e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.105, + "step": 1105 + }, + { + "loss": 0.0554, + "grad_norm": 1.8216484785079956, + "learning_rate": 8.97e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.106, + "step": 1106 + }, + { + "loss": 0.0631, + "grad_norm": 2.1785407066345215, + "learning_rate": 8.96e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.107, + "step": 1107 + }, + { + "loss": 0.0409, + "grad_norm": 1.5896263122558594, + "learning_rate": 8.95e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.108, + "step": 1108 + }, + { + "loss": 0.1964, + "grad_norm": 6.368833541870117, + "learning_rate": 8.94e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 1.109, + "step": 1109 + }, + { + "loss": 0.0087, + "grad_norm": 1.9522284269332886, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 1110 + }, + { + "loss": 0.2323, + "grad_norm": 7.9943718910217285, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 1.111, + "step": 1111 + }, + { + "loss": 0.0801, + "grad_norm": 1.92306387424469, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.112, + "step": 1112 + }, + { + "loss": 0.045, + "grad_norm": 1.3462337255477905, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.113, + "step": 1113 + }, + { + "loss": 0.0721, + "grad_norm": 2.416792869567871, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 1114 + }, + { + "loss": 0.0406, + "grad_norm": 2.1178133487701416, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.115, + "step": 1115 + }, + { + "loss": 0.0559, + "grad_norm": 1.5205347537994385, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.116, + "step": 1116 + }, + { + "loss": 0.0342, + "grad_norm": 1.617630124092102, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.117, + "step": 1117 + }, + { + "loss": 0.0438, + "grad_norm": 2.34078049659729, + "learning_rate": 8.85e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1179999999999999, + "step": 1118 + }, + { + "loss": 0.0753, + "grad_norm": 1.8780885934829712, + "learning_rate": 8.84e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.119, + "step": 1119 + }, + { + "loss": 0.147, + "grad_norm": 5.077685356140137, + "learning_rate": 8.83e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.12, + "step": 1120 + }, + { + "loss": 0.0469, + "grad_norm": 1.9634060859680176, + "learning_rate": 8.82e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.121, + "step": 1121 + }, + { + "loss": 0.0662, + "grad_norm": 1.4567596912384033, + "learning_rate": 8.81e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1219999999999999, + "step": 1122 + }, + { + "loss": 0.0167, + "grad_norm": 4.722336292266846, + "learning_rate": 8.8e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.123, + "step": 1123 + }, + { + "loss": 0.0388, + "grad_norm": 2.1787490844726562, + "learning_rate": 8.79e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.124, + "step": 1124 + }, + { + "loss": 0.0508, + "grad_norm": 1.4540494680404663, + "learning_rate": 8.78e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.125, + "step": 1125 + }, + { + "loss": 0.0463, + "grad_norm": 1.9126884937286377, + "learning_rate": 8.77e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.126, + "step": 1126 + }, + { + "loss": 0.0413, + "grad_norm": 1.3725852966308594, + "learning_rate": 8.76e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.127, + "step": 1127 + }, + { + "loss": 0.0406, + "grad_norm": 1.769464373588562, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.1280000000000001, + "step": 1128 + }, + { + "loss": 0.0157, + "grad_norm": 4.246346473693848, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.129, + "step": 1129 + }, + { + "loss": 0.1541, + "grad_norm": 4.8993754386901855, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.13, + "step": 1130 + }, + { + "loss": 0.041, + "grad_norm": 1.7246980667114258, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.131, + "step": 1131 + }, + { + "loss": 0.0726, + "grad_norm": 2.2514991760253906, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1320000000000001, + "step": 1132 + }, + { + "loss": 0.0097, + "grad_norm": 2.538367509841919, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 1133 + }, + { + "loss": 0.083, + "grad_norm": 2.2139499187469482, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.134, + "step": 1134 + }, + { + "loss": 0.0086, + "grad_norm": 2.0688657760620117, + "learning_rate": 8.68e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 1135 + }, + { + "loss": 0.0579, + "grad_norm": 1.7580430507659912, + "learning_rate": 8.67e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.1360000000000001, + "step": 1136 + }, + { + "loss": 0.0071, + "grad_norm": 1.2317492961883545, + "learning_rate": 8.66e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 1137 + }, + { + "loss": 0.0547, + "grad_norm": 1.7383458614349365, + "learning_rate": 8.65e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.138, + "step": 1138 + }, + { + "loss": 0.0493, + "grad_norm": 1.9442108869552612, + "learning_rate": 8.64e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.139, + "step": 1139 + }, + { + "loss": 0.0743, + "grad_norm": 2.8182926177978516, + "learning_rate": 8.63e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.1400000000000001, + "step": 1140 + }, + { + "loss": 0.0058, + "grad_norm": 0.5721865296363831, + "learning_rate": 8.62e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 1141 + }, + { + "loss": 0.0615, + "grad_norm": 2.226674795150757, + "learning_rate": 8.61e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.142, + "step": 1142 + }, + { + "loss": 0.0063, + "grad_norm": 0.8222597241401672, + "learning_rate": 8.6e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 1143 + }, + { + "loss": 0.0679, + "grad_norm": 2.1432037353515625, + "learning_rate": 8.59e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.144, + "step": 1144 + }, + { + "loss": 0.0604, + "grad_norm": 2.196251392364502, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.145, + "step": 1145 + }, + { + "loss": 0.0067, + "grad_norm": 0.9334397912025452, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 1146 + }, + { + "loss": 0.0877, + "grad_norm": 2.9189441204071045, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.147, + "step": 1147 + }, + { + "loss": 0.04, + "grad_norm": 1.8555492162704468, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.148, + "step": 1148 + }, + { + "loss": 0.0433, + "grad_norm": 2.1462485790252686, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.149, + "step": 1149 + }, + { + "loss": 0.0912, + "grad_norm": 2.674384593963623, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.15, + "step": 1150 + }, + { + "loss": 0.0806, + "grad_norm": 2.1967833042144775, + "learning_rate": 8.52e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.151, + "step": 1151 + }, + { + "loss": 0.0397, + "grad_norm": 1.576885461807251, + "learning_rate": 8.51e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.152, + "step": 1152 + }, + { + "loss": 0.0385, + "grad_norm": 1.8607549667358398, + "learning_rate": 8.5e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.153, + "step": 1153 + }, + { + "loss": 0.0591, + "grad_norm": 2.075608491897583, + "learning_rate": 8.49e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.154, + "step": 1154 + }, + { + "loss": 0.0072, + "grad_norm": 1.595956563949585, + "learning_rate": 8.48e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 1155 + }, + { + "loss": 0.0107, + "grad_norm": 2.7350447177886963, + "learning_rate": 8.47e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 1156 + }, + { + "loss": 0.0675, + "grad_norm": 1.7995527982711792, + "learning_rate": 8.46e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.157, + "step": 1157 + }, + { + "loss": 0.0655, + "grad_norm": 2.3666279315948486, + "learning_rate": 8.45e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.158, + "step": 1158 + }, + { + "loss": 0.0898, + "grad_norm": 2.2464659214019775, + "learning_rate": 8.44e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.159, + "step": 1159 + }, + { + "loss": 0.0555, + "grad_norm": 2.4049134254455566, + "learning_rate": 8.43e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.16, + "step": 1160 + }, + { + "loss": 0.0835, + "grad_norm": 2.0087289810180664, + "learning_rate": 8.42e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.161, + "step": 1161 + }, + { + "loss": 0.0679, + "grad_norm": 2.1180970668792725, + "learning_rate": 8.41e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.162, + "step": 1162 + }, + { + "loss": 0.0605, + "grad_norm": 1.7271490097045898, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.163, + "step": 1163 + }, + { + "loss": 0.0381, + "grad_norm": 2.031334400177002, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.164, + "step": 1164 + }, + { + "loss": 0.0639, + "grad_norm": 1.7528166770935059, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.165, + "step": 1165 + }, + { + "loss": 0.1307, + "grad_norm": 3.783503293991089, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.166, + "step": 1166 + }, + { + "loss": 0.0473, + "grad_norm": 2.779741048812866, + "learning_rate": 8.36e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.167, + "step": 1167 + }, + { + "loss": 0.0455, + "grad_norm": 1.9504565000534058, + "learning_rate": 8.35e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.168, + "step": 1168 + }, + { + "loss": 0.0662, + "grad_norm": 2.2791426181793213, + "learning_rate": 8.34e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.169, + "step": 1169 + }, + { + "loss": 0.0857, + "grad_norm": 2.4661900997161865, + "learning_rate": 8.33e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.17, + "step": 1170 + }, + { + "loss": 0.0817, + "grad_norm": 2.018150568008423, + "learning_rate": 8.32e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.171, + "step": 1171 + }, + { + "loss": 0.0491, + "grad_norm": 1.4105336666107178, + "learning_rate": 8.31e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.172, + "step": 1172 + }, + { + "loss": 0.0705, + "grad_norm": 1.7099734544754028, + "learning_rate": 8.3e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.173, + "step": 1173 + }, + { + "loss": 0.0197, + "grad_norm": 5.4979472160339355, + "learning_rate": 8.29e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.174, + "step": 1174 + }, + { + "loss": 0.0515, + "grad_norm": 1.9852694272994995, + "learning_rate": 8.28e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.175, + "step": 1175 + }, + { + "loss": 0.0435, + "grad_norm": 1.3928176164627075, + "learning_rate": 8.27e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.176, + "step": 1176 + }, + { + "loss": 0.062, + "grad_norm": 2.7774510383605957, + "learning_rate": 8.26e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.177, + "step": 1177 + }, + { + "loss": 0.053, + "grad_norm": 0.9669445753097534, + "learning_rate": 8.25e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.178, + "step": 1178 + }, + { + "loss": 0.0178, + "grad_norm": 4.694067478179932, + "learning_rate": 8.24e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.179, + "step": 1179 + }, + { + "loss": 0.0133, + "grad_norm": 3.8942577838897705, + "learning_rate": 8.23e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.18, + "step": 1180 + }, + { + "loss": 0.042, + "grad_norm": 1.4630885124206543, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.181, + "step": 1181 + }, + { + "loss": 0.0598, + "grad_norm": 1.6373014450073242, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.182, + "step": 1182 + }, + { + "loss": 0.0454, + "grad_norm": 1.9768292903900146, + "learning_rate": 8.2e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.183, + "step": 1183 + }, + { + "loss": 0.0734, + "grad_norm": 1.4859123229980469, + "learning_rate": 8.19e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.184, + "step": 1184 + }, + { + "loss": 0.0647, + "grad_norm": 1.7751868963241577, + "learning_rate": 8.18e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.185, + "step": 1185 + }, + { + "loss": 0.0643, + "grad_norm": 1.6454154253005981, + "learning_rate": 8.17e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.186, + "step": 1186 + }, + { + "loss": 0.0511, + "grad_norm": 1.9402817487716675, + "learning_rate": 8.16e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.187, + "step": 1187 + }, + { + "loss": 0.047, + "grad_norm": 1.6513389348983765, + "learning_rate": 8.15e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.188, + "step": 1188 + }, + { + "loss": 0.0107, + "grad_norm": 2.9602744579315186, + "learning_rate": 8.14e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 1189 + }, + { + "loss": 0.0708, + "grad_norm": 1.9953235387802124, + "learning_rate": 8.13e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.19, + "step": 1190 + }, + { + "loss": 0.0562, + "grad_norm": 1.7549750804901123, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.191, + "step": 1191 + }, + { + "loss": 0.0589, + "grad_norm": 2.0597615242004395, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.192, + "step": 1192 + }, + { + "loss": 0.0469, + "grad_norm": 1.7559466361999512, + "learning_rate": 8.1e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.193, + "step": 1193 + }, + { + "loss": 0.0757, + "grad_norm": 2.0765254497528076, + "learning_rate": 8.09e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.194, + "step": 1194 + }, + { + "loss": 0.0118, + "grad_norm": 3.379472017288208, + "learning_rate": 8.08e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 1195 + }, + { + "loss": 0.0692, + "grad_norm": 1.6905264854431152, + "learning_rate": 8.07e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.196, + "step": 1196 + }, + { + "loss": 0.0493, + "grad_norm": 2.3974990844726562, + "learning_rate": 8.06e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.197, + "step": 1197 + }, + { + "loss": 0.0533, + "grad_norm": 1.609572410583496, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.198, + "step": 1198 + }, + { + "loss": 0.0727, + "grad_norm": 2.563096523284912, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.199, + "step": 1199 + }, + { + "loss": 0.0556, + "grad_norm": 2.0002143383026123, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.2, + "step": 1200 + }, + { + "loss": 0.0487, + "grad_norm": 1.7846338748931885, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.201, + "step": 1201 + }, + { + "loss": 0.0802, + "grad_norm": 2.2537660598754883, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.202, + "step": 1202 + }, + { + "loss": 0.0584, + "grad_norm": 3.043835163116455, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.203, + "step": 1203 + }, + { + "loss": 0.012, + "grad_norm": 3.2526142597198486, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.204, + "step": 1204 + }, + { + "loss": 0.063, + "grad_norm": 1.3797202110290527, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.205, + "step": 1205 + }, + { + "loss": 0.0658, + "grad_norm": 2.5818750858306885, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.206, + "step": 1206 + }, + { + "loss": 0.0108, + "grad_norm": 3.089911699295044, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 1207 + }, + { + "loss": 0.0781, + "grad_norm": 2.348559856414795, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.208, + "step": 1208 + }, + { + "loss": 0.053, + "grad_norm": 1.6293948888778687, + "learning_rate": 7.94e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.209, + "step": 1209 + }, + { + "loss": 0.0541, + "grad_norm": 1.7948721647262573, + "learning_rate": 7.93e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.21, + "step": 1210 + }, + { + "loss": 0.0408, + "grad_norm": 2.3477344512939453, + "learning_rate": 7.92e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.211, + "step": 1211 + }, + { + "loss": 0.0579, + "grad_norm": 2.6738388538360596, + "learning_rate": 7.91e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.212, + "step": 1212 + }, + { + "loss": 0.055, + "grad_norm": 1.522643804550171, + "learning_rate": 7.9e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.213, + "step": 1213 + }, + { + "loss": 0.0634, + "grad_norm": 1.585366129875183, + "learning_rate": 7.89e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.214, + "step": 1214 + }, + { + "loss": 0.0616, + "grad_norm": 1.645047664642334, + "learning_rate": 7.88e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.215, + "step": 1215 + }, + { + "loss": 0.0757, + "grad_norm": 1.689460039138794, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.216, + "step": 1216 + }, + { + "loss": 0.0454, + "grad_norm": 2.0291545391082764, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.217, + "step": 1217 + }, + { + "loss": 0.0104, + "grad_norm": 3.0368359088897705, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 1218 + }, + { + "loss": 0.0097, + "grad_norm": 2.792633533477783, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 1219 + }, + { + "loss": 0.0776, + "grad_norm": 2.638593912124634, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.22, + "step": 1220 + }, + { + "loss": 0.0612, + "grad_norm": 2.7605133056640625, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.221, + "step": 1221 + }, + { + "loss": 0.0884, + "grad_norm": 2.6775927543640137, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.222, + "step": 1222 + }, + { + "loss": 0.0752, + "grad_norm": 1.9850537776947021, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.223, + "step": 1223 + }, + { + "loss": 0.0439, + "grad_norm": 1.5452102422714233, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.224, + "step": 1224 + }, + { + "loss": 0.0435, + "grad_norm": 2.2355833053588867, + "learning_rate": 7.78e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.225, + "step": 1225 + }, + { + "loss": 0.0532, + "grad_norm": 1.7478253841400146, + "learning_rate": 7.77e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.226, + "step": 1226 + }, + { + "loss": 0.0106, + "grad_norm": 3.0870492458343506, + "learning_rate": 7.76e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 1227 + }, + { + "loss": 0.0534, + "grad_norm": 1.8180068731307983, + "learning_rate": 7.75e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.228, + "step": 1228 + }, + { + "loss": 0.0088, + "grad_norm": 2.428753137588501, + "learning_rate": 7.74e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 1229 + }, + { + "loss": 0.0094, + "grad_norm": 2.480687141418457, + "learning_rate": 7.73e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 1230 + }, + { + "loss": 0.056, + "grad_norm": 1.977836012840271, + "learning_rate": 7.72e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.231, + "step": 1231 + }, + { + "loss": 0.0576, + "grad_norm": 2.694723129272461, + "learning_rate": 7.71e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.232, + "step": 1232 + }, + { + "loss": 0.0559, + "grad_norm": 1.785524606704712, + "learning_rate": 7.7e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.233, + "step": 1233 + }, + { + "loss": 0.0548, + "grad_norm": 1.7176051139831543, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.234, + "step": 1234 + }, + { + "loss": 0.07, + "grad_norm": 1.961999773979187, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2349999999999999, + "step": 1235 + }, + { + "loss": 0.0592, + "grad_norm": 2.465545654296875, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.236, + "step": 1236 + }, + { + "loss": 0.0378, + "grad_norm": 1.4544801712036133, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.237, + "step": 1237 + }, + { + "loss": 0.0602, + "grad_norm": 1.772146224975586, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.238, + "step": 1238 + }, + { + "loss": 0.04, + "grad_norm": 2.1550979614257812, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2389999999999999, + "step": 1239 + }, + { + "loss": 0.0448, + "grad_norm": 2.0862441062927246, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.24, + "step": 1240 + }, + { + "loss": 0.073, + "grad_norm": 1.8445123434066772, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.241, + "step": 1241 + }, + { + "loss": 0.0701, + "grad_norm": 1.734731912612915, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.242, + "step": 1242 + }, + { + "loss": 0.0621, + "grad_norm": 2.5419921875, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2429999999999999, + "step": 1243 + }, + { + "loss": 0.0387, + "grad_norm": 2.232482671737671, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.244, + "step": 1244 + }, + { + "loss": 0.041, + "grad_norm": 2.1068978309631348, + "learning_rate": 7.58e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.245, + "step": 1245 + }, + { + "loss": 0.0677, + "grad_norm": 1.7934560775756836, + "learning_rate": 7.57e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.246, + "step": 1246 + }, + { + "loss": 0.0866, + "grad_norm": 2.3774123191833496, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.2469999999999999, + "step": 1247 + }, + { + "loss": 0.0188, + "grad_norm": 5.182284832000732, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.248, + "step": 1248 + }, + { + "loss": 0.0517, + "grad_norm": 1.6540446281433105, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.249, + "step": 1249 + }, + { + "loss": 0.0801, + "grad_norm": 1.7044258117675781, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.25, + "step": 1250 + }, + { + "loss": 0.018, + "grad_norm": 4.825031757354736, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.251, + "step": 1251 + }, + { + "loss": 0.0579, + "grad_norm": 1.9127049446105957, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.252, + "step": 1252 + }, + { + "loss": 0.0387, + "grad_norm": 1.524353265762329, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2530000000000001, + "step": 1253 + }, + { + "loss": 0.0743, + "grad_norm": 1.8598476648330688, + "learning_rate": 7.49e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.254, + "step": 1254 + }, + { + "loss": 0.0364, + "grad_norm": 1.6264195442199707, + "learning_rate": 7.48e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.255, + "step": 1255 + }, + { + "loss": 0.0746, + "grad_norm": 1.4887213706970215, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.256, + "step": 1256 + }, + { + "loss": 0.0117, + "grad_norm": 3.425563335418701, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 1257 + }, + { + "loss": 0.0552, + "grad_norm": 1.6610738039016724, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.258, + "step": 1258 + }, + { + "loss": 0.0105, + "grad_norm": 2.9016385078430176, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 1259 + }, + { + "loss": 0.0657, + "grad_norm": 2.349597215652466, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.26, + "step": 1260 + }, + { + "loss": 0.0706, + "grad_norm": 1.7171733379364014, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.2610000000000001, + "step": 1261 + }, + { + "loss": 0.0076, + "grad_norm": 2.070596933364868, + "learning_rate": 7.41e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 1262 + }, + { + "loss": 0.082, + "grad_norm": 2.476560115814209, + "learning_rate": 7.4e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.263, + "step": 1263 + }, + { + "loss": 0.0696, + "grad_norm": 2.013134002685547, + "learning_rate": 7.39e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 1264 + }, + { + "loss": 0.0456, + "grad_norm": 2.0719385147094727, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2650000000000001, + "step": 1265 + }, + { + "loss": 0.0789, + "grad_norm": 2.737678289413452, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.266, + "step": 1266 + }, + { + "loss": 0.0755, + "grad_norm": 2.932962417602539, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.267, + "step": 1267 + }, + { + "loss": 0.0621, + "grad_norm": 1.5760010480880737, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.268, + "step": 1268 + }, + { + "loss": 0.145, + "grad_norm": 4.413599491119385, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.2690000000000001, + "step": 1269 + }, + { + "loss": 0.052, + "grad_norm": 1.3965295553207397, + "learning_rate": 7.33e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.27, + "step": 1270 + }, + { + "loss": 0.0507, + "grad_norm": 1.5652461051940918, + "learning_rate": 7.32e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.271, + "step": 1271 + }, + { + "loss": 0.1608, + "grad_norm": 5.22923469543457, + "learning_rate": 7.31e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 1.272, + "step": 1272 + }, + { + "loss": 0.04, + "grad_norm": 2.1607284545898438, + "learning_rate": 7.3e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2730000000000001, + "step": 1273 + }, + { + "loss": 0.0093, + "grad_norm": 2.755345106124878, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 1274 + }, + { + "loss": 0.0403, + "grad_norm": 1.6918083429336548, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.275, + "step": 1275 + }, + { + "loss": 0.0569, + "grad_norm": 1.4805766344070435, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.276, + "step": 1276 + }, + { + "loss": 0.0639, + "grad_norm": 1.9898265600204468, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2770000000000001, + "step": 1277 + }, + { + "loss": 0.0764, + "grad_norm": 2.4644553661346436, + "learning_rate": 7.25e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.278, + "step": 1278 + }, + { + "loss": 0.0458, + "grad_norm": 1.6111081838607788, + "learning_rate": 7.24e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.279, + "step": 1279 + }, + { + "loss": 0.0439, + "grad_norm": 1.847048282623291, + "learning_rate": 7.23e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.28, + "step": 1280 + }, + { + "loss": 0.0485, + "grad_norm": 2.2336626052856445, + "learning_rate": 7.22e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2810000000000001, + "step": 1281 + }, + { + "loss": 0.0204, + "grad_norm": 5.058897972106934, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.282, + "step": 1282 + }, + { + "loss": 0.059, + "grad_norm": 1.464397668838501, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.283, + "step": 1283 + }, + { + "loss": 0.0663, + "grad_norm": 1.986909031867981, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.284, + "step": 1284 + }, + { + "loss": 0.0553, + "grad_norm": 1.3948322534561157, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.285, + "step": 1285 + }, + { + "loss": 0.0762, + "grad_norm": 1.8114221096038818, + "learning_rate": 7.17e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.286, + "step": 1286 + }, + { + "loss": 0.0596, + "grad_norm": 1.3451945781707764, + "learning_rate": 7.16e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 1287 + }, + { + "loss": 0.066, + "grad_norm": 1.6588683128356934, + "learning_rate": 7.15e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.288, + "step": 1288 + }, + { + "loss": 0.0486, + "grad_norm": 1.8605456352233887, + "learning_rate": 7.14e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.289, + "step": 1289 + }, + { + "loss": 0.0567, + "grad_norm": 1.8595200777053833, + "learning_rate": 7.13e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.29, + "step": 1290 + }, + { + "loss": 0.0651, + "grad_norm": 1.3704520463943481, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.291, + "step": 1291 + }, + { + "loss": 0.0776, + "grad_norm": 1.5874192714691162, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.292, + "step": 1292 + }, + { + "loss": 0.0584, + "grad_norm": 1.6083050966262817, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.293, + "step": 1293 + }, + { + "loss": 0.0526, + "grad_norm": 2.637402296066284, + "learning_rate": 7.09e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.294, + "step": 1294 + }, + { + "loss": 0.0434, + "grad_norm": 1.125180721282959, + "learning_rate": 7.08e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.295, + "step": 1295 + }, + { + "loss": 0.0604, + "grad_norm": 1.9658552408218384, + "learning_rate": 7.07e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.296, + "step": 1296 + }, + { + "loss": 0.0609, + "grad_norm": 2.3239123821258545, + "learning_rate": 7.06e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.297, + "step": 1297 + }, + { + "loss": 0.0822, + "grad_norm": 2.9983248710632324, + "learning_rate": 7.05e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.298, + "step": 1298 + }, + { + "loss": 0.062, + "grad_norm": 1.7106144428253174, + "learning_rate": 7.04e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.299, + "step": 1299 + }, + { + "loss": 0.0542, + "grad_norm": 1.9297690391540527, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3, + "step": 1300 + }, + { + "loss": 0.0174, + "grad_norm": 4.6414361000061035, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.301, + "step": 1301 + }, + { + "loss": 0.0755, + "grad_norm": 2.1787867546081543, + "learning_rate": 7.01e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.302, + "step": 1302 + }, + { + "loss": 0.015, + "grad_norm": 4.113848686218262, + "learning_rate": 7e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.303, + "step": 1303 + }, + { + "loss": 0.0492, + "grad_norm": 1.3803060054779053, + "learning_rate": 6.99e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.304, + "step": 1304 + }, + { + "loss": 0.0512, + "grad_norm": 1.5045576095581055, + "learning_rate": 6.98e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.305, + "step": 1305 + }, + { + "loss": 0.0608, + "grad_norm": 1.5915031433105469, + "learning_rate": 6.97e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.306, + "step": 1306 + }, + { + "loss": 0.0583, + "grad_norm": 1.2304151058197021, + "learning_rate": 6.96e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.307, + "step": 1307 + }, + { + "loss": 0.0563, + "grad_norm": 1.7730633020401, + "learning_rate": 6.95e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.308, + "step": 1308 + }, + { + "loss": 0.0684, + "grad_norm": 1.730749249458313, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.309, + "step": 1309 + }, + { + "loss": 0.052, + "grad_norm": 1.6816562414169312, + "learning_rate": 6.93e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.31, + "step": 1310 + }, + { + "loss": 0.0732, + "grad_norm": 2.309110164642334, + "learning_rate": 6.92e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.311, + "step": 1311 + }, + { + "loss": 0.0634, + "grad_norm": 1.8224540948867798, + "learning_rate": 6.91e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.312, + "step": 1312 + }, + { + "loss": 0.0584, + "grad_norm": 1.9186445474624634, + "learning_rate": 6.9e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.313, + "step": 1313 + }, + { + "loss": 0.0348, + "grad_norm": 1.3239874839782715, + "learning_rate": 6.89e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.314, + "step": 1314 + }, + { + "loss": 0.0938, + "grad_norm": 2.3451895713806152, + "learning_rate": 6.88e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.315, + "step": 1315 + }, + { + "loss": 0.0623, + "grad_norm": 1.8779281377792358, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.316, + "step": 1316 + }, + { + "loss": 0.167, + "grad_norm": 4.993703842163086, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.317, + "step": 1317 + }, + { + "loss": 0.0142, + "grad_norm": 4.2328338623046875, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.318, + "step": 1318 + }, + { + "loss": 0.0792, + "grad_norm": 2.0863592624664307, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.319, + "step": 1319 + }, + { + "loss": 0.044, + "grad_norm": 2.3412485122680664, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.32, + "step": 1320 + }, + { + "loss": 0.0404, + "grad_norm": 1.4804179668426514, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.321, + "step": 1321 + }, + { + "loss": 0.0168, + "grad_norm": 4.645394802093506, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.322, + "step": 1322 + }, + { + "loss": 0.0718, + "grad_norm": 1.6375811100006104, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.323, + "step": 1323 + }, + { + "loss": 0.06, + "grad_norm": 1.5656460523605347, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.324, + "step": 1324 + }, + { + "loss": 0.065, + "grad_norm": 1.7190107107162476, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.325, + "step": 1325 + }, + { + "loss": 0.0152, + "grad_norm": 3.9972171783447266, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.326, + "step": 1326 + }, + { + "loss": 0.0679, + "grad_norm": 2.4974441528320312, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 1327 + }, + { + "loss": 0.0582, + "grad_norm": 2.3485262393951416, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.328, + "step": 1328 + }, + { + "loss": 0.0829, + "grad_norm": 2.598663091659546, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.329, + "step": 1329 + }, + { + "loss": 0.01, + "grad_norm": 2.8793528079986572, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 1330 + }, + { + "loss": 0.0661, + "grad_norm": 1.9478849172592163, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.331, + "step": 1331 + }, + { + "loss": 0.0715, + "grad_norm": 1.916156530380249, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.332, + "step": 1332 + }, + { + "loss": 0.0601, + "grad_norm": 1.6466504335403442, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.333, + "step": 1333 + }, + { + "loss": 0.01, + "grad_norm": 2.8242533206939697, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 1334 + }, + { + "loss": 0.0409, + "grad_norm": 1.506545066833496, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.335, + "step": 1335 + }, + { + "loss": 0.0809, + "grad_norm": 1.7198259830474854, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.336, + "step": 1336 + }, + { + "loss": 0.1451, + "grad_norm": 4.725864887237549, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 1.337, + "step": 1337 + }, + { + "loss": 0.0649, + "grad_norm": 1.4829907417297363, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.338, + "step": 1338 + }, + { + "loss": 0.0779, + "grad_norm": 1.798589825630188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.339, + "step": 1339 + }, + { + "loss": 0.0645, + "grad_norm": 2.8309855461120605, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.34, + "step": 1340 + }, + { + "loss": 0.0573, + "grad_norm": 2.2329795360565186, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.341, + "step": 1341 + }, + { + "loss": 0.0633, + "grad_norm": 1.7102524042129517, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.342, + "step": 1342 + }, + { + "loss": 0.0533, + "grad_norm": 1.8966953754425049, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.343, + "step": 1343 + }, + { + "loss": 0.1242, + "grad_norm": 3.5069096088409424, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3439999999999999, + "step": 1344 + }, + { + "loss": 0.0668, + "grad_norm": 1.6451408863067627, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.345, + "step": 1345 + }, + { + "loss": 0.0168, + "grad_norm": 4.646505355834961, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.346, + "step": 1346 + }, + { + "loss": 0.0122, + "grad_norm": 3.5036394596099854, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.347, + "step": 1347 + }, + { + "loss": 0.054, + "grad_norm": 1.476265788078308, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3479999999999999, + "step": 1348 + }, + { + "loss": 0.0771, + "grad_norm": 2.343313455581665, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.349, + "step": 1349 + }, + { + "loss": 0.041, + "grad_norm": 1.5659995079040527, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.35, + "step": 1350 + }, + { + "loss": 0.0377, + "grad_norm": 1.196007251739502, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.351, + "step": 1351 + }, + { + "loss": 0.1297, + "grad_norm": 3.8112542629241943, + "learning_rate": 6.51e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 1.3519999999999999, + "step": 1352 + }, + { + "loss": 0.0526, + "grad_norm": 1.3368208408355713, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.353, + "step": 1353 + }, + { + "loss": 0.0444, + "grad_norm": 1.8093925714492798, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.354, + "step": 1354 + }, + { + "loss": 0.0101, + "grad_norm": 2.882591485977173, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 1355 + }, + { + "loss": 0.0437, + "grad_norm": 1.7717807292938232, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3559999999999999, + "step": 1356 + }, + { + "loss": 0.0546, + "grad_norm": 2.2301149368286133, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.357, + "step": 1357 + }, + { + "loss": 0.0102, + "grad_norm": 2.8497674465179443, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 1358 + }, + { + "loss": 0.059, + "grad_norm": 1.9033845663070679, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.359, + "step": 1359 + }, + { + "loss": 0.0431, + "grad_norm": 1.6551549434661865, + "learning_rate": 6.43e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3599999999999999, + "step": 1360 + }, + { + "loss": 0.0585, + "grad_norm": 1.5250738859176636, + "learning_rate": 6.42e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.361, + "step": 1361 + }, + { + "loss": 0.0576, + "grad_norm": 1.7390161752700806, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.362, + "step": 1362 + }, + { + "loss": 0.0642, + "grad_norm": 2.0047788619995117, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.363, + "step": 1363 + }, + { + "loss": 0.0409, + "grad_norm": 1.696035385131836, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.3639999999999999, + "step": 1364 + }, + { + "loss": 0.0577, + "grad_norm": 1.9078930616378784, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.365, + "step": 1365 + }, + { + "loss": 0.0098, + "grad_norm": 2.792039155960083, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 1366 + }, + { + "loss": 0.0582, + "grad_norm": 1.8414034843444824, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.367, + "step": 1367 + }, + { + "loss": 0.0545, + "grad_norm": 2.1793394088745117, + "learning_rate": 6.35e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 1368 + }, + { + "loss": 0.0449, + "grad_norm": 2.220048666000366, + "learning_rate": 6.34e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.369, + "step": 1369 + }, + { + "loss": 0.0545, + "grad_norm": 1.9344781637191772, + "learning_rate": 6.33e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.37, + "step": 1370 + }, + { + "loss": 0.0567, + "grad_norm": 1.8442058563232422, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.371, + "step": 1371 + }, + { + "loss": 0.0118, + "grad_norm": 3.14497971534729, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.3719999999999999, + "step": 1372 + }, + { + "loss": 0.0721, + "grad_norm": 2.7254114151000977, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.373, + "step": 1373 + }, + { + "loss": 0.0587, + "grad_norm": 1.436458945274353, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.374, + "step": 1374 + }, + { + "loss": 0.1323, + "grad_norm": 3.204223871231079, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.375, + "step": 1375 + }, + { + "loss": 0.0704, + "grad_norm": 1.601090431213379, + "learning_rate": 6.27e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.376, + "step": 1376 + }, + { + "loss": 0.0601, + "grad_norm": 1.5754057168960571, + "learning_rate": 6.26e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.377, + "step": 1377 + }, + { + "loss": 0.0711, + "grad_norm": 1.8766717910766602, + "learning_rate": 6.25e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.3780000000000001, + "step": 1378 + }, + { + "loss": 0.059, + "grad_norm": 2.119466781616211, + "learning_rate": 6.24e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.379, + "step": 1379 + }, + { + "loss": 0.0772, + "grad_norm": 1.8192287683486938, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.38, + "step": 1380 + }, + { + "loss": 0.0588, + "grad_norm": 1.6275320053100586, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.381, + "step": 1381 + }, + { + "loss": 0.0417, + "grad_norm": 2.3129870891571045, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3820000000000001, + "step": 1382 + }, + { + "loss": 0.0444, + "grad_norm": 1.6177237033843994, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.383, + "step": 1383 + }, + { + "loss": 0.0566, + "grad_norm": 2.093630075454712, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.384, + "step": 1384 + }, + { + "loss": 0.0655, + "grad_norm": 1.9267455339431763, + "learning_rate": 6.18e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.385, + "step": 1385 + }, + { + "loss": 0.0442, + "grad_norm": 1.0200287103652954, + "learning_rate": 6.17e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3860000000000001, + "step": 1386 + }, + { + "loss": 0.0638, + "grad_norm": 1.3187520503997803, + "learning_rate": 6.16e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.387, + "step": 1387 + }, + { + "loss": 0.0364, + "grad_norm": 1.6464682817459106, + "learning_rate": 6.15e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.388, + "step": 1388 + }, + { + "loss": 0.0775, + "grad_norm": 2.474910020828247, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.389, + "step": 1389 + }, + { + "loss": 0.0621, + "grad_norm": 1.1011793613433838, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.3900000000000001, + "step": 1390 + }, + { + "loss": 0.0218, + "grad_norm": 5.168939113616943, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.391, + "step": 1391 + }, + { + "loss": 0.0221, + "grad_norm": 5.572858810424805, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.392, + "step": 1392 + }, + { + "loss": 0.0561, + "grad_norm": 1.8146536350250244, + "learning_rate": 6.1e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.393, + "step": 1393 + }, + { + "loss": 0.0804, + "grad_norm": 3.2232189178466797, + "learning_rate": 6.09e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.3940000000000001, + "step": 1394 + }, + { + "loss": 0.039, + "grad_norm": 1.8940805196762085, + "learning_rate": 6.08e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.395, + "step": 1395 + }, + { + "loss": 0.0584, + "grad_norm": 2.0325937271118164, + "learning_rate": 6.07e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.396, + "step": 1396 + }, + { + "loss": 0.0422, + "grad_norm": 1.980771541595459, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.397, + "step": 1397 + }, + { + "loss": 0.0593, + "grad_norm": 1.710123896598816, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.3980000000000001, + "step": 1398 + }, + { + "loss": 0.0592, + "grad_norm": 2.430305004119873, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.399, + "step": 1399 + }, + { + "loss": 0.0467, + "grad_norm": 2.204895496368408, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.4, + "step": 1400 + }, + { + "loss": 0.0496, + "grad_norm": 1.7684513330459595, + "learning_rate": 6.02e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.401, + "step": 1401 + }, + { + "loss": 0.0462, + "grad_norm": 1.7807819843292236, + "learning_rate": 6.01e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.4020000000000001, + "step": 1402 + }, + { + "loss": 0.08, + "grad_norm": 1.9608607292175293, + "learning_rate": 6e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.403, + "step": 1403 + }, + { + "loss": 0.0588, + "grad_norm": 1.6851762533187866, + "learning_rate": 5.99e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.404, + "step": 1404 + }, + { + "loss": 0.0448, + "grad_norm": 1.395566701889038, + "learning_rate": 5.98e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 1.405, + "step": 1405 + }, + { + "loss": 0.0771, + "grad_norm": 1.94028639793396, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.4060000000000001, + "step": 1406 + }, + { + "loss": 0.0717, + "grad_norm": 2.421177864074707, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.407, + "step": 1407 + }, + { + "loss": 0.0602, + "grad_norm": 1.947490930557251, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.408, + "step": 1408 + }, + { + "loss": 0.084, + "grad_norm": 3.4976916313171387, + "learning_rate": 5.94e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.409, + "step": 1409 + }, + { + "loss": 0.0146, + "grad_norm": 3.9808900356292725, + "learning_rate": 5.93e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.41, + "step": 1410 + }, + { + "loss": 0.0583, + "grad_norm": 1.8078984022140503, + "learning_rate": 5.92e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 1411 + }, + { + "loss": 0.0687, + "grad_norm": 1.9551893472671509, + "learning_rate": 5.91e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.412, + "step": 1412 + }, + { + "loss": 0.0133, + "grad_norm": 3.68121075630188, + "learning_rate": 5.9e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.413, + "step": 1413 + }, + { + "loss": 0.0411, + "grad_norm": 1.987641453742981, + "learning_rate": 5.89e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.414, + "step": 1414 + }, + { + "loss": 0.0527, + "grad_norm": 1.6725058555603027, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.415, + "step": 1415 + }, + { + "loss": 0.0516, + "grad_norm": 1.3503282070159912, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.416, + "step": 1416 + }, + { + "loss": 0.0439, + "grad_norm": 1.5804824829101562, + "learning_rate": 5.86e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.417, + "step": 1417 + }, + { + "loss": 0.0481, + "grad_norm": 1.3769683837890625, + "learning_rate": 5.85e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.418, + "step": 1418 + }, + { + "loss": 0.0108, + "grad_norm": 3.01991868019104, + "learning_rate": 5.84e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.419, + "step": 1419 + }, + { + "loss": 0.0497, + "grad_norm": 1.416107177734375, + "learning_rate": 5.83e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.42, + "step": 1420 + }, + { + "loss": 0.0377, + "grad_norm": 1.3515864610671997, + "learning_rate": 5.82e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.421, + "step": 1421 + }, + { + "loss": 0.0607, + "grad_norm": 1.8614403009414673, + "learning_rate": 5.81e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.422, + "step": 1422 + }, + { + "loss": 0.0679, + "grad_norm": 2.109128952026367, + "learning_rate": 5.8e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.423, + "step": 1423 + }, + { + "loss": 0.0751, + "grad_norm": 1.5067026615142822, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.424, + "step": 1424 + }, + { + "loss": 0.0547, + "grad_norm": 1.5301975011825562, + "learning_rate": 5.78e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.425, + "step": 1425 + }, + { + "loss": 0.0683, + "grad_norm": 2.2441554069519043, + "learning_rate": 5.77e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.426, + "step": 1426 + }, + { + "loss": 0.0458, + "grad_norm": 1.8737249374389648, + "learning_rate": 5.76e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.427, + "step": 1427 + }, + { + "loss": 0.0687, + "grad_norm": 1.9434070587158203, + "learning_rate": 5.75e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.428, + "step": 1428 + }, + { + "loss": 0.0806, + "grad_norm": 1.8568007946014404, + "learning_rate": 5.74e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.429, + "step": 1429 + }, + { + "loss": 0.065, + "grad_norm": 2.0390608310699463, + "learning_rate": 5.73e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.43, + "step": 1430 + }, + { + "loss": 0.0615, + "grad_norm": 1.7913262844085693, + "learning_rate": 5.72e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.431, + "step": 1431 + }, + { + "loss": 0.0515, + "grad_norm": 2.496122121810913, + "learning_rate": 5.71e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.432, + "step": 1432 + }, + { + "loss": 0.0501, + "grad_norm": 1.633486270904541, + "learning_rate": 5.7e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.433, + "step": 1433 + }, + { + "loss": 0.0171, + "grad_norm": 4.812644958496094, + "learning_rate": 5.69e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.434, + "step": 1434 + }, + { + "loss": 0.0756, + "grad_norm": 2.208841562271118, + "learning_rate": 5.68e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.435, + "step": 1435 + }, + { + "loss": 0.0358, + "grad_norm": 1.725355625152588, + "learning_rate": 5.67e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.436, + "step": 1436 + }, + { + "loss": 0.0173, + "grad_norm": 4.879479885101318, + "learning_rate": 5.66e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.437, + "step": 1437 + }, + { + "loss": 0.1386, + "grad_norm": 3.6769933700561523, + "learning_rate": 5.65e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.438, + "step": 1438 + }, + { + "loss": 0.0712, + "grad_norm": 1.624098300933838, + "learning_rate": 5.64e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.439, + "step": 1439 + }, + { + "loss": 0.0534, + "grad_norm": 2.2485837936401367, + "learning_rate": 5.63e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.44, + "step": 1440 + }, + { + "loss": 0.0572, + "grad_norm": 1.977672815322876, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.441, + "step": 1441 + }, + { + "loss": 0.0515, + "grad_norm": 2.81058669090271, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.442, + "step": 1442 + }, + { + "loss": 0.0118, + "grad_norm": 3.3733158111572266, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.443, + "step": 1443 + }, + { + "loss": 0.0546, + "grad_norm": 1.634824275970459, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.444, + "step": 1444 + }, + { + "loss": 0.0549, + "grad_norm": 1.9184083938598633, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.445, + "step": 1445 + }, + { + "loss": 0.1835, + "grad_norm": 5.609441757202148, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 1.446, + "step": 1446 + }, + { + "loss": 0.0568, + "grad_norm": 1.4348167181015015, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.447, + "step": 1447 + }, + { + "loss": 0.0711, + "grad_norm": 1.6240220069885254, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.448, + "step": 1448 + }, + { + "loss": 0.0395, + "grad_norm": 1.7122279405593872, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.449, + "step": 1449 + }, + { + "loss": 0.0092, + "grad_norm": 2.6746726036071777, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 1450 + }, + { + "loss": 0.0516, + "grad_norm": 1.2466599941253662, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 1451 + }, + { + "loss": 0.0755, + "grad_norm": 2.3185651302337646, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.452, + "step": 1452 + }, + { + "loss": 0.0107, + "grad_norm": 3.2160799503326416, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.453, + "step": 1453 + }, + { + "loss": 0.0353, + "grad_norm": 1.6237694025039673, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.454, + "step": 1454 + }, + { + "loss": 0.052, + "grad_norm": 1.6856698989868164, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.455, + "step": 1455 + }, + { + "loss": 0.0672, + "grad_norm": 1.7814722061157227, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.456, + "step": 1456 + }, + { + "loss": 0.0354, + "grad_norm": 1.4843939542770386, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.457, + "step": 1457 + }, + { + "loss": 0.0642, + "grad_norm": 1.6205660104751587, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.458, + "step": 1458 + }, + { + "loss": 0.0694, + "grad_norm": 2.024721384048462, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.459, + "step": 1459 + }, + { + "loss": 0.0587, + "grad_norm": 1.8312665224075317, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.46, + "step": 1460 + }, + { + "loss": 0.0411, + "grad_norm": 1.8380608558654785, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.461, + "step": 1461 + }, + { + "loss": 0.0597, + "grad_norm": 1.7451549768447876, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.462, + "step": 1462 + }, + { + "loss": 0.0773, + "grad_norm": 1.7938144207000732, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.463, + "step": 1463 + }, + { + "loss": 0.0639, + "grad_norm": 2.6028213500976562, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.464, + "step": 1464 + }, + { + "loss": 0.0686, + "grad_norm": 1.8541765213012695, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.465, + "step": 1465 + }, + { + "loss": 0.0548, + "grad_norm": 1.739157795906067, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.466, + "step": 1466 + }, + { + "loss": 0.0131, + "grad_norm": 3.847865581512451, + "learning_rate": 5.36e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.467, + "step": 1467 + }, + { + "loss": 0.0556, + "grad_norm": 1.4072014093399048, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.468, + "step": 1468 + }, + { + "loss": 0.0656, + "grad_norm": 1.7529304027557373, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.4689999999999999, + "step": 1469 + }, + { + "loss": 0.0472, + "grad_norm": 1.359227180480957, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 1470 + }, + { + "loss": 0.0553, + "grad_norm": 1.8881477117538452, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.471, + "step": 1471 + }, + { + "loss": 0.0728, + "grad_norm": 1.792786717414856, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.472, + "step": 1472 + }, + { + "loss": 0.0589, + "grad_norm": 1.9897642135620117, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.4729999999999999, + "step": 1473 + }, + { + "loss": 0.0641, + "grad_norm": 2.224968433380127, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.474, + "step": 1474 + }, + { + "loss": 0.0176, + "grad_norm": 4.579442977905273, + "learning_rate": 5.28e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.475, + "step": 1475 + }, + { + "loss": 0.0465, + "grad_norm": 1.7030646800994873, + "learning_rate": 5.27e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.476, + "step": 1476 + }, + { + "loss": 0.0638, + "grad_norm": 1.8251057863235474, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.4769999999999999, + "step": 1477 + }, + { + "loss": 0.0532, + "grad_norm": 1.7170004844665527, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.478, + "step": 1478 + }, + { + "loss": 0.0146, + "grad_norm": 4.36711311340332, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.479, + "step": 1479 + }, + { + "loss": 0.0384, + "grad_norm": 1.4616270065307617, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.48, + "step": 1480 + }, + { + "loss": 0.0536, + "grad_norm": 1.4146326780319214, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4809999999999999, + "step": 1481 + }, + { + "loss": 0.058, + "grad_norm": 1.4087859392166138, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.482, + "step": 1482 + }, + { + "loss": 0.0131, + "grad_norm": 3.685961961746216, + "learning_rate": 5.2e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.483, + "step": 1483 + }, + { + "loss": 0.054, + "grad_norm": 2.024017572402954, + "learning_rate": 5.19e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.484, + "step": 1484 + }, + { + "loss": 0.0127, + "grad_norm": 3.772671699523926, + "learning_rate": 5.18e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.4849999999999999, + "step": 1485 + }, + { + "loss": 0.0119, + "grad_norm": 3.4980599880218506, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.486, + "step": 1486 + }, + { + "loss": 0.0759, + "grad_norm": 2.152510643005371, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.487, + "step": 1487 + }, + { + "loss": 0.0408, + "grad_norm": 1.5923069715499878, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.488, + "step": 1488 + }, + { + "loss": 0.0085, + "grad_norm": 2.5293490886688232, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 1489 + }, + { + "loss": 0.0694, + "grad_norm": 2.434215545654297, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.49, + "step": 1490 + }, + { + "loss": 0.0084, + "grad_norm": 2.269744873046875, + "learning_rate": 5.12e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 1491 + }, + { + "loss": 0.0472, + "grad_norm": 2.460083246231079, + "learning_rate": 5.11e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.492, + "step": 1492 + }, + { + "loss": 0.0346, + "grad_norm": 1.8150253295898438, + "learning_rate": 5.1e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.4929999999999999, + "step": 1493 + }, + { + "loss": 0.0436, + "grad_norm": 2.3509392738342285, + "learning_rate": 5.09e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.494, + "step": 1494 + }, + { + "loss": 0.0413, + "grad_norm": 1.7899376153945923, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.495, + "step": 1495 + }, + { + "loss": 0.0068, + "grad_norm": 1.4986844062805176, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 1496 + }, + { + "loss": 0.0719, + "grad_norm": 1.9978880882263184, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4969999999999999, + "step": 1497 + }, + { + "loss": 0.0407, + "grad_norm": 1.5322047472000122, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.498, + "step": 1498 + }, + { + "loss": 0.0057, + "grad_norm": 1.21915602684021, + "learning_rate": 5.04e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 1499 + }, + { + "loss": 0.0392, + "grad_norm": 1.8600904941558838, + "learning_rate": 5.03e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5, + "step": 1500 + }, + { + "loss": 0.058, + "grad_norm": 1.788377285003662, + "learning_rate": 5.02e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.501, + "step": 1501 + }, + { + "loss": 0.073, + "grad_norm": 2.0460190773010254, + "learning_rate": 5.01e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 1502 + }, + { + "loss": 0.0631, + "grad_norm": 2.3501951694488525, + "learning_rate": 5e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5030000000000001, + "step": 1503 + }, + { + "loss": 0.0655, + "grad_norm": 1.5405539274215698, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.504, + "step": 1504 + }, + { + "loss": 0.0527, + "grad_norm": 2.613194227218628, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.505, + "step": 1505 + }, + { + "loss": 0.0533, + "grad_norm": 2.3490524291992188, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.506, + "step": 1506 + }, + { + "loss": 0.007, + "grad_norm": 1.7071534395217896, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 1507 + }, + { + "loss": 0.0063, + "grad_norm": 1.578574776649475, + "learning_rate": 4.95e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 1508 + }, + { + "loss": 0.0586, + "grad_norm": 1.7500479221343994, + "learning_rate": 4.94e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.509, + "step": 1509 + }, + { + "loss": 0.0489, + "grad_norm": 2.1021506786346436, + "learning_rate": 4.93e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.51, + "step": 1510 + }, + { + "loss": 0.0505, + "grad_norm": 1.444482684135437, + "learning_rate": 4.92e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5110000000000001, + "step": 1511 + }, + { + "loss": 0.0663, + "grad_norm": 2.043468475341797, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.512, + "step": 1512 + }, + { + "loss": 0.0429, + "grad_norm": 1.7074294090270996, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.513, + "step": 1513 + }, + { + "loss": 0.0655, + "grad_norm": 2.4234681129455566, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.514, + "step": 1514 + }, + { + "loss": 0.0766, + "grad_norm": 2.124605655670166, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.5150000000000001, + "step": 1515 + }, + { + "loss": 0.0549, + "grad_norm": 1.533837080001831, + "learning_rate": 4.87e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.516, + "step": 1516 + }, + { + "loss": 0.0674, + "grad_norm": 1.8479790687561035, + "learning_rate": 4.86e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.517, + "step": 1517 + }, + { + "loss": 0.0105, + "grad_norm": 2.9812541007995605, + "learning_rate": 4.85e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 1518 + }, + { + "loss": 0.0394, + "grad_norm": 1.3361161947250366, + "learning_rate": 4.84e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5190000000000001, + "step": 1519 + }, + { + "loss": 0.0526, + "grad_norm": 1.8740735054016113, + "learning_rate": 4.83e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.52, + "step": 1520 + }, + { + "loss": 0.0622, + "grad_norm": 2.8182497024536133, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.521, + "step": 1521 + }, + { + "loss": 0.053, + "grad_norm": 1.3909233808517456, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.522, + "step": 1522 + }, + { + "loss": 0.0352, + "grad_norm": 1.3657585382461548, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5230000000000001, + "step": 1523 + }, + { + "loss": 0.0667, + "grad_norm": 1.9412925243377686, + "learning_rate": 4.79e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.524, + "step": 1524 + }, + { + "loss": 0.0536, + "grad_norm": 1.9261113405227661, + "learning_rate": 4.78e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.525, + "step": 1525 + }, + { + "loss": 0.0371, + "grad_norm": 1.7484430074691772, + "learning_rate": 4.77e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.526, + "step": 1526 + }, + { + "loss": 0.0629, + "grad_norm": 1.5757131576538086, + "learning_rate": 4.76e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5270000000000001, + "step": 1527 + }, + { + "loss": 0.0743, + "grad_norm": 2.2460429668426514, + "learning_rate": 4.75e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.528, + "step": 1528 + }, + { + "loss": 0.0537, + "grad_norm": 2.029741048812866, + "learning_rate": 4.74e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.529, + "step": 1529 + }, + { + "loss": 0.0363, + "grad_norm": 1.7011500597000122, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.53, + "step": 1530 + }, + { + "loss": 0.0773, + "grad_norm": 2.4450201988220215, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.5310000000000001, + "step": 1531 + }, + { + "loss": 0.0597, + "grad_norm": 2.192077159881592, + "learning_rate": 4.71e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.532, + "step": 1532 + }, + { + "loss": 0.0539, + "grad_norm": 1.464800238609314, + "learning_rate": 4.7e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.533, + "step": 1533 + }, + { + "loss": 0.0762, + "grad_norm": 2.326375722885132, + "learning_rate": 4.69e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.534, + "step": 1534 + }, + { + "loss": 0.0517, + "grad_norm": 1.547634482383728, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5350000000000001, + "step": 1535 + }, + { + "loss": 0.0783, + "grad_norm": 2.2572309970855713, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.536, + "step": 1536 + }, + { + "loss": 0.0644, + "grad_norm": 2.7545583248138428, + "learning_rate": 4.66e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.537, + "step": 1537 + }, + { + "loss": 0.0596, + "grad_norm": 1.4186100959777832, + "learning_rate": 4.65e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.538, + "step": 1538 + }, + { + "loss": 0.0408, + "grad_norm": 1.7284655570983887, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5390000000000001, + "step": 1539 + }, + { + "loss": 0.0605, + "grad_norm": 1.7523491382598877, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.54, + "step": 1540 + }, + { + "loss": 0.0593, + "grad_norm": 1.346951961517334, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.541, + "step": 1541 + }, + { + "loss": 0.0618, + "grad_norm": 1.4633326530456543, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.542, + "step": 1542 + }, + { + "loss": 0.0401, + "grad_norm": 1.6125143766403198, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5430000000000001, + "step": 1543 + }, + { + "loss": 0.0703, + "grad_norm": 1.801979422569275, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.544, + "step": 1544 + }, + { + "loss": 0.0168, + "grad_norm": 4.75988245010376, + "learning_rate": 4.58e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.545, + "step": 1545 + }, + { + "loss": 0.0395, + "grad_norm": 1.7274175882339478, + "learning_rate": 4.57e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.546, + "step": 1546 + }, + { + "loss": 0.0673, + "grad_norm": 1.813065767288208, + "learning_rate": 4.56e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5470000000000002, + "step": 1547 + }, + { + "loss": 0.0149, + "grad_norm": 4.271875858306885, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.548, + "step": 1548 + }, + { + "loss": 0.0663, + "grad_norm": 2.038168430328369, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.549, + "step": 1549 + }, + { + "loss": 0.0129, + "grad_norm": 3.939451217651367, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.55, + "step": 1550 + }, + { + "loss": 0.0375, + "grad_norm": 1.818014144897461, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5510000000000002, + "step": 1551 + }, + { + "loss": 0.0589, + "grad_norm": 1.9127329587936401, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.552, + "step": 1552 + }, + { + "loss": 0.062, + "grad_norm": 2.125767946243286, + "learning_rate": 4.5e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.553, + "step": 1553 + }, + { + "loss": 0.0627, + "grad_norm": 1.3601936101913452, + "learning_rate": 4.49e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.554, + "step": 1554 + }, + { + "loss": 0.0573, + "grad_norm": 1.9718780517578125, + "learning_rate": 4.48e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.5550000000000002, + "step": 1555 + }, + { + "loss": 0.0702, + "grad_norm": 1.8015897274017334, + "learning_rate": 4.47e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.556, + "step": 1556 + }, + { + "loss": 0.0456, + "grad_norm": 2.072335958480835, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.557, + "step": 1557 + }, + { + "loss": 0.0567, + "grad_norm": 1.921351432800293, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.558, + "step": 1558 + }, + { + "loss": 0.065, + "grad_norm": 1.5375345945358276, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5590000000000002, + "step": 1559 + }, + { + "loss": 0.0384, + "grad_norm": 1.3858362436294556, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.56, + "step": 1560 + }, + { + "loss": 0.0613, + "grad_norm": 1.8221303224563599, + "learning_rate": 4.42e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.561, + "step": 1561 + }, + { + "loss": 0.051, + "grad_norm": 1.5935691595077515, + "learning_rate": 4.41e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.562, + "step": 1562 + }, + { + "loss": 0.052, + "grad_norm": 1.4923861026763916, + "learning_rate": 4.4e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.563, + "step": 1563 + }, + { + "loss": 0.0114, + "grad_norm": 3.3136603832244873, + "learning_rate": 4.39e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.564, + "step": 1564 + }, + { + "loss": 0.0634, + "grad_norm": 1.8046377897262573, + "learning_rate": 4.38e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.565, + "step": 1565 + }, + { + "loss": 0.01, + "grad_norm": 2.8774094581604004, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.5659999999999998, + "step": 1566 + }, + { + "loss": 0.0506, + "grad_norm": 1.315585732460022, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.567, + "step": 1567 + }, + { + "loss": 0.051, + "grad_norm": 1.6535403728485107, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.568, + "step": 1568 + }, + { + "loss": 0.069, + "grad_norm": 1.9435205459594727, + "learning_rate": 4.34e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.569, + "step": 1569 + }, + { + "loss": 0.0599, + "grad_norm": 1.8793127536773682, + "learning_rate": 4.33e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.5699999999999998, + "step": 1570 + }, + { + "loss": 0.0098, + "grad_norm": 2.910207986831665, + "learning_rate": 4.32e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 1571 + }, + { + "loss": 0.0636, + "grad_norm": 2.1943273544311523, + "learning_rate": 4.31e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.572, + "step": 1572 + }, + { + "loss": 0.0567, + "grad_norm": 1.5598511695861816, + "learning_rate": 4.3e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.573, + "step": 1573 + }, + { + "loss": 0.0453, + "grad_norm": 1.9701513051986694, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 1574 + }, + { + "loss": 0.0102, + "grad_norm": 3.0775904655456543, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.575, + "step": 1575 + }, + { + "loss": 0.0422, + "grad_norm": 1.8043560981750488, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.576, + "step": 1576 + }, + { + "loss": 0.0473, + "grad_norm": 1.871073842048645, + "learning_rate": 4.26e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.577, + "step": 1577 + }, + { + "loss": 0.0514, + "grad_norm": 1.4562617540359497, + "learning_rate": 4.25e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5779999999999998, + "step": 1578 + }, + { + "loss": 0.0367, + "grad_norm": 1.4301601648330688, + "learning_rate": 4.24e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.579, + "step": 1579 + }, + { + "loss": 0.0504, + "grad_norm": 1.6110836267471313, + "learning_rate": 4.23e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.58, + "step": 1580 + }, + { + "loss": 0.074, + "grad_norm": 2.0486574172973633, + "learning_rate": 4.22e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.581, + "step": 1581 + }, + { + "loss": 0.1233, + "grad_norm": 3.3242132663726807, + "learning_rate": 4.21e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5819999999999999, + "step": 1582 + }, + { + "loss": 0.0647, + "grad_norm": 1.307567834854126, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.583, + "step": 1583 + }, + { + "loss": 0.0609, + "grad_norm": 1.7847832441329956, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.584, + "step": 1584 + }, + { + "loss": 0.0095, + "grad_norm": 2.857769727706909, + "learning_rate": 4.18e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 1585 + }, + { + "loss": 0.0358, + "grad_norm": 1.3912484645843506, + "learning_rate": 4.17e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5859999999999999, + "step": 1586 + }, + { + "loss": 0.0389, + "grad_norm": 1.5175739526748657, + "learning_rate": 4.16e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.587, + "step": 1587 + }, + { + "loss": 0.0126, + "grad_norm": 3.7526566982269287, + "learning_rate": 4.15e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.588, + "step": 1588 + }, + { + "loss": 0.0558, + "grad_norm": 1.6538053750991821, + "learning_rate": 4.14e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.589, + "step": 1589 + }, + { + "loss": 0.0538, + "grad_norm": 1.3453150987625122, + "learning_rate": 4.13e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5899999999999999, + "step": 1590 + }, + { + "loss": 0.0608, + "grad_norm": 2.0873332023620605, + "learning_rate": 4.12e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.591, + "step": 1591 + }, + { + "loss": 0.0611, + "grad_norm": 1.9410951137542725, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.592, + "step": 1592 + }, + { + "loss": 0.0769, + "grad_norm": 1.8411427736282349, + "learning_rate": 4.1e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.593, + "step": 1593 + }, + { + "loss": 0.0111, + "grad_norm": 3.2430572509765625, + "learning_rate": 4.09e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 1594 + }, + { + "loss": 0.0722, + "grad_norm": 2.1307482719421387, + "learning_rate": 4.08e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.595, + "step": 1595 + }, + { + "loss": 0.0377, + "grad_norm": 2.088995933532715, + "learning_rate": 4.07e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.596, + "step": 1596 + }, + { + "loss": 0.0617, + "grad_norm": 1.546595811843872, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.597, + "step": 1597 + }, + { + "loss": 0.0683, + "grad_norm": 1.7900023460388184, + "learning_rate": 4.05e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.5979999999999999, + "step": 1598 + }, + { + "loss": 0.057, + "grad_norm": 1.5026994943618774, + "learning_rate": 4.04e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.599, + "step": 1599 + }, + { + "loss": 0.0468, + "grad_norm": 1.8879090547561646, + "learning_rate": 4.03e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6, + "step": 1600 + }, + { + "loss": 0.0345, + "grad_norm": 1.3179066181182861, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.601, + "step": 1601 + }, + { + "loss": 0.0363, + "grad_norm": 1.297089695930481, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.6019999999999999, + "step": 1602 + }, + { + "loss": 0.0465, + "grad_norm": 1.4451963901519775, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.603, + "step": 1603 + }, + { + "loss": 0.0593, + "grad_norm": 1.6601592302322388, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.604, + "step": 1604 + }, + { + "loss": 0.0633, + "grad_norm": 1.759940266609192, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.605, + "step": 1605 + }, + { + "loss": 0.0394, + "grad_norm": 1.640942096710205, + "learning_rate": 3.97e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.6059999999999999, + "step": 1606 + }, + { + "loss": 0.0107, + "grad_norm": 3.121732711791992, + "learning_rate": 3.96e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.607, + "step": 1607 + }, + { + "loss": 0.0343, + "grad_norm": 1.376590371131897, + "learning_rate": 3.95e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.608, + "step": 1608 + }, + { + "loss": 0.0731, + "grad_norm": 1.5605193376541138, + "learning_rate": 3.94e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.609, + "step": 1609 + }, + { + "loss": 0.011, + "grad_norm": 3.3589043617248535, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6099999999999999, + "step": 1610 + }, + { + "loss": 0.0541, + "grad_norm": 1.0635466575622559, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.611, + "step": 1611 + }, + { + "loss": 0.0801, + "grad_norm": 2.1112594604492188, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.612, + "step": 1612 + }, + { + "loss": 0.0541, + "grad_norm": 1.915789008140564, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.613, + "step": 1613 + }, + { + "loss": 0.0097, + "grad_norm": 2.9668385982513428, + "learning_rate": 3.89e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 1614 + }, + { + "loss": 0.0785, + "grad_norm": 1.7575700283050537, + "learning_rate": 3.88e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 1.615, + "step": 1615 + }, + { + "loss": 0.0092, + "grad_norm": 2.8856735229492188, + "learning_rate": 3.87e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 1616 + }, + { + "loss": 0.0842, + "grad_norm": 2.108201265335083, + "learning_rate": 3.86e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.617, + "step": 1617 + }, + { + "loss": 0.0513, + "grad_norm": 1.646217942237854, + "learning_rate": 3.85e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6179999999999999, + "step": 1618 + }, + { + "loss": 0.0323, + "grad_norm": 1.7345075607299805, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.619, + "step": 1619 + }, + { + "loss": 0.0508, + "grad_norm": 2.1174609661102295, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.62, + "step": 1620 + }, + { + "loss": 0.0794, + "grad_norm": 1.751968502998352, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.621, + "step": 1621 + }, + { + "loss": 0.052, + "grad_norm": 2.0297329425811768, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6219999999999999, + "step": 1622 + }, + { + "loss": 0.0414, + "grad_norm": 1.4483790397644043, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.623, + "step": 1623 + }, + { + "loss": 0.0387, + "grad_norm": 1.6367487907409668, + "learning_rate": 3.79e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.624, + "step": 1624 + }, + { + "loss": 0.0579, + "grad_norm": 1.947627305984497, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.625, + "step": 1625 + }, + { + "loss": 0.0746, + "grad_norm": 1.7073363065719604, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.626, + "step": 1626 + }, + { + "loss": 0.07, + "grad_norm": 2.310190439224243, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.627, + "step": 1627 + }, + { + "loss": 0.0614, + "grad_norm": 1.841750979423523, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6280000000000001, + "step": 1628 + }, + { + "loss": 0.01, + "grad_norm": 3.1444506645202637, + "learning_rate": 3.74e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 1629 + }, + { + "loss": 0.0522, + "grad_norm": 1.662224292755127, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.63, + "step": 1630 + }, + { + "loss": 0.0132, + "grad_norm": 3.9977800846099854, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.631, + "step": 1631 + }, + { + "loss": 0.0544, + "grad_norm": 1.3922324180603027, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6320000000000001, + "step": 1632 + }, + { + "loss": 0.054, + "grad_norm": 2.120187759399414, + "learning_rate": 3.7e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.633, + "step": 1633 + }, + { + "loss": 0.0536, + "grad_norm": 1.914109468460083, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.634, + "step": 1634 + }, + { + "loss": 0.0598, + "grad_norm": 1.831244707107544, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.635, + "step": 1635 + }, + { + "loss": 0.0573, + "grad_norm": 1.5706382989883423, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6360000000000001, + "step": 1636 + }, + { + "loss": 0.1282, + "grad_norm": 2.7458832263946533, + "learning_rate": 3.66e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 1.637, + "step": 1637 + }, + { + "loss": 0.0356, + "grad_norm": 1.4152108430862427, + "learning_rate": 3.65e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.638, + "step": 1638 + }, + { + "loss": 0.0121, + "grad_norm": 3.4849400520324707, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.639, + "step": 1639 + }, + { + "loss": 0.0702, + "grad_norm": 1.8692002296447754, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.6400000000000001, + "step": 1640 + }, + { + "loss": 0.0601, + "grad_norm": 1.828239917755127, + "learning_rate": 3.62e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.641, + "step": 1641 + }, + { + "loss": 0.0399, + "grad_norm": 1.8158057928085327, + "learning_rate": 3.61e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.642, + "step": 1642 + }, + { + "loss": 0.0451, + "grad_norm": 1.7628754377365112, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.643, + "step": 1643 + }, + { + "loss": 0.0679, + "grad_norm": 1.837315320968628, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6440000000000001, + "step": 1644 + }, + { + "loss": 0.0112, + "grad_norm": 3.3357973098754883, + "learning_rate": 3.58e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.645, + "step": 1645 + }, + { + "loss": 0.0501, + "grad_norm": 1.5952306985855103, + "learning_rate": 3.57e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 1646 + }, + { + "loss": 0.0742, + "grad_norm": 2.5686585903167725, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.647, + "step": 1647 + }, + { + "loss": 0.0109, + "grad_norm": 3.133192777633667, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 1648 + }, + { + "loss": 0.068, + "grad_norm": 1.585485577583313, + "learning_rate": 3.54e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.649, + "step": 1649 + }, + { + "loss": 0.0687, + "grad_norm": 2.0019702911376953, + "learning_rate": 3.53e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.65, + "step": 1650 + }, + { + "loss": 0.0575, + "grad_norm": 1.6265766620635986, + "learning_rate": 3.52e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.651, + "step": 1651 + }, + { + "loss": 0.0707, + "grad_norm": 1.6374586820602417, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6520000000000001, + "step": 1652 + }, + { + "loss": 0.0697, + "grad_norm": 2.4204654693603516, + "learning_rate": 3.5e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.653, + "step": 1653 + }, + { + "loss": 0.0588, + "grad_norm": 2.1378262042999268, + "learning_rate": 3.49e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.654, + "step": 1654 + }, + { + "loss": 0.0562, + "grad_norm": 2.214315414428711, + "learning_rate": 3.48e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.655, + "step": 1655 + }, + { + "loss": 0.0124, + "grad_norm": 3.5861706733703613, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6560000000000001, + "step": 1656 + }, + { + "loss": 0.0487, + "grad_norm": 1.6121397018432617, + "learning_rate": 3.46e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.657, + "step": 1657 + }, + { + "loss": 0.0556, + "grad_norm": 2.084545850753784, + "learning_rate": 3.45e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.658, + "step": 1658 + }, + { + "loss": 0.0471, + "grad_norm": 1.8340671062469482, + "learning_rate": 3.44e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.659, + "step": 1659 + }, + { + "loss": 0.0507, + "grad_norm": 1.5023232698440552, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6600000000000001, + "step": 1660 + }, + { + "loss": 0.055, + "grad_norm": 1.5226930379867554, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.661, + "step": 1661 + }, + { + "loss": 0.0689, + "grad_norm": 1.8650307655334473, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.662, + "step": 1662 + }, + { + "loss": 0.0687, + "grad_norm": 1.4976561069488525, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.663, + "step": 1663 + }, + { + "loss": 0.012, + "grad_norm": 3.7820823192596436, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6640000000000001, + "step": 1664 + }, + { + "loss": 0.0644, + "grad_norm": 1.6768338680267334, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.665, + "step": 1665 + }, + { + "loss": 0.0508, + "grad_norm": 1.6384755373001099, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.666, + "step": 1666 + }, + { + "loss": 0.0557, + "grad_norm": 1.67027747631073, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.667, + "step": 1667 + }, + { + "loss": 0.0443, + "grad_norm": 1.8305268287658691, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6680000000000001, + "step": 1668 + }, + { + "loss": 0.0398, + "grad_norm": 1.6602362394332886, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.669, + "step": 1669 + }, + { + "loss": 0.0479, + "grad_norm": 1.694201946258545, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.67, + "step": 1670 + }, + { + "loss": 0.0693, + "grad_norm": 1.8437001705169678, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.671, + "step": 1671 + }, + { + "loss": 0.0512, + "grad_norm": 1.319399118423462, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6720000000000002, + "step": 1672 + }, + { + "loss": 0.0141, + "grad_norm": 4.160251617431641, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.673, + "step": 1673 + }, + { + "loss": 0.0473, + "grad_norm": 1.736594557762146, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 1674 + }, + { + "loss": 0.0117, + "grad_norm": 3.6965503692626953, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.675, + "step": 1675 + }, + { + "loss": 0.0129, + "grad_norm": 3.8872127532958984, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6760000000000002, + "step": 1676 + }, + { + "loss": 0.0338, + "grad_norm": 1.6114709377288818, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.677, + "step": 1677 + }, + { + "loss": 0.0401, + "grad_norm": 1.4854273796081543, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.678, + "step": 1678 + }, + { + "loss": 0.0091, + "grad_norm": 2.8193323612213135, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 1679 + }, + { + "loss": 0.0104, + "grad_norm": 3.194824457168579, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 1680 + }, + { + "loss": 0.0082, + "grad_norm": 2.627159357070923, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 1681 + }, + { + "loss": 0.0715, + "grad_norm": 2.015965223312378, + "learning_rate": 3.21e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.682, + "step": 1682 + }, + { + "loss": 0.0752, + "grad_norm": 1.8641659021377563, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.683, + "step": 1683 + }, + { + "loss": 0.0446, + "grad_norm": 1.8558416366577148, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 1684 + }, + { + "loss": 0.0754, + "grad_norm": 2.614729881286621, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.685, + "step": 1685 + }, + { + "loss": 0.0781, + "grad_norm": 2.3581247329711914, + "learning_rate": 3.17e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.686, + "step": 1686 + }, + { + "loss": 0.044, + "grad_norm": 2.02897310256958, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.687, + "step": 1687 + }, + { + "loss": 0.0576, + "grad_norm": 1.8537285327911377, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.688, + "step": 1688 + }, + { + "loss": 0.0673, + "grad_norm": 2.3672072887420654, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 1689 + }, + { + "loss": 0.0406, + "grad_norm": 2.049578905105591, + "learning_rate": 3.13e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.69, + "step": 1690 + }, + { + "loss": 0.0514, + "grad_norm": 1.8079686164855957, + "learning_rate": 3.12e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.6909999999999998, + "step": 1691 + }, + { + "loss": 0.0467, + "grad_norm": 1.5584005117416382, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.692, + "step": 1692 + }, + { + "loss": 0.0073, + "grad_norm": 2.0741705894470215, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 1693 + }, + { + "loss": 0.0501, + "grad_norm": 1.9797930717468262, + "learning_rate": 3.09e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.694, + "step": 1694 + }, + { + "loss": 0.0514, + "grad_norm": 1.531952977180481, + "learning_rate": 3.08e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 1695 + }, + { + "loss": 0.0511, + "grad_norm": 2.27657413482666, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.696, + "step": 1696 + }, + { + "loss": 0.0501, + "grad_norm": 1.5408827066421509, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.697, + "step": 1697 + }, + { + "loss": 0.0356, + "grad_norm": 1.3495177030563354, + "learning_rate": 3.05e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.698, + "step": 1698 + }, + { + "loss": 0.0524, + "grad_norm": 2.264927864074707, + "learning_rate": 3.04e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6989999999999998, + "step": 1699 + }, + { + "loss": 0.0085, + "grad_norm": 2.3997385501861572, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 1700 + }, + { + "loss": 0.0537, + "grad_norm": 2.03108811378479, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.701, + "step": 1701 + }, + { + "loss": 0.0625, + "grad_norm": 1.5735002756118774, + "learning_rate": 3.01e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.702, + "step": 1702 + }, + { + "loss": 0.0498, + "grad_norm": 1.4873791933059692, + "learning_rate": 3e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7029999999999998, + "step": 1703 + }, + { + "loss": 0.0401, + "grad_norm": 1.646492600440979, + "learning_rate": 2.99e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.704, + "step": 1704 + }, + { + "loss": 0.0092, + "grad_norm": 2.825364828109741, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 1705 + }, + { + "loss": 0.0094, + "grad_norm": 2.7768924236297607, + "learning_rate": 2.97e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 1706 + }, + { + "loss": 0.0095, + "grad_norm": 2.475404977798462, + "learning_rate": 2.96e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 1707 + }, + { + "loss": 0.0416, + "grad_norm": 2.0638792514801025, + "learning_rate": 2.95e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.708, + "step": 1708 + }, + { + "loss": 0.0544, + "grad_norm": 1.6516914367675781, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.709, + "step": 1709 + }, + { + "loss": 0.0534, + "grad_norm": 1.9903455972671509, + "learning_rate": 2.93e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.71, + "step": 1710 + }, + { + "loss": 0.061, + "grad_norm": 1.6336207389831543, + "learning_rate": 2.92e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7109999999999999, + "step": 1711 + }, + { + "loss": 0.0484, + "grad_norm": 1.5735485553741455, + "learning_rate": 2.91e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.712, + "step": 1712 + }, + { + "loss": 0.0523, + "grad_norm": 1.7996323108673096, + "learning_rate": 2.9e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.713, + "step": 1713 + }, + { + "loss": 0.0568, + "grad_norm": 1.6357063055038452, + "learning_rate": 2.89e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.714, + "step": 1714 + }, + { + "loss": 0.0097, + "grad_norm": 2.460446357727051, + "learning_rate": 2.88e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 1715 + }, + { + "loss": 0.0488, + "grad_norm": 1.7914141416549683, + "learning_rate": 2.87e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.716, + "step": 1716 + }, + { + "loss": 0.0426, + "grad_norm": 2.875281572341919, + "learning_rate": 2.86e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.717, + "step": 1717 + }, + { + "loss": 0.0535, + "grad_norm": 1.9656765460968018, + "learning_rate": 2.85e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.718, + "step": 1718 + }, + { + "loss": 0.0582, + "grad_norm": 1.7268273830413818, + "learning_rate": 2.84e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.7189999999999999, + "step": 1719 + }, + { + "loss": 0.0625, + "grad_norm": 1.7748886346817017, + "learning_rate": 2.83e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 1720 + }, + { + "loss": 0.0624, + "grad_norm": 1.655421257019043, + "learning_rate": 2.82e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.721, + "step": 1721 + }, + { + "loss": 0.0418, + "grad_norm": 1.857727289199829, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.722, + "step": 1722 + }, + { + "loss": 0.0628, + "grad_norm": 1.6072860956192017, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7229999999999999, + "step": 1723 + }, + { + "loss": 0.0079, + "grad_norm": 2.1282646656036377, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 1724 + }, + { + "loss": 0.0097, + "grad_norm": 2.870497465133667, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 1725 + }, + { + "loss": 0.0573, + "grad_norm": 2.2278597354888916, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.726, + "step": 1726 + }, + { + "loss": 0.0479, + "grad_norm": 1.6248372793197632, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.7269999999999999, + "step": 1727 + }, + { + "loss": 0.0098, + "grad_norm": 3.043905258178711, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 1728 + }, + { + "loss": 0.0515, + "grad_norm": 1.613357424736023, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.729, + "step": 1729 + }, + { + "loss": 0.0391, + "grad_norm": 1.959555983543396, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.73, + "step": 1730 + }, + { + "loss": 0.0085, + "grad_norm": 2.4167284965515137, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 1731 + }, + { + "loss": 0.0638, + "grad_norm": 1.9236712455749512, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.732, + "step": 1732 + }, + { + "loss": 0.0359, + "grad_norm": 1.9113582372665405, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.733, + "step": 1733 + }, + { + "loss": 0.0083, + "grad_norm": 2.5152554512023926, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 1734 + }, + { + "loss": 0.0471, + "grad_norm": 1.6409229040145874, + "learning_rate": 2.68e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7349999999999999, + "step": 1735 + }, + { + "loss": 0.0695, + "grad_norm": 2.0613510608673096, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.736, + "step": 1736 + }, + { + "loss": 0.057, + "grad_norm": 2.3862340450286865, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.737, + "step": 1737 + }, + { + "loss": 0.0733, + "grad_norm": 2.13395357131958, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.738, + "step": 1738 + }, + { + "loss": 0.0398, + "grad_norm": 1.8025071620941162, + "learning_rate": 2.64e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7389999999999999, + "step": 1739 + }, + { + "loss": 0.0076, + "grad_norm": 2.0499792098999023, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 1740 + }, + { + "loss": 0.061, + "grad_norm": 1.6320290565490723, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.741, + "step": 1741 + }, + { + "loss": 0.0581, + "grad_norm": 1.9588946104049683, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.742, + "step": 1742 + }, + { + "loss": 0.062, + "grad_norm": 1.8158897161483765, + "learning_rate": 2.6e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.7429999999999999, + "step": 1743 + }, + { + "loss": 0.0464, + "grad_norm": 2.4023096561431885, + "learning_rate": 2.59e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.744, + "step": 1744 + }, + { + "loss": 0.0604, + "grad_norm": 2.0760178565979004, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.745, + "step": 1745 + }, + { + "loss": 0.0721, + "grad_norm": 1.8943363428115845, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.746, + "step": 1746 + }, + { + "loss": 0.0394, + "grad_norm": 1.6580768823623657, + "learning_rate": 2.56e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.7469999999999999, + "step": 1747 + }, + { + "loss": 0.0575, + "grad_norm": 1.7064754962921143, + "learning_rate": 2.55e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.748, + "step": 1748 + }, + { + "loss": 0.1451, + "grad_norm": 5.286960124969482, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 1.749, + "step": 1749 + }, + { + "loss": 0.0367, + "grad_norm": 1.5256696939468384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.75, + "step": 1750 + }, + { + "loss": 0.0352, + "grad_norm": 1.4353508949279785, + "learning_rate": 2.52e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.751, + "step": 1751 + }, + { + "loss": 0.0544, + "grad_norm": 1.449508547782898, + "learning_rate": 2.51e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.752, + "step": 1752 + }, + { + "loss": 0.0088, + "grad_norm": 2.6737008094787598, + "learning_rate": 2.5e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 1753 + }, + { + "loss": 0.054, + "grad_norm": 1.1922411918640137, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.754, + "step": 1754 + }, + { + "loss": 0.0108, + "grad_norm": 3.180657386779785, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.755, + "step": 1755 + }, + { + "loss": 0.0636, + "grad_norm": 1.900195598602295, + "learning_rate": 2.47e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.756, + "step": 1756 + }, + { + "loss": 0.0602, + "grad_norm": 2.505511522293091, + "learning_rate": 2.46e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7570000000000001, + "step": 1757 + }, + { + "loss": 0.0516, + "grad_norm": 1.517896056175232, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.758, + "step": 1758 + }, + { + "loss": 0.0653, + "grad_norm": 1.5359817743301392, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.759, + "step": 1759 + }, + { + "loss": 0.062, + "grad_norm": 2.56500244140625, + "learning_rate": 2.43e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.76, + "step": 1760 + }, + { + "loss": 0.0616, + "grad_norm": 1.2327522039413452, + "learning_rate": 2.42e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7610000000000001, + "step": 1761 + }, + { + "loss": 0.0641, + "grad_norm": 2.0313050746917725, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.762, + "step": 1762 + }, + { + "loss": 0.0509, + "grad_norm": 1.9020798206329346, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.763, + "step": 1763 + }, + { + "loss": 0.0573, + "grad_norm": 1.3576561212539673, + "learning_rate": 2.39e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.764, + "step": 1764 + }, + { + "loss": 0.0359, + "grad_norm": 1.6285313367843628, + "learning_rate": 2.38e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7650000000000001, + "step": 1765 + }, + { + "loss": 0.0779, + "grad_norm": 2.119893789291382, + "learning_rate": 2.37e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.766, + "step": 1766 + }, + { + "loss": 0.0459, + "grad_norm": 1.8730247020721436, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.767, + "step": 1767 + }, + { + "loss": 0.0359, + "grad_norm": 1.5724204778671265, + "learning_rate": 2.35e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.768, + "step": 1768 + }, + { + "loss": 0.0375, + "grad_norm": 1.7161457538604736, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.7690000000000001, + "step": 1769 + }, + { + "loss": 0.0522, + "grad_norm": 1.3714388608932495, + "learning_rate": 2.33e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.77, + "step": 1770 + }, + { + "loss": 0.0368, + "grad_norm": 1.6326324939727783, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.771, + "step": 1771 + }, + { + "loss": 0.0526, + "grad_norm": 1.4099246263504028, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.772, + "step": 1772 + }, + { + "loss": 0.0343, + "grad_norm": 1.331606149673462, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7730000000000001, + "step": 1773 + }, + { + "loss": 0.0521, + "grad_norm": 2.03346586227417, + "learning_rate": 2.29e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.774, + "step": 1774 + }, + { + "loss": 0.0738, + "grad_norm": 2.287825584411621, + "learning_rate": 2.28e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.775, + "step": 1775 + }, + { + "loss": 0.0711, + "grad_norm": 1.560683012008667, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.776, + "step": 1776 + }, + { + "loss": 0.0483, + "grad_norm": 1.860205888748169, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.7770000000000001, + "step": 1777 + }, + { + "loss": 0.0418, + "grad_norm": 1.6539009809494019, + "learning_rate": 2.25e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.778, + "step": 1778 + }, + { + "loss": 0.0669, + "grad_norm": 1.5473995208740234, + "learning_rate": 2.24e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.779, + "step": 1779 + }, + { + "loss": 0.0488, + "grad_norm": 1.3596010208129883, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.78, + "step": 1780 + }, + { + "loss": 0.0407, + "grad_norm": 1.8577399253845215, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7810000000000001, + "step": 1781 + }, + { + "loss": 0.0639, + "grad_norm": 2.693002462387085, + "learning_rate": 2.21e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.782, + "step": 1782 + }, + { + "loss": 0.0146, + "grad_norm": 4.3713555335998535, + "learning_rate": 2.2e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.783, + "step": 1783 + }, + { + "loss": 0.0702, + "grad_norm": 1.8829140663146973, + "learning_rate": 2.19e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.784, + "step": 1784 + }, + { + "loss": 0.0145, + "grad_norm": 4.203199863433838, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.7850000000000001, + "step": 1785 + }, + { + "loss": 0.0418, + "grad_norm": 1.0440939664840698, + "learning_rate": 2.17e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.786, + "step": 1786 + }, + { + "loss": 0.0658, + "grad_norm": 1.5156137943267822, + "learning_rate": 2.16e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.787, + "step": 1787 + }, + { + "loss": 0.0506, + "grad_norm": 1.6226084232330322, + "learning_rate": 2.15e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.788, + "step": 1788 + }, + { + "loss": 0.087, + "grad_norm": 1.8399536609649658, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7890000000000001, + "step": 1789 + }, + { + "loss": 0.0607, + "grad_norm": 2.031243324279785, + "learning_rate": 2.13e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.79, + "step": 1790 + }, + { + "loss": 0.0609, + "grad_norm": 1.581013798713684, + "learning_rate": 2.12e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.791, + "step": 1791 + }, + { + "loss": 0.0149, + "grad_norm": 4.233753681182861, + "learning_rate": 2.11e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.792, + "step": 1792 + }, + { + "loss": 0.0698, + "grad_norm": 1.890411615371704, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7930000000000001, + "step": 1793 + }, + { + "loss": 0.0529, + "grad_norm": 1.3680751323699951, + "learning_rate": 2.09e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.794, + "step": 1794 + }, + { + "loss": 0.0528, + "grad_norm": 1.9651073217391968, + "learning_rate": 2.08e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.795, + "step": 1795 + }, + { + "loss": 0.0133, + "grad_norm": 3.887544631958008, + "learning_rate": 2.07e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.796, + "step": 1796 + }, + { + "loss": 0.05, + "grad_norm": 1.304778814315796, + "learning_rate": 2.06e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7970000000000002, + "step": 1797 + }, + { + "loss": 0.071, + "grad_norm": 1.9661753177642822, + "learning_rate": 2.05e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.798, + "step": 1798 + }, + { + "loss": 0.0557, + "grad_norm": 1.5037291049957275, + "learning_rate": 2.04e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.799, + "step": 1799 + }, + { + "loss": 0.0372, + "grad_norm": 1.4804255962371826, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.8, + "step": 1800 + }, + { + "loss": 0.0645, + "grad_norm": 1.577778697013855, + "learning_rate": 2.02e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.8010000000000002, + "step": 1801 + }, + { + "loss": 0.0399, + "grad_norm": 1.5963507890701294, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.802, + "step": 1802 + }, + { + "loss": 0.0612, + "grad_norm": 1.7424527406692505, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.803, + "step": 1803 + }, + { + "loss": 0.0377, + "grad_norm": 1.4296543598175049, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.804, + "step": 1804 + }, + { + "loss": 0.0378, + "grad_norm": 1.4681419134140015, + "learning_rate": 1.98e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8050000000000002, + "step": 1805 + }, + { + "loss": 0.0385, + "grad_norm": 1.876345157623291, + "learning_rate": 1.97e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.806, + "step": 1806 + }, + { + "loss": 0.0454, + "grad_norm": 1.3991385698318481, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.807, + "step": 1807 + }, + { + "loss": 0.0706, + "grad_norm": 1.6286864280700684, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.808, + "step": 1808 + }, + { + "loss": 0.0409, + "grad_norm": 1.7534390687942505, + "learning_rate": 1.94e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8090000000000002, + "step": 1809 + }, + { + "loss": 0.1302, + "grad_norm": 4.238317966461182, + "learning_rate": 1.93e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.81, + "step": 1810 + }, + { + "loss": 0.0525, + "grad_norm": 2.2462339401245117, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.811, + "step": 1811 + }, + { + "loss": 0.0609, + "grad_norm": 1.5136423110961914, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.812, + "step": 1812 + }, + { + "loss": 0.0595, + "grad_norm": 1.4645228385925293, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.813, + "step": 1813 + }, + { + "loss": 0.0485, + "grad_norm": 1.4663139581680298, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.814, + "step": 1814 + }, + { + "loss": 0.0117, + "grad_norm": 3.569246768951416, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.815, + "step": 1815 + }, + { + "loss": 0.0765, + "grad_norm": 1.4224154949188232, + "learning_rate": 1.87e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.8159999999999998, + "step": 1816 + }, + { + "loss": 0.0517, + "grad_norm": 1.4875210523605347, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.817, + "step": 1817 + }, + { + "loss": 0.0123, + "grad_norm": 3.643899440765381, + "learning_rate": 1.85e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.818, + "step": 1818 + }, + { + "loss": 0.0358, + "grad_norm": 1.7132638692855835, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.819, + "step": 1819 + }, + { + "loss": 0.0396, + "grad_norm": 1.291243553161621, + "learning_rate": 1.83e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8199999999999998, + "step": 1820 + }, + { + "loss": 0.0611, + "grad_norm": 1.6885188817977905, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.821, + "step": 1821 + }, + { + "loss": 0.0507, + "grad_norm": 1.215349555015564, + "learning_rate": 1.81e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.822, + "step": 1822 + }, + { + "loss": 0.0508, + "grad_norm": 1.5074315071105957, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.823, + "step": 1823 + }, + { + "loss": 0.0593, + "grad_norm": 1.500303030014038, + "learning_rate": 1.79e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8239999999999998, + "step": 1824 + }, + { + "loss": 0.0696, + "grad_norm": 2.0285537242889404, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.825, + "step": 1825 + }, + { + "loss": 0.051, + "grad_norm": 1.3399317264556885, + "learning_rate": 1.77e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.826, + "step": 1826 + }, + { + "loss": 0.0479, + "grad_norm": 1.868754506111145, + "learning_rate": 1.76e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.827, + "step": 1827 + }, + { + "loss": 0.0123, + "grad_norm": 3.5505826473236084, + "learning_rate": 1.75e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.8279999999999998, + "step": 1828 + }, + { + "loss": 0.0384, + "grad_norm": 1.1001877784729004, + "learning_rate": 1.74e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.829, + "step": 1829 + }, + { + "loss": 0.0503, + "grad_norm": 1.5732758045196533, + "learning_rate": 1.73e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.83, + "step": 1830 + }, + { + "loss": 0.0569, + "grad_norm": 1.4768040180206299, + "learning_rate": 1.72e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.831, + "step": 1831 + }, + { + "loss": 0.0376, + "grad_norm": 2.298859119415283, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8319999999999999, + "step": 1832 + }, + { + "loss": 0.0626, + "grad_norm": 1.4698207378387451, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 1833 + }, + { + "loss": 0.0527, + "grad_norm": 1.462391972541809, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.834, + "step": 1834 + }, + { + "loss": 0.0751, + "grad_norm": 2.242673873901367, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.835, + "step": 1835 + }, + { + "loss": 0.0633, + "grad_norm": 1.4788683652877808, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.8359999999999999, + "step": 1836 + }, + { + "loss": 0.0523, + "grad_norm": 1.5662829875946045, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.837, + "step": 1837 + }, + { + "loss": 0.0496, + "grad_norm": 1.2137081623077393, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.838, + "step": 1838 + }, + { + "loss": 0.0144, + "grad_norm": 3.972593307495117, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.839, + "step": 1839 + }, + { + "loss": 0.0612, + "grad_norm": 2.0851247310638428, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.8399999999999999, + "step": 1840 + }, + { + "loss": 0.0351, + "grad_norm": 1.7115992307662964, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.841, + "step": 1841 + }, + { + "loss": 0.0543, + "grad_norm": 1.7121071815490723, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.842, + "step": 1842 + }, + { + "loss": 0.0398, + "grad_norm": 2.520775318145752, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.843, + "step": 1843 + }, + { + "loss": 0.0588, + "grad_norm": 1.4704424142837524, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8439999999999999, + "step": 1844 + }, + { + "loss": 0.0393, + "grad_norm": 1.1732555627822876, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.845, + "step": 1845 + }, + { + "loss": 0.0126, + "grad_norm": 3.8587839603424072, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.846, + "step": 1846 + }, + { + "loss": 0.0154, + "grad_norm": 4.2589006423950195, + "learning_rate": 1.56e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.847, + "step": 1847 + }, + { + "loss": 0.0525, + "grad_norm": 1.5793870687484741, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.8479999999999999, + "step": 1848 + }, + { + "loss": 0.0711, + "grad_norm": 1.637081265449524, + "learning_rate": 1.54e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.849, + "step": 1849 + }, + { + "loss": 0.0367, + "grad_norm": 1.405205488204956, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.85, + "step": 1850 + }, + { + "loss": 0.0122, + "grad_norm": 3.7381093502044678, + "learning_rate": 1.52e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.851, + "step": 1851 + }, + { + "loss": 0.0595, + "grad_norm": 1.4563549757003784, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8519999999999999, + "step": 1852 + }, + { + "loss": 0.012, + "grad_norm": 3.3752598762512207, + "learning_rate": 1.5e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.853, + "step": 1853 + }, + { + "loss": 0.0575, + "grad_norm": 1.6581268310546875, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.854, + "step": 1854 + }, + { + "loss": 0.037, + "grad_norm": 1.6496632099151611, + "learning_rate": 1.48e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.855, + "step": 1855 + }, + { + "loss": 0.0435, + "grad_norm": 2.816823959350586, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.8559999999999999, + "step": 1856 + }, + { + "loss": 0.0691, + "grad_norm": 1.9923897981643677, + "learning_rate": 1.46e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.857, + "step": 1857 + }, + { + "loss": 0.0601, + "grad_norm": 1.9515984058380127, + "learning_rate": 1.45e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.858, + "step": 1858 + }, + { + "loss": 0.0097, + "grad_norm": 3.0719552040100098, + "learning_rate": 1.44e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 1859 + }, + { + "loss": 0.0641, + "grad_norm": 1.8086748123168945, + "learning_rate": 1.43e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8599999999999999, + "step": 1860 + }, + { + "loss": 0.067, + "grad_norm": 1.6446064710617065, + "learning_rate": 1.42e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.861, + "step": 1861 + }, + { + "loss": 0.0101, + "grad_norm": 3.0983476638793945, + "learning_rate": 1.41e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 1862 + }, + { + "loss": 0.0362, + "grad_norm": 1.6780548095703125, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.863, + "step": 1863 + }, + { + "loss": 0.054, + "grad_norm": 1.5340514183044434, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8639999999999999, + "step": 1864 + }, + { + "loss": 0.0562, + "grad_norm": 1.6704845428466797, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.865, + "step": 1865 + }, + { + "loss": 0.0647, + "grad_norm": 2.0944159030914307, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.866, + "step": 1866 + }, + { + "loss": 0.0497, + "grad_norm": 1.6780622005462646, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.867, + "step": 1867 + }, + { + "loss": 0.0531, + "grad_norm": 1.5871188640594482, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8679999999999999, + "step": 1868 + }, + { + "loss": 0.061, + "grad_norm": 1.572225570678711, + "learning_rate": 1.34e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.869, + "step": 1869 + }, + { + "loss": 0.0636, + "grad_norm": 1.7540369033813477, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.87, + "step": 1870 + }, + { + "loss": 0.0516, + "grad_norm": 1.9117010831832886, + "learning_rate": 1.32e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.871, + "step": 1871 + }, + { + "loss": 0.0516, + "grad_norm": 1.8945181369781494, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8719999999999999, + "step": 1872 + }, + { + "loss": 0.1903, + "grad_norm": 7.168573379516602, + "learning_rate": 1.3e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 1.873, + "step": 1873 + }, + { + "loss": 0.0584, + "grad_norm": 1.7484742403030396, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.874, + "step": 1874 + }, + { + "loss": 0.0592, + "grad_norm": 1.998748540878296, + "learning_rate": 1.28e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.875, + "step": 1875 + }, + { + "loss": 0.0132, + "grad_norm": 3.7218382358551025, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.876, + "step": 1876 + }, + { + "loss": 0.0397, + "grad_norm": 1.7368042469024658, + "learning_rate": 1.26e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.877, + "step": 1877 + }, + { + "loss": 0.0747, + "grad_norm": 1.7804408073425293, + "learning_rate": 1.25e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8780000000000001, + "step": 1878 + }, + { + "loss": 0.0564, + "grad_norm": 1.812559962272644, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.879, + "step": 1879 + }, + { + "loss": 0.0359, + "grad_norm": 1.5748106241226196, + "learning_rate": 1.23e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.88, + "step": 1880 + }, + { + "loss": 0.1015, + "grad_norm": 2.9346442222595215, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.881, + "step": 1881 + }, + { + "loss": 0.0714, + "grad_norm": 2.8724288940429688, + "learning_rate": 1.21e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.8820000000000001, + "step": 1882 + }, + { + "loss": 0.0544, + "grad_norm": 1.6409680843353271, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.883, + "step": 1883 + }, + { + "loss": 0.0569, + "grad_norm": 1.441733479499817, + "learning_rate": 1.19e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.884, + "step": 1884 + }, + { + "loss": 0.0709, + "grad_norm": 2.3944602012634277, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.885, + "step": 1885 + }, + { + "loss": 0.0593, + "grad_norm": 2.0737223625183105, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8860000000000001, + "step": 1886 + }, + { + "loss": 0.011, + "grad_norm": 3.4782493114471436, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.887, + "step": 1887 + }, + { + "loss": 0.0115, + "grad_norm": 3.5657458305358887, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.888, + "step": 1888 + }, + { + "loss": 0.0598, + "grad_norm": 1.5167820453643799, + "learning_rate": 1.14e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.889, + "step": 1889 + }, + { + "loss": 0.0507, + "grad_norm": 1.6942130327224731, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.8900000000000001, + "step": 1890 + }, + { + "loss": 0.05, + "grad_norm": 1.4450113773345947, + "learning_rate": 1.12e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.891, + "step": 1891 + }, + { + "loss": 0.0672, + "grad_norm": 1.7840543985366821, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.892, + "step": 1892 + }, + { + "loss": 0.0114, + "grad_norm": 3.6806554794311523, + "learning_rate": 1.1e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.893, + "step": 1893 + }, + { + "loss": 0.0433, + "grad_norm": 2.5975944995880127, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.8940000000000001, + "step": 1894 + }, + { + "loss": 0.048, + "grad_norm": 1.2934935092926025, + "learning_rate": 1.08e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.895, + "step": 1895 + }, + { + "loss": 0.0129, + "grad_norm": 3.9428789615631104, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.896, + "step": 1896 + }, + { + "loss": 0.0106, + "grad_norm": 3.178393840789795, + "learning_rate": 1.06e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.897, + "step": 1897 + }, + { + "loss": 0.0601, + "grad_norm": 1.3654727935791016, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8980000000000001, + "step": 1898 + }, + { + "loss": 0.0372, + "grad_norm": 1.596958041191101, + "learning_rate": 1.04e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.899, + "step": 1899 + }, + { + "loss": 0.0407, + "grad_norm": 1.3870348930358887, + "learning_rate": 1.03e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9, + "step": 1900 + }, + { + "loss": 0.0398, + "grad_norm": 1.8837169408798218, + "learning_rate": 1.02e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.901, + "step": 1901 + }, + { + "loss": 0.0685, + "grad_norm": 2.1320674419403076, + "learning_rate": 1.01e-06, + "num_tokens": 1308570.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9020000000000001, + "step": 1902 + }, + { + "loss": 0.0824, + "grad_norm": 2.3401284217834473, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.903, + "step": 1903 + }, + { + "loss": 0.0107, + "grad_norm": 3.2646677494049072, + "learning_rate": 9.9e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 1904 + }, + { + "loss": 0.053, + "grad_norm": 1.7195311784744263, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.905, + "step": 1905 + }, + { + "loss": 0.0388, + "grad_norm": 1.4336844682693481, + "learning_rate": 9.7e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.9060000000000001, + "step": 1906 + }, + { + "loss": 0.0496, + "grad_norm": 1.5110867023468018, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.907, + "step": 1907 + }, + { + "loss": 0.0106, + "grad_norm": 3.0311079025268555, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.908, + "step": 1908 + }, + { + "loss": 0.0536, + "grad_norm": 1.9689549207687378, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.909, + "step": 1909 + }, + { + "loss": 0.0761, + "grad_norm": 2.2891626358032227, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.9100000000000001, + "step": 1910 + }, + { + "loss": 0.0099, + "grad_norm": 2.886558771133423, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 1911 + }, + { + "loss": 0.0509, + "grad_norm": 2.247649669647217, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.912, + "step": 1912 + }, + { + "loss": 0.0396, + "grad_norm": 1.8190995454788208, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.913, + "step": 1913 + }, + { + "loss": 0.0681, + "grad_norm": 1.9473356008529663, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.9140000000000001, + "step": 1914 + }, + { + "loss": 0.0583, + "grad_norm": 1.7244383096694946, + "learning_rate": 8.8e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.915, + "step": 1915 + }, + { + "loss": 0.0497, + "grad_norm": 1.471281886100769, + "learning_rate": 8.7e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.916, + "step": 1916 + }, + { + "loss": 0.0105, + "grad_norm": 3.1323492527008057, + "learning_rate": 8.6e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.917, + "step": 1917 + }, + { + "loss": 0.0587, + "grad_norm": 1.6258044242858887, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9180000000000001, + "step": 1918 + }, + { + "loss": 0.0396, + "grad_norm": 3.7344205379486084, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.919, + "step": 1919 + }, + { + "loss": 0.0669, + "grad_norm": 1.567430853843689, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.92, + "step": 1920 + }, + { + "loss": 0.0403, + "grad_norm": 2.391710042953491, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.921, + "step": 1921 + }, + { + "loss": 0.0731, + "grad_norm": 1.7387372255325317, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 1922 + }, + { + "loss": 0.0346, + "grad_norm": 1.5562756061553955, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.923, + "step": 1923 + }, + { + "loss": 0.0094, + "grad_norm": 2.8271360397338867, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 1924 + }, + { + "loss": 0.0458, + "grad_norm": 2.486022472381592, + "learning_rate": 7.8e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.925, + "step": 1925 + }, + { + "loss": 0.0432, + "grad_norm": 1.4174907207489014, + "learning_rate": 7.7e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9260000000000002, + "step": 1926 + }, + { + "loss": 0.0685, + "grad_norm": 1.9511269330978394, + "learning_rate": 7.6e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.927, + "step": 1927 + }, + { + "loss": 0.0541, + "grad_norm": 1.7855056524276733, + "learning_rate": 7.5e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.928, + "step": 1928 + }, + { + "loss": 0.0381, + "grad_norm": 1.345107913017273, + "learning_rate": 7.4e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.929, + "step": 1929 + }, + { + "loss": 0.0405, + "grad_norm": 2.1388049125671387, + "learning_rate": 7.3e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9300000000000002, + "step": 1930 + }, + { + "loss": 0.065, + "grad_norm": 1.9286760091781616, + "learning_rate": 7.2e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.931, + "step": 1931 + }, + { + "loss": 0.0084, + "grad_norm": 2.553018808364868, + "learning_rate": 7.1e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 1932 + }, + { + "loss": 0.0591, + "grad_norm": 1.3521795272827148, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.933, + "step": 1933 + }, + { + "loss": 0.0407, + "grad_norm": 2.3110647201538086, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.9340000000000002, + "step": 1934 + }, + { + "loss": 0.0087, + "grad_norm": 2.560931921005249, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 1935 + }, + { + "loss": 0.1207, + "grad_norm": 3.6795732975006104, + "learning_rate": 6.7e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 1.936, + "step": 1936 + }, + { + "loss": 0.0079, + "grad_norm": 2.1008386611938477, + "learning_rate": 6.6e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 1937 + }, + { + "loss": 0.0087, + "grad_norm": 2.5367555618286133, + "learning_rate": 6.5e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 1938 + }, + { + "loss": 0.0518, + "grad_norm": 2.0541486740112305, + "learning_rate": 6.4e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.939, + "step": 1939 + }, + { + "loss": 0.0618, + "grad_norm": 1.8797075748443604, + "learning_rate": 6.3e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.94, + "step": 1940 + }, + { + "loss": 0.0628, + "grad_norm": 2.0876829624176025, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9409999999999998, + "step": 1941 + }, + { + "loss": 0.0453, + "grad_norm": 1.7904268503189087, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.942, + "step": 1942 + }, + { + "loss": 0.009, + "grad_norm": 2.73040771484375, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 1943 + }, + { + "loss": 0.0617, + "grad_norm": 1.6844722032546997, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.944, + "step": 1944 + }, + { + "loss": 0.0431, + "grad_norm": 1.8085075616836548, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9449999999999998, + "step": 1945 + }, + { + "loss": 0.0554, + "grad_norm": 1.8000997304916382, + "learning_rate": 5.7e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.946, + "step": 1946 + }, + { + "loss": 0.0608, + "grad_norm": 1.8177446126937866, + "learning_rate": 5.6e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.947, + "step": 1947 + }, + { + "loss": 0.0624, + "grad_norm": 1.5957430601119995, + "learning_rate": 5.5e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.948, + "step": 1948 + }, + { + "loss": 0.0615, + "grad_norm": 1.5245059728622437, + "learning_rate": 5.4e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9489999999999998, + "step": 1949 + }, + { + "loss": 0.0087, + "grad_norm": 2.8260550498962402, + "learning_rate": 5.3e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 1950 + }, + { + "loss": 0.0491, + "grad_norm": 1.5616376399993896, + "learning_rate": 5.2e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.951, + "step": 1951 + }, + { + "loss": 0.0552, + "grad_norm": 1.530611276626587, + "learning_rate": 5.1e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.952, + "step": 1952 + }, + { + "loss": 0.0563, + "grad_norm": 1.5877563953399658, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.9529999999999998, + "step": 1953 + }, + { + "loss": 0.034, + "grad_norm": 1.3671666383743286, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.954, + "step": 1954 + }, + { + "loss": 0.0447, + "grad_norm": 1.4045659303665161, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.955, + "step": 1955 + }, + { + "loss": 0.0523, + "grad_norm": 1.3664851188659668, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.956, + "step": 1956 + }, + { + "loss": 0.0545, + "grad_norm": 1.9731861352920532, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9569999999999999, + "step": 1957 + }, + { + "loss": 0.056, + "grad_norm": 1.9783090353012085, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.958, + "step": 1958 + }, + { + "loss": 0.0103, + "grad_norm": 3.2062110900878906, + "learning_rate": 4.4e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.959, + "step": 1959 + }, + { + "loss": 0.0356, + "grad_norm": 1.8231993913650513, + "learning_rate": 4.3e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.96, + "step": 1960 + }, + { + "loss": 0.0525, + "grad_norm": 1.708391785621643, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9609999999999999, + "step": 1961 + }, + { + "loss": 0.0794, + "grad_norm": 2.159344434738159, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.962, + "step": 1962 + }, + { + "loss": 0.0815, + "grad_norm": 1.9803351163864136, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 1963 + }, + { + "loss": 0.0442, + "grad_norm": 2.2135045528411865, + "learning_rate": 3.9e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.964, + "step": 1964 + }, + { + "loss": 0.0082, + "grad_norm": 2.504026174545288, + "learning_rate": 3.8e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 1965 + }, + { + "loss": 0.0524, + "grad_norm": 2.4293482303619385, + "learning_rate": 3.7e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.966, + "step": 1966 + }, + { + "loss": 0.0543, + "grad_norm": 1.5671586990356445, + "learning_rate": 3.6e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.967, + "step": 1967 + }, + { + "loss": 0.0549, + "grad_norm": 2.1507840156555176, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.968, + "step": 1968 + }, + { + "loss": 0.0561, + "grad_norm": 1.4668017625808716, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9689999999999999, + "step": 1969 + }, + { + "loss": 0.008, + "grad_norm": 2.4691226482391357, + "learning_rate": 3.3e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 1970 + }, + { + "loss": 0.0104, + "grad_norm": 3.135504722595215, + "learning_rate": 3.2e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.971, + "step": 1971 + }, + { + "loss": 0.0442, + "grad_norm": 1.5039496421813965, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 1972 + }, + { + "loss": 0.035, + "grad_norm": 1.5489939451217651, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9729999999999999, + "step": 1973 + }, + { + "loss": 0.0687, + "grad_norm": 1.601294994354248, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.974, + "step": 1974 + }, + { + "loss": 0.0629, + "grad_norm": 1.7154121398925781, + "learning_rate": 2.8e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.975, + "step": 1975 + }, + { + "loss": 0.0587, + "grad_norm": 2.0388171672821045, + "learning_rate": 2.7e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 1976 + }, + { + "loss": 0.051, + "grad_norm": 1.9510704278945923, + "learning_rate": 2.6e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9769999999999999, + "step": 1977 + }, + { + "loss": 0.0512, + "grad_norm": 1.7245160341262817, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.978, + "step": 1978 + }, + { + "loss": 0.0465, + "grad_norm": 1.383158802986145, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.979, + "step": 1979 + }, + { + "loss": 0.054, + "grad_norm": 2.2401952743530273, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.98, + "step": 1980 + }, + { + "loss": 0.0516, + "grad_norm": 2.7115116119384766, + "learning_rate": 2.2e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.9809999999999999, + "step": 1981 + }, + { + "loss": 0.0095, + "grad_norm": 2.8770017623901367, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 1982 + }, + { + "loss": 0.0618, + "grad_norm": 1.8771051168441772, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.983, + "step": 1983 + }, + { + "loss": 0.0524, + "grad_norm": 1.3788121938705444, + "learning_rate": 1.9e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.984, + "step": 1984 + }, + { + "loss": 0.0582, + "grad_norm": 1.583976149559021, + "learning_rate": 1.8e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9849999999999999, + "step": 1985 + }, + { + "loss": 0.0802, + "grad_norm": 1.9991214275360107, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.986, + "step": 1986 + }, + { + "loss": 0.0085, + "grad_norm": 2.6479129791259766, + "learning_rate": 1.6e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 1987 + }, + { + "loss": 0.06, + "grad_norm": 1.4170489311218262, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.988, + "step": 1988 + }, + { + "loss": 0.0502, + "grad_norm": 1.5151011943817139, + "learning_rate": 1.4e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9889999999999999, + "step": 1989 + }, + { + "loss": 0.0639, + "grad_norm": 1.8262159824371338, + "learning_rate": 1.3e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.99, + "step": 1990 + }, + { + "loss": 0.039, + "grad_norm": 1.5687544345855713, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.991, + "step": 1991 + }, + { + "loss": 0.0601, + "grad_norm": 1.4482007026672363, + "learning_rate": 1.1e-07, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.992, + "step": 1992 + }, + { + "loss": 0.0549, + "grad_norm": 2.2203757762908936, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9929999999999999, + "step": 1993 + }, + { + "loss": 0.0089, + "grad_norm": 2.697735071182251, + "learning_rate": 9e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 1994 + }, + { + "loss": 0.0595, + "grad_norm": 1.751516342163086, + "learning_rate": 8e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.995, + "step": 1995 + }, + { + "loss": 0.0499, + "grad_norm": 1.9196858406066895, + "learning_rate": 7e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.996, + "step": 1996 + }, + { + "loss": 0.0385, + "grad_norm": 1.7155030965805054, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.9969999999999999, + "step": 1997 + }, + { + "loss": 0.0089, + "grad_norm": 2.759751558303833, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 1998 + }, + { + "loss": 0.0393, + "grad_norm": 1.8272897005081177, + "learning_rate": 4e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.999, + "step": 1999 + }, + { + "loss": 0.0626, + "grad_norm": 1.7148770093917847, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 2.0, + "step": 2000 + }, + { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502, + "epoch": 2.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba2cfbcdcee3753e484b0a1b3adcb35ee225098 --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_0_5b/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "unsloth_available": false, + "train_runtime": 234.6302, + "train_loss": 0.19233327957964502, + "train_metrics": { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..7d529fa55bcfe3e68b31a5702ee3f96c63526167 --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 2.158, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely to continue the sentence smoothly? Please provide a brief justification for your choice.\" rationale: \"The chosen candidate is more likely to maintain coherence with the existing context and future predictions accurately.\" \n\nMy reasoning:\n- The", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.557, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI; justification: The instruction is incomplete. Please provide a specific question or task related to the candidates mentioned in order for me to choose an appropriate candidate_id and provide a justified response.\" \"rationale\": \"This question asks for a justification, but no question or task was provided in the initial instruction. To fulfill this request, I need more information about what", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 2.04, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate is most likely to be correct for the following sentence fragment: 'The quick brown fox jumps over the lazy dog.'? Choose one candidate_id and provide your justification.\" \nrationale: The choice of 'cand_07' as the", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.059, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"I would choose candidate_id=cand_05; rationale=This candidate uses", + "candidate_id": "cand_05", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.007, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you select as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"Selecting the best candidate involves considering factors such as syntactic correctness, semantic coherence", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.127, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5778936849f9a5bb988c315271fbf3c3507aba26 --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "model_index": 1, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 1, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..d8c5d1cfe6fab1b4a4647f03f5ca461b1739180f --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_history.json @@ -0,0 +1,36011 @@ +[ + { + "loss": 2.9686, + "grad_norm": 1.1798820495605469, + "learning_rate": 2e-05, + "num_tokens": 91.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 2.9639, + "grad_norm": 1.146132469177246, + "learning_rate": 1.9995e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 1.2609, + "grad_norm": 0.2891564667224884, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 694.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 2.9479, + "grad_norm": 1.1511788368225098, + "learning_rate": 1.9985000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.002, + "step": 4 + }, + { + "loss": 0.8201, + "grad_norm": 0.27247434854507446, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1297.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 1.1688, + "grad_norm": 0.30153799057006836, + "learning_rate": 1.9975e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 2.927, + "grad_norm": 1.123976469039917, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1900.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 2.9219, + "grad_norm": 1.1258331537246704, + "learning_rate": 1.9965e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 1.2624, + "grad_norm": 0.3105297088623047, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 2503.0, + "mean_token_accuracy": 0.7592955231666565, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.8468, + "grad_norm": 0.27270445227622986, + "learning_rate": 1.9955e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.005, + "step": 10 + }, + { + "loss": 1.1895, + "grad_norm": 0.31019389629364014, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3527.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 2.8961, + "grad_norm": 1.0758286714553833, + "learning_rate": 1.9945e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 1.1822, + "grad_norm": 0.3052140772342682, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4130.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 2.8831, + "grad_norm": 1.0789313316345215, + "learning_rate": 1.9935e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.8383, + "grad_norm": 0.2903873026371002, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 4733.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 1.2037, + "grad_norm": 0.3023833632469177, + "learning_rate": 1.9925e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 1.2477, + "grad_norm": 0.28835517168045044, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 5757.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 1.237, + "grad_norm": 0.30421048402786255, + "learning_rate": 1.9915e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 2.8549, + "grad_norm": 1.0703911781311035, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6360.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 1.2092, + "grad_norm": 0.30991482734680176, + "learning_rate": 1.9905e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7690802216529846, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 1.2362, + "grad_norm": 0.3097628951072693, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7384.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 1.223, + "grad_norm": 0.31258082389831543, + "learning_rate": 1.9895000000000002e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 2.8321, + "grad_norm": 1.0650557279586792, + "learning_rate": 1.989e-05, + "num_tokens": 7987.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 1.1381, + "grad_norm": 0.31106889247894287, + "learning_rate": 1.9885e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.8059, + "grad_norm": 0.28179118037223816, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9011.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 2.8152, + "grad_norm": 1.0609599351882935, + "learning_rate": 1.9875000000000002e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 2.8078, + "grad_norm": 1.06212317943573, + "learning_rate": 1.987e-05, + "num_tokens": 9193.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 1.205, + "grad_norm": 0.3027011752128601, + "learning_rate": 1.9865e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 1.1295, + "grad_norm": 0.30131977796554565, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10217.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 2.7894, + "grad_norm": 1.0723512172698975, + "learning_rate": 1.9855000000000002e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 1.1157, + "grad_norm": 0.30370256304740906, + "learning_rate": 1.985e-05, + "num_tokens": 10820.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 1.2198, + "grad_norm": 0.3102725148200989, + "learning_rate": 1.9845e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 2.7699, + "grad_norm": 1.0780471563339233, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11423.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 2.7633, + "grad_norm": 1.0721458196640015, + "learning_rate": 1.9835000000000002e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.8241, + "grad_norm": 0.2753015458583832, + "learning_rate": 1.983e-05, + "num_tokens": 12026.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 1.2029, + "grad_norm": 0.32459118962287903, + "learning_rate": 1.9825e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 2.7393, + "grad_norm": 1.089471459388733, + "learning_rate": 1.982e-05, + "num_tokens": 12629.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 2.7339, + "grad_norm": 1.085958480834961, + "learning_rate": 1.9815000000000003e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 2.7235, + "grad_norm": 1.1013903617858887, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 12811.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 1.1925, + "grad_norm": 0.322603315114975, + "learning_rate": 1.9805e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 1.0755, + "grad_norm": 0.33030447363853455, + "learning_rate": 1.98e-05, + "num_tokens": 13835.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.8072, + "grad_norm": 0.292123407125473, + "learning_rate": 1.9795000000000003e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.7719, + "grad_norm": 0.2785574495792389, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14859.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 2.6826, + "grad_norm": 1.1196017265319824, + "learning_rate": 1.9785e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 2.6763, + "grad_norm": 1.1198991537094116, + "learning_rate": 1.978e-05, + "num_tokens": 15041.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 1.0823, + "grad_norm": 0.3456343412399292, + "learning_rate": 1.9775000000000003e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 1.1172, + "grad_norm": 0.3377469480037689, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16065.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 1.19, + "grad_norm": 0.3273194134235382, + "learning_rate": 1.9765e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 1.0897, + "grad_norm": 0.330640584230423, + "learning_rate": 1.976e-05, + "num_tokens": 17089.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 2.6381, + "grad_norm": 1.1452019214630127, + "learning_rate": 1.9755000000000003e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.7974, + "grad_norm": 0.30913424491882324, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 17692.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 1.175, + "grad_norm": 0.3387100100517273, + "learning_rate": 1.9745e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 1.1322, + "grad_norm": 0.3353443443775177, + "learning_rate": 1.974e-05, + "num_tokens": 18716.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 2.6086, + "grad_norm": 1.1715646982192993, + "learning_rate": 1.9735000000000003e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 2.5992, + "grad_norm": 1.1846489906311035, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18898.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 2.5913, + "grad_norm": 1.1861159801483154, + "learning_rate": 1.9725000000000002e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 1.1598, + "grad_norm": 0.3380836546421051, + "learning_rate": 1.972e-05, + "num_tokens": 19501.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 1.1193, + "grad_norm": 0.34247249364852905, + "learning_rate": 1.9715000000000004e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 2.5644, + "grad_norm": 1.205854892730713, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20104.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 2.5553, + "grad_norm": 1.211520791053772, + "learning_rate": 1.9705000000000002e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 2.5452, + "grad_norm": 1.2238597869873047, + "learning_rate": 1.97e-05, + "num_tokens": 20286.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 1.1531, + "grad_norm": 0.3495417535305023, + "learning_rate": 1.9695e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 1.0714, + "grad_norm": 0.3549030125141144, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21310.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.765, + "grad_norm": 0.3008621335029602, + "learning_rate": 1.9685000000000002e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 1.0392, + "grad_norm": 0.3398958444595337, + "learning_rate": 1.968e-05, + "num_tokens": 22334.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 1.0477, + "grad_norm": 0.35012176632881165, + "learning_rate": 1.9675e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 2.4882, + "grad_norm": 1.2684752941131592, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 22937.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 2.478, + "grad_norm": 1.2892162799835205, + "learning_rate": 1.9665000000000002e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 2.4664, + "grad_norm": 1.296135663986206, + "learning_rate": 1.966e-05, + "num_tokens": 23119.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.7605, + "grad_norm": 0.3300800323486328, + "learning_rate": 1.9655e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.7663, + "grad_norm": 0.33007505536079407, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24143.0, + "mean_token_accuracy": 0.8512719869613647, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 2.4349, + "grad_norm": 1.3247182369232178, + "learning_rate": 1.9645e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 1.0354, + "grad_norm": 0.3528023660182953, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 24746.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.738, + "grad_norm": 0.3283436894416809, + "learning_rate": 1.9635e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 1.1271, + "grad_norm": 0.38431045413017273, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 25770.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": 1.0373, + "grad_norm": 0.3673364818096161, + "learning_rate": 1.9625e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 1.156, + "grad_norm": 0.3851627707481384, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26794.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 2.3789, + "grad_norm": 1.3850467205047607, + "learning_rate": 1.9615e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 2.3734, + "grad_norm": 1.3814043998718262, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 26976.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 2.3599, + "grad_norm": 1.3965320587158203, + "learning_rate": 1.9605e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 2.3458, + "grad_norm": 1.4337000846862793, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27158.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.7631, + "grad_norm": 0.328967422246933, + "learning_rate": 1.9595e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 1.0816, + "grad_norm": 0.40056440234184265, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28182.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.761, + "grad_norm": 0.34349334239959717, + "learning_rate": 1.9585e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.7308, + "grad_norm": 0.35714098811149597, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29206.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 2.2886, + "grad_norm": 1.4950672388076782, + "learning_rate": 1.9575e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 2.2801, + "grad_norm": 1.5058231353759766, + "learning_rate": 1.957e-05, + "num_tokens": 29388.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 2.2683, + "grad_norm": 1.5141775608062744, + "learning_rate": 1.9565e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.9814, + "grad_norm": 0.3899815082550049, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 29991.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 1.1155, + "grad_norm": 0.40274983644485474, + "learning_rate": 1.9555e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 2.2309, + "grad_norm": 1.5758429765701294, + "learning_rate": 1.955e-05, + "num_tokens": 30594.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 1.0635, + "grad_norm": 0.4182218015193939, + "learning_rate": 1.9545e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.7083, + "grad_norm": 0.35819146037101746, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31618.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 2.1959, + "grad_norm": 1.6126611232757568, + "learning_rate": 1.9535000000000002e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 2.1797, + "grad_norm": 1.676061987876892, + "learning_rate": 1.953e-05, + "num_tokens": 31800.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 1.0347, + "grad_norm": 0.4216737151145935, + "learning_rate": 1.9525e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.6884, + "grad_norm": 0.39531153440475464, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32824.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 2.1441, + "grad_norm": 1.7453250885009766, + "learning_rate": 1.9515000000000002e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 2.1265, + "grad_norm": 1.7851935625076294, + "learning_rate": 1.951e-05, + "num_tokens": 33006.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 2.112, + "grad_norm": 1.830625057220459, + "learning_rate": 1.9505e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 2.0989, + "grad_norm": 1.851873755455017, + "learning_rate": 1.95e-05, + "num_tokens": 33188.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.6824, + "grad_norm": 0.39206984639167786, + "learning_rate": 1.9495000000000002e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.6874, + "grad_norm": 0.3998919725418091, + "learning_rate": 1.949e-05, + "num_tokens": 34212.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 1.0692, + "grad_norm": 0.45781052112579346, + "learning_rate": 1.9485e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.052, + "step": 104 + }, + { + "loss": 1.061, + "grad_norm": 0.4857180714607239, + "learning_rate": 1.948e-05, + "num_tokens": 35236.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.9418, + "grad_norm": 0.4719521701335907, + "learning_rate": 1.9475000000000002e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.9888, + "grad_norm": 0.4797465205192566, + "learning_rate": 1.947e-05, + "num_tokens": 36260.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 1.994, + "grad_norm": 2.2058191299438477, + "learning_rate": 1.9465e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.7016, + "grad_norm": 0.41740846633911133, + "learning_rate": 1.946e-05, + "num_tokens": 36863.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.6818, + "grad_norm": 0.43658050894737244, + "learning_rate": 1.9455000000000003e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.6655, + "grad_norm": 0.46398866176605225, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37887.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 1.9355, + "grad_norm": 2.4030585289001465, + "learning_rate": 1.9445e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 1.0308, + "grad_norm": 0.47935715317726135, + "learning_rate": 1.944e-05, + "num_tokens": 38490.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": 0.6529, + "grad_norm": 0.5175711512565613, + "learning_rate": 1.9435000000000003e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 1.9, + "grad_norm": 2.3800323009490967, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39093.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 1.0589, + "grad_norm": 0.5446810722351074, + "learning_rate": 1.9425e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 1.8661, + "grad_norm": 2.2952208518981934, + "learning_rate": 1.942e-05, + "num_tokens": 39696.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 1.8546, + "grad_norm": 2.2471399307250977, + "learning_rate": 1.9415000000000003e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 1.8394, + "grad_norm": 2.1859543323516846, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 39878.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.6737, + "grad_norm": 0.5614652633666992, + "learning_rate": 1.9405e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.6406, + "grad_norm": 0.5995651483535767, + "learning_rate": 1.94e-05, + "num_tokens": 40902.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.9218, + "grad_norm": 0.6819480657577515, + "learning_rate": 1.9395000000000003e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.9464, + "grad_norm": 0.6670010089874268, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 41926.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.9323, + "grad_norm": 0.8481072187423706, + "learning_rate": 1.9385e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.6372, + "grad_norm": 0.5398988127708435, + "learning_rate": 1.938e-05, + "num_tokens": 42950.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.6362, + "grad_norm": 0.5465712547302246, + "learning_rate": 1.9375e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 1.7297, + "grad_norm": 2.4601035118103027, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 43553.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.6423, + "grad_norm": 0.5248544812202454, + "learning_rate": 1.9365000000000002e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 1.7024, + "grad_norm": 2.7017173767089844, + "learning_rate": 1.936e-05, + "num_tokens": 44156.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.8623, + "grad_norm": 0.6321293711662292, + "learning_rate": 1.9355e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.8852, + "grad_norm": 0.7586547136306763, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45180.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 1.6632, + "grad_norm": 3.066443920135498, + "learning_rate": 1.9345000000000002e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 1.642, + "grad_norm": 3.3219645023345947, + "learning_rate": 1.934e-05, + "num_tokens": 45362.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 1.623, + "grad_norm": 3.5062637329101562, + "learning_rate": 1.9335e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 1.6017, + "grad_norm": 3.623307228088379, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 45544.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.8752, + "grad_norm": 0.7358177900314331, + "learning_rate": 1.9325000000000002e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.9563, + "grad_norm": 0.8089514970779419, + "learning_rate": 1.932e-05, + "num_tokens": 46568.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.9479, + "grad_norm": 0.8843920826911926, + "learning_rate": 1.9315e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 1.5158, + "grad_norm": 3.546642303466797, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47171.0, + "mean_token_accuracy": 0.7333333492279053, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.5831, + "grad_norm": 0.7032448053359985, + "learning_rate": 1.9305000000000002e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.8191, + "grad_norm": 0.9835058450698853, + "learning_rate": 1.93e-05, + "num_tokens": 48195.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.5936, + "grad_norm": 0.7396312952041626, + "learning_rate": 1.9295e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 1.4418, + "grad_norm": 3.6846494674682617, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48798.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 1.4276, + "grad_norm": 3.8224549293518066, + "learning_rate": 1.9285000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 1.4024, + "grad_norm": 3.874878168106079, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 48980.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 1.3769, + "grad_norm": 3.8388218879699707, + "learning_rate": 1.9275e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 1.3516, + "grad_norm": 3.6529314517974854, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49162.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 1.3215, + "grad_norm": 3.6978349685668945, + "learning_rate": 1.9265000000000003e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.7666666507720947, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 1.2966, + "grad_norm": 3.7301321029663086, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49344.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.9111, + "grad_norm": 0.9517998695373535, + "learning_rate": 1.9255e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 1.2327, + "grad_norm": 4.175051212310791, + "learning_rate": 1.925e-05, + "num_tokens": 49947.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 1.2076, + "grad_norm": 4.348862171173096, + "learning_rate": 1.9245000000000003e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.5662, + "grad_norm": 0.9280498623847961, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 50550.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.8844, + "grad_norm": 1.042202353477478, + "learning_rate": 1.9235e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 1.1432, + "grad_norm": NaN, + "learning_rate": 1.923e-05, + "num_tokens": 51153.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 1.1364, + "grad_norm": 3.4773733615875244, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.7888888716697693, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.5305, + "grad_norm": 1.0232493877410889, + "learning_rate": 1.9225000000000003e-05, + "num_tokens": 51756.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.8352, + "grad_norm": 1.172676920890808, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.5667, + "grad_norm": 1.041461706161499, + "learning_rate": 1.9215e-05, + "num_tokens": 52780.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.5104, + "grad_norm": 1.050549030303955, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.875, + "grad_norm": 1.1163139343261719, + "learning_rate": 1.9205000000000003e-05, + "num_tokens": 53804.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.799, + "grad_norm": 0.9202898740768433, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 1.0468, + "grad_norm": 6.722721576690674, + "learning_rate": 1.9195000000000002e-05, + "num_tokens": 54407.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 1.032, + "grad_norm": 6.30849027633667, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.8387, + "grad_norm": 0.8642046451568604, + "learning_rate": 1.9185000000000004e-05, + "num_tokens": 55010.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.8299, + "grad_norm": 0.8796883821487427, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.9957, + "grad_norm": 6.16769552230835, + "learning_rate": 1.9175000000000002e-05, + "num_tokens": 55613.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.7521, + "grad_norm": 0.8700262904167175, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.5251, + "grad_norm": 1.2144312858581543, + "learning_rate": 1.9165000000000004e-05, + "num_tokens": 56637.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": 0.76, + "grad_norm": 0.9009570479393005, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.941, + "grad_norm": 5.8355841636657715, + "learning_rate": 1.9155000000000002e-05, + "num_tokens": 57240.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.928, + "grad_norm": 5.541483402252197, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.716, + "grad_norm": 1.0414000749588013, + "learning_rate": 1.9145000000000004e-05, + "num_tokens": 57843.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.8929, + "grad_norm": 4.810738563537598, + "learning_rate": 1.914e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.7684, + "grad_norm": 1.2132883071899414, + "learning_rate": 1.9135000000000002e-05, + "num_tokens": 58446.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.6497, + "grad_norm": 1.1370697021484375, + "learning_rate": 1.913e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.6995, + "grad_norm": 1.2495081424713135, + "learning_rate": 1.9125000000000004e-05, + "num_tokens": 59470.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.4539, + "grad_norm": 1.0713244676589966, + "learning_rate": 1.912e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.8311, + "grad_norm": 8.016578674316406, + "learning_rate": 1.9115000000000002e-05, + "num_tokens": 60073.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.7657, + "grad_norm": 1.6656423807144165, + "learning_rate": 1.911e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.7687, + "grad_norm": 1.0611323118209839, + "learning_rate": 1.9105e-05, + "num_tokens": 61097.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.8062, + "grad_norm": 10.057961463928223, + "learning_rate": 1.91e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.4494, + "grad_norm": 0.8912132978439331, + "learning_rate": 1.9095000000000003e-05, + "num_tokens": 61700.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.7813, + "grad_norm": 8.121318817138672, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.762, + "grad_norm": 7.607242584228516, + "learning_rate": 1.9085e-05, + "num_tokens": 61882.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.7692, + "grad_norm": 1.015843391418457, + "learning_rate": 1.908e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.7587, + "grad_norm": 0.9659166932106018, + "learning_rate": 1.9075000000000003e-05, + "num_tokens": 62906.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.6702, + "grad_norm": 1.6121653318405151, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.7191, + "grad_norm": 5.08962345123291, + "learning_rate": 1.9065e-05, + "num_tokens": 63509.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.7033, + "grad_norm": 1.2752808332443237, + "learning_rate": 1.906e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.7025, + "grad_norm": 5.420579433441162, + "learning_rate": 1.9055e-05, + "num_tokens": 64112.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.6507, + "grad_norm": 0.9945167899131775, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.5894, + "grad_norm": 1.0229939222335815, + "learning_rate": 1.9045e-05, + "num_tokens": 65136.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.6627, + "grad_norm": 9.837233543395996, + "learning_rate": 1.904e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.7, + "grad_norm": 1.4510327577590942, + "learning_rate": 1.9035e-05, + "num_tokens": 65739.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.6437, + "grad_norm": 11.414746284484863, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.631, + "grad_norm": 10.233067512512207, + "learning_rate": 1.9025e-05, + "num_tokens": 65921.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.6945, + "grad_norm": 1.3608763217926025, + "learning_rate": 1.902e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.6546, + "grad_norm": 1.217339038848877, + "learning_rate": 1.9015e-05, + "num_tokens": 66945.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.6805, + "grad_norm": 1.5453741550445557, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.5748, + "grad_norm": 4.581247806549072, + "learning_rate": 1.9005000000000002e-05, + "num_tokens": 67548.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.6366, + "grad_norm": 1.6470707654953003, + "learning_rate": 1.9e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.4235, + "grad_norm": 0.9932326078414917, + "learning_rate": 1.8995e-05, + "num_tokens": 68572.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": 0.6296, + "grad_norm": 1.9582555294036865, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.5822, + "grad_norm": 1.569627046585083, + "learning_rate": 1.8985000000000002e-05, + "num_tokens": 69596.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.5748, + "grad_norm": 1.2322492599487305, + "learning_rate": 1.898e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.6398, + "grad_norm": 1.6496992111206055, + "learning_rate": 1.8975e-05, + "num_tokens": 70620.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.3614, + "grad_norm": 1.1484179496765137, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.6247, + "grad_norm": 2.376291275024414, + "learning_rate": 1.8965000000000002e-05, + "num_tokens": 71644.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.5296, + "grad_norm": 1.148452877998352, + "learning_rate": 1.896e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.3511, + "grad_norm": 1.6766430139541626, + "learning_rate": 1.8955e-05, + "num_tokens": 72668.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.5254, + "grad_norm": 13.195364952087402, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.5164, + "grad_norm": 10.336882591247559, + "learning_rate": 1.8945000000000002e-05, + "num_tokens": 72850.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.5768, + "grad_norm": 1.2533048391342163, + "learning_rate": 1.894e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.5941, + "grad_norm": 1.1360353231430054, + "learning_rate": 1.8935e-05, + "num_tokens": 73874.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.4831, + "grad_norm": 6.034897327423096, + "learning_rate": 1.893e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.4774, + "grad_norm": 5.36783504486084, + "learning_rate": 1.8925000000000003e-05, + "num_tokens": 74056.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.3472, + "grad_norm": 2.312915563583374, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.4547, + "grad_norm": 5.124778747558594, + "learning_rate": 1.8915e-05, + "num_tokens": 74659.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.4438, + "grad_norm": 3.7214717864990234, + "learning_rate": 1.891e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.5071, + "grad_norm": 1.825179100036621, + "learning_rate": 1.8905000000000003e-05, + "num_tokens": 75262.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.4157, + "grad_norm": 2.892442464828491, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.4085, + "grad_norm": 3.1406774520874023, + "learning_rate": 1.8895e-05, + "num_tokens": 75444.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.532, + "grad_norm": 2.529170274734497, + "learning_rate": 1.889e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.3828, + "grad_norm": 3.846367597579956, + "learning_rate": 1.8885000000000003e-05, + "num_tokens": 76047.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.5073, + "grad_norm": 2.1968491077423096, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.5165, + "grad_norm": 1.508063793182373, + "learning_rate": 1.8875e-05, + "num_tokens": 77071.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.3491, + "grad_norm": 2.4780421257019043, + "learning_rate": 1.887e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.3379, + "grad_norm": 2.2446343898773193, + "learning_rate": 1.8865000000000003e-05, + "num_tokens": 77253.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.3318, + "grad_norm": 3.05029296875, + "learning_rate": 1.886e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.3173, + "grad_norm": 2.2870967388153076, + "learning_rate": 1.8855e-05, + "num_tokens": 77435.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.3278, + "grad_norm": 1.3750704526901245, + "learning_rate": 1.885e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.2964, + "grad_norm": 2.238151788711548, + "learning_rate": 1.8845000000000003e-05, + "num_tokens": 78038.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.305, + "grad_norm": 1.4246138334274292, + "learning_rate": 1.884e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.3385, + "grad_norm": 1.810808777809143, + "learning_rate": 1.8835000000000002e-05, + "num_tokens": 79062.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.5181, + "grad_norm": 2.939674139022827, + "learning_rate": 1.883e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.4909, + "grad_norm": 2.4543910026550293, + "learning_rate": 1.8825000000000004e-05, + "num_tokens": 80086.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.2604, + "grad_norm": 2.63846492767334, + "learning_rate": 1.882e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.2533, + "grad_norm": 3.536795139312744, + "learning_rate": 1.8815000000000002e-05, + "num_tokens": 80268.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.2449, + "grad_norm": 2.941943645477295, + "learning_rate": 1.881e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.4928, + "grad_norm": 2.69899582862854, + "learning_rate": 1.8805000000000004e-05, + "num_tokens": 80871.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.3019, + "grad_norm": 1.5328068733215332, + "learning_rate": 1.88e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.4154, + "grad_norm": 5.932051181793213, + "learning_rate": 1.8795000000000002e-05, + "num_tokens": 81895.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.4072, + "grad_norm": 3.7254579067230225, + "learning_rate": 1.879e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.2266, + "grad_norm": 4.67811918258667, + "learning_rate": 1.8785e-05, + "num_tokens": 82498.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.2835, + "grad_norm": 2.31062650680542, + "learning_rate": 1.878e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.222, + "grad_norm": 4.9225335121154785, + "learning_rate": 1.8775000000000002e-05, + "num_tokens": 83101.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.4098, + "grad_norm": 2.3302409648895264, + "learning_rate": 1.877e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.4401, + "grad_norm": 1.917952299118042, + "learning_rate": 1.8765e-05, + "num_tokens": 84125.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.3927, + "grad_norm": 4.312741279602051, + "learning_rate": 1.876e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.2032, + "grad_norm": 4.237610340118408, + "learning_rate": 1.8755000000000003e-05, + "num_tokens": 84728.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.2, + "grad_norm": 4.144465446472168, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.1974, + "grad_norm": 4.548800945281982, + "learning_rate": 1.8745e-05, + "num_tokens": 84910.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.2936, + "grad_norm": 1.368138313293457, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.4425, + "grad_norm": 1.6547119617462158, + "learning_rate": 1.8735e-05, + "num_tokens": 85934.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.1815, + "grad_norm": 1.936987042427063, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.3853, + "grad_norm": 1.9844653606414795, + "learning_rate": 1.8725e-05, + "num_tokens": 86537.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.3816, + "grad_norm": 2.563992977142334, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.1717, + "grad_norm": 1.9275789260864258, + "learning_rate": 1.8715e-05, + "num_tokens": 87140.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.3635, + "grad_norm": 2.198817014694214, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.166, + "grad_norm": 2.225175380706787, + "learning_rate": 1.8705e-05, + "num_tokens": 87743.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.1618, + "grad_norm": 1.4393062591552734, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.3188, + "grad_norm": 1.8201826810836792, + "learning_rate": 1.8695e-05, + "num_tokens": 88346.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.3957, + "grad_norm": 1.8483490943908691, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.3545, + "grad_norm": 2.5658915042877197, + "learning_rate": 1.8685e-05, + "num_tokens": 89370.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.4109, + "grad_norm": 2.197061777114868, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.3934, + "grad_norm": 1.9570775032043457, + "learning_rate": 1.8675e-05, + "num_tokens": 90394.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.149, + "grad_norm": 2.242249011993408, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.3673, + "grad_norm": 2.5640757083892822, + "learning_rate": 1.8665000000000002e-05, + "num_tokens": 90997.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.3437, + "grad_norm": 1.6239393949508667, + "learning_rate": 1.866e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.1448, + "grad_norm": 2.4205758571624756, + "learning_rate": 1.8655e-05, + "num_tokens": 91600.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.2803, + "grad_norm": 1.5447510480880737, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.2501, + "grad_norm": 1.2362499237060547, + "learning_rate": 1.8645000000000002e-05, + "num_tokens": 92624.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.263, + "grad_norm": 1.3345736265182495, + "learning_rate": 1.864e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.3598, + "grad_norm": 5.145051002502441, + "learning_rate": 1.8635e-05, + "num_tokens": 93648.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.143, + "grad_norm": 3.363790988922119, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.3858, + "grad_norm": 2.9212327003479004, + "learning_rate": 1.8625000000000002e-05, + "num_tokens": 94251.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.1404, + "grad_norm": 2.9169602394104004, + "learning_rate": 1.862e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.2422, + "grad_norm": 1.9243407249450684, + "learning_rate": 1.8615e-05, + "num_tokens": 94854.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.3585, + "grad_norm": 4.024987697601318, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.3474, + "grad_norm": 2.019094944000244, + "learning_rate": 1.8605000000000002e-05, + "num_tokens": 95878.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.3368, + "grad_norm": 1.5415781736373901, + "learning_rate": 1.86e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.1373, + "grad_norm": 3.6068742275238037, + "learning_rate": 1.8595e-05, + "num_tokens": 96481.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.2176, + "grad_norm": 1.1446317434310913, + "learning_rate": 1.859e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.1328, + "grad_norm": 3.26859974861145, + "learning_rate": 1.8585000000000002e-05, + "num_tokens": 97084.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.131, + "grad_norm": 2.849381446838379, + "learning_rate": 1.858e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.3323, + "grad_norm": 4.831865310668945, + "learning_rate": 1.8575e-05, + "num_tokens": 97687.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.3036, + "grad_norm": 1.8017945289611816, + "learning_rate": 1.857e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.3478, + "grad_norm": 4.759650707244873, + "learning_rate": 1.8565000000000003e-05, + "num_tokens": 98711.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.1239, + "grad_norm": 1.6707216501235962, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.3554, + "grad_norm": 3.568655014038086, + "learning_rate": 1.8555e-05, + "num_tokens": 99314.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.1219, + "grad_norm": 1.743139624595642, + "learning_rate": 1.855e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.3297, + "grad_norm": 3.192558526992798, + "learning_rate": 1.8545000000000003e-05, + "num_tokens": 99917.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.339, + "grad_norm": 2.8700854778289795, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.3341, + "grad_norm": 3.1597092151641846, + "learning_rate": 1.8535e-05, + "num_tokens": 100941.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.3151, + "grad_norm": 2.549912929534912, + "learning_rate": 1.853e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.249, + "grad_norm": 4.164290904998779, + "learning_rate": 1.8525000000000003e-05, + "num_tokens": 101965.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.2877, + "grad_norm": 1.8462411165237427, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.2215, + "grad_norm": 1.49083411693573, + "learning_rate": 1.8515e-05, + "num_tokens": 102989.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.2631, + "grad_norm": 1.5168116092681885, + "learning_rate": 1.851e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.3179, + "grad_norm": 3.1732399463653564, + "learning_rate": 1.8505000000000003e-05, + "num_tokens": 104013.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.315, + "grad_norm": 2.9725892543792725, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.2763, + "grad_norm": 1.4138047695159912, + "learning_rate": 1.8495e-05, + "num_tokens": 105037.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.3151, + "grad_norm": 2.3229987621307373, + "learning_rate": 1.849e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.2862, + "grad_norm": 3.2318272590637207, + "learning_rate": 1.8485000000000003e-05, + "num_tokens": 106061.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.2339, + "grad_norm": 3.401787757873535, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.2094, + "grad_norm": 2.1061453819274902, + "learning_rate": 1.8475000000000002e-05, + "num_tokens": 107085.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.2863, + "grad_norm": 1.6479979753494263, + "learning_rate": 1.847e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.1445, + "grad_norm": 7.635932445526123, + "learning_rate": 1.8465e-05, + "num_tokens": 107688.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.1347, + "grad_norm": 6.305334091186523, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.2233, + "grad_norm": 3.41860294342041, + "learning_rate": 1.8455000000000002e-05, + "num_tokens": 108291.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.128, + "grad_norm": 5.801213264465332, + "learning_rate": 1.845e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.1283, + "grad_norm": 5.675178527832031, + "learning_rate": 1.8445e-05, + "num_tokens": 108473.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.3029, + "grad_norm": 5.509076118469238, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.1112, + "grad_norm": 2.6948108673095703, + "learning_rate": 1.8435000000000002e-05, + "num_tokens": 109076.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.107, + "grad_norm": 2.523871421813965, + "learning_rate": 1.843e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.2636, + "grad_norm": 2.1710612773895264, + "learning_rate": 1.8425e-05, + "num_tokens": 109679.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.2891, + "grad_norm": 2.2263383865356445, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.2611, + "grad_norm": 1.752862572669983, + "learning_rate": 1.8415e-05, + "num_tokens": 110703.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.1023, + "grad_norm": 3.256633996963501, + "learning_rate": 1.841e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.1009, + "grad_norm": 2.10860276222229, + "learning_rate": 1.8405e-05, + "num_tokens": 110885.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.2849, + "grad_norm": 3.3475303649902344, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.2727, + "grad_norm": 2.763415575027466, + "learning_rate": 1.8395e-05, + "num_tokens": 111909.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.1914, + "grad_norm": 1.7206056118011475, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.2981, + "grad_norm": 4.825778484344482, + "learning_rate": 1.8385e-05, + "num_tokens": 112933.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.2575, + "grad_norm": 2.3532052040100098, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.3108, + "grad_norm": 2.1766650676727295, + "learning_rate": 1.8375e-05, + "num_tokens": 113957.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.2547, + "grad_norm": 1.6271114349365234, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.2451, + "grad_norm": 1.533071517944336, + "learning_rate": 1.8365e-05, + "num_tokens": 114981.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.2362, + "grad_norm": 1.4881736040115356, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0898, + "grad_norm": 1.764446496963501, + "learning_rate": 1.8355e-05, + "num_tokens": 115584.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.2345, + "grad_norm": 1.3447750806808472, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.2802, + "grad_norm": 3.713470458984375, + "learning_rate": 1.8345e-05, + "num_tokens": 116608.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.1853, + "grad_norm": 1.427515983581543, + "learning_rate": 1.834e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0921, + "grad_norm": 2.3074567317962646, + "learning_rate": 1.8335e-05, + "num_tokens": 117211.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0887, + "grad_norm": 2.2687530517578125, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.2126, + "grad_norm": 3.1814491748809814, + "learning_rate": 1.8325e-05, + "num_tokens": 117814.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0881, + "grad_norm": 2.606569528579712, + "learning_rate": 1.832e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.1751, + "grad_norm": 2.4892592430114746, + "learning_rate": 1.8315e-05, + "num_tokens": 118417.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.2011, + "grad_norm": 2.357940673828125, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.2168, + "grad_norm": 2.8288958072662354, + "learning_rate": 1.8305000000000002e-05, + "num_tokens": 119441.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.183, + "grad_norm": 1.945565104484558, + "learning_rate": 1.83e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0746, + "grad_norm": 1.7267169952392578, + "learning_rate": 1.8295e-05, + "num_tokens": 120044.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0733, + "grad_norm": 1.9393048286437988, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0728, + "grad_norm": 2.1715469360351562, + "learning_rate": 1.8285000000000002e-05, + "num_tokens": 120226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0704, + "grad_norm": 2.0847175121307373, + "learning_rate": 1.828e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.1791, + "grad_norm": 1.5438156127929688, + "learning_rate": 1.8275e-05, + "num_tokens": 120829.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.2073, + "grad_norm": 1.6084765195846558, + "learning_rate": 1.827e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.2215, + "grad_norm": 1.543698787689209, + "learning_rate": 1.8265000000000002e-05, + "num_tokens": 121853.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.1904, + "grad_norm": 1.41824209690094, + "learning_rate": 1.826e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.2005, + "grad_norm": 1.6803160905838013, + "learning_rate": 1.8255e-05, + "num_tokens": 122877.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0605, + "grad_norm": 1.5710349082946777, + "learning_rate": 1.825e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0615, + "grad_norm": 1.633989691734314, + "learning_rate": 1.8245000000000002e-05, + "num_tokens": 123059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.1828, + "grad_norm": 1.6902644634246826, + "learning_rate": 1.824e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0558, + "grad_norm": 1.7157853841781616, + "learning_rate": 1.8235e-05, + "num_tokens": 123662.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0515, + "grad_norm": 1.4476577043533325, + "learning_rate": 1.823e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0502, + "grad_norm": 2.1938326358795166, + "learning_rate": 1.8225000000000003e-05, + "num_tokens": 123844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.1783, + "grad_norm": 2.738436460494995, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.275, + "grad_norm": 3.493831157684326, + "learning_rate": 1.8215e-05, + "num_tokens": 124868.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.1786, + "grad_norm": 1.7162284851074219, + "learning_rate": 1.821e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0448, + "grad_norm": 2.925360679626465, + "learning_rate": 1.8205000000000003e-05, + "num_tokens": 125471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.3138, + "grad_norm": 4.2967753410339355, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.0381, + "grad_norm": 1.3151957988739014, + "learning_rate": 1.8195e-05, + "num_tokens": 126074.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.1773, + "grad_norm": 1.440629243850708, + "learning_rate": 1.819e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0361, + "grad_norm": 1.378117561340332, + "learning_rate": 1.8185000000000003e-05, + "num_tokens": 126677.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0357, + "grad_norm": 1.3120638132095337, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 1.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0333, + "grad_norm": 1.1625266075134277, + "learning_rate": 1.8175e-05, + "num_tokens": 126859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0292, + "grad_norm": 1.198464035987854, + "learning_rate": 1.817e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.193, + "grad_norm": 1.9310072660446167, + "learning_rate": 1.8165000000000003e-05, + "num_tokens": 127462.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": 0.209, + "grad_norm": 1.7112150192260742, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.1398, + "grad_norm": 1.4659478664398193, + "learning_rate": 1.8155e-05, + "num_tokens": 128486.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.1688, + "grad_norm": 3.3470299243927, + "learning_rate": 1.815e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.2416, + "grad_norm": 3.232045888900757, + "learning_rate": 1.8145e-05, + "num_tokens": 129510.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0242, + "grad_norm": 2.809112548828125, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 1.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.0222, + "grad_norm": 2.652397394180298, + "learning_rate": 1.8135000000000002e-05, + "num_tokens": 129692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.1619, + "grad_norm": 1.6935186386108398, + "learning_rate": 1.813e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0206, + "grad_norm": 1.8048573732376099, + "learning_rate": 1.8125e-05, + "num_tokens": 130295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.0199, + "grad_norm": 1.7344465255737305, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0174, + "grad_norm": 1.6794533729553223, + "learning_rate": 1.8115000000000002e-05, + "num_tokens": 130477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0172, + "grad_norm": 2.995704174041748, + "learning_rate": 1.811e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 1.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.241, + "grad_norm": 2.3058347702026367, + "learning_rate": 1.8105e-05, + "num_tokens": 131080.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.2068, + "grad_norm": 2.030050277709961, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.1573, + "grad_norm": 2.108264207839966, + "learning_rate": 1.8095000000000002e-05, + "num_tokens": 132104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0128, + "grad_norm": 0.9666662812232971, + "learning_rate": 1.809e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.1613, + "grad_norm": 1.9703510999679565, + "learning_rate": 1.8085e-05, + "num_tokens": 132707.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.1579, + "grad_norm": 1.7536500692367554, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.2503, + "grad_norm": 3.074944257736206, + "learning_rate": 1.8075000000000002e-05, + "num_tokens": 133731.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.154, + "grad_norm": 2.3541879653930664, + "learning_rate": 1.807e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.1655, + "grad_norm": 1.2853813171386719, + "learning_rate": 1.8065e-05, + "num_tokens": 134755.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.1481, + "grad_norm": 1.4534378051757812, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0156, + "grad_norm": 2.346766710281372, + "learning_rate": 1.8055000000000002e-05, + "num_tokens": 135358.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0179, + "grad_norm": 2.7506628036499023, + "learning_rate": 1.805e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 1.0, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.2665, + "grad_norm": 7.800353050231934, + "learning_rate": 1.8045e-05, + "num_tokens": 135961.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0137, + "grad_norm": 1.6062291860580444, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 1.0, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.1298, + "grad_norm": 1.9706884622573853, + "learning_rate": 1.8035000000000003e-05, + "num_tokens": 136564.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.1587, + "grad_norm": 4.288624286651611, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.1706, + "grad_norm": 2.351865291595459, + "learning_rate": 1.8025e-05, + "num_tokens": 137588.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.1391, + "grad_norm": 2.3107855319976807, + "learning_rate": 1.802e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0116, + "grad_norm": 1.2413067817687988, + "learning_rate": 1.8015000000000003e-05, + "num_tokens": 138191.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.1528, + "grad_norm": 2.238205671310425, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0111, + "grad_norm": 1.0291837453842163, + "learning_rate": 1.8005e-05, + "num_tokens": 138794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": 0.2551, + "grad_norm": 3.0084855556488037, + "learning_rate": 1.8e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.271, + "grad_norm": 3.355750560760498, + "learning_rate": 1.7995000000000003e-05, + "num_tokens": 139818.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.1479, + "grad_norm": 3.3119289875030518, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.1951, + "grad_norm": 3.4890756607055664, + "learning_rate": 1.7985e-05, + "num_tokens": 140842.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.1439, + "grad_norm": 2.5274429321289062, + "learning_rate": 1.798e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.1537, + "grad_norm": 3.0909008979797363, + "learning_rate": 1.7975000000000003e-05, + "num_tokens": 141866.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0107, + "grad_norm": 2.0530686378479004, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 1.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.011, + "grad_norm": 1.7325184345245361, + "learning_rate": 1.7965e-05, + "num_tokens": 142048.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.135, + "grad_norm": 1.9106756448745728, + "learning_rate": 1.796e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.177, + "grad_norm": 3.206461191177368, + "learning_rate": 1.7955000000000003e-05, + "num_tokens": 143072.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0095, + "grad_norm": 0.8696625828742981, + "learning_rate": 1.795e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 1.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.1656, + "grad_norm": 5.9883856773376465, + "learning_rate": 1.7945000000000002e-05, + "num_tokens": 143675.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.2393, + "grad_norm": 3.601959466934204, + "learning_rate": 1.794e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0092, + "grad_norm": 1.547377586364746, + "learning_rate": 1.7935000000000004e-05, + "num_tokens": 144278.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0099, + "grad_norm": 1.7349345684051514, + "learning_rate": 1.793e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 1.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.1454, + "grad_norm": 2.134899377822876, + "learning_rate": 1.7925000000000002e-05, + "num_tokens": 144881.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.2317, + "grad_norm": 3.7199866771698, + "learning_rate": 1.792e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.2081, + "grad_norm": 3.7679033279418945, + "learning_rate": 1.7915000000000004e-05, + "num_tokens": 145905.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0084, + "grad_norm": 0.7981175184249878, + "learning_rate": 1.791e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 1.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0078, + "grad_norm": 0.624564528465271, + "learning_rate": 1.7905000000000002e-05, + "num_tokens": 146087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.153, + "grad_norm": 1.46378755569458, + "learning_rate": 1.79e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0085, + "grad_norm": 1.403277039527893, + "learning_rate": 1.7895000000000004e-05, + "num_tokens": 146690.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.1413, + "grad_norm": 2.821493148803711, + "learning_rate": 1.789e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.1268, + "grad_norm": 2.5567212104797363, + "learning_rate": 1.7885000000000002e-05, + "num_tokens": 147714.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.1303, + "grad_norm": 2.5823540687561035, + "learning_rate": 1.788e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0075, + "grad_norm": 1.26413094997406, + "learning_rate": 1.7875e-05, + "num_tokens": 148317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0067, + "grad_norm": 0.9559513330459595, + "learning_rate": 1.787e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0069, + "grad_norm": 0.641984224319458, + "learning_rate": 1.7865000000000003e-05, + "num_tokens": 148499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.1762, + "grad_norm": 2.6874637603759766, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0062, + "grad_norm": 0.4612693786621094, + "learning_rate": 1.7855e-05, + "num_tokens": 149102.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.1284, + "grad_norm": 2.1469764709472656, + "learning_rate": 1.785e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.1216, + "grad_norm": 2.77829909324646, + "learning_rate": 1.7845000000000003e-05, + "num_tokens": 150126.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0056, + "grad_norm": 0.3416956067085266, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 1.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0055, + "grad_norm": 0.3599971830844879, + "learning_rate": 1.7835e-05, + "num_tokens": 150308.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0054, + "grad_norm": 0.3336946368217468, + "learning_rate": 1.783e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 1.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.1384, + "grad_norm": 2.486008882522583, + "learning_rate": 1.7825e-05, + "num_tokens": 150911.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.1366, + "grad_norm": 1.806955337524414, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.0053, + "grad_norm": 0.3250260651111603, + "learning_rate": 1.7815e-05, + "num_tokens": 151514.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0048, + "grad_norm": 0.33809739351272583, + "learning_rate": 1.781e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 1.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.1241, + "grad_norm": 1.514503002166748, + "learning_rate": 1.7805e-05, + "num_tokens": 152117.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.1369, + "grad_norm": 1.73817777633667, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.005, + "grad_norm": 0.6402959227561951, + "learning_rate": 1.7795e-05, + "num_tokens": 152720.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.1392, + "grad_norm": 2.1087169647216797, + "learning_rate": 1.779e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0056, + "grad_norm": 0.7931351661682129, + "learning_rate": 1.7785e-05, + "num_tokens": 153323.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.1216, + "grad_norm": 2.559343099594116, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.1415, + "grad_norm": 3.7847163677215576, + "learning_rate": 1.7775000000000002e-05, + "num_tokens": 154347.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0056, + "grad_norm": 0.6650505661964417, + "learning_rate": 1.777e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0058, + "grad_norm": 0.6711560487747192, + "learning_rate": 1.7765e-05, + "num_tokens": 154529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.1339, + "grad_norm": 2.383869171142578, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.1384, + "grad_norm": 2.9380829334259033, + "learning_rate": 1.7755000000000002e-05, + "num_tokens": 155553.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.1355, + "grad_norm": 3.530726432800293, + "learning_rate": 1.775e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0057, + "grad_norm": 0.6963756680488586, + "learning_rate": 1.7745e-05, + "num_tokens": 156156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0047, + "grad_norm": 0.45467251539230347, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.1322, + "grad_norm": 2.1101133823394775, + "learning_rate": 1.7735000000000002e-05, + "num_tokens": 156759.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.3436, + "grad_norm": 10.156854629516602, + "learning_rate": 1.773e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.1111, + "grad_norm": 1.9533101320266724, + "learning_rate": 1.7725e-05, + "num_tokens": 157783.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0054, + "grad_norm": 0.571807861328125, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 1.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0045, + "grad_norm": 0.6374226808547974, + "learning_rate": 1.7715000000000002e-05, + "num_tokens": 157965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.1115, + "grad_norm": 1.9669644832611084, + "learning_rate": 1.771e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.1336, + "grad_norm": 1.4811934232711792, + "learning_rate": 1.7705e-05, + "num_tokens": 158989.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.2041, + "grad_norm": 3.112797737121582, + "learning_rate": 1.77e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0045, + "grad_norm": 0.5766833424568176, + "learning_rate": 1.7695000000000003e-05, + "num_tokens": 159592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.1237, + "grad_norm": 1.863338589668274, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.1236, + "grad_norm": 2.4069719314575195, + "learning_rate": 1.7685e-05, + "num_tokens": 160616.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0055, + "grad_norm": 0.8338965177536011, + "learning_rate": 1.768e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 1.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0044, + "grad_norm": 0.5481887459754944, + "learning_rate": 1.7675000000000003e-05, + "num_tokens": 160798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.1354, + "grad_norm": 4.145319938659668, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.1279, + "grad_norm": 3.560887575149536, + "learning_rate": 1.7665e-05, + "num_tokens": 161822.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0044, + "grad_norm": 0.43582797050476074, + "learning_rate": 1.766e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 1.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.004, + "grad_norm": 0.3212014138698578, + "learning_rate": 1.7655000000000003e-05, + "num_tokens": 162004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.1956, + "grad_norm": 2.662240982055664, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0038, + "grad_norm": 0.32649490237236023, + "learning_rate": 1.7645e-05, + "num_tokens": 162607.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0039, + "grad_norm": 0.33435314893722534, + "learning_rate": 1.764e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": 0.1217, + "grad_norm": 3.422117233276367, + "learning_rate": 1.7635000000000003e-05, + "num_tokens": 163210.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.1169, + "grad_norm": 1.9841532707214355, + "learning_rate": 1.763e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0035, + "grad_norm": 0.23611226677894592, + "learning_rate": 1.7625e-05, + "num_tokens": 163813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0036, + "grad_norm": 0.35102367401123047, + "learning_rate": 1.762e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 1.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0034, + "grad_norm": 0.22219745814800262, + "learning_rate": 1.7615000000000003e-05, + "num_tokens": 163995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.1109, + "grad_norm": 1.8000237941741943, + "learning_rate": 1.761e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0034, + "grad_norm": 0.4621182084083557, + "learning_rate": 1.7605000000000002e-05, + "num_tokens": 164598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0035, + "grad_norm": 0.5149714350700378, + "learning_rate": 1.76e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.004, + "grad_norm": 0.5277268886566162, + "learning_rate": 1.7595000000000003e-05, + "num_tokens": 164780.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.1178, + "grad_norm": 1.9578617811203003, + "learning_rate": 1.759e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0032, + "grad_norm": 0.30999821424484253, + "learning_rate": 1.7585000000000002e-05, + "num_tokens": 165383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0032, + "grad_norm": 0.3227098882198334, + "learning_rate": 1.758e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 1.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0032, + "grad_norm": 0.2970958352088928, + "learning_rate": 1.7575000000000004e-05, + "num_tokens": 165565.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.1054, + "grad_norm": 3.3750076293945312, + "learning_rate": 1.757e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.003, + "grad_norm": 0.315746933221817, + "learning_rate": 1.7565000000000002e-05, + "num_tokens": 166168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.1014, + "grad_norm": 1.7110451459884644, + "learning_rate": 1.756e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.1009, + "grad_norm": 2.0282938480377197, + "learning_rate": 1.7555e-05, + "num_tokens": 167192.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0029, + "grad_norm": 0.18862634897232056, + "learning_rate": 1.755e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 1.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.1251, + "grad_norm": 1.5325688123703003, + "learning_rate": 1.7545000000000002e-05, + "num_tokens": 167795.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0032, + "grad_norm": 0.37112897634506226, + "learning_rate": 1.754e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 1.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.0031, + "grad_norm": 0.32201266288757324, + "learning_rate": 1.7535e-05, + "num_tokens": 167977.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.003, + "grad_norm": 0.32648831605911255, + "learning_rate": 1.753e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 1.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": 0.1251, + "grad_norm": 2.044515371322632, + "learning_rate": 1.7525000000000002e-05, + "num_tokens": 168580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.1099, + "grad_norm": 2.5852344036102295, + "learning_rate": 1.752e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0032, + "grad_norm": 0.33884692192077637, + "learning_rate": 1.7515e-05, + "num_tokens": 169183.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.1006, + "grad_norm": 1.9987916946411133, + "learning_rate": 1.751e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0895, + "grad_norm": 2.697984457015991, + "learning_rate": 1.7505e-05, + "num_tokens": 170207.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.0034, + "grad_norm": 0.4763769507408142, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 1.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0899, + "grad_norm": 3.0565173625946045, + "learning_rate": 1.7495e-05, + "num_tokens": 170810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0909, + "grad_norm": 1.3817325830459595, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0044, + "grad_norm": 0.8519660830497742, + "learning_rate": 1.7485e-05, + "num_tokens": 171413.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.1095, + "grad_norm": 2.0203707218170166, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0048, + "grad_norm": 1.1067970991134644, + "learning_rate": 1.7475e-05, + "num_tokens": 172016.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.1167, + "grad_norm": 2.3915855884552, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0049, + "grad_norm": 1.0700874328613281, + "learning_rate": 1.7465e-05, + "num_tokens": 172619.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.004, + "grad_norm": 0.6739718317985535, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 1.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.1176, + "grad_norm": 2.5957095623016357, + "learning_rate": 1.7455e-05, + "num_tokens": 173222.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": 0.0763, + "grad_norm": 2.0077261924743652, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0028, + "grad_norm": 0.2505457103252411, + "learning_rate": 1.7445e-05, + "num_tokens": 173825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0025, + "grad_norm": 0.1596791297197342, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 1.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.1892, + "grad_norm": 2.4415338039398193, + "learning_rate": 1.7435e-05, + "num_tokens": 174428.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.1134, + "grad_norm": 2.0744497776031494, + "learning_rate": 1.743e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0991, + "grad_norm": 2.4540417194366455, + "learning_rate": 1.7425e-05, + "num_tokens": 175452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0025, + "grad_norm": 0.17656919360160828, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.1227, + "grad_norm": 2.1174721717834473, + "learning_rate": 1.7415000000000002e-05, + "num_tokens": 176055.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0026, + "grad_norm": 0.23843693733215332, + "learning_rate": 1.741e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 1.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.1103, + "grad_norm": 3.4821200370788574, + "learning_rate": 1.7405e-05, + "num_tokens": 176658.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0027, + "grad_norm": 0.3274306654930115, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 1.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0924, + "grad_norm": 1.685363531112671, + "learning_rate": 1.7395000000000002e-05, + "num_tokens": 177261.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0028, + "grad_norm": 0.3265073299407959, + "learning_rate": 1.739e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 1.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.1099, + "grad_norm": 3.1508426666259766, + "learning_rate": 1.7385e-05, + "num_tokens": 177864.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.1034, + "grad_norm": 1.8193601369857788, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.1016, + "grad_norm": 1.59476637840271, + "learning_rate": 1.7375000000000002e-05, + "num_tokens": 178888.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.1998, + "grad_norm": 3.547844648361206, + "learning_rate": 1.737e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.004, + "grad_norm": 0.7272564172744751, + "learning_rate": 1.7365e-05, + "num_tokens": 179491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0046, + "grad_norm": 0.918525755405426, + "learning_rate": 1.736e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 1.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.1078, + "grad_norm": 2.3493764400482178, + "learning_rate": 1.7355000000000002e-05, + "num_tokens": 180094.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0042, + "grad_norm": 0.7224324941635132, + "learning_rate": 1.735e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 1.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0043, + "grad_norm": 0.6705859303474426, + "learning_rate": 1.7345e-05, + "num_tokens": 180276.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.1953, + "grad_norm": 2.93843674659729, + "learning_rate": 1.734e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.0034, + "grad_norm": 0.46903571486473083, + "learning_rate": 1.7335000000000003e-05, + "num_tokens": 180879.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0938, + "grad_norm": 2.1053452491760254, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0026, + "grad_norm": 0.24292589724063873, + "learning_rate": 1.7325e-05, + "num_tokens": 181482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0922, + "grad_norm": 2.257225275039673, + "learning_rate": 1.732e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.003, + "grad_norm": 0.4069388508796692, + "learning_rate": 1.7315000000000003e-05, + "num_tokens": 182085.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.183, + "grad_norm": 3.2919442653656006, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.1693, + "grad_norm": 2.224686861038208, + "learning_rate": 1.7305e-05, + "num_tokens": 183109.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.1085, + "grad_norm": 1.8910117149353027, + "learning_rate": 1.73e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0026, + "grad_norm": 0.40661975741386414, + "learning_rate": 1.7295000000000003e-05, + "num_tokens": 183712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0027, + "grad_norm": 0.4873325228691101, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 1.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0028, + "grad_norm": 0.6161079406738281, + "learning_rate": 1.7285e-05, + "num_tokens": 183894.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0027, + "grad_norm": 0.4630989134311676, + "learning_rate": 1.728e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 1.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0985, + "grad_norm": 1.9053902626037598, + "learning_rate": 1.7275000000000003e-05, + "num_tokens": 184497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.0026, + "grad_norm": 0.37032097578048706, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 1.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.0024, + "grad_norm": 0.27917778491973877, + "learning_rate": 1.7265e-05, + "num_tokens": 184679.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0826, + "grad_norm": 2.2242591381073, + "learning_rate": 1.726e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0023, + "grad_norm": 0.22320418059825897, + "learning_rate": 1.7255000000000003e-05, + "num_tokens": 185282.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0958, + "grad_norm": 2.1955316066741943, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.1204, + "grad_norm": 2.8383123874664307, + "learning_rate": 1.7245000000000002e-05, + "num_tokens": 186306.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0025, + "grad_norm": 0.2997134327888489, + "learning_rate": 1.724e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0024, + "grad_norm": 0.24415498971939087, + "learning_rate": 1.7235e-05, + "num_tokens": 186488.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0885, + "grad_norm": 2.02583384513855, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0927, + "grad_norm": 2.139193534851074, + "learning_rate": 1.7225000000000002e-05, + "num_tokens": 187512.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0025, + "grad_norm": 0.3212721347808838, + "learning_rate": 1.722e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.1594, + "grad_norm": 1.6018428802490234, + "learning_rate": 1.7215e-05, + "num_tokens": 188115.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0027, + "grad_norm": 0.43617552518844604, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 1.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.1228, + "grad_norm": 1.8676470518112183, + "learning_rate": 1.7205000000000002e-05, + "num_tokens": 188718.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": 0.1523, + "grad_norm": 2.5800390243530273, + "learning_rate": 1.72e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0036, + "grad_norm": 0.7294099926948547, + "learning_rate": 1.7195e-05, + "num_tokens": 189321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.0797, + "grad_norm": 2.594087600708008, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.1031, + "grad_norm": 3.2291526794433594, + "learning_rate": 1.7185e-05, + "num_tokens": 190345.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0036, + "grad_norm": 0.7465726733207703, + "learning_rate": 1.718e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 1.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.1692, + "grad_norm": 2.709357500076294, + "learning_rate": 1.7175e-05, + "num_tokens": 190948.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.1003, + "grad_norm": 2.117990493774414, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.1015, + "grad_norm": 2.4742591381073, + "learning_rate": 1.7165e-05, + "num_tokens": 191972.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0911, + "grad_norm": 2.098302125930786, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.1107, + "grad_norm": 1.915540337562561, + "learning_rate": 1.7155e-05, + "num_tokens": 192996.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0049, + "grad_norm": 1.0682960748672485, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0965, + "grad_norm": 1.5651695728302002, + "learning_rate": 1.7145e-05, + "num_tokens": 193599.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.103, + "grad_norm": 2.3110480308532715, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.005, + "grad_norm": 1.1688706874847412, + "learning_rate": 1.7135e-05, + "num_tokens": 194202.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0801, + "grad_norm": 2.4091689586639404, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.159, + "grad_norm": 2.0551347732543945, + "learning_rate": 1.7125e-05, + "num_tokens": 195226.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.004, + "grad_norm": 0.8690920472145081, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0052, + "grad_norm": 1.225834608078003, + "learning_rate": 1.7115e-05, + "num_tokens": 195408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0038, + "grad_norm": 0.7105492949485779, + "learning_rate": 1.711e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0027, + "grad_norm": 0.3135615587234497, + "learning_rate": 1.7105e-05, + "num_tokens": 195590.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0025, + "grad_norm": 0.33731189370155334, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 1.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0024, + "grad_norm": 0.6950210928916931, + "learning_rate": 1.7095e-05, + "num_tokens": 195772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.166, + "grad_norm": 3.7873523235321045, + "learning_rate": 1.709e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.081, + "grad_norm": 2.6900861263275146, + "learning_rate": 1.7085e-05, + "num_tokens": 196796.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.002, + "grad_norm": 0.19354696571826935, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 1.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0935, + "grad_norm": 2.4997594356536865, + "learning_rate": 1.7075e-05, + "num_tokens": 197399.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.002, + "grad_norm": 0.24508339166641235, + "learning_rate": 1.707e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 1.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0019, + "grad_norm": 0.1790609359741211, + "learning_rate": 1.7065e-05, + "num_tokens": 197581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.1101, + "grad_norm": 2.382162570953369, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.1892, + "grad_norm": 3.0123023986816406, + "learning_rate": 1.7055000000000002e-05, + "num_tokens": 198605.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0019, + "grad_norm": 0.27882760763168335, + "learning_rate": 1.705e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0019, + "grad_norm": 0.23136040568351746, + "learning_rate": 1.7045e-05, + "num_tokens": 198787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.1046, + "grad_norm": 1.8799446821212769, + "learning_rate": 1.704e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0018, + "grad_norm": 0.23780478537082672, + "learning_rate": 1.7035000000000002e-05, + "num_tokens": 199390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0849, + "grad_norm": 1.9498792886734009, + "learning_rate": 1.703e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.298, + "step": 596 + }, + { + "loss": 0.0953, + "grad_norm": 2.2400667667388916, + "learning_rate": 1.7025e-05, + "num_tokens": 200414.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.002, + "grad_norm": 0.3908434510231018, + "learning_rate": 1.702e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 1.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0939, + "grad_norm": 2.667379140853882, + "learning_rate": 1.7015000000000002e-05, + "num_tokens": 201017.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.0745, + "grad_norm": 2.066331624984741, + "learning_rate": 1.701e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0025, + "grad_norm": 0.5688944458961487, + "learning_rate": 1.7005e-05, + "num_tokens": 201620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.1069, + "grad_norm": 2.021451950073242, + "learning_rate": 1.7e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.003, + "grad_norm": 0.6418687105178833, + "learning_rate": 1.6995000000000002e-05, + "num_tokens": 202223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0029, + "grad_norm": 0.6194710731506348, + "learning_rate": 1.699e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 1.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.1193, + "grad_norm": 3.001216411590576, + "learning_rate": 1.6985e-05, + "num_tokens": 202826.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": 0.1078, + "grad_norm": 2.1146023273468018, + "learning_rate": 1.698e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.098, + "grad_norm": 3.064103841781616, + "learning_rate": 1.6975000000000003e-05, + "num_tokens": 203850.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0747, + "grad_norm": 3.1524202823638916, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.1506, + "grad_norm": 3.1213419437408447, + "learning_rate": 1.6965e-05, + "num_tokens": 204874.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0038, + "grad_norm": 0.8761835098266602, + "learning_rate": 1.696e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0733, + "grad_norm": 2.0461108684539795, + "learning_rate": 1.6955000000000003e-05, + "num_tokens": 205477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0948, + "grad_norm": 2.52803111076355, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0036, + "grad_norm": 0.837294340133667, + "learning_rate": 1.6945e-05, + "num_tokens": 206080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0036, + "grad_norm": 0.8330880403518677, + "learning_rate": 1.694e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0608, + "grad_norm": 1.6941643953323364, + "learning_rate": 1.6935000000000003e-05, + "num_tokens": 206683.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0897, + "grad_norm": 1.850446105003357, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.0933, + "grad_norm": 2.3541157245635986, + "learning_rate": 1.6925e-05, + "num_tokens": 207707.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0026, + "grad_norm": 0.45243605971336365, + "learning_rate": 1.692e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0694, + "grad_norm": 2.299668312072754, + "learning_rate": 1.6915e-05, + "num_tokens": 208310.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0029, + "grad_norm": 0.6032459139823914, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.0967, + "grad_norm": 2.7924766540527344, + "learning_rate": 1.6905e-05, + "num_tokens": 208913.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0027, + "grad_norm": 0.5459297299385071, + "learning_rate": 1.69e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0033, + "grad_norm": 0.7005264759063721, + "learning_rate": 1.6895e-05, + "num_tokens": 209095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0712, + "grad_norm": 2.0087270736694336, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0816, + "grad_norm": 2.023620843887329, + "learning_rate": 1.6885000000000002e-05, + "num_tokens": 210119.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0884, + "grad_norm": 3.3579723834991455, + "learning_rate": 1.688e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.1001, + "grad_norm": 2.1446380615234375, + "learning_rate": 1.6875e-05, + "num_tokens": 211143.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0024, + "grad_norm": 0.46906810998916626, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.003, + "grad_norm": 0.6180875897407532, + "learning_rate": 1.6865000000000002e-05, + "num_tokens": 211325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0024, + "grad_norm": 0.44018203020095825, + "learning_rate": 1.686e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0021, + "grad_norm": 0.3610388934612274, + "learning_rate": 1.6855e-05, + "num_tokens": 211507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0025, + "grad_norm": 0.42492103576660156, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0785, + "grad_norm": 2.052070379257202, + "learning_rate": 1.6845000000000002e-05, + "num_tokens": 212110.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0955, + "grad_norm": 1.5501021146774292, + "learning_rate": 1.684e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0017, + "grad_norm": 0.14774425327777863, + "learning_rate": 1.6835e-05, + "num_tokens": 212713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0016, + "grad_norm": 0.13003599643707275, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0016, + "grad_norm": 0.11263933777809143, + "learning_rate": 1.6825000000000002e-05, + "num_tokens": 212895.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0814, + "grad_norm": 2.4652907848358154, + "learning_rate": 1.682e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0016, + "grad_norm": 0.1284048706293106, + "learning_rate": 1.6815e-05, + "num_tokens": 213498.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0016, + "grad_norm": 0.14626798033714294, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 1.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0984, + "grad_norm": 2.53958797454834, + "learning_rate": 1.6805000000000003e-05, + "num_tokens": 214101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0983, + "grad_norm": 2.0881552696228027, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0016, + "grad_norm": 0.14537213742733002, + "learning_rate": 1.6795e-05, + "num_tokens": 214704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.0642, + "grad_norm": 2.0831480026245117, + "learning_rate": 1.679e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.0016, + "grad_norm": 0.12770842015743256, + "learning_rate": 1.6785000000000003e-05, + "num_tokens": 215307.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0878, + "grad_norm": 2.531637668609619, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0864, + "grad_norm": 2.4697654247283936, + "learning_rate": 1.6775e-05, + "num_tokens": 216331.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0865, + "grad_norm": 1.655576229095459, + "learning_rate": 1.677e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.1086, + "grad_norm": 2.826423168182373, + "learning_rate": 1.6765000000000003e-05, + "num_tokens": 217355.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.1042, + "grad_norm": 3.4096198081970215, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.0027, + "grad_norm": 0.5534147620201111, + "learning_rate": 1.6755e-05, + "num_tokens": 217958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0611, + "grad_norm": 1.5646562576293945, + "learning_rate": 1.675e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0033, + "grad_norm": 1.048545479774475, + "learning_rate": 1.6745000000000003e-05, + "num_tokens": 218561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.005, + "grad_norm": 1.3414465188980103, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0032, + "grad_norm": 0.636330246925354, + "learning_rate": 1.6735e-05, + "num_tokens": 218743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0882, + "grad_norm": 1.7900675535202026, + "learning_rate": 1.673e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.0883, + "grad_norm": 1.8037763833999634, + "learning_rate": 1.6725000000000003e-05, + "num_tokens": 219767.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0733, + "grad_norm": 1.7987661361694336, + "learning_rate": 1.672e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0033, + "grad_norm": 0.6671841740608215, + "learning_rate": 1.6715000000000002e-05, + "num_tokens": 220370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": 0.0699, + "grad_norm": 2.178269147872925, + "learning_rate": 1.671e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0653, + "grad_norm": 2.165506601333618, + "learning_rate": 1.6705000000000004e-05, + "num_tokens": 221394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0828, + "grad_norm": 1.837323546409607, + "learning_rate": 1.67e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0798, + "grad_norm": 2.296050548553467, + "learning_rate": 1.6695000000000002e-05, + "num_tokens": 222418.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.169, + "grad_norm": 3.554818868637085, + "learning_rate": 1.669e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.1585, + "grad_norm": 2.993666887283325, + "learning_rate": 1.6685000000000004e-05, + "num_tokens": 223442.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0864, + "grad_norm": 3.0106112957000732, + "learning_rate": 1.668e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0605, + "grad_norm": 1.362823247909546, + "learning_rate": 1.6675000000000002e-05, + "num_tokens": 224466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.0055, + "grad_norm": 1.2802313566207886, + "learning_rate": 1.667e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0906, + "grad_norm": 2.1969728469848633, + "learning_rate": 1.6665000000000004e-05, + "num_tokens": 225069.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.0919, + "grad_norm": 3.0707828998565674, + "learning_rate": 1.666e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0061, + "grad_norm": 1.514074444770813, + "learning_rate": 1.6655000000000002e-05, + "num_tokens": 225672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0968, + "grad_norm": 2.7561936378479004, + "learning_rate": 1.665e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0887, + "grad_norm": 2.4263193607330322, + "learning_rate": 1.6645e-05, + "num_tokens": 226696.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0924, + "grad_norm": 2.360464572906494, + "learning_rate": 1.664e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.0926, + "grad_norm": 2.564941644668579, + "learning_rate": 1.6635000000000003e-05, + "num_tokens": 227720.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0849, + "grad_norm": 3.0359439849853516, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.1488, + "grad_norm": 2.505728006362915, + "learning_rate": 1.6625e-05, + "num_tokens": 228744.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0757, + "grad_norm": 1.8170560598373413, + "learning_rate": 1.662e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0083, + "grad_norm": 2.0260066986083984, + "learning_rate": 1.6615000000000003e-05, + "num_tokens": 229347.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0088, + "grad_norm": 2.0579655170440674, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0698, + "grad_norm": 2.465139865875244, + "learning_rate": 1.6605e-05, + "num_tokens": 229950.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.0865, + "grad_norm": 2.2099132537841797, + "learning_rate": 1.66e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0585, + "grad_norm": 2.1250336170196533, + "learning_rate": 1.6595e-05, + "num_tokens": 230974.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0047, + "grad_norm": 1.0128132104873657, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 1.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0934, + "grad_norm": 2.2283778190612793, + "learning_rate": 1.6585e-05, + "num_tokens": 231577.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0616, + "grad_norm": 1.5224443674087524, + "learning_rate": 1.658e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0831, + "grad_norm": 2.9646942615509033, + "learning_rate": 1.6575e-05, + "num_tokens": 232601.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.1237, + "grad_norm": 2.9797046184539795, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0636, + "grad_norm": 2.184934139251709, + "learning_rate": 1.6565e-05, + "num_tokens": 233625.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0624, + "grad_norm": 2.1586413383483887, + "learning_rate": 1.656e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.004, + "grad_norm": 0.7300480604171753, + "learning_rate": 1.6555e-05, + "num_tokens": 234228.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0034, + "grad_norm": 0.6544972062110901, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 1.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0696, + "grad_norm": 2.013485908508301, + "learning_rate": 1.6545e-05, + "num_tokens": 234831.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0029, + "grad_norm": 0.5221191048622131, + "learning_rate": 1.654e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 1.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0772, + "grad_norm": 1.8417952060699463, + "learning_rate": 1.6535e-05, + "num_tokens": 235434.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0713, + "grad_norm": 1.9944443702697754, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.0658, + "grad_norm": 1.900722861289978, + "learning_rate": 1.6525000000000002e-05, + "num_tokens": 236458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0679, + "grad_norm": 2.4299168586730957, + "learning_rate": 1.652e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.06, + "grad_norm": 1.561680793762207, + "learning_rate": 1.6515e-05, + "num_tokens": 237482.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0739, + "grad_norm": 1.774482011795044, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0436, + "grad_norm": 1.7762006521224976, + "learning_rate": 1.6505000000000002e-05, + "num_tokens": 238506.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0034, + "grad_norm": 0.7131043672561646, + "learning_rate": 1.65e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0823, + "grad_norm": 2.994682550430298, + "learning_rate": 1.6495e-05, + "num_tokens": 239109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": 0.0776, + "grad_norm": 2.6362464427948, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0846, + "grad_norm": 2.8052642345428467, + "learning_rate": 1.6485000000000002e-05, + "num_tokens": 240133.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0048, + "grad_norm": 1.1239407062530518, + "learning_rate": 1.648e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 1.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0841, + "grad_norm": 2.1707019805908203, + "learning_rate": 1.6475e-05, + "num_tokens": 240736.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0423, + "grad_norm": 1.9918863773345947, + "learning_rate": 1.647e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.0903, + "grad_norm": 2.1334235668182373, + "learning_rate": 1.6465000000000002e-05, + "num_tokens": 241760.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0067, + "grad_norm": 1.6682239770889282, + "learning_rate": 1.646e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 1.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0652, + "grad_norm": 1.4505804777145386, + "learning_rate": 1.6455e-05, + "num_tokens": 242363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0769, + "grad_norm": 1.6511123180389404, + "learning_rate": 1.645e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.006, + "grad_norm": 1.3824306726455688, + "learning_rate": 1.6445000000000003e-05, + "num_tokens": 242966.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0742, + "grad_norm": 2.109647512435913, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.1414, + "grad_norm": 2.5469703674316406, + "learning_rate": 1.6435e-05, + "num_tokens": 243990.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0669, + "grad_norm": 1.3465361595153809, + "learning_rate": 1.643e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.082, + "grad_norm": 2.1633052825927734, + "learning_rate": 1.6425000000000003e-05, + "num_tokens": 245014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0043, + "grad_norm": 0.926991879940033, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.143, + "grad_norm": 2.2284176349639893, + "learning_rate": 1.6415e-05, + "num_tokens": 245617.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0981, + "grad_norm": 2.301908493041992, + "learning_rate": 1.641e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0448, + "grad_norm": 1.2258681058883667, + "learning_rate": 1.6405000000000003e-05, + "num_tokens": 246641.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.0043, + "grad_norm": 0.9370044469833374, + "learning_rate": 1.64e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 1.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0788, + "grad_norm": 3.762192964553833, + "learning_rate": 1.6395e-05, + "num_tokens": 247244.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.0046, + "grad_norm": 0.9186903238296509, + "learning_rate": 1.639e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 1.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0035, + "grad_norm": 0.6930652260780334, + "learning_rate": 1.6385000000000003e-05, + "num_tokens": 247426.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.3322, + "grad_norm": 9.659932136535645, + "learning_rate": 1.638e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0662, + "grad_norm": 1.7305420637130737, + "learning_rate": 1.6375e-05, + "num_tokens": 248450.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0024, + "grad_norm": 0.3103489577770233, + "learning_rate": 1.637e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 1.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0915, + "grad_norm": 2.235250234603882, + "learning_rate": 1.6365000000000003e-05, + "num_tokens": 249053.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0596, + "grad_norm": 2.24996280670166, + "learning_rate": 1.636e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0679, + "grad_norm": 2.596879005432129, + "learning_rate": 1.6355000000000002e-05, + "num_tokens": 250077.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0642, + "grad_norm": 1.9771475791931152, + "learning_rate": 1.635e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0024, + "grad_norm": 0.7699919939041138, + "learning_rate": 1.6345000000000004e-05, + "num_tokens": 250680.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792540490627289, + "learning_rate": 1.634e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0021, + "grad_norm": 0.32606813311576843, + "learning_rate": 1.6335000000000002e-05, + "num_tokens": 250862.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0574, + "grad_norm": 2.3009800910949707, + "learning_rate": 1.633e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0966, + "grad_norm": 2.396700859069824, + "learning_rate": 1.6325e-05, + "num_tokens": 251886.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.1378, + "grad_norm": 2.726357936859131, + "learning_rate": 1.632e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0022, + "grad_norm": 0.36913836002349854, + "learning_rate": 1.6315000000000002e-05, + "num_tokens": 252489.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0021, + "grad_norm": 0.34592556953430176, + "learning_rate": 1.631e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0024, + "grad_norm": 0.45417988300323486, + "learning_rate": 1.6305e-05, + "num_tokens": 252671.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0672, + "grad_norm": 2.153691053390503, + "learning_rate": 1.63e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0021, + "grad_norm": 0.35626691579818726, + "learning_rate": 1.6295000000000002e-05, + "num_tokens": 253274.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0021, + "grad_norm": 0.37343284487724304, + "learning_rate": 1.629e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.002, + "grad_norm": 0.34979110956192017, + "learning_rate": 1.6285e-05, + "num_tokens": 253456.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.078, + "grad_norm": 2.1453590393066406, + "learning_rate": 1.628e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0019, + "grad_norm": 0.21562984585762024, + "learning_rate": 1.6275e-05, + "num_tokens": 254059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0017, + "grad_norm": 0.18868863582611084, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.075, + "grad_norm": 2.238870143890381, + "learning_rate": 1.6265e-05, + "num_tokens": 254662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0712, + "grad_norm": 1.3297274112701416, + "learning_rate": 1.626e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.2668, + "grad_norm": 6.078666687011719, + "learning_rate": 1.6255e-05, + "num_tokens": 255686.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0017, + "grad_norm": 0.18387450277805328, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 1.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0017, + "grad_norm": 0.1908990740776062, + "learning_rate": 1.6245e-05, + "num_tokens": 255868.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0796, + "grad_norm": 1.9942879676818848, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0017, + "grad_norm": 0.18278343975543976, + "learning_rate": 1.6235e-05, + "num_tokens": 256471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0017, + "grad_norm": 0.2012937068939209, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0019, + "grad_norm": 0.23027914762496948, + "learning_rate": 1.6225e-05, + "num_tokens": 256653.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.088, + "grad_norm": 2.3463082313537598, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0015, + "grad_norm": 0.1516222059726715, + "learning_rate": 1.6215e-05, + "num_tokens": 257256.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0592, + "grad_norm": 1.780516505241394, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0016, + "grad_norm": 0.1569552719593048, + "learning_rate": 1.6205e-05, + "num_tokens": 257859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0015, + "grad_norm": 0.15376536548137665, + "learning_rate": 1.62e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0016, + "grad_norm": 0.16803313791751862, + "learning_rate": 1.6195e-05, + "num_tokens": 258041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0877, + "grad_norm": 1.7319484949111938, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0015, + "grad_norm": 0.14868228137493134, + "learning_rate": 1.6185000000000002e-05, + "num_tokens": 258644.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0703, + "grad_norm": 1.626076102256775, + "learning_rate": 1.618e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0852, + "grad_norm": 1.4952802658081055, + "learning_rate": 1.6175e-05, + "num_tokens": 259668.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0782, + "grad_norm": 1.6785380840301514, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0806, + "grad_norm": 1.424209475517273, + "learning_rate": 1.6165000000000002e-05, + "num_tokens": 260692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0018, + "grad_norm": 0.27588197588920593, + "learning_rate": 1.616e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 1.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0682, + "grad_norm": 2.780993938446045, + "learning_rate": 1.6155e-05, + "num_tokens": 261295.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.0027, + "grad_norm": 0.5201116800308228, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0028, + "grad_norm": 0.5331841111183167, + "learning_rate": 1.6145000000000002e-05, + "num_tokens": 261477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.1404, + "grad_norm": 3.156398296356201, + "learning_rate": 1.614e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.003, + "grad_norm": 0.5515365600585938, + "learning_rate": 1.6135e-05, + "num_tokens": 262080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.0029, + "grad_norm": 0.5499039888381958, + "learning_rate": 1.613e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0473, + "grad_norm": 1.4062751531600952, + "learning_rate": 1.6125000000000002e-05, + "num_tokens": 262683.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0638, + "grad_norm": 1.5207608938217163, + "learning_rate": 1.612e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0018, + "grad_norm": 0.24566565454006195, + "learning_rate": 1.6115e-05, + "num_tokens": 263286.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0019, + "grad_norm": 0.26229217648506165, + "learning_rate": 1.611e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 1.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0019, + "grad_norm": 0.2518826425075531, + "learning_rate": 1.6105000000000003e-05, + "num_tokens": 263468.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.066, + "grad_norm": 1.8491489887237549, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0771, + "grad_norm": 2.3547780513763428, + "learning_rate": 1.6095e-05, + "num_tokens": 264492.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": 0.067, + "grad_norm": 1.581396222114563, + "learning_rate": 1.609e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0017, + "grad_norm": 0.22524242103099823, + "learning_rate": 1.6085000000000003e-05, + "num_tokens": 265095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0825, + "grad_norm": 1.542362928390503, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.0019, + "grad_norm": 0.2753300964832306, + "learning_rate": 1.6075e-05, + "num_tokens": 265698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0594, + "grad_norm": 2.435917377471924, + "learning_rate": 1.607e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0671, + "grad_norm": 1.3892773389816284, + "learning_rate": 1.6065000000000003e-05, + "num_tokens": 266722.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0552, + "grad_norm": 1.9706708192825317, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0029, + "grad_norm": 0.5541112422943115, + "learning_rate": 1.6055e-05, + "num_tokens": 267325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0765, + "grad_norm": 2.187875270843506, + "learning_rate": 1.605e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0533, + "grad_norm": 1.9069744348526, + "learning_rate": 1.6045000000000003e-05, + "num_tokens": 268349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0034, + "grad_norm": 0.6806110739707947, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0033, + "grad_norm": 0.6904415488243103, + "learning_rate": 1.6035e-05, + "num_tokens": 268531.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0835, + "grad_norm": 1.7817496061325073, + "learning_rate": 1.603e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.003, + "grad_norm": 0.576019823551178, + "learning_rate": 1.6025000000000003e-05, + "num_tokens": 269134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0444, + "grad_norm": 2.0043082237243652, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0456, + "grad_norm": 1.6300431489944458, + "learning_rate": 1.6015e-05, + "num_tokens": 270158.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.002, + "grad_norm": 0.3286590874195099, + "learning_rate": 1.601e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0593, + "grad_norm": 3.0931613445281982, + "learning_rate": 1.6005e-05, + "num_tokens": 270761.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0774, + "grad_norm": 2.7380502223968506, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0025, + "grad_norm": 0.5391877293586731, + "learning_rate": 1.5995000000000002e-05, + "num_tokens": 271364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0022, + "grad_norm": 0.43329155445098877, + "learning_rate": 1.599e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0753, + "grad_norm": 2.46846866607666, + "learning_rate": 1.5985e-05, + "num_tokens": 271967.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0021, + "grad_norm": 0.3546755313873291, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0023, + "grad_norm": 0.4083067774772644, + "learning_rate": 1.5975000000000002e-05, + "num_tokens": 272149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.002, + "grad_norm": 0.3581921458244324, + "learning_rate": 1.597e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0613, + "grad_norm": 2.8087387084960938, + "learning_rate": 1.5965e-05, + "num_tokens": 272752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0015, + "grad_norm": 0.1888950765132904, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0671, + "grad_norm": 2.2728195190429688, + "learning_rate": 1.5955e-05, + "num_tokens": 273355.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0829, + "grad_norm": 2.8371574878692627, + "learning_rate": 1.595e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0013, + "grad_norm": 0.12679244577884674, + "learning_rate": 1.5945e-05, + "num_tokens": 273958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0014, + "grad_norm": 0.14318323135375977, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0013, + "grad_norm": 0.12078670412302017, + "learning_rate": 1.5935e-05, + "num_tokens": 274140.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0605, + "grad_norm": 2.762150764465332, + "learning_rate": 1.593e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0013, + "grad_norm": 0.1383422166109085, + "learning_rate": 1.5925e-05, + "num_tokens": 274743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0012, + "grad_norm": 0.1123310998082161, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0802, + "grad_norm": 2.965071201324463, + "learning_rate": 1.5915e-05, + "num_tokens": 275346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.1343, + "grad_norm": 3.2984137535095215, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0829, + "grad_norm": 1.568178415298462, + "learning_rate": 1.5905e-05, + "num_tokens": 276370.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0014, + "grad_norm": 0.21307793259620667, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 1.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0579, + "grad_norm": 2.5958898067474365, + "learning_rate": 1.5895e-05, + "num_tokens": 276973.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0013, + "grad_norm": 0.1617453545331955, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0014, + "grad_norm": 0.1798456758260727, + "learning_rate": 1.5885e-05, + "num_tokens": 277155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0016, + "grad_norm": 0.20433904230594635, + "learning_rate": 1.588e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0486, + "grad_norm": 1.5812333822250366, + "learning_rate": 1.5875e-05, + "num_tokens": 277758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.1437, + "grad_norm": 3.0360054969787598, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0804, + "grad_norm": 2.6603028774261475, + "learning_rate": 1.5865e-05, + "num_tokens": 278782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": 0.0814, + "grad_norm": 1.870706558227539, + "learning_rate": 1.586e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0718, + "grad_norm": 1.5813627243041992, + "learning_rate": 1.5855e-05, + "num_tokens": 279806.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0729, + "grad_norm": 2.107619285583496, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0683, + "grad_norm": 1.209026575088501, + "learning_rate": 1.5845e-05, + "num_tokens": 280830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.2674, + "grad_norm": 6.916773319244385, + "learning_rate": 1.584e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0581, + "grad_norm": 2.1409847736358643, + "learning_rate": 1.5835e-05, + "num_tokens": 281854.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0054, + "grad_norm": 1.191935420036316, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0057, + "grad_norm": 1.2228178977966309, + "learning_rate": 1.5825000000000002e-05, + "num_tokens": 282036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.005, + "grad_norm": 1.1271437406539917, + "learning_rate": 1.582e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0545, + "grad_norm": 2.2059969902038574, + "learning_rate": 1.5815e-05, + "num_tokens": 282639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.1348, + "grad_norm": 2.8853166103363037, + "learning_rate": 1.581e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0027, + "grad_norm": 0.5147932767868042, + "learning_rate": 1.5805000000000002e-05, + "num_tokens": 283242.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0514, + "grad_norm": 1.7287933826446533, + "learning_rate": 1.58e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0024, + "grad_norm": 0.41022399067878723, + "learning_rate": 1.5795e-05, + "num_tokens": 283845.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0021, + "grad_norm": 0.31408146023750305, + "learning_rate": 1.579e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 1.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0019, + "grad_norm": 0.3368740677833557, + "learning_rate": 1.5785000000000002e-05, + "num_tokens": 284027.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0733, + "grad_norm": 1.9898301362991333, + "learning_rate": 1.578e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.2631, + "grad_norm": 6.1759562492370605, + "learning_rate": 1.5775e-05, + "num_tokens": 285051.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0518, + "grad_norm": 1.7494398355484009, + "learning_rate": 1.577e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0634, + "grad_norm": 3.39536452293396, + "learning_rate": 1.5765000000000002e-05, + "num_tokens": 286075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0015, + "grad_norm": 0.16311416029930115, + "learning_rate": 1.576e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0014, + "grad_norm": 0.1292622685432434, + "learning_rate": 1.5755e-05, + "num_tokens": 286257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0487, + "grad_norm": 1.4789959192276, + "learning_rate": 1.575e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0704, + "grad_norm": 1.8533966541290283, + "learning_rate": 1.5745000000000003e-05, + "num_tokens": 287281.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0431, + "grad_norm": 1.6309059858322144, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.125, + "grad_norm": 1.811131238937378, + "learning_rate": 1.5735e-05, + "num_tokens": 288305.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0018, + "grad_norm": 0.2807428240776062, + "learning_rate": 1.573e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 1.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.0991, + "grad_norm": 2.5759706497192383, + "learning_rate": 1.5725000000000003e-05, + "num_tokens": 288908.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0646, + "grad_norm": 2.325784206390381, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0021, + "grad_norm": 0.398372620344162, + "learning_rate": 1.5715e-05, + "num_tokens": 289511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.002, + "grad_norm": 0.34870296716690063, + "learning_rate": 1.571e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0458, + "grad_norm": 1.5269895792007446, + "learning_rate": 1.5705000000000003e-05, + "num_tokens": 290114.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0023, + "grad_norm": 0.4617532789707184, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.1164, + "grad_norm": 2.049588680267334, + "learning_rate": 1.5695e-05, + "num_tokens": 290717.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0707, + "grad_norm": 3.5546929836273193, + "learning_rate": 1.569e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0686, + "grad_norm": 1.6962814331054688, + "learning_rate": 1.5685e-05, + "num_tokens": 291741.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0509, + "grad_norm": 1.9832770824432373, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0028, + "grad_norm": 0.5347197651863098, + "learning_rate": 1.5675e-05, + "num_tokens": 292344.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.0716, + "grad_norm": 2.209432363510132, + "learning_rate": 1.567e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0826, + "grad_norm": 1.7408462762832642, + "learning_rate": 1.5665e-05, + "num_tokens": 293368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0553, + "grad_norm": 1.7983943223953247, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0042, + "grad_norm": 0.8812737464904785, + "learning_rate": 1.5655000000000002e-05, + "num_tokens": 293971.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0649, + "grad_norm": 2.0859007835388184, + "learning_rate": 1.565e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0581, + "grad_norm": 1.566475510597229, + "learning_rate": 1.5645e-05, + "num_tokens": 294995.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0045, + "grad_norm": 0.9423922896385193, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0517, + "grad_norm": 1.8182531595230103, + "learning_rate": 1.5635e-05, + "num_tokens": 295598.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.1177, + "grad_norm": 2.7388081550598145, + "learning_rate": 1.563e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.1132, + "grad_norm": 2.579310655593872, + "learning_rate": 1.5625e-05, + "num_tokens": 296622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": 0.065, + "grad_norm": 1.4705184698104858, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0032, + "grad_norm": 0.6671587228775024, + "learning_rate": 1.5615000000000002e-05, + "num_tokens": 297225.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0579, + "grad_norm": 2.3290131092071533, + "learning_rate": 1.561e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0814, + "grad_norm": 2.8370614051818848, + "learning_rate": 1.5605e-05, + "num_tokens": 298249.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0689, + "grad_norm": 2.715596914291382, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.0671, + "grad_norm": 1.7622898817062378, + "learning_rate": 1.5595000000000002e-05, + "num_tokens": 299273.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0042, + "grad_norm": 0.9052322506904602, + "learning_rate": 1.559e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.049, + "grad_norm": 1.3162498474121094, + "learning_rate": 1.5585e-05, + "num_tokens": 299876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0036, + "grad_norm": 0.7319129109382629, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 1.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.0032, + "grad_norm": 0.6452810764312744, + "learning_rate": 1.5575000000000002e-05, + "num_tokens": 300058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0757, + "grad_norm": 2.2865378856658936, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0538, + "grad_norm": 1.7665457725524902, + "learning_rate": 1.5565e-05, + "num_tokens": 301082.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.047, + "grad_norm": 1.9683163166046143, + "learning_rate": 1.556e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0664, + "grad_norm": 2.087733030319214, + "learning_rate": 1.5555000000000003e-05, + "num_tokens": 302106.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0023, + "grad_norm": 0.39902573823928833, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 1.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0021, + "grad_norm": 0.34475409984588623, + "learning_rate": 1.5545e-05, + "num_tokens": 302288.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0532, + "grad_norm": 1.763016700744629, + "learning_rate": 1.554e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0625, + "grad_norm": 2.4447097778320312, + "learning_rate": 1.5535000000000003e-05, + "num_tokens": 303312.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.2444, + "grad_norm": 5.089849948883057, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.1233, + "grad_norm": 1.9174350500106812, + "learning_rate": 1.5525e-05, + "num_tokens": 304336.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.002, + "grad_norm": 0.34749460220336914, + "learning_rate": 1.552e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 1.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.075, + "grad_norm": 1.8123295307159424, + "learning_rate": 1.5515000000000003e-05, + "num_tokens": 304939.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0942, + "grad_norm": 2.2524919509887695, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0023, + "grad_norm": 0.4282050132751465, + "learning_rate": 1.5505e-05, + "num_tokens": 305542.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0022, + "grad_norm": 0.4201665222644806, + "learning_rate": 1.55e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0022, + "grad_norm": 0.38267236948013306, + "learning_rate": 1.5495000000000003e-05, + "num_tokens": 305724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0668, + "grad_norm": 1.5852563381195068, + "learning_rate": 1.549e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0686, + "grad_norm": 2.5186655521392822, + "learning_rate": 1.5485e-05, + "num_tokens": 306748.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0018, + "grad_norm": 0.3009900450706482, + "learning_rate": 1.548e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 1.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0591, + "grad_norm": 2.0340046882629395, + "learning_rate": 1.5475000000000003e-05, + "num_tokens": 307351.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.0652, + "grad_norm": 2.206228017807007, + "learning_rate": 1.547e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0485, + "grad_norm": 1.763405203819275, + "learning_rate": 1.5465000000000002e-05, + "num_tokens": 308375.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.002, + "grad_norm": 0.35779571533203125, + "learning_rate": 1.546e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0019, + "grad_norm": 0.32313865423202515, + "learning_rate": 1.5455000000000004e-05, + "num_tokens": 308557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0749, + "grad_norm": 2.2083141803741455, + "learning_rate": 1.545e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0762, + "grad_norm": 1.5048847198486328, + "learning_rate": 1.5445000000000002e-05, + "num_tokens": 309581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0536, + "grad_norm": 1.6958098411560059, + "learning_rate": 1.544e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0718, + "grad_norm": 1.9835456609725952, + "learning_rate": 1.5435000000000004e-05, + "num_tokens": 310605.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0459, + "grad_norm": 1.618090033531189, + "learning_rate": 1.543e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0611, + "grad_norm": 1.508302092552185, + "learning_rate": 1.5425000000000002e-05, + "num_tokens": 311629.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.1341, + "grad_norm": 3.744704008102417, + "learning_rate": 1.542e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0649, + "grad_norm": 1.4073272943496704, + "learning_rate": 1.5415e-05, + "num_tokens": 312653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0059, + "grad_norm": 1.3199745416641235, + "learning_rate": 1.541e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0445, + "grad_norm": 1.7224688529968262, + "learning_rate": 1.5405000000000002e-05, + "num_tokens": 313256.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.0697, + "grad_norm": 1.5272228717803955, + "learning_rate": 1.54e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0061, + "grad_norm": 1.3069825172424316, + "learning_rate": 1.5395e-05, + "num_tokens": 313859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0059, + "grad_norm": 1.285326600074768, + "learning_rate": 1.539e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0044, + "grad_norm": 0.9240864515304565, + "learning_rate": 1.5385000000000003e-05, + "num_tokens": 314041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0535, + "grad_norm": 1.9520580768585205, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0426, + "grad_norm": 1.3014405965805054, + "learning_rate": 1.5375e-05, + "num_tokens": 315065.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0024, + "grad_norm": 0.4011932611465454, + "learning_rate": 1.537e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0019, + "grad_norm": 0.2749421298503876, + "learning_rate": 1.5365e-05, + "num_tokens": 315247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0022, + "grad_norm": 0.31892502307891846, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 1.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0483, + "grad_norm": 2.0664267539978027, + "learning_rate": 1.5355e-05, + "num_tokens": 315850.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0819, + "grad_norm": 2.846149206161499, + "learning_rate": 1.535e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0013, + "grad_norm": 0.1373102068901062, + "learning_rate": 1.5345e-05, + "num_tokens": 316453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0013, + "grad_norm": 0.1736987680196762, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.053, + "grad_norm": 1.4268443584442139, + "learning_rate": 1.5335e-05, + "num_tokens": 317056.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0917, + "grad_norm": 1.9649128913879395, + "learning_rate": 1.533e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.1411, + "grad_norm": 2.5292632579803467, + "learning_rate": 1.5325e-05, + "num_tokens": 318080.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0527, + "grad_norm": 1.9480016231536865, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.0846, + "grad_norm": 2.2493338584899902, + "learning_rate": 1.5315e-05, + "num_tokens": 319104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0013, + "grad_norm": 0.13474015891551971, + "learning_rate": 1.531e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0832, + "grad_norm": 1.5178154706954956, + "learning_rate": 1.5305e-05, + "num_tokens": 319707.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": 0.0795, + "grad_norm": 2.071016788482666, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.1163, + "grad_norm": 2.11936092376709, + "learning_rate": 1.5295000000000002e-05, + "num_tokens": 320731.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0018, + "grad_norm": 0.2738206088542938, + "learning_rate": 1.529e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 1.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0674, + "grad_norm": 1.7774465084075928, + "learning_rate": 1.5285e-05, + "num_tokens": 321334.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0019, + "grad_norm": 0.3061210513114929, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.1228, + "grad_norm": 2.0818684101104736, + "learning_rate": 1.5275000000000002e-05, + "num_tokens": 321937.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0716, + "grad_norm": 1.6649255752563477, + "learning_rate": 1.527e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0026, + "grad_norm": 0.477672815322876, + "learning_rate": 1.5265e-05, + "num_tokens": 322540.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0728, + "grad_norm": 1.9350183010101318, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0631, + "grad_norm": 1.786603569984436, + "learning_rate": 1.5255000000000002e-05, + "num_tokens": 323564.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.1006, + "grad_norm": 2.4447789192199707, + "learning_rate": 1.525e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0034, + "grad_norm": 0.6078147292137146, + "learning_rate": 1.5245e-05, + "num_tokens": 324167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0769, + "grad_norm": 1.76687753200531, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.1099, + "grad_norm": 1.7330924272537231, + "learning_rate": 1.5235000000000002e-05, + "num_tokens": 325191.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.1119, + "grad_norm": 2.317302942276001, + "learning_rate": 1.523e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0047, + "grad_norm": 0.8692587018013, + "learning_rate": 1.5225e-05, + "num_tokens": 325794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0752, + "grad_norm": 2.7787444591522217, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0042, + "grad_norm": 0.7904698252677917, + "learning_rate": 1.5215000000000003e-05, + "num_tokens": 326397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0637, + "grad_norm": 1.9206311702728271, + "learning_rate": 1.521e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0641, + "grad_norm": 1.5487322807312012, + "learning_rate": 1.5205000000000001e-05, + "num_tokens": 327421.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0034, + "grad_norm": 0.6128824949264526, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0026, + "grad_norm": 0.4303649365901947, + "learning_rate": 1.5195000000000003e-05, + "num_tokens": 327603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0024, + "grad_norm": 0.3603818118572235, + "learning_rate": 1.519e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 1.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.0722, + "grad_norm": 1.3239399194717407, + "learning_rate": 1.5185000000000001e-05, + "num_tokens": 328206.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0714, + "grad_norm": 1.5037869215011597, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0462, + "grad_norm": 1.4942961931228638, + "learning_rate": 1.5175000000000001e-05, + "num_tokens": 329230.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0019, + "grad_norm": 0.2582552134990692, + "learning_rate": 1.517e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0018, + "grad_norm": 0.22304527461528778, + "learning_rate": 1.5165000000000001e-05, + "num_tokens": 329412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.083, + "grad_norm": 2.117966890335083, + "learning_rate": 1.516e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.0018, + "grad_norm": 0.21721050143241882, + "learning_rate": 1.5155000000000001e-05, + "num_tokens": 330015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0016, + "grad_norm": 0.20195893943309784, + "learning_rate": 1.515e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0807, + "grad_norm": 2.2437827587127686, + "learning_rate": 1.5145000000000002e-05, + "num_tokens": 330618.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0803, + "grad_norm": 2.0074269771575928, + "learning_rate": 1.514e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.1081, + "grad_norm": 2.117880344390869, + "learning_rate": 1.5135000000000002e-05, + "num_tokens": 331642.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0498, + "grad_norm": 1.624760389328003, + "learning_rate": 1.513e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0017, + "grad_norm": 0.2406463772058487, + "learning_rate": 1.5125e-05, + "num_tokens": 332245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.078, + "grad_norm": 1.9976122379302979, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0017, + "grad_norm": 0.2691337466239929, + "learning_rate": 1.5115000000000002e-05, + "num_tokens": 332848.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0017, + "grad_norm": 0.3240523040294647, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.002, + "grad_norm": 0.3948870897293091, + "learning_rate": 1.5105e-05, + "num_tokens": 333030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.058, + "grad_norm": 2.228799343109131, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0019, + "grad_norm": 0.30388572812080383, + "learning_rate": 1.5095000000000002e-05, + "num_tokens": 333633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0018, + "grad_norm": 0.23492957651615143, + "learning_rate": 1.509e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0767, + "grad_norm": 1.961020588874817, + "learning_rate": 1.5085e-05, + "num_tokens": 334236.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0015, + "grad_norm": 0.18129733204841614, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0016, + "grad_norm": 0.20082105696201324, + "learning_rate": 1.5075000000000002e-05, + "num_tokens": 334418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0715, + "grad_norm": 1.6847742795944214, + "learning_rate": 1.507e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.1066, + "grad_norm": 1.804700255393982, + "learning_rate": 1.5065e-05, + "num_tokens": 335442.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0017, + "grad_norm": 0.24969542026519775, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 1.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.076, + "grad_norm": 1.119564175605774, + "learning_rate": 1.5055000000000002e-05, + "num_tokens": 336045.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.1127, + "grad_norm": 1.9994937181472778, + "learning_rate": 1.505e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0018, + "grad_norm": 0.27987295389175415, + "learning_rate": 1.5045e-05, + "num_tokens": 336648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0019, + "grad_norm": 0.3454192876815796, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0023, + "grad_norm": 0.4122897684574127, + "learning_rate": 1.5035000000000003e-05, + "num_tokens": 336830.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": 0.1004, + "grad_norm": 1.930411696434021, + "learning_rate": 1.503e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0019, + "grad_norm": 0.29886701703071594, + "learning_rate": 1.5025000000000001e-05, + "num_tokens": 337433.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0016, + "grad_norm": 0.2443024218082428, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.0673, + "grad_norm": 1.4124706983566284, + "learning_rate": 1.5015000000000001e-05, + "num_tokens": 338036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0799, + "grad_norm": 2.3533709049224854, + "learning_rate": 1.501e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0713, + "grad_norm": 1.8907470703125, + "learning_rate": 1.5005000000000001e-05, + "num_tokens": 339060.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0689, + "grad_norm": 2.691020965576172, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0497, + "grad_norm": 1.6671160459518433, + "learning_rate": 1.4995000000000001e-05, + "num_tokens": 340084.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.002, + "grad_norm": 0.29797157645225525, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 1.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0019, + "grad_norm": 0.29996100068092346, + "learning_rate": 1.4985000000000001e-05, + "num_tokens": 340266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.0024, + "grad_norm": 0.4070133566856384, + "learning_rate": 1.498e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0021, + "grad_norm": 0.3220314681529999, + "learning_rate": 1.4975000000000001e-05, + "num_tokens": 340448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0019, + "grad_norm": 0.3058181405067444, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0018, + "grad_norm": 0.28231292963027954, + "learning_rate": 1.4965e-05, + "num_tokens": 340630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0692, + "grad_norm": 1.5155085325241089, + "learning_rate": 1.496e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.0683, + "grad_norm": 1.8045986890792847, + "learning_rate": 1.4955000000000002e-05, + "num_tokens": 341654.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.0408, + "grad_norm": 1.349377989768982, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0734, + "grad_norm": 1.7803888320922852, + "learning_rate": 1.4945e-05, + "num_tokens": 342678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0014, + "grad_norm": 0.1658269613981247, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 1.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0625, + "grad_norm": 1.7009806632995605, + "learning_rate": 1.4935000000000002e-05, + "num_tokens": 343281.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0017, + "grad_norm": 0.25617343187332153, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.0625, + "grad_norm": 1.769629955291748, + "learning_rate": 1.4925e-05, + "num_tokens": 343884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0017, + "grad_norm": 0.2548482418060303, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 1.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.0016, + "grad_norm": 0.2222324013710022, + "learning_rate": 1.4915000000000002e-05, + "num_tokens": 344066.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0774, + "grad_norm": 4.686360836029053, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0459, + "grad_norm": 2.749084234237671, + "learning_rate": 1.4905e-05, + "num_tokens": 345090.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.1302, + "grad_norm": 4.177389621734619, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.1173, + "grad_norm": 4.055930137634277, + "learning_rate": 1.4895000000000002e-05, + "num_tokens": 346114.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.002, + "grad_norm": 0.3603017032146454, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": 0.0693, + "grad_norm": 1.6064629554748535, + "learning_rate": 1.4885e-05, + "num_tokens": 346717.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0652, + "grad_norm": 1.3037128448486328, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0689, + "grad_norm": 2.06034779548645, + "learning_rate": 1.4875000000000002e-05, + "num_tokens": 347741.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0029, + "grad_norm": 0.5724895596504211, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 1.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0033, + "grad_norm": 0.6629590392112732, + "learning_rate": 1.4865e-05, + "num_tokens": 347923.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0024, + "grad_norm": 0.453980416059494, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 1.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0024, + "grad_norm": 0.4251463711261749, + "learning_rate": 1.4855000000000001e-05, + "num_tokens": 348105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0019, + "grad_norm": 0.30966171622276306, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 1.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.002, + "grad_norm": 0.3118286430835724, + "learning_rate": 1.4845000000000001e-05, + "num_tokens": 348287.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0555, + "grad_norm": 1.792464256286621, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0675, + "grad_norm": 1.5182185173034668, + "learning_rate": 1.4835000000000001e-05, + "num_tokens": 349311.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0674, + "grad_norm": 2.3636367321014404, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0656, + "grad_norm": 2.3102426528930664, + "learning_rate": 1.4825000000000001e-05, + "num_tokens": 350335.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0649, + "grad_norm": 1.6550447940826416, + "learning_rate": 1.482e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0633, + "grad_norm": 1.6831378936767578, + "learning_rate": 1.4815000000000001e-05, + "num_tokens": 351359.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0012, + "grad_norm": 0.14287354052066803, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 1.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0506, + "grad_norm": 1.8767977952957153, + "learning_rate": 1.4805e-05, + "num_tokens": 351962.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0798, + "grad_norm": 1.768181562423706, + "learning_rate": 1.48e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0537, + "grad_norm": 1.7165502309799194, + "learning_rate": 1.4795000000000001e-05, + "num_tokens": 352986.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0016, + "grad_norm": 0.24984677135944366, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.06, + "grad_norm": 1.5225651264190674, + "learning_rate": 1.4785e-05, + "num_tokens": 353589.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0453, + "grad_norm": 1.48419988155365, + "learning_rate": 1.478e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": 0.0693, + "grad_norm": 1.9988808631896973, + "learning_rate": 1.4775000000000002e-05, + "num_tokens": 354613.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0419, + "grad_norm": 1.4052188396453857, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0582, + "grad_norm": 1.6217740774154663, + "learning_rate": 1.4765e-05, + "num_tokens": 355637.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0778, + "grad_norm": 1.9261959791183472, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0593, + "grad_norm": 1.315152645111084, + "learning_rate": 1.4755000000000002e-05, + "num_tokens": 356661.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0069, + "grad_norm": 1.2978978157043457, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0412, + "grad_norm": 1.215545654296875, + "learning_rate": 1.4745e-05, + "num_tokens": 357264.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0075, + "grad_norm": 1.4120475053787231, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 1.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": 0.033, + "grad_norm": 1.2826626300811768, + "learning_rate": 1.4735000000000002e-05, + "num_tokens": 357867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.0074, + "grad_norm": 1.4002093076705933, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0695, + "grad_norm": 2.1978306770324707, + "learning_rate": 1.4725e-05, + "num_tokens": 358470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0693, + "grad_norm": 1.8518682718276978, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0048, + "grad_norm": 0.920648455619812, + "learning_rate": 1.4715000000000002e-05, + "num_tokens": 359073.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0041, + "grad_norm": 0.7800686955451965, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0515, + "grad_norm": 2.606135606765747, + "learning_rate": 1.4705e-05, + "num_tokens": 359676.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0024, + "grad_norm": 0.40420445799827576, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 1.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0719, + "grad_norm": 1.9594024419784546, + "learning_rate": 1.4695e-05, + "num_tokens": 360279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0018, + "grad_norm": 0.245815709233284, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.0787, + "grad_norm": 2.42266845703125, + "learning_rate": 1.4685000000000001e-05, + "num_tokens": 360882.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0014, + "grad_norm": 0.19625961780548096, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 1.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0014, + "grad_norm": 0.18439820408821106, + "learning_rate": 1.4675000000000001e-05, + "num_tokens": 361064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0012, + "grad_norm": 0.15009146928787231, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0961, + "grad_norm": 1.6586538553237915, + "learning_rate": 1.4665000000000001e-05, + "num_tokens": 361667.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.056, + "grad_norm": 1.6204346418380737, + "learning_rate": 1.466e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0629, + "grad_norm": 3.179530382156372, + "learning_rate": 1.4655000000000001e-05, + "num_tokens": 362691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0475, + "grad_norm": 1.5324857234954834, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0475, + "grad_norm": 1.6246694326400757, + "learning_rate": 1.4645e-05, + "num_tokens": 363715.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.1217, + "grad_norm": 3.528550624847412, + "learning_rate": 1.464e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0013, + "grad_norm": 0.17739705741405487, + "learning_rate": 1.4635000000000001e-05, + "num_tokens": 364318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0736, + "grad_norm": 1.7169992923736572, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.1137, + "grad_norm": 2.5113534927368164, + "learning_rate": 1.4625e-05, + "num_tokens": 365342.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.105, + "grad_norm": 2.1154234409332275, + "learning_rate": 1.462e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.0014, + "grad_norm": 0.19033615291118622, + "learning_rate": 1.4615000000000002e-05, + "num_tokens": 365945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0521, + "grad_norm": 1.7730141878128052, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.0016, + "grad_norm": 0.24216671288013458, + "learning_rate": 1.4605e-05, + "num_tokens": 366548.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0018, + "grad_norm": 0.27462536096572876, + "learning_rate": 1.46e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": 0.0744, + "grad_norm": 1.9374821186065674, + "learning_rate": 1.4595000000000002e-05, + "num_tokens": 367151.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0679, + "grad_norm": 1.6294903755187988, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0495, + "grad_norm": 1.4929898977279663, + "learning_rate": 1.4585e-05, + "num_tokens": 368175.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0026, + "grad_norm": 0.4472891092300415, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0023, + "grad_norm": 0.36597439646720886, + "learning_rate": 1.4575000000000002e-05, + "num_tokens": 368357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0024, + "grad_norm": 0.42359644174575806, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0022, + "grad_norm": 0.37764036655426025, + "learning_rate": 1.4565e-05, + "num_tokens": 368539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0021, + "grad_norm": 0.34881848096847534, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0019, + "grad_norm": 0.2842845320701599, + "learning_rate": 1.4555000000000002e-05, + "num_tokens": 368721.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0016, + "grad_norm": 0.23593850433826447, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.0773, + "grad_norm": 1.4594675302505493, + "learning_rate": 1.4545e-05, + "num_tokens": 369324.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.1, + "grad_norm": 1.863494873046875, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0013, + "grad_norm": 0.13081954419612885, + "learning_rate": 1.4535e-05, + "num_tokens": 369927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0443, + "grad_norm": 1.7305635213851929, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0012, + "grad_norm": 0.12010564655065536, + "learning_rate": 1.4525e-05, + "num_tokens": 370530.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.046, + "grad_norm": 1.4965153932571411, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0013, + "grad_norm": 0.1335715800523758, + "learning_rate": 1.4515e-05, + "num_tokens": 371133.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0789, + "grad_norm": 2.0868091583251953, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0013, + "grad_norm": 0.1260039061307907, + "learning_rate": 1.4505000000000001e-05, + "num_tokens": 371736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0013, + "grad_norm": 0.1729843020439148, + "learning_rate": 1.45e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0014, + "grad_norm": 0.1744985431432724, + "learning_rate": 1.4495000000000001e-05, + "num_tokens": 371918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0012, + "grad_norm": 0.12203537672758102, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.1175, + "grad_norm": 2.857239007949829, + "learning_rate": 1.4485e-05, + "num_tokens": 372521.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0012, + "grad_norm": 0.13221806287765503, + "learning_rate": 1.448e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0558, + "grad_norm": 1.8117022514343262, + "learning_rate": 1.4475000000000001e-05, + "num_tokens": 373124.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.0746, + "grad_norm": 1.5601890087127686, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0667, + "grad_norm": 2.6270835399627686, + "learning_rate": 1.4465e-05, + "num_tokens": 374148.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.07, + "grad_norm": 2.4209983348846436, + "learning_rate": 1.446e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0759, + "grad_norm": 1.9546290636062622, + "learning_rate": 1.4455000000000001e-05, + "num_tokens": 375172.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0673, + "grad_norm": 2.9238405227661133, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0505, + "grad_norm": 1.4308744668960571, + "learning_rate": 1.4445e-05, + "num_tokens": 376196.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0491, + "grad_norm": 1.8547859191894531, + "learning_rate": 1.444e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0611, + "grad_norm": 1.7769485712051392, + "learning_rate": 1.4435000000000002e-05, + "num_tokens": 377220.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0026, + "grad_norm": 0.4414771497249603, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 1.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0645, + "grad_norm": 2.1288139820098877, + "learning_rate": 1.4425e-05, + "num_tokens": 377823.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0449, + "grad_norm": 1.480977177619934, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0622, + "grad_norm": 1.4551938772201538, + "learning_rate": 1.4415000000000002e-05, + "num_tokens": 378847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0435, + "grad_norm": 1.613083004951477, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0666, + "grad_norm": 1.3638219833374023, + "learning_rate": 1.4405e-05, + "num_tokens": 379871.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0447, + "grad_norm": 1.5498117208480835, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0423, + "grad_norm": 1.8802024126052856, + "learning_rate": 1.4395000000000002e-05, + "num_tokens": 380895.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0077, + "grad_norm": 1.3431289196014404, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0075, + "grad_norm": 1.2728586196899414, + "learning_rate": 1.4385e-05, + "num_tokens": 381077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0072, + "grad_norm": 1.205004096031189, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0776, + "grad_norm": 1.9510324001312256, + "learning_rate": 1.4375e-05, + "num_tokens": 381680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0585, + "grad_norm": 1.6569032669067383, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0437, + "grad_norm": 1.996708631515503, + "learning_rate": 1.4365000000000002e-05, + "num_tokens": 382704.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.1022, + "grad_norm": 1.9323452711105347, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.1023, + "grad_norm": 2.318890333175659, + "learning_rate": 1.4355e-05, + "num_tokens": 383728.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0406, + "grad_norm": 1.4253126382827759, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0032, + "grad_norm": 0.5123540759086609, + "learning_rate": 1.4345000000000002e-05, + "num_tokens": 384331.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0466, + "grad_norm": 1.6153643131256104, + "learning_rate": 1.434e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.003, + "grad_norm": 0.468280553817749, + "learning_rate": 1.4335e-05, + "num_tokens": 384934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0028, + "grad_norm": 0.4284001588821411, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0484, + "grad_norm": 1.9119105339050293, + "learning_rate": 1.4325000000000003e-05, + "num_tokens": 385537.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0616, + "grad_norm": 2.9587130546569824, + "learning_rate": 1.432e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0645, + "grad_norm": 2.1663818359375, + "learning_rate": 1.4315000000000001e-05, + "num_tokens": 386561.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0022, + "grad_norm": 0.33302196860313416, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0019, + "grad_norm": 0.2560519278049469, + "learning_rate": 1.4305000000000003e-05, + "num_tokens": 386743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0504, + "grad_norm": 2.333263397216797, + "learning_rate": 1.43e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0514, + "grad_norm": 1.790854573249817, + "learning_rate": 1.4295000000000001e-05, + "num_tokens": 387767.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0478, + "grad_norm": 1.8263012170791626, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0017, + "grad_norm": 0.22925561666488647, + "learning_rate": 1.4285000000000003e-05, + "num_tokens": 388370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0508, + "grad_norm": 1.9549782276153564, + "learning_rate": 1.428e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0461, + "grad_norm": 2.7456071376800537, + "learning_rate": 1.4275000000000001e-05, + "num_tokens": 389394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0019, + "grad_norm": 0.25512465834617615, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0018, + "grad_norm": 0.2454918771982193, + "learning_rate": 1.4265000000000001e-05, + "num_tokens": 389576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.0016, + "grad_norm": 0.20499202609062195, + "learning_rate": 1.426e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0016, + "grad_norm": 0.22024467587471008, + "learning_rate": 1.4255000000000002e-05, + "num_tokens": 389758.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.1054, + "grad_norm": 1.7958146333694458, + "learning_rate": 1.425e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0016, + "grad_norm": 0.19123780727386475, + "learning_rate": 1.4245000000000002e-05, + "num_tokens": 390361.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0015, + "grad_norm": 0.1973554641008377, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0515, + "grad_norm": 1.5054925680160522, + "learning_rate": 1.4235000000000002e-05, + "num_tokens": 390964.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0645, + "grad_norm": 1.4418784379959106, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0428, + "grad_norm": 1.3686002492904663, + "learning_rate": 1.4225000000000002e-05, + "num_tokens": 391988.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0015, + "grad_norm": 0.18040749430656433, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 1.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0666, + "grad_norm": 1.9525736570358276, + "learning_rate": 1.4215e-05, + "num_tokens": 392591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0416, + "grad_norm": 1.5055146217346191, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0016, + "grad_norm": 0.21493053436279297, + "learning_rate": 1.4205000000000002e-05, + "num_tokens": 393194.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0483, + "grad_norm": 1.4553972482681274, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0017, + "grad_norm": 0.24199633300304413, + "learning_rate": 1.4195e-05, + "num_tokens": 393797.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0017, + "grad_norm": 0.22347070276737213, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0446, + "grad_norm": 1.314347743988037, + "learning_rate": 1.4185000000000002e-05, + "num_tokens": 394400.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.002, + "grad_norm": 0.3113741874694824, + "learning_rate": 1.418e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0634, + "grad_norm": 1.786219596862793, + "learning_rate": 1.4175e-05, + "num_tokens": 395003.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0906, + "grad_norm": 2.9753689765930176, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0019, + "grad_norm": 0.2806491255760193, + "learning_rate": 1.4165000000000002e-05, + "num_tokens": 395606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0444, + "grad_norm": 1.8984386920928955, + "learning_rate": 1.416e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0023, + "grad_norm": 0.3554719090461731, + "learning_rate": 1.4155000000000001e-05, + "num_tokens": 396209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0021, + "grad_norm": 0.3154850900173187, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.002, + "grad_norm": 0.2822473347187042, + "learning_rate": 1.4145000000000003e-05, + "num_tokens": 396391.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.0933, + "grad_norm": 2.0030465126037598, + "learning_rate": 1.414e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0018, + "grad_norm": 0.25846239924430847, + "learning_rate": 1.4135000000000001e-05, + "num_tokens": 396994.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0576, + "grad_norm": 1.3536447286605835, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.0018, + "grad_norm": 0.23509684205055237, + "learning_rate": 1.4125000000000003e-05, + "num_tokens": 397597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0402, + "grad_norm": 1.1482503414154053, + "learning_rate": 1.412e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.065, + "grad_norm": 1.7037919759750366, + "learning_rate": 1.4115000000000001e-05, + "num_tokens": 398621.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0691, + "grad_norm": 1.7646807432174683, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0804, + "grad_norm": 1.7181248664855957, + "learning_rate": 1.4105000000000001e-05, + "num_tokens": 399645.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0019, + "grad_norm": 0.2505536675453186, + "learning_rate": 1.41e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0618, + "grad_norm": 1.5859951972961426, + "learning_rate": 1.4095000000000001e-05, + "num_tokens": 400248.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0019, + "grad_norm": 0.2755191922187805, + "learning_rate": 1.409e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 1.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0555, + "grad_norm": 1.4727070331573486, + "learning_rate": 1.4085000000000002e-05, + "num_tokens": 400851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0481, + "grad_norm": 1.8706026077270508, + "learning_rate": 1.408e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.0474, + "grad_norm": 1.1995218992233276, + "learning_rate": 1.4075000000000002e-05, + "num_tokens": 401875.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0543, + "grad_norm": 1.2178373336791992, + "learning_rate": 1.407e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0562, + "grad_norm": 1.595617413520813, + "learning_rate": 1.4065000000000002e-05, + "num_tokens": 402899.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0029, + "grad_norm": 0.46309027075767517, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 1.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0031, + "grad_norm": 0.5019537210464478, + "learning_rate": 1.4055e-05, + "num_tokens": 403081.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.0481, + "grad_norm": 1.4502179622650146, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0612, + "grad_norm": 1.3172924518585205, + "learning_rate": 1.4045000000000002e-05, + "num_tokens": 404105.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0643, + "grad_norm": 1.8145051002502441, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0939, + "grad_norm": 2.2837142944335938, + "learning_rate": 1.4035e-05, + "num_tokens": 405129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0444, + "grad_norm": 1.4133625030517578, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0762, + "grad_norm": 3.3270263671875, + "learning_rate": 1.4025000000000002e-05, + "num_tokens": 406153.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0382, + "grad_norm": 1.5502580404281616, + "learning_rate": 1.402e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0624, + "grad_norm": 2.8620283603668213, + "learning_rate": 1.4015e-05, + "num_tokens": 407177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0062, + "grad_norm": 0.9600316286087036, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": 0.232, + "grad_norm": 6.662532329559326, + "learning_rate": 1.4005000000000002e-05, + "num_tokens": 407780.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.2308, + "grad_norm": 5.728747844696045, + "learning_rate": 1.4e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0064, + "grad_norm": 1.0067918300628662, + "learning_rate": 1.3995e-05, + "num_tokens": 408383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0717, + "grad_norm": 2.222224712371826, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0049, + "grad_norm": 0.7748068571090698, + "learning_rate": 1.3985000000000002e-05, + "num_tokens": 408986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0042, + "grad_norm": 0.6555838584899902, + "learning_rate": 1.398e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.1053, + "grad_norm": 2.1453135013580322, + "learning_rate": 1.3975000000000001e-05, + "num_tokens": 409589.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0814, + "grad_norm": 2.092453718185425, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0025, + "grad_norm": 0.37734025716781616, + "learning_rate": 1.3965000000000003e-05, + "num_tokens": 410192.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.0859, + "grad_norm": 2.4313082695007324, + "learning_rate": 1.396e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0571, + "grad_norm": 1.533075213432312, + "learning_rate": 1.3955000000000001e-05, + "num_tokens": 411216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0907, + "grad_norm": 1.7440866231918335, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0015, + "grad_norm": 0.19383682310581207, + "learning_rate": 1.3945000000000001e-05, + "num_tokens": 411819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0015, + "grad_norm": 0.1786634922027588, + "learning_rate": 1.394e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.065, + "grad_norm": 2.1025426387786865, + "learning_rate": 1.3935000000000001e-05, + "num_tokens": 412422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0879, + "grad_norm": 1.9717315435409546, + "learning_rate": 1.393e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0726, + "grad_norm": 2.1733202934265137, + "learning_rate": 1.3925000000000001e-05, + "num_tokens": 413446.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": 0.0635, + "grad_norm": 2.1671876907348633, + "learning_rate": 1.392e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0728, + "grad_norm": 1.5356316566467285, + "learning_rate": 1.3915000000000001e-05, + "num_tokens": 414470.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0014, + "grad_norm": 0.16603456437587738, + "learning_rate": 1.391e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0558, + "grad_norm": 1.9890317916870117, + "learning_rate": 1.3905000000000002e-05, + "num_tokens": 415073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0015, + "grad_norm": 0.20005646347999573, + "learning_rate": 1.39e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.1005, + "grad_norm": 3.5178253650665283, + "learning_rate": 1.3895e-05, + "num_tokens": 415676.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0702, + "grad_norm": 2.5081353187561035, + "learning_rate": 1.389e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0017, + "grad_norm": 0.23757857084274292, + "learning_rate": 1.3885000000000002e-05, + "num_tokens": 416279.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0531, + "grad_norm": 1.5659825801849365, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.002, + "grad_norm": 0.3491363525390625, + "learning_rate": 1.3875e-05, + "num_tokens": 416882.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0663, + "grad_norm": 1.5751999616622925, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0018, + "grad_norm": 0.3209178149700165, + "learning_rate": 1.3865000000000002e-05, + "num_tokens": 417485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0019, + "grad_norm": 0.3630707561969757, + "learning_rate": 1.386e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0437, + "grad_norm": 1.6397857666015625, + "learning_rate": 1.3855e-05, + "num_tokens": 418088.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0585, + "grad_norm": 2.164947748184204, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0754, + "grad_norm": 1.7066527605056763, + "learning_rate": 1.3845000000000002e-05, + "num_tokens": 419112.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0021, + "grad_norm": 0.3518334627151489, + "learning_rate": 1.384e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 1.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.0505, + "grad_norm": 1.5215017795562744, + "learning_rate": 1.3835e-05, + "num_tokens": 419715.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0572, + "grad_norm": 1.9514737129211426, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0023, + "grad_norm": 0.4249929189682007, + "learning_rate": 1.3825000000000002e-05, + "num_tokens": 420318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0661, + "grad_norm": 1.7851744890213013, + "learning_rate": 1.382e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": 0.0621, + "grad_norm": 1.3740767240524292, + "learning_rate": 1.3815e-05, + "num_tokens": 421342.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0841, + "grad_norm": 2.665015459060669, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0028, + "grad_norm": 0.4941730797290802, + "learning_rate": 1.3805000000000003e-05, + "num_tokens": 421945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.044, + "grad_norm": 1.4924557209014893, + "learning_rate": 1.38e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.0511, + "grad_norm": 2.1234307289123535, + "learning_rate": 1.3795000000000001e-05, + "num_tokens": 422969.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0426, + "grad_norm": 1.1785792112350464, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.0773, + "grad_norm": 1.6448895931243896, + "learning_rate": 1.3785000000000001e-05, + "num_tokens": 423993.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0594, + "grad_norm": 1.792230486869812, + "learning_rate": 1.378e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0535, + "grad_norm": 1.3552350997924805, + "learning_rate": 1.3775000000000001e-05, + "num_tokens": 425017.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0388, + "grad_norm": 1.0532437562942505, + "learning_rate": 1.377e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0759, + "grad_norm": 2.1115078926086426, + "learning_rate": 1.3765000000000001e-05, + "num_tokens": 426041.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0056, + "grad_norm": 0.8818362355232239, + "learning_rate": 1.376e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 1.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0051, + "grad_norm": 0.8002524971961975, + "learning_rate": 1.3755000000000001e-05, + "num_tokens": 426223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0619, + "grad_norm": 2.207181692123413, + "learning_rate": 1.375e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0053, + "grad_norm": 0.814557671546936, + "learning_rate": 1.3745000000000001e-05, + "num_tokens": 426826.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0621, + "grad_norm": 1.6394788026809692, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.0678, + "grad_norm": 1.9382132291793823, + "learning_rate": 1.3735e-05, + "num_tokens": 427850.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0396, + "grad_norm": 1.3062744140625, + "learning_rate": 1.373e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.1056, + "grad_norm": 1.7765963077545166, + "learning_rate": 1.3725000000000002e-05, + "num_tokens": 428874.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0036, + "grad_norm": 0.5703164339065552, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.062, + "grad_norm": 1.6491400003433228, + "learning_rate": 1.3715e-05, + "num_tokens": 429477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0414, + "grad_norm": 1.2670550346374512, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0977, + "grad_norm": 2.5612552165985107, + "learning_rate": 1.3705000000000002e-05, + "num_tokens": 430501.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.043, + "grad_norm": 1.5120333433151245, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0513, + "grad_norm": 1.3469822406768799, + "learning_rate": 1.3695e-05, + "num_tokens": 431525.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.052, + "grad_norm": 1.3584448099136353, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0043, + "grad_norm": 0.6871080994606018, + "learning_rate": 1.3685000000000002e-05, + "num_tokens": 432128.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0038, + "grad_norm": 0.6316184401512146, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 1.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0039, + "grad_norm": 0.6172608733177185, + "learning_rate": 1.3675e-05, + "num_tokens": 432310.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0034, + "grad_norm": 0.5193918943405151, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": 0.0545, + "grad_norm": 1.789426326751709, + "learning_rate": 1.3665000000000002e-05, + "num_tokens": 432913.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.0681, + "grad_norm": 1.8359259366989136, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0856, + "grad_norm": 2.033186197280884, + "learning_rate": 1.3655e-05, + "num_tokens": 433937.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0019, + "grad_norm": 0.2717677354812622, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0902, + "grad_norm": 1.8082786798477173, + "learning_rate": 1.3645000000000002e-05, + "num_tokens": 434540.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0019, + "grad_norm": 0.27892598509788513, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0017, + "grad_norm": 0.21636277437210083, + "learning_rate": 1.3635e-05, + "num_tokens": 434722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0017, + "grad_norm": 0.21708306670188904, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0442, + "grad_norm": 1.8083100318908691, + "learning_rate": 1.3625e-05, + "num_tokens": 435325.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0015, + "grad_norm": 0.16797110438346863, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0013, + "grad_norm": 0.1489250212907791, + "learning_rate": 1.3615000000000001e-05, + "num_tokens": 435507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0013, + "grad_norm": 0.14432698488235474, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0845, + "grad_norm": 1.7793538570404053, + "learning_rate": 1.3605000000000001e-05, + "num_tokens": 436110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.072, + "grad_norm": 2.0468149185180664, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0012, + "grad_norm": 0.13057845830917358, + "learning_rate": 1.3595000000000001e-05, + "num_tokens": 436713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0012, + "grad_norm": 0.1187715157866478, + "learning_rate": 1.359e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0652, + "grad_norm": 1.7846852540969849, + "learning_rate": 1.3585000000000001e-05, + "num_tokens": 437316.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.001, + "grad_norm": 0.09880056232213974, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 1.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0543, + "grad_norm": 1.7948801517486572, + "learning_rate": 1.3575e-05, + "num_tokens": 437919.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0613, + "grad_norm": 1.7139854431152344, + "learning_rate": 1.357e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0932, + "grad_norm": 2.8757143020629883, + "learning_rate": 1.3565000000000001e-05, + "num_tokens": 438943.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0542, + "grad_norm": 1.7751576900482178, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0011, + "grad_norm": 0.10208199918270111, + "learning_rate": 1.3555e-05, + "num_tokens": 439546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0441, + "grad_norm": 1.3240106105804443, + "learning_rate": 1.355e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0013, + "grad_norm": 0.14222493767738342, + "learning_rate": 1.3545000000000002e-05, + "num_tokens": 440149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0013, + "grad_norm": 0.15622317790985107, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.049, + "grad_norm": 1.685028076171875, + "learning_rate": 1.3535e-05, + "num_tokens": 440752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0013, + "grad_norm": 0.15723161399364471, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0014, + "grad_norm": 0.1701563447713852, + "learning_rate": 1.3525000000000002e-05, + "num_tokens": 440934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0554, + "grad_norm": 1.94820237159729, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": 0.0868, + "grad_norm": 1.4613052606582642, + "learning_rate": 1.3515e-05, + "num_tokens": 441958.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0606, + "grad_norm": 1.5318107604980469, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0695, + "grad_norm": 1.676740050315857, + "learning_rate": 1.3505000000000002e-05, + "num_tokens": 442982.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0588, + "grad_norm": 1.5801854133605957, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.002, + "grad_norm": 0.27110394835472107, + "learning_rate": 1.3495e-05, + "num_tokens": 443585.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0493, + "grad_norm": 1.5821062326431274, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0409, + "grad_norm": 1.4319894313812256, + "learning_rate": 1.3485000000000002e-05, + "num_tokens": 444609.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0451, + "grad_norm": 1.562462329864502, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0621, + "grad_norm": 1.4181314706802368, + "learning_rate": 1.3475e-05, + "num_tokens": 445633.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0031, + "grad_norm": 0.48450395464897156, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0561, + "grad_norm": 1.5698680877685547, + "learning_rate": 1.3465e-05, + "num_tokens": 446236.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0035, + "grad_norm": 0.5244553685188293, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0036, + "grad_norm": 0.534037709236145, + "learning_rate": 1.3455e-05, + "num_tokens": 446418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0728, + "grad_norm": 2.4191722869873047, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0694, + "grad_norm": 2.0287888050079346, + "learning_rate": 1.3445000000000001e-05, + "num_tokens": 447442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.057, + "grad_norm": 1.7234476804733276, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0033, + "grad_norm": 0.48596495389938354, + "learning_rate": 1.3435000000000001e-05, + "num_tokens": 448045.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0572, + "grad_norm": 1.4727040529251099, + "learning_rate": 1.343e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0031, + "grad_norm": 0.4591142535209656, + "learning_rate": 1.3425000000000001e-05, + "num_tokens": 448648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0578, + "grad_norm": 1.542529582977295, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0544, + "grad_norm": 1.567787766456604, + "learning_rate": 1.3415e-05, + "num_tokens": 449672.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.048, + "grad_norm": 1.4822731018066406, + "learning_rate": 1.341e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0033, + "grad_norm": 0.47298771142959595, + "learning_rate": 1.3405000000000001e-05, + "num_tokens": 450275.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.0885, + "grad_norm": 2.084674119949341, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0571, + "grad_norm": 1.5821152925491333, + "learning_rate": 1.3395e-05, + "num_tokens": 451299.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.003, + "grad_norm": 0.44274547696113586, + "learning_rate": 1.339e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0461, + "grad_norm": 1.7462387084960938, + "learning_rate": 1.3385000000000001e-05, + "num_tokens": 451902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0557, + "grad_norm": 1.9857844114303589, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0419, + "grad_norm": 1.386896014213562, + "learning_rate": 1.3375e-05, + "num_tokens": 452926.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0457, + "grad_norm": 1.6964994668960571, + "learning_rate": 1.337e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.0029, + "grad_norm": 0.42876869440078735, + "learning_rate": 1.3365000000000002e-05, + "num_tokens": 453529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.1072, + "grad_norm": 2.350618839263916, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0495, + "grad_norm": 1.449182152748108, + "learning_rate": 1.3355e-05, + "num_tokens": 454553.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0608, + "grad_norm": 2.024829149246216, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0431, + "grad_norm": 1.3092213869094849, + "learning_rate": 1.3345000000000002e-05, + "num_tokens": 455577.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0035, + "grad_norm": 0.5321254134178162, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 1.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0033, + "grad_norm": 0.4984612762928009, + "learning_rate": 1.3335e-05, + "num_tokens": 455759.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.2288, + "grad_norm": 3.947110652923584, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0031, + "grad_norm": 0.4745834767818451, + "learning_rate": 1.3325000000000002e-05, + "num_tokens": 456362.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0032, + "grad_norm": 0.5151614546775818, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0542, + "grad_norm": 1.0336432456970215, + "learning_rate": 1.3315e-05, + "num_tokens": 456965.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0562, + "grad_norm": 1.5250927209854126, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0431, + "grad_norm": 1.4132592678070068, + "learning_rate": 1.3305e-05, + "num_tokens": 457989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.074, + "grad_norm": 1.864004373550415, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.0023, + "grad_norm": 0.32277822494506836, + "learning_rate": 1.3295e-05, + "num_tokens": 458592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": 0.0656, + "grad_norm": 1.8421293497085571, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0784, + "grad_norm": 1.431746482849121, + "learning_rate": 1.3285e-05, + "num_tokens": 459616.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0859, + "grad_norm": 2.2143869400024414, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0638, + "grad_norm": 2.397982597351074, + "learning_rate": 1.3275e-05, + "num_tokens": 460640.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.072, + "grad_norm": 1.9987224340438843, + "learning_rate": 1.327e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0026, + "grad_norm": 0.3712107837200165, + "learning_rate": 1.3265000000000001e-05, + "num_tokens": 461243.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0027, + "grad_norm": 0.3893998861312866, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 1.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0024, + "grad_norm": 0.3540315330028534, + "learning_rate": 1.3255e-05, + "num_tokens": 461425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0022, + "grad_norm": 0.3253246545791626, + "learning_rate": 1.325e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 1.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0585, + "grad_norm": 1.6001460552215576, + "learning_rate": 1.3245000000000001e-05, + "num_tokens": 462028.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.0472, + "grad_norm": 1.4387136697769165, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.002, + "grad_norm": 0.2645460069179535, + "learning_rate": 1.3235e-05, + "num_tokens": 462631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0486, + "grad_norm": 1.7650330066680908, + "learning_rate": 1.323e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0018, + "grad_norm": 0.23414187133312225, + "learning_rate": 1.3225000000000001e-05, + "num_tokens": 463234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0019, + "grad_norm": 0.2595520317554474, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0598, + "grad_norm": 1.4952349662780762, + "learning_rate": 1.3215e-05, + "num_tokens": 463837.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.0777, + "grad_norm": 1.956957221031189, + "learning_rate": 1.321e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0426, + "grad_norm": 1.263728141784668, + "learning_rate": 1.3205000000000001e-05, + "num_tokens": 464861.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0018, + "grad_norm": 0.2717933654785156, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 1.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0017, + "grad_norm": 0.24730290472507477, + "learning_rate": 1.3195e-05, + "num_tokens": 465043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0017, + "grad_norm": 0.25752246379852295, + "learning_rate": 1.319e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0594, + "grad_norm": 1.2743943929672241, + "learning_rate": 1.3185000000000002e-05, + "num_tokens": 465646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0468, + "grad_norm": 1.4228495359420776, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0015, + "grad_norm": 0.2151045948266983, + "learning_rate": 1.3175e-05, + "num_tokens": 466249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0707, + "grad_norm": 1.637633204460144, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0557, + "grad_norm": 1.91914963722229, + "learning_rate": 1.3165000000000002e-05, + "num_tokens": 467273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0017, + "grad_norm": 0.22663576900959015, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0447, + "grad_norm": 1.3842930793762207, + "learning_rate": 1.3155e-05, + "num_tokens": 467876.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0402, + "grad_norm": 1.3382936716079712, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.0722, + "grad_norm": 1.7016624212265015, + "learning_rate": 1.3145e-05, + "num_tokens": 468900.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0603, + "grad_norm": 1.7416592836380005, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0545, + "grad_norm": 2.0610973834991455, + "learning_rate": 1.3135e-05, + "num_tokens": 469924.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0027, + "grad_norm": 0.42048102617263794, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0647, + "grad_norm": 1.5505709648132324, + "learning_rate": 1.3125e-05, + "num_tokens": 470527.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0899, + "grad_norm": 1.7793169021606445, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0032, + "grad_norm": 0.5216090083122253, + "learning_rate": 1.3115000000000002e-05, + "num_tokens": 471130.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0886, + "grad_norm": 1.749000906944275, + "learning_rate": 1.311e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0917, + "grad_norm": 2.4577291011810303, + "learning_rate": 1.3105e-05, + "num_tokens": 472154.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0032, + "grad_norm": 0.5224512815475464, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0637, + "grad_norm": 1.690381646156311, + "learning_rate": 1.3095000000000003e-05, + "num_tokens": 472757.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0718, + "grad_norm": 2.1140615940093994, + "learning_rate": 1.309e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0041, + "grad_norm": 0.6610037684440613, + "learning_rate": 1.3085000000000001e-05, + "num_tokens": 473360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.1995, + "grad_norm": 5.919976711273193, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0035, + "grad_norm": 0.5762227177619934, + "learning_rate": 1.3075000000000003e-05, + "num_tokens": 473963.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0035, + "grad_norm": 0.558562695980072, + "learning_rate": 1.307e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 1.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0029, + "grad_norm": 0.4903852343559265, + "learning_rate": 1.3065000000000001e-05, + "num_tokens": 474145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0024, + "grad_norm": 0.40001630783081055, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 1.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.002, + "grad_norm": 0.3093484044075012, + "learning_rate": 1.3055000000000003e-05, + "num_tokens": 474327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0813, + "grad_norm": 1.846347451210022, + "learning_rate": 1.305e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0466, + "grad_norm": 1.9397575855255127, + "learning_rate": 1.3045000000000001e-05, + "num_tokens": 475351.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.0012, + "grad_norm": 0.1433739811182022, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 1.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0647, + "grad_norm": 1.7246447801589966, + "learning_rate": 1.3035000000000001e-05, + "num_tokens": 475954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0774, + "grad_norm": 1.6557238101959229, + "learning_rate": 1.303e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0732, + "grad_norm": 1.2370885610580444, + "learning_rate": 1.3025000000000002e-05, + "num_tokens": 476978.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0011, + "grad_norm": 0.11068759858608246, + "learning_rate": 1.302e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 1.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0921, + "grad_norm": 2.1499900817871094, + "learning_rate": 1.3015000000000002e-05, + "num_tokens": 477581.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0012, + "grad_norm": 0.12917853891849518, + "learning_rate": 1.301e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0644, + "grad_norm": 1.2409875392913818, + "learning_rate": 1.3005000000000002e-05, + "num_tokens": 478184.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0594, + "grad_norm": 1.3983649015426636, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0013, + "grad_norm": 0.17072346806526184, + "learning_rate": 1.2995000000000002e-05, + "num_tokens": 478787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0623, + "grad_norm": 1.6930880546569824, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0395, + "grad_norm": 1.0536465644836426, + "learning_rate": 1.2985e-05, + "num_tokens": 479811.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0593, + "grad_norm": 1.2563151121139526, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.0455, + "grad_norm": 1.3295787572860718, + "learning_rate": 1.2975000000000002e-05, + "num_tokens": 480835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.074, + "grad_norm": 1.3767396211624146, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0456, + "grad_norm": 1.3392114639282227, + "learning_rate": 1.2965e-05, + "num_tokens": 481859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.091, + "grad_norm": 2.6617116928100586, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0783, + "grad_norm": 2.208951473236084, + "learning_rate": 1.2955000000000002e-05, + "num_tokens": 482883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0026, + "grad_norm": 0.425293892621994, + "learning_rate": 1.295e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 1.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0751, + "grad_norm": 1.7252588272094727, + "learning_rate": 1.2945e-05, + "num_tokens": 483486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0032, + "grad_norm": 0.5211181640625, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 1.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": 0.0931, + "grad_norm": 2.448201894760132, + "learning_rate": 1.2935000000000002e-05, + "num_tokens": 484089.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.059, + "grad_norm": 1.2256298065185547, + "learning_rate": 1.293e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0037, + "grad_norm": 0.5853725671768188, + "learning_rate": 1.2925e-05, + "num_tokens": 484692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0667, + "grad_norm": 1.6646796464920044, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0034, + "grad_norm": 0.5198765993118286, + "learning_rate": 1.2915000000000003e-05, + "num_tokens": 485295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.06, + "grad_norm": 1.8327956199645996, + "learning_rate": 1.291e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0578, + "grad_norm": 1.4550710916519165, + "learning_rate": 1.2905000000000001e-05, + "num_tokens": 486319.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0035, + "grad_norm": 0.5253085494041443, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 1.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": 0.0664, + "grad_norm": 2.0553388595581055, + "learning_rate": 1.2895000000000003e-05, + "num_tokens": 486922.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0507, + "grad_norm": 1.2666943073272705, + "learning_rate": 1.289e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0419, + "grad_norm": 1.1951980590820312, + "learning_rate": 1.2885000000000001e-05, + "num_tokens": 487946.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0521, + "grad_norm": 1.5074187517166138, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0039, + "grad_norm": 0.5865699648857117, + "learning_rate": 1.2875000000000001e-05, + "num_tokens": 488549.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0032, + "grad_norm": 0.4775572121143341, + "learning_rate": 1.287e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 1.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0861, + "grad_norm": 1.977977991104126, + "learning_rate": 1.2865000000000001e-05, + "num_tokens": 489152.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0415, + "grad_norm": 1.351745843887329, + "learning_rate": 1.286e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0027, + "grad_norm": 0.3994472920894623, + "learning_rate": 1.2855000000000001e-05, + "num_tokens": 489755.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0027, + "grad_norm": 0.40307220816612244, + "learning_rate": 1.285e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 1.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0023, + "grad_norm": 0.3672088086605072, + "learning_rate": 1.2845000000000002e-05, + "num_tokens": 489937.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0024, + "grad_norm": 0.3693186938762665, + "learning_rate": 1.284e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0022, + "grad_norm": 0.3379809856414795, + "learning_rate": 1.2835000000000002e-05, + "num_tokens": 490119.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0692, + "grad_norm": 1.80624520778656, + "learning_rate": 1.283e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0015, + "grad_norm": 0.19782321155071259, + "learning_rate": 1.2825e-05, + "num_tokens": 490722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.0765, + "grad_norm": 2.1652674674987793, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0524, + "grad_norm": 1.3651760816574097, + "learning_rate": 1.2815000000000002e-05, + "num_tokens": 491746.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0013, + "grad_norm": 0.15779025852680206, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 1.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0405, + "grad_norm": 1.4021095037460327, + "learning_rate": 1.2805e-05, + "num_tokens": 492349.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0012, + "grad_norm": 0.14934077858924866, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0466, + "grad_norm": 1.3255256414413452, + "learning_rate": 1.2795000000000002e-05, + "num_tokens": 492952.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0011, + "grad_norm": 0.13669109344482422, + "learning_rate": 1.279e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0706, + "grad_norm": 2.915336847305298, + "learning_rate": 1.2785e-05, + "num_tokens": 493555.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0012, + "grad_norm": 0.14015723764896393, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 1.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0731, + "grad_norm": 1.5240583419799805, + "learning_rate": 1.2775000000000002e-05, + "num_tokens": 494158.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0011, + "grad_norm": 0.11803555488586426, + "learning_rate": 1.277e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0011, + "grad_norm": 0.13458400964736938, + "learning_rate": 1.2765e-05, + "num_tokens": 494340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0012, + "grad_norm": 0.14607498049736023, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0011, + "grad_norm": 0.12011824548244476, + "learning_rate": 1.2755000000000002e-05, + "num_tokens": 494522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0011, + "grad_norm": 0.13116565346717834, + "learning_rate": 1.275e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0011, + "grad_norm": 0.11727877706289291, + "learning_rate": 1.2745e-05, + "num_tokens": 494704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.0501, + "grad_norm": 1.6986955404281616, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0472, + "grad_norm": 1.4376126527786255, + "learning_rate": 1.2735000000000003e-05, + "num_tokens": 495728.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.001, + "grad_norm": 0.11870448291301727, + "learning_rate": 1.273e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0011, + "grad_norm": 0.11969612538814545, + "learning_rate": 1.2725000000000001e-05, + "num_tokens": 495910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0493, + "grad_norm": 1.3840702772140503, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.001, + "grad_norm": 0.10890035331249237, + "learning_rate": 1.2715000000000001e-05, + "num_tokens": 496513.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0011, + "grad_norm": 0.12227390706539154, + "learning_rate": 1.271e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0752, + "grad_norm": 2.110506057739258, + "learning_rate": 1.2705000000000001e-05, + "num_tokens": 497116.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0011, + "grad_norm": 0.1325536072254181, + "learning_rate": 1.27e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 1.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0386, + "grad_norm": 1.118979811668396, + "learning_rate": 1.2695000000000001e-05, + "num_tokens": 497719.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.066, + "grad_norm": 1.572615623474121, + "learning_rate": 1.269e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0679, + "grad_norm": 1.6447997093200684, + "learning_rate": 1.2685000000000001e-05, + "num_tokens": 498743.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0012, + "grad_norm": 0.1418675184249878, + "learning_rate": 1.268e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0471, + "grad_norm": 1.3554447889328003, + "learning_rate": 1.2675000000000001e-05, + "num_tokens": 499346.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0012, + "grad_norm": 0.1589028388261795, + "learning_rate": 1.267e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0943, + "grad_norm": 2.5991010665893555, + "learning_rate": 1.2665e-05, + "num_tokens": 499949.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0495, + "grad_norm": 1.6441336870193481, + "learning_rate": 1.266e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0765, + "grad_norm": 1.842661738395691, + "learning_rate": 1.2655000000000002e-05, + "num_tokens": 500973.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0016, + "grad_norm": 0.22247855365276337, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0401, + "grad_norm": 1.3632177114486694, + "learning_rate": 1.2645e-05, + "num_tokens": 501576.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0021, + "grad_norm": 0.31719765067100525, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0376, + "grad_norm": 1.1765908002853394, + "learning_rate": 1.2635000000000002e-05, + "num_tokens": 502179.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0024, + "grad_norm": 0.33981993794441223, + "learning_rate": 1.263e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 1.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0712, + "grad_norm": 1.7833467721939087, + "learning_rate": 1.2625e-05, + "num_tokens": 502782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0407, + "grad_norm": 1.2483290433883667, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0597, + "grad_norm": 1.2847890853881836, + "learning_rate": 1.2615000000000002e-05, + "num_tokens": 503806.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0706, + "grad_norm": 2.0048041343688965, + "learning_rate": 1.261e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0033, + "grad_norm": 0.48029038310050964, + "learning_rate": 1.2605e-05, + "num_tokens": 504409.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0853, + "grad_norm": 1.8489866256713867, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0664, + "grad_norm": 1.9049607515335083, + "learning_rate": 1.2595000000000002e-05, + "num_tokens": 505433.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0038, + "grad_norm": 0.5629300475120544, + "learning_rate": 1.259e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0035, + "grad_norm": 0.5016162395477295, + "learning_rate": 1.2585e-05, + "num_tokens": 505615.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.0034, + "grad_norm": 0.533896803855896, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 1.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0398, + "grad_norm": 1.6724116802215576, + "learning_rate": 1.2575000000000002e-05, + "num_tokens": 506218.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0649, + "grad_norm": 1.1757819652557373, + "learning_rate": 1.257e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0028, + "grad_norm": 0.3974631726741791, + "learning_rate": 1.2565e-05, + "num_tokens": 506821.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0022, + "grad_norm": 0.33079567551612854, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 1.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0612, + "grad_norm": 1.6804654598236084, + "learning_rate": 1.2555000000000001e-05, + "num_tokens": 507424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0814, + "grad_norm": 1.6637822389602661, + "learning_rate": 1.255e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0463, + "grad_norm": 1.2395890951156616, + "learning_rate": 1.2545000000000001e-05, + "num_tokens": 508448.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0022, + "grad_norm": 0.3290168046951294, + "learning_rate": 1.254e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0474, + "grad_norm": 1.62813138961792, + "learning_rate": 1.2535000000000001e-05, + "num_tokens": 509051.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0432, + "grad_norm": 1.1684247255325317, + "learning_rate": 1.253e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.191, + "grad_norm": 4.108924865722656, + "learning_rate": 1.2525000000000001e-05, + "num_tokens": 510075.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0022, + "grad_norm": 0.32842448353767395, + "learning_rate": 1.252e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0692, + "grad_norm": 1.0593329668045044, + "learning_rate": 1.2515000000000001e-05, + "num_tokens": 510678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.046, + "grad_norm": 1.279249906539917, + "learning_rate": 1.251e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0021, + "grad_norm": 0.32091253995895386, + "learning_rate": 1.2505e-05, + "num_tokens": 511281.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0404, + "grad_norm": 1.2973002195358276, + "learning_rate": 1.25e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0023, + "grad_norm": 0.34064143896102905, + "learning_rate": 1.2495000000000001e-05, + "num_tokens": 511884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0793, + "grad_norm": 1.864046573638916, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0023, + "grad_norm": 0.3757898211479187, + "learning_rate": 1.2485e-05, + "num_tokens": 512487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0024, + "grad_norm": 0.381061315536499, + "learning_rate": 1.248e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 1.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0676, + "grad_norm": 1.62307608127594, + "learning_rate": 1.2475000000000002e-05, + "num_tokens": 513090.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.047, + "grad_norm": 1.570786476135254, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0566, + "grad_norm": 1.7626087665557861, + "learning_rate": 1.2465e-05, + "num_tokens": 514114.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0581, + "grad_norm": 1.7678264379501343, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0417, + "grad_norm": 1.4467406272888184, + "learning_rate": 1.2455000000000002e-05, + "num_tokens": 515138.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0712, + "grad_norm": 1.5711795091629028, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.0026, + "grad_norm": 0.41801631450653076, + "learning_rate": 1.2445e-05, + "num_tokens": 515741.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0499, + "grad_norm": 1.5882858037948608, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0504, + "grad_norm": 1.1772035360336304, + "learning_rate": 1.2435000000000002e-05, + "num_tokens": 516765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0618, + "grad_norm": 1.7687872648239136, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0664, + "grad_norm": 1.677937626838684, + "learning_rate": 1.2425e-05, + "num_tokens": 517789.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.004, + "grad_norm": 0.654071569442749, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 1.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0409, + "grad_norm": 1.5208879709243774, + "learning_rate": 1.2415000000000002e-05, + "num_tokens": 518392.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0035, + "grad_norm": 0.5567553639411926, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 1.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0798, + "grad_norm": 2.2302029132843018, + "learning_rate": 1.2405e-05, + "num_tokens": 518995.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0669, + "grad_norm": 2.0240256786346436, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0839, + "grad_norm": 1.8468784093856812, + "learning_rate": 1.2395e-05, + "num_tokens": 520019.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0584, + "grad_norm": 2.1111018657684326, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0043, + "grad_norm": 0.755431592464447, + "learning_rate": 1.2385000000000001e-05, + "num_tokens": 520622.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0432, + "grad_norm": 1.864660620689392, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0693, + "grad_norm": 3.3374569416046143, + "learning_rate": 1.2375000000000001e-05, + "num_tokens": 521646.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0388, + "grad_norm": 1.5575084686279297, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.0645, + "grad_norm": 1.5467334985733032, + "learning_rate": 1.2365000000000001e-05, + "num_tokens": 522670.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0037, + "grad_norm": 0.5897421836853027, + "learning_rate": 1.236e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 1.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0819, + "grad_norm": 3.0543386936187744, + "learning_rate": 1.2355000000000001e-05, + "num_tokens": 523273.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.004, + "grad_norm": 0.647894024848938, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0032, + "grad_norm": 0.5120076537132263, + "learning_rate": 1.2345e-05, + "num_tokens": 523455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0032, + "grad_norm": 0.50294429063797, + "learning_rate": 1.234e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0654, + "grad_norm": 1.3424628973007202, + "learning_rate": 1.2335000000000001e-05, + "num_tokens": 524058.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0898, + "grad_norm": 2.0473086833953857, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0023, + "grad_norm": 0.36929139494895935, + "learning_rate": 1.2325e-05, + "num_tokens": 524661.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0021, + "grad_norm": 0.3227180540561676, + "learning_rate": 1.232e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0663, + "grad_norm": 1.83015775680542, + "learning_rate": 1.2315000000000002e-05, + "num_tokens": 525264.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0657, + "grad_norm": 1.8247884511947632, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0016, + "grad_norm": 0.21814872324466705, + "learning_rate": 1.2305e-05, + "num_tokens": 525867.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.07, + "grad_norm": 1.3606796264648438, + "learning_rate": 1.23e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.0521, + "grad_norm": 1.5558913946151733, + "learning_rate": 1.2295000000000002e-05, + "num_tokens": 526891.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": 0.0768, + "grad_norm": 1.718390703201294, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.2012, + "grad_norm": 3.623452663421631, + "learning_rate": 1.2285e-05, + "num_tokens": 527915.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0922, + "grad_norm": 2.289684534072876, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.0665, + "grad_norm": 1.6864427328109741, + "learning_rate": 1.2275000000000002e-05, + "num_tokens": 528939.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0017, + "grad_norm": 0.2226596623659134, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.195, + "grad_norm": 3.805149555206299, + "learning_rate": 1.2265e-05, + "num_tokens": 529542.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0651, + "grad_norm": 1.3887238502502441, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0016, + "grad_norm": 0.20220878720283508, + "learning_rate": 1.2255000000000002e-05, + "num_tokens": 530145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0456, + "grad_norm": 1.4763877391815186, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0017, + "grad_norm": 0.2297908216714859, + "learning_rate": 1.2245e-05, + "num_tokens": 530748.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0479, + "grad_norm": 1.846569538116455, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0018, + "grad_norm": 0.2527587115764618, + "learning_rate": 1.2235e-05, + "num_tokens": 531351.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0828, + "grad_norm": 1.8091585636138916, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.002, + "grad_norm": 0.29240918159484863, + "learning_rate": 1.2225e-05, + "num_tokens": 531954.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.0568, + "grad_norm": 1.4905025959014893, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0022, + "grad_norm": 0.29934078454971313, + "learning_rate": 1.2215e-05, + "num_tokens": 532557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.0655, + "grad_norm": 1.620811939239502, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0699, + "grad_norm": 1.4509178400039673, + "learning_rate": 1.2205000000000001e-05, + "num_tokens": 533581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0537, + "grad_norm": 1.6190178394317627, + "learning_rate": 1.22e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.083, + "grad_norm": 2.0025248527526855, + "learning_rate": 1.2195000000000001e-05, + "num_tokens": 534605.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3503265976905823, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": 0.0699, + "grad_norm": 1.2692803144454956, + "learning_rate": 1.2185e-05, + "num_tokens": 535208.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0024, + "grad_norm": 0.3514065146446228, + "learning_rate": 1.218e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 1.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0025, + "grad_norm": 0.3770548701286316, + "learning_rate": 1.2175000000000001e-05, + "num_tokens": 535390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0024, + "grad_norm": 0.3553021550178528, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 1.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0657, + "grad_norm": 1.3145198822021484, + "learning_rate": 1.2165e-05, + "num_tokens": 535993.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0453, + "grad_norm": 1.1688368320465088, + "learning_rate": 1.216e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.1801, + "grad_norm": 3.7217485904693604, + "learning_rate": 1.2155000000000001e-05, + "num_tokens": 537017.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0029, + "grad_norm": 0.4446180462837219, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 1.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0863, + "grad_norm": 2.0155787467956543, + "learning_rate": 1.2145e-05, + "num_tokens": 537620.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0022, + "grad_norm": 0.3482968807220459, + "learning_rate": 1.214e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0023, + "grad_norm": 0.32771721482276917, + "learning_rate": 1.2135000000000002e-05, + "num_tokens": 537802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.056, + "grad_norm": 1.8173542022705078, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.039, + "grad_norm": 1.1963605880737305, + "learning_rate": 1.2125e-05, + "num_tokens": 538826.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.0594, + "grad_norm": 1.7138198614120483, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.002, + "grad_norm": 0.2943565249443054, + "learning_rate": 1.2115000000000002e-05, + "num_tokens": 539429.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.002, + "grad_norm": 0.2892753481864929, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0019, + "grad_norm": 0.2714136838912964, + "learning_rate": 1.2105e-05, + "num_tokens": 539611.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0504, + "grad_norm": 1.0601574182510376, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0019, + "grad_norm": 0.2627917230129242, + "learning_rate": 1.2095000000000002e-05, + "num_tokens": 540214.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0566, + "grad_norm": 1.1405881643295288, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0018, + "grad_norm": 0.2452574223279953, + "learning_rate": 1.2085e-05, + "num_tokens": 540817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.0018, + "grad_norm": 0.24650417268276215, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0014, + "grad_norm": 0.19634543359279633, + "learning_rate": 1.2075e-05, + "num_tokens": 540999.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.0014, + "grad_norm": 0.17830893397331238, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.045, + "grad_norm": 1.1427490711212158, + "learning_rate": 1.2065e-05, + "num_tokens": 541602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.043, + "grad_norm": 1.0804896354675293, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0729, + "grad_norm": 1.6100242137908936, + "learning_rate": 1.2055e-05, + "num_tokens": 542626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.0585, + "grad_norm": 1.2319777011871338, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0014, + "grad_norm": 0.18333016335964203, + "learning_rate": 1.2045e-05, + "num_tokens": 543229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0014, + "grad_norm": 0.17933838069438934, + "learning_rate": 1.204e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.0606, + "grad_norm": 1.531948208808899, + "learning_rate": 1.2035e-05, + "num_tokens": 543832.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.0798, + "grad_norm": 1.4439104795455933, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.0798, + "grad_norm": 1.6658635139465332, + "learning_rate": 1.2025e-05, + "num_tokens": 544856.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.0666, + "grad_norm": 1.2919996976852417, + "learning_rate": 1.202e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.0526, + "grad_norm": 1.7219940423965454, + "learning_rate": 1.2015000000000001e-05, + "num_tokens": 545880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0453, + "grad_norm": 1.3877556324005127, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0675, + "grad_norm": 1.6357606649398804, + "learning_rate": 1.2005e-05, + "num_tokens": 546904.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0023, + "grad_norm": 0.3360651433467865, + "learning_rate": 1.2e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0025, + "grad_norm": 0.36647501587867737, + "learning_rate": 1.1995000000000001e-05, + "num_tokens": 547086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.1876, + "grad_norm": 3.880563974380493, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0026, + "grad_norm": 0.3927272856235504, + "learning_rate": 1.1985e-05, + "num_tokens": 547689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0616, + "grad_norm": 1.807646632194519, + "learning_rate": 1.198e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.0939, + "grad_norm": 3.455456018447876, + "learning_rate": 1.1975000000000001e-05, + "num_tokens": 548713.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0576, + "grad_norm": 1.2851530313491821, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0613, + "grad_norm": 1.2460367679595947, + "learning_rate": 1.1965e-05, + "num_tokens": 549737.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0498, + "grad_norm": 1.8220652341842651, + "learning_rate": 1.196e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0029, + "grad_norm": 0.43996259570121765, + "learning_rate": 1.1955000000000002e-05, + "num_tokens": 550340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.164, + "grad_norm": 3.639434814453125, + "learning_rate": 1.195e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0033, + "grad_norm": 0.49846982955932617, + "learning_rate": 1.1945e-05, + "num_tokens": 550943.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.0034, + "grad_norm": 0.5146701335906982, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0552, + "grad_norm": 0.9798343777656555, + "learning_rate": 1.1935000000000002e-05, + "num_tokens": 551546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0033, + "grad_norm": 0.49275118112564087, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 1.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.0699, + "grad_norm": 1.1279994249343872, + "learning_rate": 1.1925e-05, + "num_tokens": 552149.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0029, + "grad_norm": 0.4336951673030853, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0682, + "grad_norm": 1.8408714532852173, + "learning_rate": 1.1915e-05, + "num_tokens": 552752.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0025, + "grad_norm": 0.3696609139442444, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0468, + "grad_norm": 1.6169545650482178, + "learning_rate": 1.1905e-05, + "num_tokens": 553355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0669, + "grad_norm": 1.641153335571289, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0019, + "grad_norm": 0.2700659930706024, + "learning_rate": 1.1895e-05, + "num_tokens": 553958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0021, + "grad_norm": 0.30612003803253174, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0755, + "grad_norm": 1.821285367012024, + "learning_rate": 1.1885e-05, + "num_tokens": 554561.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0628, + "grad_norm": 1.6025607585906982, + "learning_rate": 1.188e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0018, + "grad_norm": 0.24747499823570251, + "learning_rate": 1.1875e-05, + "num_tokens": 555164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0017, + "grad_norm": 0.2355332225561142, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 1.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0016, + "grad_norm": 0.22167058289051056, + "learning_rate": 1.1865000000000002e-05, + "num_tokens": 555346.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0014, + "grad_norm": 0.1909945011138916, + "learning_rate": 1.186e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0014, + "grad_norm": 0.17070873081684113, + "learning_rate": 1.1855e-05, + "num_tokens": 555528.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0714, + "grad_norm": 1.4018418788909912, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0859, + "grad_norm": 2.558520793914795, + "learning_rate": 1.1845000000000003e-05, + "num_tokens": 556552.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0012, + "grad_norm": 0.14977574348449707, + "learning_rate": 1.184e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0011, + "grad_norm": 0.12937067449092865, + "learning_rate": 1.1835000000000001e-05, + "num_tokens": 556734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0604, + "grad_norm": 1.5028055906295776, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0012, + "grad_norm": 0.13798221945762634, + "learning_rate": 1.1825000000000003e-05, + "num_tokens": 557337.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0508, + "grad_norm": 1.1325984001159668, + "learning_rate": 1.182e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0608, + "grad_norm": 1.3021001815795898, + "learning_rate": 1.1815000000000001e-05, + "num_tokens": 558361.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0563, + "grad_norm": 1.5208338499069214, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.0669, + "grad_norm": 1.6899033784866333, + "learning_rate": 1.1805000000000001e-05, + "num_tokens": 559385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0582, + "grad_norm": 1.563767910003662, + "learning_rate": 1.18e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.0674, + "grad_norm": 1.4604460000991821, + "learning_rate": 1.1795000000000001e-05, + "num_tokens": 560409.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.051, + "grad_norm": 1.4536890983581543, + "learning_rate": 1.179e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0681, + "grad_norm": 1.4582575559616089, + "learning_rate": 1.1785000000000002e-05, + "num_tokens": 561433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0625, + "grad_norm": 1.5202876329421997, + "learning_rate": 1.178e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0018, + "grad_norm": 0.25325441360473633, + "learning_rate": 1.1775000000000002e-05, + "num_tokens": 562036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0533, + "grad_norm": 1.4468379020690918, + "learning_rate": 1.177e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0023, + "grad_norm": 0.32276058197021484, + "learning_rate": 1.1765000000000002e-05, + "num_tokens": 562639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0025, + "grad_norm": 0.36645182967185974, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.067, + "grad_norm": 2.532277822494507, + "learning_rate": 1.1755e-05, + "num_tokens": 563242.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0025, + "grad_norm": 0.3641115427017212, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 1.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0621, + "grad_norm": 1.6259859800338745, + "learning_rate": 1.1745000000000002e-05, + "num_tokens": 563845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.0431, + "grad_norm": 1.5126338005065918, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0416, + "grad_norm": 1.3851490020751953, + "learning_rate": 1.1735e-05, + "num_tokens": 564869.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0621, + "grad_norm": 1.7890119552612305, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.0661, + "grad_norm": 1.2367877960205078, + "learning_rate": 1.1725000000000002e-05, + "num_tokens": 565893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0032, + "grad_norm": 0.49922677874565125, + "learning_rate": 1.172e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 1.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.0033, + "grad_norm": 0.49921202659606934, + "learning_rate": 1.1715e-05, + "num_tokens": 566075.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.0035, + "grad_norm": 0.5215579867362976, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0031, + "grad_norm": 0.43590739369392395, + "learning_rate": 1.1705000000000002e-05, + "num_tokens": 566257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0397, + "grad_norm": 1.2309280633926392, + "learning_rate": 1.17e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.04, + "grad_norm": 1.2009049654006958, + "learning_rate": 1.1695e-05, + "num_tokens": 567281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0608, + "grad_norm": 1.7890830039978027, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0022, + "grad_norm": 0.33328190445899963, + "learning_rate": 1.1685000000000002e-05, + "num_tokens": 567884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0725, + "grad_norm": 1.7722251415252686, + "learning_rate": 1.168e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.002, + "grad_norm": 0.2905958592891693, + "learning_rate": 1.1675000000000001e-05, + "num_tokens": 568487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0778, + "grad_norm": 1.8844209909439087, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0558, + "grad_norm": 1.4232587814331055, + "learning_rate": 1.1665000000000003e-05, + "num_tokens": 569511.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0588, + "grad_norm": 1.4562510251998901, + "learning_rate": 1.166e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.0019, + "grad_norm": 0.2660907804965973, + "learning_rate": 1.1655000000000001e-05, + "num_tokens": 570114.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.079, + "grad_norm": 1.9491440057754517, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.055, + "grad_norm": 1.847509741783142, + "learning_rate": 1.1645000000000001e-05, + "num_tokens": 571138.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0517, + "grad_norm": 1.504838466644287, + "learning_rate": 1.164e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": 0.0416, + "grad_norm": 1.0979009866714478, + "learning_rate": 1.1635000000000001e-05, + "num_tokens": 572162.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0448, + "grad_norm": 1.3496202230453491, + "learning_rate": 1.163e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0026, + "grad_norm": 0.382183700799942, + "learning_rate": 1.1625000000000001e-05, + "num_tokens": 572765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0026, + "grad_norm": 0.37047019600868225, + "learning_rate": 1.162e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 1.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0702, + "grad_norm": 1.7991583347320557, + "learning_rate": 1.1615000000000001e-05, + "num_tokens": 573368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0442, + "grad_norm": 1.4013893604278564, + "learning_rate": 1.161e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.0409, + "grad_norm": 1.3295344114303589, + "learning_rate": 1.1605000000000002e-05, + "num_tokens": 574392.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.0388, + "grad_norm": 1.3626537322998047, + "learning_rate": 1.16e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0031, + "grad_norm": 0.4437231123447418, + "learning_rate": 1.1595e-05, + "num_tokens": 574995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0036, + "grad_norm": 0.5210691094398499, + "learning_rate": 1.159e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.065, + "grad_norm": 2.1340172290802, + "learning_rate": 1.1585000000000002e-05, + "num_tokens": 575598.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0647, + "grad_norm": 1.9830479621887207, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0684, + "grad_norm": 2.2673563957214355, + "learning_rate": 1.1575e-05, + "num_tokens": 576622.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0031, + "grad_norm": 0.44506582617759705, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.038, + "grad_norm": 1.131693959236145, + "learning_rate": 1.1565000000000002e-05, + "num_tokens": 577225.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.0369, + "grad_norm": 1.1869642734527588, + "learning_rate": 1.156e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0031, + "grad_norm": 0.4332590401172638, + "learning_rate": 1.1555e-05, + "num_tokens": 577828.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0026, + "grad_norm": 0.359754741191864, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0028, + "grad_norm": 0.3960857689380646, + "learning_rate": 1.1545000000000002e-05, + "num_tokens": 578010.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0569, + "grad_norm": 1.7389343976974487, + "learning_rate": 1.154e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0714, + "grad_norm": 1.75542414188385, + "learning_rate": 1.1535e-05, + "num_tokens": 579034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0026, + "grad_norm": 0.3733665943145752, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 1.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151729702949524, + "learning_rate": 1.1525000000000002e-05, + "num_tokens": 579216.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0725, + "grad_norm": 2.008699417114258, + "learning_rate": 1.152e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0679, + "grad_norm": 2.3607006072998047, + "learning_rate": 1.1515e-05, + "num_tokens": 580240.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.043, + "grad_norm": 1.3802534341812134, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0018, + "grad_norm": 0.24884727597236633, + "learning_rate": 1.1505000000000003e-05, + "num_tokens": 580843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0517, + "grad_norm": 1.4253575801849365, + "learning_rate": 1.15e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0479, + "grad_norm": 1.2443790435791016, + "learning_rate": 1.1495000000000001e-05, + "num_tokens": 581867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0017, + "grad_norm": 0.22854706645011902, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0742, + "grad_norm": 1.5941340923309326, + "learning_rate": 1.1485000000000001e-05, + "num_tokens": 582470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.002, + "grad_norm": 0.27522599697113037, + "learning_rate": 1.148e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0019, + "grad_norm": 0.2548190653324127, + "learning_rate": 1.1475000000000001e-05, + "num_tokens": 582652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0586, + "grad_norm": 0.9956546425819397, + "learning_rate": 1.147e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0399, + "grad_norm": 1.2318187952041626, + "learning_rate": 1.1465000000000001e-05, + "num_tokens": 583676.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.057, + "grad_norm": 1.2258297204971313, + "learning_rate": 1.146e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0593, + "grad_norm": 1.4450581073760986, + "learning_rate": 1.1455000000000001e-05, + "num_tokens": 584700.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0589, + "grad_norm": 2.703789472579956, + "learning_rate": 1.145e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0022, + "grad_norm": 0.2988422214984894, + "learning_rate": 1.1445000000000001e-05, + "num_tokens": 585303.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.002, + "grad_norm": 0.2543957829475403, + "learning_rate": 1.144e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0634, + "grad_norm": 1.5069470405578613, + "learning_rate": 1.1435e-05, + "num_tokens": 585906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0777, + "grad_norm": 1.8321071863174438, + "learning_rate": 1.143e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0705, + "grad_norm": 1.7684837579727173, + "learning_rate": 1.1425000000000002e-05, + "num_tokens": 586930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0646, + "grad_norm": 1.7334975004196167, + "learning_rate": 1.142e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0657, + "grad_norm": 1.7223514318466187, + "learning_rate": 1.1415e-05, + "num_tokens": 587954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0551, + "grad_norm": 2.0270273685455322, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0642, + "grad_norm": 1.5014370679855347, + "learning_rate": 1.1405000000000002e-05, + "num_tokens": 588978.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0766, + "grad_norm": 1.7329357862472534, + "learning_rate": 1.14e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.0038, + "grad_norm": 0.5561279654502869, + "learning_rate": 1.1395e-05, + "num_tokens": 589581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0805, + "grad_norm": 2.5624947547912598, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0036, + "grad_norm": 0.5101985931396484, + "learning_rate": 1.1385000000000002e-05, + "num_tokens": 590184.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0564, + "grad_norm": 1.227173924446106, + "learning_rate": 1.138e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0036, + "grad_norm": 0.5354023575782776, + "learning_rate": 1.1375e-05, + "num_tokens": 590787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0443, + "grad_norm": 1.4744853973388672, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0715, + "grad_norm": 1.5623061656951904, + "learning_rate": 1.1365000000000002e-05, + "num_tokens": 591811.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0529, + "grad_norm": 1.357082486152649, + "learning_rate": 1.136e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0037, + "grad_norm": 0.54876309633255, + "learning_rate": 1.1355e-05, + "num_tokens": 592414.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.0635, + "grad_norm": 1.2679226398468018, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0458, + "grad_norm": 1.1748446226119995, + "learning_rate": 1.1345000000000002e-05, + "num_tokens": 593438.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0035, + "grad_norm": 0.5624827146530151, + "learning_rate": 1.134e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.003, + "grad_norm": 0.4557420015335083, + "learning_rate": 1.1335e-05, + "num_tokens": 593620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.003, + "grad_norm": 0.46185532212257385, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0028, + "grad_norm": 0.42278051376342773, + "learning_rate": 1.1325e-05, + "num_tokens": 593802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0453, + "grad_norm": 1.387130856513977, + "learning_rate": 1.132e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025925099849701, + "learning_rate": 1.1315000000000001e-05, + "num_tokens": 594405.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0022, + "grad_norm": 0.33897924423217773, + "learning_rate": 1.131e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 1.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0737, + "grad_norm": 1.979303240776062, + "learning_rate": 1.1305000000000001e-05, + "num_tokens": 595008.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.064, + "grad_norm": 1.5425118207931519, + "learning_rate": 1.13e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0671, + "grad_norm": 1.1620323657989502, + "learning_rate": 1.1295000000000001e-05, + "num_tokens": 596032.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0785, + "grad_norm": 2.378268003463745, + "learning_rate": 1.129e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0016, + "grad_norm": 0.22170788049697876, + "learning_rate": 1.1285000000000001e-05, + "num_tokens": 596635.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0015, + "grad_norm": 0.20151561498641968, + "learning_rate": 1.128e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0017, + "grad_norm": 0.2272740602493286, + "learning_rate": 1.1275e-05, + "num_tokens": 596817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.0013, + "grad_norm": 0.15716217458248138, + "learning_rate": 1.127e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0609, + "grad_norm": 1.5205357074737549, + "learning_rate": 1.1265000000000001e-05, + "num_tokens": 597420.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0013, + "grad_norm": 0.16709472239017487, + "learning_rate": 1.126e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0771, + "grad_norm": 1.7946810722351074, + "learning_rate": 1.1255e-05, + "num_tokens": 598023.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0581, + "grad_norm": 1.250422716140747, + "learning_rate": 1.125e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0566, + "grad_norm": 1.8859542608261108, + "learning_rate": 1.1245000000000002e-05, + "num_tokens": 599047.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.042, + "grad_norm": 1.3896710872650146, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0012, + "grad_norm": 0.13600599765777588, + "learning_rate": 1.1235e-05, + "num_tokens": 599650.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0455, + "grad_norm": 1.2671265602111816, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0792, + "grad_norm": 1.9507051706314087, + "learning_rate": 1.1225000000000002e-05, + "num_tokens": 600674.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0015, + "grad_norm": 0.18869547545909882, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0643, + "grad_norm": 2.124163866043091, + "learning_rate": 1.1215e-05, + "num_tokens": 601277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0017, + "grad_norm": 0.22649085521697998, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 1.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0014, + "grad_norm": 0.1775384545326233, + "learning_rate": 1.1205000000000002e-05, + "num_tokens": 601459.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0675, + "grad_norm": 2.2713491916656494, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0504, + "grad_norm": 1.3982276916503906, + "learning_rate": 1.1195e-05, + "num_tokens": 602483.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0478, + "grad_norm": 1.40345299243927, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0655, + "grad_norm": 2.0257670879364014, + "learning_rate": 1.1185000000000002e-05, + "num_tokens": 603507.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.0019, + "grad_norm": 0.2651630938053131, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": 0.0772, + "grad_norm": 2.0185799598693848, + "learning_rate": 1.1175e-05, + "num_tokens": 604110.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0022, + "grad_norm": 0.30773913860321045, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 1.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0401, + "grad_norm": 1.1661447286605835, + "learning_rate": 1.1165e-05, + "num_tokens": 604713.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0805, + "grad_norm": 2.5561182498931885, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.0023, + "grad_norm": 0.3356492221355438, + "learning_rate": 1.1155e-05, + "num_tokens": 605316.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0519, + "grad_norm": 1.2280339002609253, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.0412, + "grad_norm": 1.1461997032165527, + "learning_rate": 1.1145000000000001e-05, + "num_tokens": 606340.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0024, + "grad_norm": 0.33912718296051025, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0026, + "grad_norm": 0.3827052116394043, + "learning_rate": 1.1135000000000001e-05, + "num_tokens": 606522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025944471359253, + "learning_rate": 1.113e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0025, + "grad_norm": 0.34845641255378723, + "learning_rate": 1.1125000000000001e-05, + "num_tokens": 606704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0704, + "grad_norm": 1.9853920936584473, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0431, + "grad_norm": 1.3894938230514526, + "learning_rate": 1.1115e-05, + "num_tokens": 607728.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.069, + "grad_norm": 1.2977555990219116, + "learning_rate": 1.111e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0366, + "grad_norm": 1.1859874725341797, + "learning_rate": 1.1105000000000001e-05, + "num_tokens": 608752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0022, + "grad_norm": 0.3078896105289459, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.002, + "grad_norm": 0.28668129444122314, + "learning_rate": 1.1095e-05, + "num_tokens": 608934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0021, + "grad_norm": 0.30314162373542786, + "learning_rate": 1.109e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0741, + "grad_norm": 1.5230200290679932, + "learning_rate": 1.1085000000000001e-05, + "num_tokens": 609537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.002, + "grad_norm": 0.26326534152030945, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 1.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.002, + "grad_norm": 0.2711552381515503, + "learning_rate": 1.1075e-05, + "num_tokens": 609719.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.0616, + "grad_norm": 1.274338960647583, + "learning_rate": 1.107e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.0016, + "grad_norm": 0.2114490568637848, + "learning_rate": 1.1065000000000002e-05, + "num_tokens": 610322.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0682, + "grad_norm": 1.6731176376342773, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0813, + "grad_norm": 1.9255222082138062, + "learning_rate": 1.1055e-05, + "num_tokens": 611346.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0016, + "grad_norm": 0.21615324914455414, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0688, + "grad_norm": 1.5003544092178345, + "learning_rate": 1.1045000000000002e-05, + "num_tokens": 611949.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0014, + "grad_norm": 0.18165816366672516, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 1.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0583, + "grad_norm": 1.9068502187728882, + "learning_rate": 1.1035e-05, + "num_tokens": 612552.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0015, + "grad_norm": 0.18768055737018585, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0014, + "grad_norm": 0.1921229511499405, + "learning_rate": 1.1025000000000002e-05, + "num_tokens": 612734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0015, + "grad_norm": 0.19404935836791992, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0619, + "grad_norm": 1.6527628898620605, + "learning_rate": 1.1015e-05, + "num_tokens": 613337.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0413, + "grad_norm": 1.2340315580368042, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0015, + "grad_norm": 0.19533570110797882, + "learning_rate": 1.1005e-05, + "num_tokens": 613940.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0639, + "grad_norm": 1.0601844787597656, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0014, + "grad_norm": 0.18472979962825775, + "learning_rate": 1.0995e-05, + "num_tokens": 614543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0642, + "grad_norm": 1.2736060619354248, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0581, + "grad_norm": 1.4980621337890625, + "learning_rate": 1.0985e-05, + "num_tokens": 615567.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0489, + "grad_norm": 1.1453659534454346, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0625, + "grad_norm": 1.6183781623840332, + "learning_rate": 1.0975e-05, + "num_tokens": 616591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0018, + "grad_norm": 0.24508105218410492, + "learning_rate": 1.097e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 1.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.002, + "grad_norm": 0.2894340753555298, + "learning_rate": 1.0965000000000001e-05, + "num_tokens": 616773.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0394, + "grad_norm": 1.3422820568084717, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0018, + "grad_norm": 0.26346835494041443, + "learning_rate": 1.0955e-05, + "num_tokens": 617376.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.002, + "grad_norm": 0.28616681694984436, + "learning_rate": 1.095e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0629, + "grad_norm": 1.515001654624939, + "learning_rate": 1.0945000000000001e-05, + "num_tokens": 617979.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.0429, + "grad_norm": 1.3231642246246338, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0498, + "grad_norm": 1.3477892875671387, + "learning_rate": 1.0935e-05, + "num_tokens": 619003.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.0686, + "grad_norm": 1.4584791660308838, + "learning_rate": 1.093e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.0021, + "grad_norm": 0.29815393686294556, + "learning_rate": 1.0925000000000001e-05, + "num_tokens": 619606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.087, + "grad_norm": 2.550358533859253, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.0021, + "grad_norm": 0.3024434447288513, + "learning_rate": 1.0915e-05, + "num_tokens": 620209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0559, + "grad_norm": 1.8500303030014038, + "learning_rate": 1.091e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0024, + "grad_norm": 0.3702225685119629, + "learning_rate": 1.0905000000000001e-05, + "num_tokens": 620812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0782, + "grad_norm": 1.9154956340789795, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0613, + "grad_norm": 1.6961833238601685, + "learning_rate": 1.0895e-05, + "num_tokens": 621836.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0022, + "grad_norm": 0.3193221390247345, + "learning_rate": 1.089e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0025, + "grad_norm": 0.36297887563705444, + "learning_rate": 1.0885000000000002e-05, + "num_tokens": 622018.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0022, + "grad_norm": 0.3415636420249939, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0408, + "grad_norm": 1.2334237098693848, + "learning_rate": 1.0875e-05, + "num_tokens": 622621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.002, + "grad_norm": 0.2912217974662781, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.043, + "grad_norm": 1.9397270679473877, + "learning_rate": 1.0865000000000002e-05, + "num_tokens": 623224.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.0395, + "grad_norm": 1.2516388893127441, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0018, + "grad_norm": 0.24329343438148499, + "learning_rate": 1.0855e-05, + "num_tokens": 623827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0019, + "grad_norm": 0.2603467106819153, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0588, + "grad_norm": 1.736319661140442, + "learning_rate": 1.0845e-05, + "num_tokens": 624430.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0014, + "grad_norm": 0.19694186747074127, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 1.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.0015, + "grad_norm": 0.20471760630607605, + "learning_rate": 1.0835e-05, + "num_tokens": 624612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0016, + "grad_norm": 0.21806074678897858, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0014, + "grad_norm": 0.19000421464443207, + "learning_rate": 1.0825e-05, + "num_tokens": 624794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.0516, + "grad_norm": 1.4601935148239136, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0713, + "grad_norm": 2.011367082595825, + "learning_rate": 1.0815e-05, + "num_tokens": 625818.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0012, + "grad_norm": 0.15841880440711975, + "learning_rate": 1.081e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 1.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0711, + "grad_norm": 2.100233793258667, + "learning_rate": 1.0805e-05, + "num_tokens": 626421.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0012, + "grad_norm": 0.1544499695301056, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0012, + "grad_norm": 0.15288732945919037, + "learning_rate": 1.0794999999999999e-05, + "num_tokens": 626603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.0379, + "grad_norm": 1.210354208946228, + "learning_rate": 1.079e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.042, + "grad_norm": 1.1011019945144653, + "learning_rate": 1.0785000000000001e-05, + "num_tokens": 627627.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0646, + "grad_norm": 1.4223557710647583, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0011, + "grad_norm": 0.14515887200832367, + "learning_rate": 1.0775e-05, + "num_tokens": 628230.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0012, + "grad_norm": 0.14745497703552246, + "learning_rate": 1.077e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0013, + "grad_norm": 0.16342398524284363, + "learning_rate": 1.0765000000000001e-05, + "num_tokens": 628412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0711, + "grad_norm": 1.4518134593963623, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0719, + "grad_norm": 1.6602455377578735, + "learning_rate": 1.0755e-05, + "num_tokens": 629436.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0676, + "grad_norm": 1.4668382406234741, + "learning_rate": 1.075e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0675, + "grad_norm": 1.7040259838104248, + "learning_rate": 1.0745000000000001e-05, + "num_tokens": 630460.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0015, + "grad_norm": 0.2076033502817154, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 1.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0578, + "grad_norm": 1.4224144220352173, + "learning_rate": 1.0735e-05, + "num_tokens": 631063.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0359, + "grad_norm": 1.0415198802947998, + "learning_rate": 1.073e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0549, + "grad_norm": 1.3249598741531372, + "learning_rate": 1.0725000000000001e-05, + "num_tokens": 632087.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27819395065307617, + "learning_rate": 1.072e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 1.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.002, + "grad_norm": 0.28510138392448425, + "learning_rate": 1.0715e-05, + "num_tokens": 632269.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0023, + "grad_norm": 0.33845254778862, + "learning_rate": 1.071e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.0022, + "grad_norm": 0.3247784972190857, + "learning_rate": 1.0705000000000002e-05, + "num_tokens": 632451.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.043, + "grad_norm": 1.0912247896194458, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.0578, + "grad_norm": 1.1355180740356445, + "learning_rate": 1.0695e-05, + "num_tokens": 633475.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.0024, + "grad_norm": 0.3479563593864441, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 1.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0022, + "grad_norm": 0.3158959448337555, + "learning_rate": 1.0685e-05, + "num_tokens": 633657.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": 0.0428, + "grad_norm": 1.4031771421432495, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.055, + "grad_norm": 1.2979878187179565, + "learning_rate": 1.0675e-05, + "num_tokens": 634681.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0021, + "grad_norm": 0.30659785866737366, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0407, + "grad_norm": 1.1281771659851074, + "learning_rate": 1.0665e-05, + "num_tokens": 635284.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0021, + "grad_norm": 0.3046596050262451, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 1.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.002, + "grad_norm": 0.29561498761177063, + "learning_rate": 1.0655e-05, + "num_tokens": 635466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.042, + "grad_norm": 1.11528480052948, + "learning_rate": 1.065e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0603, + "grad_norm": 1.633859634399414, + "learning_rate": 1.0645e-05, + "num_tokens": 636490.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0617, + "grad_norm": 1.5089678764343262, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0393, + "grad_norm": 1.644981026649475, + "learning_rate": 1.0634999999999999e-05, + "num_tokens": 637514.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0548, + "grad_norm": 1.4219714403152466, + "learning_rate": 1.063e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0022, + "grad_norm": 0.3061341941356659, + "learning_rate": 1.0625e-05, + "num_tokens": 638117.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0439, + "grad_norm": 1.3055533170700073, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0844, + "grad_norm": 2.4925858974456787, + "learning_rate": 1.0615000000000003e-05, + "num_tokens": 639141.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0409, + "grad_norm": 1.2279584407806396, + "learning_rate": 1.061e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.0023, + "grad_norm": 0.3406059145927429, + "learning_rate": 1.0605000000000001e-05, + "num_tokens": 639744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.0024, + "grad_norm": 0.3423788249492645, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 1.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0758, + "grad_norm": 2.193775177001953, + "learning_rate": 1.0595000000000003e-05, + "num_tokens": 640347.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0397, + "grad_norm": 1.2993077039718628, + "learning_rate": 1.059e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0025, + "grad_norm": 0.37831318378448486, + "learning_rate": 1.0585000000000001e-05, + "num_tokens": 640950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0695, + "grad_norm": 1.9661240577697754, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0514, + "grad_norm": 1.348526954650879, + "learning_rate": 1.0575000000000001e-05, + "num_tokens": 641974.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0422, + "grad_norm": 1.4465380907058716, + "learning_rate": 1.057e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0792, + "grad_norm": 1.823074460029602, + "learning_rate": 1.0565000000000001e-05, + "num_tokens": 642998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0707, + "grad_norm": 1.9393905401229858, + "learning_rate": 1.056e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": 0.0765, + "grad_norm": 2.4390299320220947, + "learning_rate": 1.0555000000000001e-05, + "num_tokens": 644022.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0377, + "grad_norm": 1.2858082056045532, + "learning_rate": 1.055e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.036, + "grad_norm": 1.1891300678253174, + "learning_rate": 1.0545000000000002e-05, + "num_tokens": 645046.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0596, + "grad_norm": 1.3432769775390625, + "learning_rate": 1.054e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0409, + "grad_norm": 1.3289687633514404, + "learning_rate": 1.0535000000000002e-05, + "num_tokens": 646070.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0703, + "grad_norm": 1.9712656736373901, + "learning_rate": 1.053e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0674, + "grad_norm": 1.360931634902954, + "learning_rate": 1.0525e-05, + "num_tokens": 647094.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0712, + "grad_norm": 1.7070671319961548, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0553, + "grad_norm": 1.2540414333343506, + "learning_rate": 1.0515000000000002e-05, + "num_tokens": 648118.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0462, + "grad_norm": 1.0861750841140747, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0606, + "grad_norm": 1.2730586528778076, + "learning_rate": 1.0505e-05, + "num_tokens": 649142.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0678, + "grad_norm": 1.881486177444458, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0358, + "grad_norm": 1.520228385925293, + "learning_rate": 1.0495000000000002e-05, + "num_tokens": 650166.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0102, + "grad_norm": 1.2519571781158447, + "learning_rate": 1.049e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0603, + "grad_norm": 1.7512507438659668, + "learning_rate": 1.0485e-05, + "num_tokens": 650769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": 0.0422, + "grad_norm": 1.2172882556915283, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0111, + "grad_norm": 1.2125916481018066, + "learning_rate": 1.0475000000000002e-05, + "num_tokens": 651372.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0104, + "grad_norm": 1.187291145324707, + "learning_rate": 1.047e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.067, + "grad_norm": 1.5227930545806885, + "learning_rate": 1.0465e-05, + "num_tokens": 651975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0512, + "grad_norm": 1.1584064960479736, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0585, + "grad_norm": 1.5452741384506226, + "learning_rate": 1.0455000000000002e-05, + "num_tokens": 652999.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.037, + "grad_norm": 1.2185399532318115, + "learning_rate": 1.045e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0073, + "grad_norm": 0.8913355469703674, + "learning_rate": 1.0445e-05, + "num_tokens": 653602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.1718, + "grad_norm": 3.605719804763794, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0545, + "grad_norm": 0.8743512034416199, + "learning_rate": 1.0435000000000003e-05, + "num_tokens": 654626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0601, + "grad_norm": 1.5047037601470947, + "learning_rate": 1.043e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0048, + "grad_norm": 0.6472101211547852, + "learning_rate": 1.0425000000000001e-05, + "num_tokens": 655229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0819, + "grad_norm": 2.8786802291870117, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0722, + "grad_norm": 1.6400585174560547, + "learning_rate": 1.0415000000000001e-05, + "num_tokens": 656253.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0379, + "grad_norm": 1.1578104496002197, + "learning_rate": 1.041e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0651, + "grad_norm": 1.9455623626708984, + "learning_rate": 1.0405000000000001e-05, + "num_tokens": 657277.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0588, + "grad_norm": 1.3513238430023193, + "learning_rate": 1.04e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0584, + "grad_norm": 2.0099873542785645, + "learning_rate": 1.0395000000000001e-05, + "num_tokens": 658301.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0422, + "grad_norm": 1.1260371208190918, + "learning_rate": 1.039e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.1567, + "grad_norm": 4.341492652893066, + "learning_rate": 1.0385000000000001e-05, + "num_tokens": 659325.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0034, + "grad_norm": 0.5023797154426575, + "learning_rate": 1.038e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.0515, + "grad_norm": 1.3957620859146118, + "learning_rate": 1.0375000000000001e-05, + "num_tokens": 659928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.074, + "grad_norm": 1.8058022260665894, + "learning_rate": 1.037e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.0683, + "grad_norm": 1.5976930856704712, + "learning_rate": 1.0365e-05, + "num_tokens": 660952.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.042, + "grad_norm": 1.2127424478530884, + "learning_rate": 1.036e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0525, + "grad_norm": 1.24295175075531, + "learning_rate": 1.0355000000000002e-05, + "num_tokens": 661976.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0553, + "grad_norm": 1.3676091432571411, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0043, + "grad_norm": 0.5990502834320068, + "learning_rate": 1.0345e-05, + "num_tokens": 662579.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.0651, + "grad_norm": 1.8467062711715698, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.0035, + "grad_norm": 0.4997740089893341, + "learning_rate": 1.0335000000000002e-05, + "num_tokens": 663182.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0564, + "grad_norm": 0.9972801804542542, + "learning_rate": 1.033e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0587, + "grad_norm": 1.6288121938705444, + "learning_rate": 1.0325e-05, + "num_tokens": 664206.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0387, + "grad_norm": 1.0264148712158203, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.0044, + "grad_norm": 0.6445260047912598, + "learning_rate": 1.0315000000000002e-05, + "num_tokens": 664809.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0379, + "grad_norm": 1.0764647722244263, + "learning_rate": 1.031e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.0483, + "grad_norm": 1.6414856910705566, + "learning_rate": 1.0305e-05, + "num_tokens": 665833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.0392, + "grad_norm": 1.0878779888153076, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0721, + "grad_norm": 1.8314939737319946, + "learning_rate": 1.0295000000000002e-05, + "num_tokens": 666857.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0404, + "grad_norm": 1.2442834377288818, + "learning_rate": 1.029e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0344, + "grad_norm": 1.0829095840454102, + "learning_rate": 1.0285e-05, + "num_tokens": 667881.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.005, + "grad_norm": 0.7069464921951294, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 1.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0701, + "grad_norm": 1.8649088144302368, + "learning_rate": 1.0275000000000002e-05, + "num_tokens": 668484.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0445, + "grad_norm": 1.5859991312026978, + "learning_rate": 1.027e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.0617, + "grad_norm": 1.400742530822754, + "learning_rate": 1.0265e-05, + "num_tokens": 669508.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0527, + "grad_norm": 1.4805254936218262, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.0052, + "grad_norm": 0.7180629968643188, + "learning_rate": 1.0255000000000001e-05, + "num_tokens": 670111.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0404, + "grad_norm": 1.3597116470336914, + "learning_rate": 1.025e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0054, + "grad_norm": 0.7400949597358704, + "learning_rate": 1.0245000000000001e-05, + "num_tokens": 670714.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.0049, + "grad_norm": 0.6836004853248596, + "learning_rate": 1.024e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 1.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0736, + "grad_norm": 2.3706512451171875, + "learning_rate": 1.0235000000000001e-05, + "num_tokens": 671317.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0045, + "grad_norm": 0.6252732872962952, + "learning_rate": 1.023e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0538, + "grad_norm": 1.2009153366088867, + "learning_rate": 1.0225000000000001e-05, + "num_tokens": 671920.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0032, + "grad_norm": 0.4667681157588959, + "learning_rate": 1.022e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 1.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0612, + "grad_norm": 1.505027413368225, + "learning_rate": 1.0215000000000001e-05, + "num_tokens": 672523.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.0551, + "grad_norm": 1.3336291313171387, + "learning_rate": 1.021e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0402, + "grad_norm": 1.1181267499923706, + "learning_rate": 1.0205e-05, + "num_tokens": 673547.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0633, + "grad_norm": 1.5764997005462646, + "learning_rate": 1.02e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0024, + "grad_norm": 0.33718812465667725, + "learning_rate": 1.0195000000000001e-05, + "num_tokens": 674150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0683, + "grad_norm": 1.428412675857544, + "learning_rate": 1.019e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441157937049866, + "learning_rate": 1.0185e-05, + "num_tokens": 674753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0023, + "grad_norm": 0.33211714029312134, + "learning_rate": 1.018e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0022, + "grad_norm": 0.3089843988418579, + "learning_rate": 1.0175000000000002e-05, + "num_tokens": 674935.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0635, + "grad_norm": 1.286823034286499, + "learning_rate": 1.017e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0419, + "grad_norm": 1.0465713739395142, + "learning_rate": 1.0165e-05, + "num_tokens": 675959.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0019, + "grad_norm": 0.27270686626434326, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 1.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0614, + "grad_norm": 1.536331295967102, + "learning_rate": 1.0155000000000002e-05, + "num_tokens": 676562.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0521, + "grad_norm": 1.3282392024993896, + "learning_rate": 1.015e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0573, + "grad_norm": 1.3458013534545898, + "learning_rate": 1.0145e-05, + "num_tokens": 677586.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0607, + "grad_norm": 1.5142616033554077, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0427, + "grad_norm": 1.3866674900054932, + "learning_rate": 1.0135000000000002e-05, + "num_tokens": 678610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0652, + "grad_norm": 1.3013007640838623, + "learning_rate": 1.013e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0021, + "grad_norm": 0.2967868447303772, + "learning_rate": 1.0125e-05, + "num_tokens": 679213.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.002, + "grad_norm": 0.2977685332298279, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0022, + "grad_norm": 0.3109460473060608, + "learning_rate": 1.0115000000000002e-05, + "num_tokens": 679395.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0563, + "grad_norm": 1.1927019357681274, + "learning_rate": 1.011e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0019, + "grad_norm": 0.27015697956085205, + "learning_rate": 1.0105e-05, + "num_tokens": 679998.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.054, + "grad_norm": 1.8113130331039429, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0688, + "grad_norm": 1.6508032083511353, + "learning_rate": 1.0095e-05, + "num_tokens": 681022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0502, + "grad_norm": 1.1528620719909668, + "learning_rate": 1.009e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29425331950187683, + "learning_rate": 1.0085000000000001e-05, + "num_tokens": 681625.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0641, + "grad_norm": 1.702049732208252, + "learning_rate": 1.008e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.036, + "grad_norm": 1.1969891786575317, + "learning_rate": 1.0075000000000001e-05, + "num_tokens": 682649.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0022, + "grad_norm": 0.31679248809814453, + "learning_rate": 1.007e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 1.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.0403, + "grad_norm": 1.1920922994613647, + "learning_rate": 1.0065000000000001e-05, + "num_tokens": 683252.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0544, + "grad_norm": 1.1415454149246216, + "learning_rate": 1.006e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0028, + "grad_norm": 0.42351487278938293, + "learning_rate": 1.0055000000000001e-05, + "num_tokens": 683855.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0466, + "grad_norm": 1.6247456073760986, + "learning_rate": 1.005e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0524, + "grad_norm": 1.2605568170547485, + "learning_rate": 1.0045e-05, + "num_tokens": 684879.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.057, + "grad_norm": 1.483921766281128, + "learning_rate": 1.004e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0029, + "grad_norm": 0.420865923166275, + "learning_rate": 1.0035000000000001e-05, + "num_tokens": 685482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0483, + "grad_norm": 1.9411001205444336, + "learning_rate": 1.003e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0418, + "grad_norm": 1.1357734203338623, + "learning_rate": 1.0025e-05, + "num_tokens": 686506.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0031, + "grad_norm": 0.4264874756336212, + "learning_rate": 1.002e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0628, + "grad_norm": 1.5096089839935303, + "learning_rate": 1.0015000000000002e-05, + "num_tokens": 687109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.003, + "grad_norm": 0.41657188534736633, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0, + "step": 2000 + }, + { + "loss": 0.0028, + "grad_norm": 0.3918426036834717, + "learning_rate": 1.0005e-05, + "num_tokens": 687291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0005, + "step": 2001 + }, + { + "loss": 0.0524, + "grad_norm": 1.1938209533691406, + "learning_rate": 1e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.001, + "step": 2002 + }, + { + "loss": 0.0027, + "grad_norm": 0.3788990080356598, + "learning_rate": 9.995000000000002e-06, + "num_tokens": 687894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0015, + "step": 2003 + }, + { + "loss": 0.0025, + "grad_norm": 0.3577810227870941, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 687985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.002, + "step": 2004 + }, + { + "loss": 0.0024, + "grad_norm": 0.3305366039276123, + "learning_rate": 9.985000000000002e-06, + "num_tokens": 688076.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0025, + "step": 2005 + }, + { + "loss": 0.002, + "grad_norm": 0.277047336101532, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 688167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.003, + "step": 2006 + }, + { + "loss": 0.0019, + "grad_norm": 0.2567979693412781, + "learning_rate": 9.975000000000002e-06, + "num_tokens": 688258.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0035, + "step": 2007 + }, + { + "loss": 0.0682, + "grad_norm": 1.844512701034546, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.004, + "step": 2008 + }, + { + "loss": 0.0487, + "grad_norm": 1.2499569654464722, + "learning_rate": 9.965000000000002e-06, + "num_tokens": 689282.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0045, + "step": 2009 + }, + { + "loss": 0.0432, + "grad_norm": 1.2406448125839233, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.005, + "step": 2010 + }, + { + "loss": 0.0804, + "grad_norm": 1.833058476448059, + "learning_rate": 9.955000000000002e-06, + "num_tokens": 690306.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0055, + "step": 2011 + }, + { + "loss": 0.0464, + "grad_norm": 1.3244189023971558, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.006, + "step": 2012 + }, + { + "loss": 0.0416, + "grad_norm": 1.044066309928894, + "learning_rate": 9.945e-06, + "num_tokens": 691330.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0065, + "step": 2013 + }, + { + "loss": 0.0646, + "grad_norm": 1.5272581577301025, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.007, + "step": 2014 + }, + { + "loss": 0.0401, + "grad_norm": 1.2222588062286377, + "learning_rate": 9.935e-06, + "num_tokens": 692354.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0075, + "step": 2015 + }, + { + "loss": 0.0833, + "grad_norm": 2.3880302906036377, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.008, + "step": 2016 + }, + { + "loss": 0.0661, + "grad_norm": 1.666345238685608, + "learning_rate": 9.925e-06, + "num_tokens": 693378.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0085, + "step": 2017 + }, + { + "loss": 0.061, + "grad_norm": 1.2552286386489868, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.009, + "step": 2018 + }, + { + "loss": 0.0022, + "grad_norm": 0.2978605329990387, + "learning_rate": 9.915e-06, + "num_tokens": 693981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0095, + "step": 2019 + }, + { + "loss": 0.0419, + "grad_norm": 1.1351749897003174, + "learning_rate": 9.91e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.01, + "step": 2020 + }, + { + "loss": 0.0028, + "grad_norm": 0.4339805245399475, + "learning_rate": 9.905000000000001e-06, + "num_tokens": 694584.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0105, + "step": 2021 + }, + { + "loss": 0.0027, + "grad_norm": 0.3737834393978119, + "learning_rate": 9.9e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 2022 + }, + { + "loss": 0.0724, + "grad_norm": 1.6216633319854736, + "learning_rate": 9.895000000000001e-06, + "num_tokens": 695187.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0115, + "step": 2023 + }, + { + "loss": 0.0026, + "grad_norm": 0.38558149337768555, + "learning_rate": 9.89e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 1.0, + "epoch": 1.012, + "step": 2024 + }, + { + "loss": 0.0457, + "grad_norm": 1.2241498231887817, + "learning_rate": 9.885000000000001e-06, + "num_tokens": 695790.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0125, + "step": 2025 + }, + { + "loss": 0.0387, + "grad_norm": 1.4335367679595947, + "learning_rate": 9.88e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.013, + "step": 2026 + }, + { + "loss": 0.0716, + "grad_norm": 1.5836760997772217, + "learning_rate": 9.875000000000001e-06, + "num_tokens": 696814.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0135, + "step": 2027 + }, + { + "loss": 0.0419, + "grad_norm": 1.2072887420654297, + "learning_rate": 9.87e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.014, + "step": 2028 + }, + { + "loss": 0.0376, + "grad_norm": 0.9630845189094543, + "learning_rate": 9.865000000000001e-06, + "num_tokens": 697838.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.0145, + "step": 2029 + }, + { + "loss": 0.0562, + "grad_norm": 1.396782636642456, + "learning_rate": 9.86e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.015, + "step": 2030 + }, + { + "loss": 0.0611, + "grad_norm": 1.526076316833496, + "learning_rate": 9.855000000000001e-06, + "num_tokens": 698862.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0155, + "step": 2031 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280098915100098, + "learning_rate": 9.85e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.016, + "step": 2032 + }, + { + "loss": 0.0036, + "grad_norm": 0.5271911025047302, + "learning_rate": 9.845000000000001e-06, + "num_tokens": 699044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0165, + "step": 2033 + }, + { + "loss": 0.0638, + "grad_norm": 1.2341188192367554, + "learning_rate": 9.84e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.017, + "step": 2034 + }, + { + "loss": 0.0386, + "grad_norm": 1.0637688636779785, + "learning_rate": 9.835000000000002e-06, + "num_tokens": 700068.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0175, + "step": 2035 + }, + { + "loss": 0.0036, + "grad_norm": 0.52369225025177, + "learning_rate": 9.83e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 1.0, + "epoch": 1.018, + "step": 2036 + }, + { + "loss": 0.0494, + "grad_norm": 2.351320266723633, + "learning_rate": 9.825000000000002e-06, + "num_tokens": 700671.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0185, + "step": 2037 + }, + { + "loss": 0.0034, + "grad_norm": 0.4984705150127411, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.019, + "step": 2038 + }, + { + "loss": 0.0406, + "grad_norm": 1.5286310911178589, + "learning_rate": 9.815000000000002e-06, + "num_tokens": 701274.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0195, + "step": 2039 + }, + { + "loss": 0.0523, + "grad_norm": 1.7273446321487427, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.02, + "step": 2040 + }, + { + "loss": 0.0033, + "grad_norm": 0.4823690950870514, + "learning_rate": 9.805000000000002e-06, + "num_tokens": 701877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0205, + "step": 2041 + }, + { + "loss": 0.0032, + "grad_norm": 0.4507608711719513, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.021, + "step": 2042 + }, + { + "loss": 0.0703, + "grad_norm": 1.77262544631958, + "learning_rate": 9.795000000000002e-06, + "num_tokens": 702480.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0215, + "step": 2043 + }, + { + "loss": 0.0026, + "grad_norm": 0.3709382116794586, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.022, + "step": 2044 + }, + { + "loss": 0.0683, + "grad_norm": 3.5564355850219727, + "learning_rate": 9.785e-06, + "num_tokens": 703083.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0225, + "step": 2045 + }, + { + "loss": 0.0024, + "grad_norm": 0.3166162967681885, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.023, + "step": 2046 + }, + { + "loss": 0.0022, + "grad_norm": 0.2928009331226349, + "learning_rate": 9.775e-06, + "num_tokens": 703265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0235, + "step": 2047 + }, + { + "loss": 0.0621, + "grad_norm": 1.902612566947937, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.024, + "step": 2048 + }, + { + "loss": 0.0018, + "grad_norm": 0.23954610526561737, + "learning_rate": 9.765e-06, + "num_tokens": 703868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0245, + "step": 2049 + }, + { + "loss": 0.0409, + "grad_norm": 1.3355653285980225, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.025, + "step": 2050 + }, + { + "loss": 0.0705, + "grad_norm": 1.6696054935455322, + "learning_rate": 9.755e-06, + "num_tokens": 704892.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0255, + "step": 2051 + }, + { + "loss": 0.0016, + "grad_norm": 0.22299779951572418, + "learning_rate": 9.75e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.026, + "step": 2052 + }, + { + "loss": 0.0016, + "grad_norm": 0.21063728630542755, + "learning_rate": 9.745e-06, + "num_tokens": 705074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0265, + "step": 2053 + }, + { + "loss": 0.0696, + "grad_norm": 1.6844984292984009, + "learning_rate": 9.74e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.027, + "step": 2054 + }, + { + "loss": 0.0714, + "grad_norm": 1.5383219718933105, + "learning_rate": 9.735e-06, + "num_tokens": 706098.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0275, + "step": 2055 + }, + { + "loss": 0.0015, + "grad_norm": 0.19807161390781403, + "learning_rate": 9.73e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.028, + "step": 2056 + }, + { + "loss": 0.0014, + "grad_norm": 0.19030039012432098, + "learning_rate": 9.725000000000001e-06, + "num_tokens": 706280.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0285, + "step": 2057 + }, + { + "loss": 0.0013, + "grad_norm": 0.16322408616542816, + "learning_rate": 9.72e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 1.0, + "epoch": 1.029, + "step": 2058 + }, + { + "loss": 0.0014, + "grad_norm": 0.17665083706378937, + "learning_rate": 9.715000000000001e-06, + "num_tokens": 706462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0295, + "step": 2059 + }, + { + "loss": 0.0669, + "grad_norm": 1.8765722513198853, + "learning_rate": 9.71e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.03, + "step": 2060 + }, + { + "loss": 0.0768, + "grad_norm": 1.7586760520935059, + "learning_rate": 9.705000000000001e-06, + "num_tokens": 707486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0305, + "step": 2061 + }, + { + "loss": 0.0696, + "grad_norm": 1.258619785308838, + "learning_rate": 9.7e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.031, + "step": 2062 + }, + { + "loss": 0.0493, + "grad_norm": 1.2884832620620728, + "learning_rate": 9.695000000000001e-06, + "num_tokens": 708510.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0315, + "step": 2063 + }, + { + "loss": 0.0012, + "grad_norm": 0.15901947021484375, + "learning_rate": 9.69e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.032, + "step": 2064 + }, + { + "loss": 0.0656, + "grad_norm": 1.3002307415008545, + "learning_rate": 9.685000000000001e-06, + "num_tokens": 709113.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0325, + "step": 2065 + }, + { + "loss": 0.0013, + "grad_norm": 0.17090171575546265, + "learning_rate": 9.68e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.033, + "step": 2066 + }, + { + "loss": 0.0013, + "grad_norm": 0.1825355738401413, + "learning_rate": 9.675000000000001e-06, + "num_tokens": 709295.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0335, + "step": 2067 + }, + { + "loss": 0.0459, + "grad_norm": 1.092247724533081, + "learning_rate": 9.67e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.034, + "step": 2068 + }, + { + "loss": 0.0648, + "grad_norm": 1.4761494398117065, + "learning_rate": 9.665000000000001e-06, + "num_tokens": 710319.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0345, + "step": 2069 + }, + { + "loss": 0.0014, + "grad_norm": 0.1826472133398056, + "learning_rate": 9.66e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 1.0, + "epoch": 1.035, + "step": 2070 + }, + { + "loss": 0.0461, + "grad_norm": 1.338349461555481, + "learning_rate": 9.655000000000002e-06, + "num_tokens": 710922.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0355, + "step": 2071 + }, + { + "loss": 0.0567, + "grad_norm": 1.0566164255142212, + "learning_rate": 9.65e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.036, + "step": 2072 + }, + { + "loss": 0.0015, + "grad_norm": 0.19834326207637787, + "learning_rate": 9.645000000000002e-06, + "num_tokens": 711525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0365, + "step": 2073 + }, + { + "loss": 0.0418, + "grad_norm": 1.210045576095581, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.037, + "step": 2074 + }, + { + "loss": 0.0016, + "grad_norm": 0.22290614247322083, + "learning_rate": 9.635000000000002e-06, + "num_tokens": 712128.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0375, + "step": 2075 + }, + { + "loss": 0.0695, + "grad_norm": 1.4690190553665161, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.038, + "step": 2076 + }, + { + "loss": 0.0016, + "grad_norm": 0.2209765613079071, + "learning_rate": 9.625e-06, + "num_tokens": 712731.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0385, + "step": 2077 + }, + { + "loss": 0.0018, + "grad_norm": 0.23313096165657043, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 2078 + }, + { + "loss": 0.0017, + "grad_norm": 0.23196078836917877, + "learning_rate": 9.615e-06, + "num_tokens": 712913.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0395, + "step": 2079 + }, + { + "loss": 0.0541, + "grad_norm": 1.220723032951355, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.04, + "step": 2080 + }, + { + "loss": 0.0018, + "grad_norm": 0.2516387403011322, + "learning_rate": 9.605e-06, + "num_tokens": 713516.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0405, + "step": 2081 + }, + { + "loss": 0.0424, + "grad_norm": 1.0561903715133667, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.041, + "step": 2082 + }, + { + "loss": 0.0438, + "grad_norm": 1.2110846042633057, + "learning_rate": 9.595e-06, + "num_tokens": 714540.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0415, + "step": 2083 + }, + { + "loss": 0.0018, + "grad_norm": 0.24697688221931458, + "learning_rate": 9.59e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 1.0, + "epoch": 1.042, + "step": 2084 + }, + { + "loss": 0.0388, + "grad_norm": 1.0054850578308105, + "learning_rate": 9.585e-06, + "num_tokens": 715143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0425, + "step": 2085 + }, + { + "loss": 0.0713, + "grad_norm": 1.8077067136764526, + "learning_rate": 9.58e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.043, + "step": 2086 + }, + { + "loss": 0.0018, + "grad_norm": 0.24363017082214355, + "learning_rate": 9.575e-06, + "num_tokens": 715746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0435, + "step": 2087 + }, + { + "loss": 0.0016, + "grad_norm": 0.21341845393180847, + "learning_rate": 9.57e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 2088 + }, + { + "loss": 0.0391, + "grad_norm": 1.3833376169204712, + "learning_rate": 9.565e-06, + "num_tokens": 716349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0445, + "step": 2089 + }, + { + "loss": 0.0393, + "grad_norm": 0.9772108793258667, + "learning_rate": 9.56e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.045, + "step": 2090 + }, + { + "loss": 0.002, + "grad_norm": 0.283633828163147, + "learning_rate": 9.555e-06, + "num_tokens": 716952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0455, + "step": 2091 + }, + { + "loss": 0.0728, + "grad_norm": 1.849652647972107, + "learning_rate": 9.55e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.046, + "step": 2092 + }, + { + "loss": 0.0022, + "grad_norm": 0.3161669969558716, + "learning_rate": 9.545000000000001e-06, + "num_tokens": 717555.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0465, + "step": 2093 + }, + { + "loss": 0.0587, + "grad_norm": 1.600858449935913, + "learning_rate": 9.54e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.047, + "step": 2094 + }, + { + "loss": 0.0021, + "grad_norm": 0.2948978543281555, + "learning_rate": 9.535000000000001e-06, + "num_tokens": 718158.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0475, + "step": 2095 + }, + { + "loss": 0.0019, + "grad_norm": 0.27492448687553406, + "learning_rate": 9.53e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.048, + "step": 2096 + }, + { + "loss": 0.0382, + "grad_norm": 1.2440471649169922, + "learning_rate": 9.525000000000001e-06, + "num_tokens": 718761.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0485, + "step": 2097 + }, + { + "loss": 0.058, + "grad_norm": 1.5657495260238647, + "learning_rate": 9.52e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 2098 + }, + { + "loss": 0.0018, + "grad_norm": 0.2510983645915985, + "learning_rate": 9.515000000000001e-06, + "num_tokens": 719364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0495, + "step": 2099 + }, + { + "loss": 0.0677, + "grad_norm": 2.6615045070648193, + "learning_rate": 9.51e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.05, + "step": 2100 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355963945388794, + "learning_rate": 9.505000000000001e-06, + "num_tokens": 719967.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0505, + "step": 2101 + }, + { + "loss": 0.0628, + "grad_norm": 1.4263781309127808, + "learning_rate": 9.5e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.051, + "step": 2102 + }, + { + "loss": 0.0384, + "grad_norm": 1.3316160440444946, + "learning_rate": 9.495000000000001e-06, + "num_tokens": 720991.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0515, + "step": 2103 + }, + { + "loss": 0.0413, + "grad_norm": 1.2754371166229248, + "learning_rate": 9.49e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.052, + "step": 2104 + }, + { + "loss": 0.0551, + "grad_norm": 1.9524251222610474, + "learning_rate": 9.485000000000002e-06, + "num_tokens": 722015.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0525, + "step": 2105 + }, + { + "loss": 0.0551, + "grad_norm": 1.5522267818450928, + "learning_rate": 9.48e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.053, + "step": 2106 + }, + { + "loss": 0.0019, + "grad_norm": 0.27614012360572815, + "learning_rate": 9.475000000000002e-06, + "num_tokens": 722618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0535, + "step": 2107 + }, + { + "loss": 0.0606, + "grad_norm": 1.409346103668213, + "learning_rate": 9.47e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.054, + "step": 2108 + }, + { + "loss": 0.0024, + "grad_norm": 0.357972115278244, + "learning_rate": 9.465e-06, + "num_tokens": 723221.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0545, + "step": 2109 + }, + { + "loss": 0.0023, + "grad_norm": 0.3270082175731659, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 1.0, + "epoch": 1.055, + "step": 2110 + }, + { + "loss": 0.0024, + "grad_norm": 0.3454654812812805, + "learning_rate": 9.455e-06, + "num_tokens": 723403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0555, + "step": 2111 + }, + { + "loss": 0.0024, + "grad_norm": 0.352299302816391, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 1.0, + "epoch": 1.056, + "step": 2112 + }, + { + "loss": 0.002, + "grad_norm": 0.27746516466140747, + "learning_rate": 9.445e-06, + "num_tokens": 723585.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0565, + "step": 2113 + }, + { + "loss": 0.002, + "grad_norm": 0.2780683636665344, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 1.0, + "epoch": 1.057, + "step": 2114 + }, + { + "loss": 0.0464, + "grad_norm": 1.5355291366577148, + "learning_rate": 9.435e-06, + "num_tokens": 724188.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0575, + "step": 2115 + }, + { + "loss": 0.0017, + "grad_norm": 0.2329765260219574, + "learning_rate": 9.43e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.058, + "step": 2116 + }, + { + "loss": 0.0015, + "grad_norm": 0.20377217233181, + "learning_rate": 9.425e-06, + "num_tokens": 724370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0585, + "step": 2117 + }, + { + "loss": 0.0014, + "grad_norm": 0.1731068193912506, + "learning_rate": 9.42e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 2118 + }, + { + "loss": 0.0349, + "grad_norm": 1.301210641860962, + "learning_rate": 9.415e-06, + "num_tokens": 724973.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.0594999999999999, + "step": 2119 + }, + { + "loss": 0.0012, + "grad_norm": 0.15070641040802002, + "learning_rate": 9.41e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.06, + "step": 2120 + }, + { + "loss": 0.0012, + "grad_norm": 0.13666701316833496, + "learning_rate": 9.405e-06, + "num_tokens": 725155.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0605, + "step": 2121 + }, + { + "loss": 0.0011, + "grad_norm": 0.13183920085430145, + "learning_rate": 9.4e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 2122 + }, + { + "loss": 0.0735, + "grad_norm": 2.157339096069336, + "learning_rate": 9.395e-06, + "num_tokens": 725758.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.0615, + "step": 2123 + }, + { + "loss": 0.0434, + "grad_norm": 1.441329836845398, + "learning_rate": 9.39e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.062, + "step": 2124 + }, + { + "loss": 0.001, + "grad_norm": 0.11148537695407867, + "learning_rate": 9.385e-06, + "num_tokens": 726361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0625, + "step": 2125 + }, + { + "loss": 0.0363, + "grad_norm": 1.2650766372680664, + "learning_rate": 9.38e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 1.063, + "step": 2126 + }, + { + "loss": 0.042, + "grad_norm": 1.170820951461792, + "learning_rate": 9.375000000000001e-06, + "num_tokens": 727385.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0635, + "step": 2127 + }, + { + "loss": 0.0375, + "grad_norm": 1.31922447681427, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.064, + "step": 2128 + }, + { + "loss": 0.0009, + "grad_norm": 0.10702881962060928, + "learning_rate": 9.365000000000001e-06, + "num_tokens": 727988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0645, + "step": 2129 + }, + { + "loss": 0.001, + "grad_norm": 0.12134991586208344, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 2130 + }, + { + "loss": 0.001, + "grad_norm": 0.12518537044525146, + "learning_rate": 9.355000000000001e-06, + "num_tokens": 728170.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0655000000000001, + "step": 2131 + }, + { + "loss": 0.0443, + "grad_norm": 1.5640217065811157, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.066, + "step": 2132 + }, + { + "loss": 0.043, + "grad_norm": 1.7402693033218384, + "learning_rate": 9.345000000000001e-06, + "num_tokens": 729194.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0665, + "step": 2133 + }, + { + "loss": 0.0572, + "grad_norm": 1.478943109512329, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.067, + "step": 2134 + }, + { + "loss": 0.0436, + "grad_norm": 1.75895357131958, + "learning_rate": 9.335000000000001e-06, + "num_tokens": 730218.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0675, + "step": 2135 + }, + { + "loss": 0.0011, + "grad_norm": 0.14104828238487244, + "learning_rate": 9.33e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.068, + "step": 2136 + }, + { + "loss": 0.0014, + "grad_norm": 0.1940988302230835, + "learning_rate": 9.325000000000001e-06, + "num_tokens": 730400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0685, + "step": 2137 + }, + { + "loss": 0.0012, + "grad_norm": 0.15279027819633484, + "learning_rate": 9.32e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 2138 + }, + { + "loss": 0.0627, + "grad_norm": 1.8744264841079712, + "learning_rate": 9.315000000000001e-06, + "num_tokens": 731003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0695000000000001, + "step": 2139 + }, + { + "loss": 0.045, + "grad_norm": 1.4347468614578247, + "learning_rate": 9.31e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.07, + "step": 2140 + }, + { + "loss": 0.0711, + "grad_norm": 1.9654953479766846, + "learning_rate": 9.305000000000002e-06, + "num_tokens": 732027.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0705, + "step": 2141 + }, + { + "loss": 0.0723, + "grad_norm": 1.851762294769287, + "learning_rate": 9.3e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 2142 + }, + { + "loss": 0.0397, + "grad_norm": 1.1016762256622314, + "learning_rate": 9.295e-06, + "num_tokens": 733051.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0715, + "step": 2143 + }, + { + "loss": 0.0614, + "grad_norm": 1.278972864151001, + "learning_rate": 9.29e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.072, + "step": 2144 + }, + { + "loss": 0.0578, + "grad_norm": 1.5237491130828857, + "learning_rate": 9.285e-06, + "num_tokens": 734075.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0725, + "step": 2145 + }, + { + "loss": 0.0021, + "grad_norm": 0.29453045129776, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.073, + "step": 2146 + }, + { + "loss": 0.0756, + "grad_norm": 1.90165376663208, + "learning_rate": 9.275e-06, + "num_tokens": 734678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0735, + "step": 2147 + }, + { + "loss": 0.0025, + "grad_norm": 0.3552635610103607, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.074, + "step": 2148 + }, + { + "loss": 0.0615, + "grad_norm": 1.3596733808517456, + "learning_rate": 9.265e-06, + "num_tokens": 735281.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0745, + "step": 2149 + }, + { + "loss": 0.0571, + "grad_norm": 1.0499508380889893, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.075, + "step": 2150 + }, + { + "loss": 0.0593, + "grad_norm": 1.4813532829284668, + "learning_rate": 9.255e-06, + "num_tokens": 736305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0755, + "step": 2151 + }, + { + "loss": 0.0451, + "grad_norm": 1.1956957578659058, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.076, + "step": 2152 + }, + { + "loss": 0.0035, + "grad_norm": 0.5021563172340393, + "learning_rate": 9.245e-06, + "num_tokens": 736908.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0765, + "step": 2153 + }, + { + "loss": 0.0035, + "grad_norm": 0.5023340582847595, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 2154 + }, + { + "loss": 0.0593, + "grad_norm": 1.3515294790267944, + "learning_rate": 9.235e-06, + "num_tokens": 737511.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0775, + "step": 2155 + }, + { + "loss": 0.0036, + "grad_norm": 0.5020677447319031, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.078, + "step": 2156 + }, + { + "loss": 0.0034, + "grad_norm": 0.4873979985713959, + "learning_rate": 9.225e-06, + "num_tokens": 737693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0785, + "step": 2157 + }, + { + "loss": 0.0582, + "grad_norm": 1.3766424655914307, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.079, + "step": 2158 + }, + { + "loss": 0.0631, + "grad_norm": 1.1943955421447754, + "learning_rate": 9.215e-06, + "num_tokens": 738717.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0795, + "step": 2159 + }, + { + "loss": 0.003, + "grad_norm": 0.43413516879081726, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.08, + "step": 2160 + }, + { + "loss": 0.0031, + "grad_norm": 0.44669783115386963, + "learning_rate": 9.205e-06, + "num_tokens": 738899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0805, + "step": 2161 + }, + { + "loss": 0.0561, + "grad_norm": 1.3388497829437256, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.081, + "step": 2162 + }, + { + "loss": 0.0426, + "grad_norm": 1.8933428525924683, + "learning_rate": 9.195000000000001e-06, + "num_tokens": 739923.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0815, + "step": 2163 + }, + { + "loss": 0.06, + "grad_norm": 1.3706074953079224, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.082, + "step": 2164 + }, + { + "loss": 0.0621, + "grad_norm": 1.443211555480957, + "learning_rate": 9.185000000000001e-06, + "num_tokens": 740947.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0825, + "step": 2165 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098005950450897, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 1.0, + "epoch": 1.083, + "step": 2166 + }, + { + "loss": 0.06, + "grad_norm": 1.2332003116607666, + "learning_rate": 9.175000000000001e-06, + "num_tokens": 741550.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.0835, + "step": 2167 + }, + { + "loss": 0.0682, + "grad_norm": 1.4077450037002563, + "learning_rate": 9.17e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.084, + "step": 2168 + }, + { + "loss": 0.0584, + "grad_norm": 1.4201141595840454, + "learning_rate": 9.165000000000001e-06, + "num_tokens": 742574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0845, + "step": 2169 + }, + { + "loss": 0.0024, + "grad_norm": 0.3220980167388916, + "learning_rate": 9.16e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.085, + "step": 2170 + }, + { + "loss": 0.0571, + "grad_norm": 1.3979272842407227, + "learning_rate": 9.155000000000001e-06, + "num_tokens": 743177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0855, + "step": 2171 + }, + { + "loss": 0.0572, + "grad_norm": 1.6924889087677002, + "learning_rate": 9.15e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.086, + "step": 2172 + }, + { + "loss": 0.0708, + "grad_norm": 1.7350118160247803, + "learning_rate": 9.145000000000001e-06, + "num_tokens": 744201.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0865, + "step": 2173 + }, + { + "loss": 0.0024, + "grad_norm": 0.3453267812728882, + "learning_rate": 9.14e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 1.0, + "epoch": 1.087, + "step": 2174 + }, + { + "loss": 0.0028, + "grad_norm": 0.3845599293708801, + "learning_rate": 9.135e-06, + "num_tokens": 744383.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0875, + "step": 2175 + }, + { + "loss": 0.0023, + "grad_norm": 0.32928982377052307, + "learning_rate": 9.13e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.088, + "step": 2176 + }, + { + "loss": 0.0025, + "grad_norm": 0.3593277335166931, + "learning_rate": 9.125e-06, + "num_tokens": 744565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0885, + "step": 2177 + }, + { + "loss": 0.0447, + "grad_norm": 1.6252307891845703, + "learning_rate": 9.12e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.089, + "step": 2178 + }, + { + "loss": 0.0664, + "grad_norm": 1.3326979875564575, + "learning_rate": 9.115e-06, + "num_tokens": 745589.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0895, + "step": 2179 + }, + { + "loss": 0.0713, + "grad_norm": 2.490602493286133, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.09, + "step": 2180 + }, + { + "loss": 0.0577, + "grad_norm": 1.2613682746887207, + "learning_rate": 9.105e-06, + "num_tokens": 746613.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0905, + "step": 2181 + }, + { + "loss": 0.0604, + "grad_norm": 1.8400533199310303, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.091, + "step": 2182 + }, + { + "loss": 0.0546, + "grad_norm": 1.577405571937561, + "learning_rate": 9.095e-06, + "num_tokens": 747637.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0915, + "step": 2183 + }, + { + "loss": 0.1758, + "grad_norm": 3.9485361576080322, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 1.092, + "step": 2184 + }, + { + "loss": 0.0407, + "grad_norm": 1.4230077266693115, + "learning_rate": 9.085e-06, + "num_tokens": 748661.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0925, + "step": 2185 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441873788833618, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 1.0, + "epoch": 1.093, + "step": 2186 + }, + { + "loss": 0.0574, + "grad_norm": 1.059336543083191, + "learning_rate": 9.075e-06, + "num_tokens": 749264.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0935, + "step": 2187 + }, + { + "loss": 0.0022, + "grad_norm": 0.3150666058063507, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.094, + "step": 2188 + }, + { + "loss": 0.0401, + "grad_norm": 1.1904288530349731, + "learning_rate": 9.065e-06, + "num_tokens": 749867.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0945, + "step": 2189 + }, + { + "loss": 0.0024, + "grad_norm": 0.3425971567630768, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 1.0, + "epoch": 1.095, + "step": 2190 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606398403644562, + "learning_rate": 9.055e-06, + "num_tokens": 750049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0955, + "step": 2191 + }, + { + "loss": 0.0025, + "grad_norm": 0.3754805028438568, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.096, + "step": 2192 + }, + { + "loss": 0.0512, + "grad_norm": 1.1577214002609253, + "learning_rate": 9.045e-06, + "num_tokens": 750652.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0965, + "step": 2193 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151845633983612, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 1.0, + "epoch": 1.097, + "step": 2194 + }, + { + "loss": 0.0386, + "grad_norm": 1.1814777851104736, + "learning_rate": 9.035e-06, + "num_tokens": 751255.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0975, + "step": 2195 + }, + { + "loss": 0.002, + "grad_norm": 0.2940640151500702, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.098, + "step": 2196 + }, + { + "loss": 0.0021, + "grad_norm": 0.3114289939403534, + "learning_rate": 9.025e-06, + "num_tokens": 751437.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0985, + "step": 2197 + }, + { + "loss": 0.0587, + "grad_norm": 1.5265949964523315, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.099, + "step": 2198 + }, + { + "loss": 0.0584, + "grad_norm": 1.182391881942749, + "learning_rate": 9.015000000000001e-06, + "num_tokens": 752461.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0995, + "step": 2199 + }, + { + "loss": 0.0018, + "grad_norm": 0.2633577287197113, + "learning_rate": 9.01e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1, + "step": 2200 + }, + { + "loss": 0.0019, + "grad_norm": 0.26985710859298706, + "learning_rate": 9.005000000000001e-06, + "num_tokens": 752643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1005, + "step": 2201 + }, + { + "loss": 0.0017, + "grad_norm": 0.23652321100234985, + "learning_rate": 9e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 2202 + }, + { + "loss": 0.0578, + "grad_norm": 1.4083077907562256, + "learning_rate": 8.995000000000001e-06, + "num_tokens": 753246.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1015, + "step": 2203 + }, + { + "loss": 0.0595, + "grad_norm": 1.427134394645691, + "learning_rate": 8.99e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.102, + "step": 2204 + }, + { + "loss": 0.0539, + "grad_norm": 1.3228328227996826, + "learning_rate": 8.985000000000001e-06, + "num_tokens": 754270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1025, + "step": 2205 + }, + { + "loss": 0.0015, + "grad_norm": 0.2133481651544571, + "learning_rate": 8.98e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.103, + "step": 2206 + }, + { + "loss": 0.0821, + "grad_norm": 2.5287461280822754, + "learning_rate": 8.975e-06, + "num_tokens": 754873.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1035, + "step": 2207 + }, + { + "loss": 0.0623, + "grad_norm": 1.4041988849639893, + "learning_rate": 8.97e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.104, + "step": 2208 + }, + { + "loss": 0.0409, + "grad_norm": 1.1858478784561157, + "learning_rate": 8.965e-06, + "num_tokens": 755897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1045, + "step": 2209 + }, + { + "loss": 0.0583, + "grad_norm": 1.219450831413269, + "learning_rate": 8.96e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.105, + "step": 2210 + }, + { + "loss": 0.0414, + "grad_norm": 1.1721197366714478, + "learning_rate": 8.955e-06, + "num_tokens": 756921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1055, + "step": 2211 + }, + { + "loss": 0.053, + "grad_norm": 1.277345895767212, + "learning_rate": 8.95e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.106, + "step": 2212 + }, + { + "loss": 0.0625, + "grad_norm": 1.3503938913345337, + "learning_rate": 8.945e-06, + "num_tokens": 757945.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1065, + "step": 2213 + }, + { + "loss": 0.002, + "grad_norm": 0.30203038454055786, + "learning_rate": 8.94e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.107, + "step": 2214 + }, + { + "loss": 0.0022, + "grad_norm": 0.35174328088760376, + "learning_rate": 8.935e-06, + "num_tokens": 758127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1075, + "step": 2215 + }, + { + "loss": 0.0423, + "grad_norm": 1.168192744255066, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.108, + "step": 2216 + }, + { + "loss": 0.0764, + "grad_norm": 1.3265845775604248, + "learning_rate": 8.925e-06, + "num_tokens": 759151.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1085, + "step": 2217 + }, + { + "loss": 0.1833, + "grad_norm": 3.288583755493164, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 1.109, + "step": 2218 + }, + { + "loss": 0.0029, + "grad_norm": 0.44568195939064026, + "learning_rate": 8.915e-06, + "num_tokens": 759754.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1095, + "step": 2219 + }, + { + "loss": 0.0027, + "grad_norm": 0.409576416015625, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 2220 + }, + { + "loss": 0.0033, + "grad_norm": 0.4960649907588959, + "learning_rate": 8.905e-06, + "num_tokens": 759936.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1105, + "step": 2221 + }, + { + "loss": 0.1642, + "grad_norm": 2.6913421154022217, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.111, + "step": 2222 + }, + { + "loss": 0.0715, + "grad_norm": 1.5037237405776978, + "learning_rate": 8.895e-06, + "num_tokens": 760960.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1115, + "step": 2223 + }, + { + "loss": 0.0562, + "grad_norm": 1.152312159538269, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.112, + "step": 2224 + }, + { + "loss": 0.0025, + "grad_norm": 0.3840191960334778, + "learning_rate": 8.885e-06, + "num_tokens": 761563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1125, + "step": 2225 + }, + { + "loss": 0.0421, + "grad_norm": 1.0708019733428955, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.113, + "step": 2226 + }, + { + "loss": 0.0713, + "grad_norm": 1.2928557395935059, + "learning_rate": 8.875e-06, + "num_tokens": 762587.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1135, + "step": 2227 + }, + { + "loss": 0.0622, + "grad_norm": 1.3733391761779785, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 2228 + }, + { + "loss": 0.0029, + "grad_norm": 0.42555150389671326, + "learning_rate": 8.865e-06, + "num_tokens": 763190.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1145, + "step": 2229 + }, + { + "loss": 0.0457, + "grad_norm": 1.3084357976913452, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.115, + "step": 2230 + }, + { + "loss": 0.0027, + "grad_norm": 0.3956111967563629, + "learning_rate": 8.855e-06, + "num_tokens": 763793.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1155, + "step": 2231 + }, + { + "loss": 0.066, + "grad_norm": 1.3650692701339722, + "learning_rate": 8.85e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.116, + "step": 2232 + }, + { + "loss": 0.0029, + "grad_norm": 0.4088021516799927, + "learning_rate": 8.845000000000001e-06, + "num_tokens": 764396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1165, + "step": 2233 + }, + { + "loss": 0.0397, + "grad_norm": 1.2808146476745605, + "learning_rate": 8.84e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.117, + "step": 2234 + }, + { + "loss": 0.0027, + "grad_norm": 0.3983195126056671, + "learning_rate": 8.835000000000001e-06, + "num_tokens": 764999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1175, + "step": 2235 + }, + { + "loss": 0.0423, + "grad_norm": 1.1593605279922485, + "learning_rate": 8.83e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1179999999999999, + "step": 2236 + }, + { + "loss": 0.0649, + "grad_norm": 1.5087552070617676, + "learning_rate": 8.825000000000001e-06, + "num_tokens": 766023.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1185, + "step": 2237 + }, + { + "loss": 0.0683, + "grad_norm": 1.5192102193832397, + "learning_rate": 8.82e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.119, + "step": 2238 + }, + { + "loss": 0.0588, + "grad_norm": 1.386413812637329, + "learning_rate": 8.815e-06, + "num_tokens": 767047.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1195, + "step": 2239 + }, + { + "loss": 0.14, + "grad_norm": 2.439119815826416, + "learning_rate": 8.81e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.12, + "step": 2240 + }, + { + "loss": 0.0029, + "grad_norm": 0.4191952049732208, + "learning_rate": 8.805e-06, + "num_tokens": 767650.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1205, + "step": 2241 + }, + { + "loss": 0.0397, + "grad_norm": 1.169542908668518, + "learning_rate": 8.8e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.121, + "step": 2242 + }, + { + "loss": 0.0584, + "grad_norm": 1.2895692586898804, + "learning_rate": 8.795e-06, + "num_tokens": 768674.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1215, + "step": 2243 + }, + { + "loss": 0.0582, + "grad_norm": 1.274592638015747, + "learning_rate": 8.79e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1219999999999999, + "step": 2244 + }, + { + "loss": 0.0032, + "grad_norm": 0.44238153100013733, + "learning_rate": 8.785e-06, + "num_tokens": 769277.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1225, + "step": 2245 + }, + { + "loss": 0.0032, + "grad_norm": 0.4488213360309601, + "learning_rate": 8.78e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.123, + "step": 2246 + }, + { + "loss": 0.003, + "grad_norm": 0.43088752031326294, + "learning_rate": 8.775e-06, + "num_tokens": 769459.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1235, + "step": 2247 + }, + { + "loss": 0.0366, + "grad_norm": 1.2531421184539795, + "learning_rate": 8.77e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.124, + "step": 2248 + }, + { + "loss": 0.0029, + "grad_norm": 0.40329650044441223, + "learning_rate": 8.765e-06, + "num_tokens": 770062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1245, + "step": 2249 + }, + { + "loss": 0.0527, + "grad_norm": 1.196119785308838, + "learning_rate": 8.76e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.125, + "step": 2250 + }, + { + "loss": 0.0468, + "grad_norm": 1.571480393409729, + "learning_rate": 8.755e-06, + "num_tokens": 771086.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1255, + "step": 2251 + }, + { + "loss": 0.0024, + "grad_norm": 0.32946687936782837, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.126, + "step": 2252 + }, + { + "loss": 0.0023, + "grad_norm": 0.3213779628276825, + "learning_rate": 8.745000000000002e-06, + "num_tokens": 771268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1265, + "step": 2253 + }, + { + "loss": 0.0381, + "grad_norm": 1.36893630027771, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.127, + "step": 2254 + }, + { + "loss": 0.0023, + "grad_norm": 0.3214550316333771, + "learning_rate": 8.735000000000002e-06, + "num_tokens": 771871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1275, + "step": 2255 + }, + { + "loss": 0.0389, + "grad_norm": 1.1307684183120728, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1280000000000001, + "step": 2256 + }, + { + "loss": 0.0021, + "grad_norm": 0.30145928263664246, + "learning_rate": 8.725000000000002e-06, + "num_tokens": 772474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1285, + "step": 2257 + }, + { + "loss": 0.0018, + "grad_norm": 0.24611108005046844, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.129, + "step": 2258 + }, + { + "loss": 0.0652, + "grad_norm": 1.5593312978744507, + "learning_rate": 8.715e-06, + "num_tokens": 773077.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1295, + "step": 2259 + }, + { + "loss": 0.1724, + "grad_norm": 3.1925134658813477, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.13, + "step": 2260 + }, + { + "loss": 0.0016, + "grad_norm": 0.2210361361503601, + "learning_rate": 8.705e-06, + "num_tokens": 773680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1305, + "step": 2261 + }, + { + "loss": 0.044, + "grad_norm": 1.1579885482788086, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.131, + "step": 2262 + }, + { + "loss": 0.0812, + "grad_norm": 2.0770068168640137, + "learning_rate": 8.695e-06, + "num_tokens": 774704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1315, + "step": 2263 + }, + { + "loss": 0.0376, + "grad_norm": 1.1654012203216553, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1320000000000001, + "step": 2264 + }, + { + "loss": 0.0017, + "grad_norm": 0.22535240650177002, + "learning_rate": 8.685e-06, + "num_tokens": 775307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1325, + "step": 2265 + }, + { + "loss": 0.0017, + "grad_norm": 0.2348785251379013, + "learning_rate": 8.68e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 2266 + }, + { + "loss": 0.0017, + "grad_norm": 0.24279342591762543, + "learning_rate": 8.675e-06, + "num_tokens": 775489.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1335, + "step": 2267 + }, + { + "loss": 0.0748, + "grad_norm": 1.5453892946243286, + "learning_rate": 8.67e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.134, + "step": 2268 + }, + { + "loss": 0.0015, + "grad_norm": 0.20795051753520966, + "learning_rate": 8.665000000000001e-06, + "num_tokens": 776092.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1345, + "step": 2269 + }, + { + "loss": 0.0016, + "grad_norm": 0.21314096450805664, + "learning_rate": 8.66e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 2270 + }, + { + "loss": 0.0016, + "grad_norm": 0.22147318720817566, + "learning_rate": 8.655000000000001e-06, + "num_tokens": 776274.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1355, + "step": 2271 + }, + { + "loss": 0.0511, + "grad_norm": 1.1325373649597168, + "learning_rate": 8.65e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1360000000000001, + "step": 2272 + }, + { + "loss": 0.0014, + "grad_norm": 0.18845656514167786, + "learning_rate": 8.645000000000001e-06, + "num_tokens": 776877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1365, + "step": 2273 + }, + { + "loss": 0.0013, + "grad_norm": 0.16952817142009735, + "learning_rate": 8.64e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 2274 + }, + { + "loss": 0.0621, + "grad_norm": 1.329026222229004, + "learning_rate": 8.635000000000001e-06, + "num_tokens": 777480.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1375, + "step": 2275 + }, + { + "loss": 0.0416, + "grad_norm": 1.105779767036438, + "learning_rate": 8.63e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.138, + "step": 2276 + }, + { + "loss": 0.0467, + "grad_norm": 1.1847842931747437, + "learning_rate": 8.625000000000001e-06, + "num_tokens": 778504.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1385, + "step": 2277 + }, + { + "loss": 0.0414, + "grad_norm": 1.0636855363845825, + "learning_rate": 8.62e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.139, + "step": 2278 + }, + { + "loss": 0.058, + "grad_norm": 1.3789916038513184, + "learning_rate": 8.615000000000001e-06, + "num_tokens": 779528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1395, + "step": 2279 + }, + { + "loss": 0.0649, + "grad_norm": 1.1419354677200317, + "learning_rate": 8.61e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1400000000000001, + "step": 2280 + }, + { + "loss": 0.0014, + "grad_norm": 0.19384142756462097, + "learning_rate": 8.605000000000001e-06, + "num_tokens": 780131.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1405, + "step": 2281 + }, + { + "loss": 0.0015, + "grad_norm": 0.19773858785629272, + "learning_rate": 8.6e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 2282 + }, + { + "loss": 0.0557, + "grad_norm": 1.190521001815796, + "learning_rate": 8.595000000000002e-06, + "num_tokens": 780734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1415, + "step": 2283 + }, + { + "loss": 0.0017, + "grad_norm": 0.23638860881328583, + "learning_rate": 8.59e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 1.0, + "epoch": 1.142, + "step": 2284 + }, + { + "loss": 0.0017, + "grad_norm": 0.24933819472789764, + "learning_rate": 8.585000000000002e-06, + "num_tokens": 780916.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1425, + "step": 2285 + }, + { + "loss": 0.0017, + "grad_norm": 0.22720065712928772, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 2286 + }, + { + "loss": 0.0416, + "grad_norm": 1.214958667755127, + "learning_rate": 8.575000000000002e-06, + "num_tokens": 781519.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1435, + "step": 2287 + }, + { + "loss": 0.054, + "grad_norm": 0.9985194206237793, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.144, + "step": 2288 + }, + { + "loss": 0.0017, + "grad_norm": 0.24114187061786652, + "learning_rate": 8.565000000000002e-06, + "num_tokens": 782122.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1445, + "step": 2289 + }, + { + "loss": 0.0574, + "grad_norm": 1.4530028104782104, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.145, + "step": 2290 + }, + { + "loss": 0.0018, + "grad_norm": 0.2544173002243042, + "learning_rate": 8.555e-06, + "num_tokens": 782725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1455, + "step": 2291 + }, + { + "loss": 0.0017, + "grad_norm": 0.23475930094718933, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 2292 + }, + { + "loss": 0.0708, + "grad_norm": 1.619470477104187, + "learning_rate": 8.545e-06, + "num_tokens": 783328.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1465, + "step": 2293 + }, + { + "loss": 0.0019, + "grad_norm": 0.2572467029094696, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 1.0, + "epoch": 1.147, + "step": 2294 + }, + { + "loss": 0.0019, + "grad_norm": 0.26701951026916504, + "learning_rate": 8.535e-06, + "num_tokens": 783510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1475, + "step": 2295 + }, + { + "loss": 0.0471, + "grad_norm": 1.147359848022461, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.148, + "step": 2296 + }, + { + "loss": 0.0485, + "grad_norm": 1.0665885210037231, + "learning_rate": 8.525e-06, + "num_tokens": 784534.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1485, + "step": 2297 + }, + { + "loss": 0.0017, + "grad_norm": 0.23322324454784393, + "learning_rate": 8.52e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.149, + "step": 2298 + }, + { + "loss": 0.0667, + "grad_norm": 1.4317374229431152, + "learning_rate": 8.515e-06, + "num_tokens": 785137.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1495, + "step": 2299 + }, + { + "loss": 0.0651, + "grad_norm": 1.4495528936386108, + "learning_rate": 8.51e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.15, + "step": 2300 + }, + { + "loss": 0.0018, + "grad_norm": 0.24990759789943695, + "learning_rate": 8.505e-06, + "num_tokens": 785740.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1505, + "step": 2301 + }, + { + "loss": 0.0673, + "grad_norm": 1.3833082914352417, + "learning_rate": 8.5e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.151, + "step": 2302 + }, + { + "loss": 0.0384, + "grad_norm": 1.0650711059570312, + "learning_rate": 8.495e-06, + "num_tokens": 786764.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1515, + "step": 2303 + }, + { + "loss": 0.0017, + "grad_norm": 0.2362237423658371, + "learning_rate": 8.49e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.152, + "step": 2304 + }, + { + "loss": 0.0362, + "grad_norm": 1.2261658906936646, + "learning_rate": 8.485000000000001e-06, + "num_tokens": 787367.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1525, + "step": 2305 + }, + { + "loss": 0.0021, + "grad_norm": 0.285277396440506, + "learning_rate": 8.48e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 1.0, + "epoch": 1.153, + "step": 2306 + }, + { + "loss": 0.0018, + "grad_norm": 0.24331547319889069, + "learning_rate": 8.475000000000001e-06, + "num_tokens": 787549.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1535, + "step": 2307 + }, + { + "loss": 0.057, + "grad_norm": 1.260392427444458, + "learning_rate": 8.47e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.154, + "step": 2308 + }, + { + "loss": 0.002, + "grad_norm": 0.26841071248054504, + "learning_rate": 8.465000000000001e-06, + "num_tokens": 788152.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1545, + "step": 2309 + }, + { + "loss": 0.0018, + "grad_norm": 0.25016698241233826, + "learning_rate": 8.46e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 2310 + }, + { + "loss": 0.002, + "grad_norm": 0.2738337218761444, + "learning_rate": 8.455000000000001e-06, + "num_tokens": 788334.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1555, + "step": 2311 + }, + { + "loss": 0.0017, + "grad_norm": 0.2311965376138687, + "learning_rate": 8.45e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 2312 + }, + { + "loss": 0.0608, + "grad_norm": 1.6522681713104248, + "learning_rate": 8.445000000000001e-06, + "num_tokens": 788937.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1565, + "step": 2313 + }, + { + "loss": 0.0595, + "grad_norm": 1.3370118141174316, + "learning_rate": 8.44e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.157, + "step": 2314 + }, + { + "loss": 0.0706, + "grad_norm": 1.5185800790786743, + "learning_rate": 8.435000000000001e-06, + "num_tokens": 789961.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1575, + "step": 2315 + }, + { + "loss": 0.0015, + "grad_norm": 0.20058579742908478, + "learning_rate": 8.43e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 1.0, + "epoch": 1.158, + "step": 2316 + }, + { + "loss": 0.0736, + "grad_norm": 1.6871758699417114, + "learning_rate": 8.425000000000001e-06, + "num_tokens": 790564.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1585, + "step": 2317 + }, + { + "loss": 0.0684, + "grad_norm": 1.7638912200927734, + "learning_rate": 8.42e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.159, + "step": 2318 + }, + { + "loss": 0.0017, + "grad_norm": 0.23336097598075867, + "learning_rate": 8.415000000000002e-06, + "num_tokens": 791167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1595, + "step": 2319 + }, + { + "loss": 0.0596, + "grad_norm": 1.3170890808105469, + "learning_rate": 8.41e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.16, + "step": 2320 + }, + { + "loss": 0.0566, + "grad_norm": 1.8501343727111816, + "learning_rate": 8.405000000000002e-06, + "num_tokens": 792191.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1605, + "step": 2321 + }, + { + "loss": 0.0679, + "grad_norm": 1.3065072298049927, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.161, + "step": 2322 + }, + { + "loss": 0.0577, + "grad_norm": 1.3374840021133423, + "learning_rate": 8.395e-06, + "num_tokens": 793215.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1615, + "step": 2323 + }, + { + "loss": 0.0651, + "grad_norm": 1.2627785205841064, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.162, + "step": 2324 + }, + { + "loss": 0.0589, + "grad_norm": 1.1249433755874634, + "learning_rate": 8.385e-06, + "num_tokens": 794239.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1625, + "step": 2325 + }, + { + "loss": 0.0022, + "grad_norm": 0.31153878569602966, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.163, + "step": 2326 + }, + { + "loss": 0.0376, + "grad_norm": 1.2043869495391846, + "learning_rate": 8.375e-06, + "num_tokens": 794842.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1635, + "step": 2327 + }, + { + "loss": 0.0024, + "grad_norm": 0.3410504162311554, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 1.0, + "epoch": 1.164, + "step": 2328 + }, + { + "loss": 0.0497, + "grad_norm": 1.3358232975006104, + "learning_rate": 8.365e-06, + "num_tokens": 795445.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1645, + "step": 2329 + }, + { + "loss": 0.062, + "grad_norm": 1.3019129037857056, + "learning_rate": 8.36e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.165, + "step": 2330 + }, + { + "loss": 0.1411, + "grad_norm": 3.1003713607788086, + "learning_rate": 8.355e-06, + "num_tokens": 796469.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.1655, + "step": 2331 + }, + { + "loss": 0.0675, + "grad_norm": 1.4928791522979736, + "learning_rate": 8.35e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.166, + "step": 2332 + }, + { + "loss": 0.0032, + "grad_norm": 0.47702810168266296, + "learning_rate": 8.345e-06, + "num_tokens": 797072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1665, + "step": 2333 + }, + { + "loss": 0.0486, + "grad_norm": 1.189456820487976, + "learning_rate": 8.34e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.167, + "step": 2334 + }, + { + "loss": 0.0033, + "grad_norm": 0.5152677893638611, + "learning_rate": 8.335e-06, + "num_tokens": 797675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1675, + "step": 2335 + }, + { + "loss": 0.0463, + "grad_norm": 1.3805276155471802, + "learning_rate": 8.33e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.168, + "step": 2336 + }, + { + "loss": 0.0653, + "grad_norm": 1.7025351524353027, + "learning_rate": 8.325e-06, + "num_tokens": 798699.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1685, + "step": 2337 + }, + { + "loss": 0.0031, + "grad_norm": 0.44580474495887756, + "learning_rate": 8.32e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 1.0, + "epoch": 1.169, + "step": 2338 + }, + { + "loss": 0.0462, + "grad_norm": 1.3915964365005493, + "learning_rate": 8.315000000000001e-06, + "num_tokens": 799302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1695, + "step": 2339 + }, + { + "loss": 0.0689, + "grad_norm": 1.3206253051757812, + "learning_rate": 8.31e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.17, + "step": 2340 + }, + { + "loss": 0.0616, + "grad_norm": 1.0774954557418823, + "learning_rate": 8.305000000000001e-06, + "num_tokens": 800326.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1705, + "step": 2341 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280348658561707, + "learning_rate": 8.3e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 1.0, + "epoch": 1.171, + "step": 2342 + }, + { + "loss": 0.0534, + "grad_norm": 1.1514171361923218, + "learning_rate": 8.295000000000001e-06, + "num_tokens": 800929.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1715, + "step": 2343 + }, + { + "loss": 0.0034, + "grad_norm": 0.4936150014400482, + "learning_rate": 8.29e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 1.0, + "epoch": 1.172, + "step": 2344 + }, + { + "loss": 0.0411, + "grad_norm": 1.091706395149231, + "learning_rate": 8.285000000000001e-06, + "num_tokens": 801532.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1724999999999999, + "step": 2345 + }, + { + "loss": 0.0633, + "grad_norm": 1.2277299165725708, + "learning_rate": 8.28e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.173, + "step": 2346 + }, + { + "loss": 0.0032, + "grad_norm": 0.4532278776168823, + "learning_rate": 8.275000000000001e-06, + "num_tokens": 802135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1735, + "step": 2347 + }, + { + "loss": 0.0033, + "grad_norm": 0.467818021774292, + "learning_rate": 8.27e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.174, + "step": 2348 + }, + { + "loss": 0.0528, + "grad_norm": 1.7821072340011597, + "learning_rate": 8.265000000000001e-06, + "num_tokens": 802738.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1745, + "step": 2349 + }, + { + "loss": 0.0415, + "grad_norm": 1.4086565971374512, + "learning_rate": 8.26e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.175, + "step": 2350 + }, + { + "loss": 0.045, + "grad_norm": 1.1930326223373413, + "learning_rate": 8.255000000000001e-06, + "num_tokens": 803762.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1755, + "step": 2351 + }, + { + "loss": 0.0028, + "grad_norm": 0.4077257215976715, + "learning_rate": 8.25e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 1.0, + "epoch": 1.176, + "step": 2352 + }, + { + "loss": 0.0535, + "grad_norm": 1.0156196355819702, + "learning_rate": 8.245000000000002e-06, + "num_tokens": 804365.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1764999999999999, + "step": 2353 + }, + { + "loss": 0.0544, + "grad_norm": 1.701621413230896, + "learning_rate": 8.24e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.177, + "step": 2354 + }, + { + "loss": 0.0408, + "grad_norm": 1.3804023265838623, + "learning_rate": 8.235e-06, + "num_tokens": 805389.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1775, + "step": 2355 + }, + { + "loss": 0.0538, + "grad_norm": 1.4935331344604492, + "learning_rate": 8.23e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.178, + "step": 2356 + }, + { + "loss": 0.0031, + "grad_norm": 0.46967241168022156, + "learning_rate": 8.225e-06, + "num_tokens": 805992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1785, + "step": 2357 + }, + { + "loss": 0.003, + "grad_norm": 0.4181312620639801, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.179, + "step": 2358 + }, + { + "loss": 0.003, + "grad_norm": 0.4292071461677551, + "learning_rate": 8.215e-06, + "num_tokens": 806174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1795, + "step": 2359 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606574833393097, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.18, + "step": 2360 + }, + { + "loss": 0.0384, + "grad_norm": 1.0812703371047974, + "learning_rate": 8.205e-06, + "num_tokens": 806777.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1804999999999999, + "step": 2361 + }, + { + "loss": 0.0025, + "grad_norm": 0.36413413286209106, + "learning_rate": 8.2e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.181, + "step": 2362 + }, + { + "loss": 0.0632, + "grad_norm": 1.3525351285934448, + "learning_rate": 8.195e-06, + "num_tokens": 807380.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1815, + "step": 2363 + }, + { + "loss": 0.0021, + "grad_norm": 0.29519718885421753, + "learning_rate": 8.19e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.182, + "step": 2364 + }, + { + "loss": 0.002, + "grad_norm": 0.28825369477272034, + "learning_rate": 8.185e-06, + "num_tokens": 807562.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1825, + "step": 2365 + }, + { + "loss": 0.0364, + "grad_norm": 1.0907576084136963, + "learning_rate": 8.18e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.183, + "step": 2366 + }, + { + "loss": 0.0682, + "grad_norm": 1.3050081729888916, + "learning_rate": 8.175e-06, + "num_tokens": 808586.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1835, + "step": 2367 + }, + { + "loss": 0.0424, + "grad_norm": 1.141483187675476, + "learning_rate": 8.17e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.184, + "step": 2368 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355233788490295, + "learning_rate": 8.165e-06, + "num_tokens": 809189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1844999999999999, + "step": 2369 + }, + { + "loss": 0.0744, + "grad_norm": 1.7785593271255493, + "learning_rate": 8.16e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.185, + "step": 2370 + }, + { + "loss": 0.0657, + "grad_norm": 1.3623268604278564, + "learning_rate": 8.155e-06, + "num_tokens": 810213.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1855, + "step": 2371 + }, + { + "loss": 0.0549, + "grad_norm": 1.1436368227005005, + "learning_rate": 8.15e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.186, + "step": 2372 + }, + { + "loss": 0.0539, + "grad_norm": 1.2383182048797607, + "learning_rate": 8.145e-06, + "num_tokens": 811237.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1865, + "step": 2373 + }, + { + "loss": 0.0018, + "grad_norm": 0.24816246330738068, + "learning_rate": 8.14e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.187, + "step": 2374 + }, + { + "loss": 0.0409, + "grad_norm": 1.240695834159851, + "learning_rate": 8.135000000000001e-06, + "num_tokens": 811840.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1875, + "step": 2375 + }, + { + "loss": 0.0364, + "grad_norm": 0.927349328994751, + "learning_rate": 8.13e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.188, + "step": 2376 + }, + { + "loss": 0.002, + "grad_norm": 0.28636854887008667, + "learning_rate": 8.125000000000001e-06, + "num_tokens": 812443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1885, + "step": 2377 + }, + { + "loss": 0.0021, + "grad_norm": 0.3085651397705078, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 2378 + }, + { + "loss": 0.0733, + "grad_norm": 1.627233862876892, + "learning_rate": 8.115000000000001e-06, + "num_tokens": 813046.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1895, + "step": 2379 + }, + { + "loss": 0.0523, + "grad_norm": 1.2803730964660645, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.19, + "step": 2380 + }, + { + "loss": 0.0358, + "grad_norm": 1.134440302848816, + "learning_rate": 8.105000000000001e-06, + "num_tokens": 814070.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1905000000000001, + "step": 2381 + }, + { + "loss": 0.062, + "grad_norm": 1.7024178504943848, + "learning_rate": 8.1e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.191, + "step": 2382 + }, + { + "loss": 0.0555, + "grad_norm": 1.755904197692871, + "learning_rate": 8.095000000000001e-06, + "num_tokens": 815094.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1915, + "step": 2383 + }, + { + "loss": 0.0028, + "grad_norm": 0.4056146442890167, + "learning_rate": 8.09e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 1.0, + "epoch": 1.192, + "step": 2384 + }, + { + "loss": 0.0415, + "grad_norm": 1.3847079277038574, + "learning_rate": 8.085000000000001e-06, + "num_tokens": 815697.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1925, + "step": 2385 + }, + { + "loss": 0.041, + "grad_norm": 1.05851149559021, + "learning_rate": 8.08e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.193, + "step": 2386 + }, + { + "loss": 0.0683, + "grad_norm": 1.5797926187515259, + "learning_rate": 8.075000000000001e-06, + "num_tokens": 816721.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1935, + "step": 2387 + }, + { + "loss": 0.003, + "grad_norm": 0.44755682349205017, + "learning_rate": 8.07e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 1.0, + "epoch": 1.194, + "step": 2388 + }, + { + "loss": 0.0035, + "grad_norm": 0.5333588719367981, + "learning_rate": 8.065e-06, + "num_tokens": 816903.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1945000000000001, + "step": 2389 + }, + { + "loss": 0.0034, + "grad_norm": 0.5025861263275146, + "learning_rate": 8.06e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 2390 + }, + { + "loss": 0.0657, + "grad_norm": 1.9265213012695312, + "learning_rate": 8.055e-06, + "num_tokens": 817506.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1955, + "step": 2391 + }, + { + "loss": 0.0029, + "grad_norm": 0.4326709508895874, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.196, + "step": 2392 + }, + { + "loss": 0.0385, + "grad_norm": 1.282583236694336, + "learning_rate": 8.045e-06, + "num_tokens": 818109.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1965, + "step": 2393 + }, + { + "loss": 0.048, + "grad_norm": 1.7246921062469482, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.197, + "step": 2394 + }, + { + "loss": 0.0529, + "grad_norm": 1.3816536664962769, + "learning_rate": 8.035e-06, + "num_tokens": 819133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1975, + "step": 2395 + }, + { + "loss": 0.0025, + "grad_norm": 0.36934202909469604, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 1.0, + "epoch": 1.198, + "step": 2396 + }, + { + "loss": 0.0701, + "grad_norm": 1.844415307044983, + "learning_rate": 8.025e-06, + "num_tokens": 819736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1985000000000001, + "step": 2397 + }, + { + "loss": 0.0026, + "grad_norm": 0.3918537199497223, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 1.0, + "epoch": 1.199, + "step": 2398 + }, + { + "loss": 0.0025, + "grad_norm": 0.3629172444343567, + "learning_rate": 8.015e-06, + "num_tokens": 819918.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1995, + "step": 2399 + }, + { + "loss": 0.0593, + "grad_norm": 1.3562273979187012, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2, + "step": 2400 + }, + { + "loss": 0.0415, + "grad_norm": 1.1191670894622803, + "learning_rate": 8.005e-06, + "num_tokens": 820942.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2005, + "step": 2401 + }, + { + "loss": 0.0021, + "grad_norm": 0.3028194308280945, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 1.0, + "epoch": 1.201, + "step": 2402 + }, + { + "loss": 0.0021, + "grad_norm": 0.3161010444164276, + "learning_rate": 7.995e-06, + "num_tokens": 821124.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2015, + "step": 2403 + }, + { + "loss": 0.0631, + "grad_norm": 1.4275634288787842, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.202, + "step": 2404 + }, + { + "loss": 0.0018, + "grad_norm": 0.2525792121887207, + "learning_rate": 7.985e-06, + "num_tokens": 821727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2025000000000001, + "step": 2405 + }, + { + "loss": 0.0576, + "grad_norm": 1.2019566297531128, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.203, + "step": 2406 + }, + { + "loss": 0.0019, + "grad_norm": 0.28433406352996826, + "learning_rate": 7.975e-06, + "num_tokens": 822330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2035, + "step": 2407 + }, + { + "loss": 0.0018, + "grad_norm": 0.26680925488471985, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 1.0, + "epoch": 1.204, + "step": 2408 + }, + { + "loss": 0.0523, + "grad_norm": 1.5135900974273682, + "learning_rate": 7.965e-06, + "num_tokens": 822933.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2045, + "step": 2409 + }, + { + "loss": 0.0595, + "grad_norm": 1.425874948501587, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.205, + "step": 2410 + }, + { + "loss": 0.0688, + "grad_norm": 1.7353657484054565, + "learning_rate": 7.955000000000001e-06, + "num_tokens": 823957.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2055, + "step": 2411 + }, + { + "loss": 0.0016, + "grad_norm": 0.22734731435775757, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.206, + "step": 2412 + }, + { + "loss": 0.0016, + "grad_norm": 0.22473861277103424, + "learning_rate": 7.945000000000001e-06, + "num_tokens": 824139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2065, + "step": 2413 + }, + { + "loss": 0.0016, + "grad_norm": 0.23369428515434265, + "learning_rate": 7.94e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 2414 + }, + { + "loss": 0.0018, + "grad_norm": 0.25014567375183105, + "learning_rate": 7.935000000000001e-06, + "num_tokens": 824321.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2075, + "step": 2415 + }, + { + "loss": 0.0701, + "grad_norm": 1.4806315898895264, + "learning_rate": 7.93e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.208, + "step": 2416 + }, + { + "loss": 0.0015, + "grad_norm": 0.1993637979030609, + "learning_rate": 7.925000000000001e-06, + "num_tokens": 824924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2085, + "step": 2417 + }, + { + "loss": 0.0548, + "grad_norm": 1.2813140153884888, + "learning_rate": 7.92e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.209, + "step": 2418 + }, + { + "loss": 0.0552, + "grad_norm": 1.2722525596618652, + "learning_rate": 7.915000000000001e-06, + "num_tokens": 825948.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2095, + "step": 2419 + }, + { + "loss": 0.0013, + "grad_norm": 0.17925392091274261, + "learning_rate": 7.91e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 1.0, + "epoch": 1.21, + "step": 2420 + }, + { + "loss": 0.0013, + "grad_norm": 0.18519414961338043, + "learning_rate": 7.905e-06, + "num_tokens": 826130.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2105, + "step": 2421 + }, + { + "loss": 0.041, + "grad_norm": 1.3869478702545166, + "learning_rate": 7.9e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.211, + "step": 2422 + }, + { + "loss": 0.0013, + "grad_norm": 0.1751483976840973, + "learning_rate": 7.895e-06, + "num_tokens": 826733.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2115, + "step": 2423 + }, + { + "loss": 0.05, + "grad_norm": 1.0098025798797607, + "learning_rate": 7.89e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.212, + "step": 2424 + }, + { + "loss": 0.0605, + "grad_norm": 1.3178874254226685, + "learning_rate": 7.885e-06, + "num_tokens": 827757.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2125, + "step": 2425 + }, + { + "loss": 0.0013, + "grad_norm": 0.18827441334724426, + "learning_rate": 7.88e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 1.0, + "epoch": 1.213, + "step": 2426 + }, + { + "loss": 0.064, + "grad_norm": 1.4484566450119019, + "learning_rate": 7.875e-06, + "num_tokens": 828360.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2135, + "step": 2427 + }, + { + "loss": 0.0014, + "grad_norm": 0.19540052115917206, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 1.0, + "epoch": 1.214, + "step": 2428 + }, + { + "loss": 0.0623, + "grad_norm": 1.3592177629470825, + "learning_rate": 7.865e-06, + "num_tokens": 828963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2145, + "step": 2429 + }, + { + "loss": 0.0014, + "grad_norm": 0.20412060618400574, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.215, + "step": 2430 + }, + { + "loss": 0.0617, + "grad_norm": 1.755582332611084, + "learning_rate": 7.855e-06, + "num_tokens": 829566.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2155, + "step": 2431 + }, + { + "loss": 0.0631, + "grad_norm": 1.2380058765411377, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.216, + "step": 2432 + }, + { + "loss": 0.0375, + "grad_norm": 1.3119670152664185, + "learning_rate": 7.845e-06, + "num_tokens": 830590.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2165, + "step": 2433 + }, + { + "loss": 0.0015, + "grad_norm": 0.22137387096881866, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 1.0, + "epoch": 1.217, + "step": 2434 + }, + { + "loss": 0.0017, + "grad_norm": 0.2416553795337677, + "learning_rate": 7.835e-06, + "num_tokens": 830772.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2175, + "step": 2435 + }, + { + "loss": 0.0015, + "grad_norm": 0.21708650887012482, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 2436 + }, + { + "loss": 0.0016, + "grad_norm": 0.23922832310199738, + "learning_rate": 7.825e-06, + "num_tokens": 830954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2185, + "step": 2437 + }, + { + "loss": 0.0016, + "grad_norm": 0.2385343313217163, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 2438 + }, + { + "loss": 0.065, + "grad_norm": 1.4742591381072998, + "learning_rate": 7.815e-06, + "num_tokens": 831557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2195, + "step": 2439 + }, + { + "loss": 0.0016, + "grad_norm": 0.2341725379228592, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 1.0, + "epoch": 1.22, + "step": 2440 + }, + { + "loss": 0.0615, + "grad_norm": 1.4791371822357178, + "learning_rate": 7.805e-06, + "num_tokens": 832160.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2205, + "step": 2441 + }, + { + "loss": 0.048, + "grad_norm": 1.601716160774231, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.221, + "step": 2442 + }, + { + "loss": 0.0014, + "grad_norm": 0.19947591423988342, + "learning_rate": 7.795e-06, + "num_tokens": 832763.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2215, + "step": 2443 + }, + { + "loss": 0.0801, + "grad_norm": 1.753954291343689, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.222, + "step": 2444 + }, + { + "loss": 0.0015, + "grad_norm": 0.21398615837097168, + "learning_rate": 7.785000000000001e-06, + "num_tokens": 833366.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2225, + "step": 2445 + }, + { + "loss": 0.0655, + "grad_norm": 1.799574851989746, + "learning_rate": 7.78e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.223, + "step": 2446 + }, + { + "loss": 0.0438, + "grad_norm": 1.332261085510254, + "learning_rate": 7.775000000000001e-06, + "num_tokens": 834390.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2235, + "step": 2447 + }, + { + "loss": 0.044, + "grad_norm": 1.238344430923462, + "learning_rate": 7.77e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.224, + "step": 2448 + }, + { + "loss": 0.0015, + "grad_norm": 0.2137579768896103, + "learning_rate": 7.765000000000001e-06, + "num_tokens": 834993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2245, + "step": 2449 + }, + { + "loss": 0.0438, + "grad_norm": 1.1821973323822021, + "learning_rate": 7.76e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.225, + "step": 2450 + }, + { + "loss": 0.0562, + "grad_norm": 1.4905529022216797, + "learning_rate": 7.755000000000001e-06, + "num_tokens": 836017.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2255, + "step": 2451 + }, + { + "loss": 0.0015, + "grad_norm": 0.21731820702552795, + "learning_rate": 7.75e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 1.0, + "epoch": 1.226, + "step": 2452 + }, + { + "loss": 0.0017, + "grad_norm": 0.25909724831581116, + "learning_rate": 7.745e-06, + "num_tokens": 836199.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2265, + "step": 2453 + }, + { + "loss": 0.0016, + "grad_norm": 0.22781187295913696, + "learning_rate": 7.74e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 2454 + }, + { + "loss": 0.0016, + "grad_norm": 0.24323998391628265, + "learning_rate": 7.735e-06, + "num_tokens": 836381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2275, + "step": 2455 + }, + { + "loss": 0.0594, + "grad_norm": 1.5349161624908447, + "learning_rate": 7.73e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.228, + "step": 2456 + }, + { + "loss": 0.0017, + "grad_norm": 0.24151335656642914, + "learning_rate": 7.725e-06, + "num_tokens": 836984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2285, + "step": 2457 + }, + { + "loss": 0.0016, + "grad_norm": 0.23347225785255432, + "learning_rate": 7.72e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 2458 + }, + { + "loss": 0.0017, + "grad_norm": 0.24232612550258636, + "learning_rate": 7.715e-06, + "num_tokens": 837166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2295, + "step": 2459 + }, + { + "loss": 0.0016, + "grad_norm": 0.23151801526546478, + "learning_rate": 7.71e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 2460 + }, + { + "loss": 0.0586, + "grad_norm": 1.4122602939605713, + "learning_rate": 7.705e-06, + "num_tokens": 837769.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2305, + "step": 2461 + }, + { + "loss": 0.0014, + "grad_norm": 0.19469626247882843, + "learning_rate": 7.7e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.231, + "step": 2462 + }, + { + "loss": 0.0637, + "grad_norm": 1.675697684288025, + "learning_rate": 7.695e-06, + "num_tokens": 838372.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2315, + "step": 2463 + }, + { + "loss": 0.0013, + "grad_norm": 0.17535777390003204, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.232, + "step": 2464 + }, + { + "loss": 0.0549, + "grad_norm": 1.1719900369644165, + "learning_rate": 7.685e-06, + "num_tokens": 838975.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2325, + "step": 2465 + }, + { + "loss": 0.0013, + "grad_norm": 0.16398227214813232, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.233, + "step": 2466 + }, + { + "loss": 0.0674, + "grad_norm": 1.7502342462539673, + "learning_rate": 7.675e-06, + "num_tokens": 839578.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2335, + "step": 2467 + }, + { + "loss": 0.0013, + "grad_norm": 0.17352193593978882, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.234, + "step": 2468 + }, + { + "loss": 0.063, + "grad_norm": 1.5015274286270142, + "learning_rate": 7.665e-06, + "num_tokens": 840181.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2345, + "step": 2469 + }, + { + "loss": 0.0611, + "grad_norm": 1.3142430782318115, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2349999999999999, + "step": 2470 + }, + { + "loss": 0.0589, + "grad_norm": 1.3366830348968506, + "learning_rate": 7.655e-06, + "num_tokens": 841205.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2355, + "step": 2471 + }, + { + "loss": 0.0013, + "grad_norm": 0.17301248013973236, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.236, + "step": 2472 + }, + { + "loss": 0.0435, + "grad_norm": 1.1996126174926758, + "learning_rate": 7.645e-06, + "num_tokens": 841808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2365, + "step": 2473 + }, + { + "loss": 0.0015, + "grad_norm": 0.21387803554534912, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.237, + "step": 2474 + }, + { + "loss": 0.064, + "grad_norm": 1.3917018175125122, + "learning_rate": 7.635e-06, + "num_tokens": 842411.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2375, + "step": 2475 + }, + { + "loss": 0.0014, + "grad_norm": 0.20352397859096527, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.238, + "step": 2476 + }, + { + "loss": 0.0015, + "grad_norm": 0.21035854518413544, + "learning_rate": 7.625e-06, + "num_tokens": 842593.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2385, + "step": 2477 + }, + { + "loss": 0.0384, + "grad_norm": 1.1954495906829834, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.2389999999999999, + "step": 2478 + }, + { + "loss": 0.0398, + "grad_norm": 1.3171675205230713, + "learning_rate": 7.615e-06, + "num_tokens": 843617.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2395, + "step": 2479 + }, + { + "loss": 0.0016, + "grad_norm": 0.22742266952991486, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.24, + "step": 2480 + }, + { + "loss": 0.0505, + "grad_norm": 1.463847041130066, + "learning_rate": 7.605e-06, + "num_tokens": 844220.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2405, + "step": 2481 + }, + { + "loss": 0.0634, + "grad_norm": 1.0150220394134521, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.241, + "step": 2482 + }, + { + "loss": 0.0628, + "grad_norm": 1.2490217685699463, + "learning_rate": 7.595e-06, + "num_tokens": 845244.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2415, + "step": 2483 + }, + { + "loss": 0.0568, + "grad_norm": 0.9812212586402893, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.242, + "step": 2484 + }, + { + "loss": 0.0684, + "grad_norm": 1.4887269735336304, + "learning_rate": 7.585e-06, + "num_tokens": 846268.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2425, + "step": 2485 + }, + { + "loss": 0.002, + "grad_norm": 0.2907889485359192, + "learning_rate": 7.58e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2429999999999999, + "step": 2486 + }, + { + "loss": 0.0024, + "grad_norm": 0.3490116596221924, + "learning_rate": 7.575e-06, + "num_tokens": 846450.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2435, + "step": 2487 + }, + { + "loss": 0.0379, + "grad_norm": 0.9351921081542969, + "learning_rate": 7.57e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.244, + "step": 2488 + }, + { + "loss": 0.0409, + "grad_norm": 1.486227035522461, + "learning_rate": 7.565e-06, + "num_tokens": 847474.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2445, + "step": 2489 + }, + { + "loss": 0.0024, + "grad_norm": 0.35926783084869385, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.245, + "step": 2490 + }, + { + "loss": 0.0547, + "grad_norm": 1.216343879699707, + "learning_rate": 7.5550000000000005e-06, + "num_tokens": 848077.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2455, + "step": 2491 + }, + { + "loss": 0.0622, + "grad_norm": 1.0978708267211914, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.246, + "step": 2492 + }, + { + "loss": 0.0026, + "grad_norm": 0.3695952892303467, + "learning_rate": 7.545e-06, + "num_tokens": 848680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2465, + "step": 2493 + }, + { + "loss": 0.0712, + "grad_norm": 1.1717898845672607, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2469999999999999, + "step": 2494 + }, + { + "loss": 0.003, + "grad_norm": 0.4548373818397522, + "learning_rate": 7.535e-06, + "num_tokens": 849283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2475, + "step": 2495 + }, + { + "loss": 0.003, + "grad_norm": 0.4568769335746765, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.248, + "step": 2496 + }, + { + "loss": 0.0024, + "grad_norm": 0.36542901396751404, + "learning_rate": 7.525e-06, + "num_tokens": 849465.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2485, + "step": 2497 + }, + { + "loss": 0.0566, + "grad_norm": 1.315274715423584, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.249, + "step": 2498 + }, + { + "loss": 0.0026, + "grad_norm": 0.39514294266700745, + "learning_rate": 7.515e-06, + "num_tokens": 850068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2495, + "step": 2499 + }, + { + "loss": 0.0678, + "grad_norm": 1.530604362487793, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.25, + "step": 2500 + }, + { + "loss": 0.0022, + "grad_norm": 0.3104536533355713, + "learning_rate": 7.505e-06, + "num_tokens": 850671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2505, + "step": 2501 + }, + { + "loss": 0.0019, + "grad_norm": 0.2783941924571991, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.251, + "step": 2502 + }, + { + "loss": 0.0597, + "grad_norm": 1.77070951461792, + "learning_rate": 7.495000000000001e-06, + "num_tokens": 851274.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2515, + "step": 2503 + }, + { + "loss": 0.0019, + "grad_norm": 0.2808924913406372, + "learning_rate": 7.49e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 1.0, + "epoch": 1.252, + "step": 2504 + }, + { + "loss": 0.0441, + "grad_norm": 1.070281982421875, + "learning_rate": 7.485000000000001e-06, + "num_tokens": 851877.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2525, + "step": 2505 + }, + { + "loss": 0.0018, + "grad_norm": 0.25118544697761536, + "learning_rate": 7.48e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2530000000000001, + "step": 2506 + }, + { + "loss": 0.0698, + "grad_norm": 1.3499447107315063, + "learning_rate": 7.475000000000001e-06, + "num_tokens": 852480.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2535, + "step": 2507 + }, + { + "loss": 0.0016, + "grad_norm": 0.23157145082950592, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.254, + "step": 2508 + }, + { + "loss": 0.0384, + "grad_norm": 1.1759817600250244, + "learning_rate": 7.465000000000001e-06, + "num_tokens": 853083.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2545, + "step": 2509 + }, + { + "loss": 0.0017, + "grad_norm": 0.24023179709911346, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.255, + "step": 2510 + }, + { + "loss": 0.0559, + "grad_norm": 1.3075677156448364, + "learning_rate": 7.4550000000000015e-06, + "num_tokens": 853686.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2555, + "step": 2511 + }, + { + "loss": 0.0691, + "grad_norm": 1.5931618213653564, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.256, + "step": 2512 + }, + { + "loss": 0.0015, + "grad_norm": 0.21379417181015015, + "learning_rate": 7.445000000000001e-06, + "num_tokens": 854289.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2565, + "step": 2513 + }, + { + "loss": 0.0016, + "grad_norm": 0.22427783906459808, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 2514 + }, + { + "loss": 0.0585, + "grad_norm": 1.3955110311508179, + "learning_rate": 7.435000000000001e-06, + "num_tokens": 854892.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2575, + "step": 2515 + }, + { + "loss": 0.0016, + "grad_norm": 0.22540539503097534, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.258, + "step": 2516 + }, + { + "loss": 0.0015, + "grad_norm": 0.20957466959953308, + "learning_rate": 7.425000000000001e-06, + "num_tokens": 855074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2585, + "step": 2517 + }, + { + "loss": 0.0013, + "grad_norm": 0.17798997461795807, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 2518 + }, + { + "loss": 0.0681, + "grad_norm": 1.692757487297058, + "learning_rate": 7.415000000000001e-06, + "num_tokens": 855677.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2595, + "step": 2519 + }, + { + "loss": 0.0013, + "grad_norm": 0.18327295780181885, + "learning_rate": 7.41e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 1.0, + "epoch": 1.26, + "step": 2520 + }, + { + "loss": 0.0694, + "grad_norm": 1.3426337242126465, + "learning_rate": 7.405000000000001e-06, + "num_tokens": 856280.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2605, + "step": 2521 + }, + { + "loss": 0.0575, + "grad_norm": 1.3755184412002563, + "learning_rate": 7.4e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2610000000000001, + "step": 2522 + }, + { + "loss": 0.0012, + "grad_norm": 0.15550144016742706, + "learning_rate": 7.395000000000001e-06, + "num_tokens": 856883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2615, + "step": 2523 + }, + { + "loss": 0.0013, + "grad_norm": 0.18434429168701172, + "learning_rate": 7.39e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 2524 + }, + { + "loss": 0.0561, + "grad_norm": 1.3532037734985352, + "learning_rate": 7.385000000000001e-06, + "num_tokens": 857486.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2625, + "step": 2525 + }, + { + "loss": 0.0783, + "grad_norm": 2.749722719192505, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.263, + "step": 2526 + }, + { + "loss": 0.0739, + "grad_norm": 1.7389228343963623, + "learning_rate": 7.375000000000001e-06, + "num_tokens": 858510.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2635, + "step": 2527 + }, + { + "loss": 0.0596, + "grad_norm": 1.5434712171554565, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 2528 + }, + { + "loss": 0.0012, + "grad_norm": 0.16660870611667633, + "learning_rate": 7.365000000000001e-06, + "num_tokens": 859113.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2645, + "step": 2529 + }, + { + "loss": 0.0466, + "grad_norm": 1.1618560552597046, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2650000000000001, + "step": 2530 + }, + { + "loss": 0.066, + "grad_norm": 1.4426238536834717, + "learning_rate": 7.355000000000001e-06, + "num_tokens": 860137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2655, + "step": 2531 + }, + { + "loss": 0.0014, + "grad_norm": 0.1874425858259201, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 1.0, + "epoch": 1.266, + "step": 2532 + }, + { + "loss": 0.0574, + "grad_norm": 1.2460824251174927, + "learning_rate": 7.345000000000001e-06, + "num_tokens": 860740.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2665, + "step": 2533 + }, + { + "loss": 0.0722, + "grad_norm": 1.7045679092407227, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.267, + "step": 2534 + }, + { + "loss": 0.0641, + "grad_norm": 1.4023394584655762, + "learning_rate": 7.335000000000001e-06, + "num_tokens": 861764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2675, + "step": 2535 + }, + { + "loss": 0.0018, + "grad_norm": 0.25083932280540466, + "learning_rate": 7.33e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.268, + "step": 2536 + }, + { + "loss": 0.0625, + "grad_norm": 1.2308841943740845, + "learning_rate": 7.325000000000001e-06, + "num_tokens": 862367.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2685, + "step": 2537 + }, + { + "loss": 0.1399, + "grad_norm": 2.6957058906555176, + "learning_rate": 7.32e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.2690000000000001, + "step": 2538 + }, + { + "loss": 0.0403, + "grad_norm": 1.0539931058883667, + "learning_rate": 7.315000000000001e-06, + "num_tokens": 863391.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2695, + "step": 2539 + }, + { + "loss": 0.0603, + "grad_norm": 1.6862679719924927, + "learning_rate": 7.31e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.27, + "step": 2540 + }, + { + "loss": 0.0022, + "grad_norm": 0.3110877275466919, + "learning_rate": 7.305000000000001e-06, + "num_tokens": 863994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2705, + "step": 2541 + }, + { + "loss": 0.0521, + "grad_norm": 1.1967720985412598, + "learning_rate": 7.3e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.271, + "step": 2542 + }, + { + "loss": 0.1383, + "grad_norm": 2.653751850128174, + "learning_rate": 7.295000000000001e-06, + "num_tokens": 865018.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.2715, + "step": 2543 + }, + { + "loss": 0.0025, + "grad_norm": 0.3700110614299774, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.272, + "step": 2544 + }, + { + "loss": 0.0031, + "grad_norm": 0.42906609177589417, + "learning_rate": 7.2850000000000006e-06, + "num_tokens": 865200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2725, + "step": 2545 + }, + { + "loss": 0.0437, + "grad_norm": 1.104537010192871, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2730000000000001, + "step": 2546 + }, + { + "loss": 0.0027, + "grad_norm": 0.3919247090816498, + "learning_rate": 7.275000000000001e-06, + "num_tokens": 865803.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2735, + "step": 2547 + }, + { + "loss": 0.0029, + "grad_norm": 0.4317328929901123, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 2548 + }, + { + "loss": 0.0025, + "grad_norm": 0.37341031432151794, + "learning_rate": 7.265000000000001e-06, + "num_tokens": 865985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2745, + "step": 2549 + }, + { + "loss": 0.0416, + "grad_norm": 1.0737035274505615, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.275, + "step": 2550 + }, + { + "loss": 0.0646, + "grad_norm": 1.3107216358184814, + "learning_rate": 7.255000000000001e-06, + "num_tokens": 867009.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2755, + "step": 2551 + }, + { + "loss": 0.0381, + "grad_norm": 0.9233097434043884, + "learning_rate": 7.25e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.276, + "step": 2552 + }, + { + "loss": 0.056, + "grad_norm": 1.2655408382415771, + "learning_rate": 7.245000000000001e-06, + "num_tokens": 868033.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2765, + "step": 2553 + }, + { + "loss": 0.0519, + "grad_norm": 1.2633070945739746, + "learning_rate": 7.24e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2770000000000001, + "step": 2554 + }, + { + "loss": 0.0666, + "grad_norm": 1.5826315879821777, + "learning_rate": 7.235000000000001e-06, + "num_tokens": 869057.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2775, + "step": 2555 + }, + { + "loss": 0.0026, + "grad_norm": 0.3732459545135498, + "learning_rate": 7.23e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 1.0, + "epoch": 1.278, + "step": 2556 + }, + { + "loss": 0.0384, + "grad_norm": 0.9308870434761047, + "learning_rate": 7.225000000000001e-06, + "num_tokens": 869660.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2785, + "step": 2557 + }, + { + "loss": 0.0027, + "grad_norm": 0.3898535668849945, + "learning_rate": 7.22e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 1.0, + "epoch": 1.279, + "step": 2558 + }, + { + "loss": 0.0416, + "grad_norm": 1.0320757627487183, + "learning_rate": 7.215000000000001e-06, + "num_tokens": 870263.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2795, + "step": 2559 + }, + { + "loss": 0.0028, + "grad_norm": 0.4121858477592468, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 1.0, + "epoch": 1.28, + "step": 2560 + }, + { + "loss": 0.0028, + "grad_norm": 0.4276776611804962, + "learning_rate": 7.2050000000000005e-06, + "num_tokens": 870445.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2805, + "step": 2561 + }, + { + "loss": 0.0407, + "grad_norm": 0.9345077872276306, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2810000000000001, + "step": 2562 + }, + { + "loss": 0.0025, + "grad_norm": 0.3605985641479492, + "learning_rate": 7.1950000000000006e-06, + "num_tokens": 871048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2814999999999999, + "step": 2563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346655070781708, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.282, + "step": 2564 + }, + { + "loss": 0.0744, + "grad_norm": 1.8985601663589478, + "learning_rate": 7.185000000000001e-06, + "num_tokens": 871651.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2825, + "step": 2565 + }, + { + "loss": 0.0388, + "grad_norm": 0.96394282579422, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.283, + "step": 2566 + }, + { + "loss": 0.0682, + "grad_norm": 1.4056230783462524, + "learning_rate": 7.175000000000001e-06, + "num_tokens": 872675.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2835, + "step": 2567 + }, + { + "loss": 0.0022, + "grad_norm": 0.3106633722782135, + "learning_rate": 7.17e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 1.0, + "epoch": 1.284, + "step": 2568 + }, + { + "loss": 0.0384, + "grad_norm": 1.064553141593933, + "learning_rate": 7.165000000000001e-06, + "num_tokens": 873278.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2845, + "step": 2569 + }, + { + "loss": 0.0626, + "grad_norm": 1.0392028093338013, + "learning_rate": 7.16e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.285, + "step": 2570 + }, + { + "loss": 0.0022, + "grad_norm": 0.30655112862586975, + "learning_rate": 7.155000000000001e-06, + "num_tokens": 873881.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2854999999999999, + "step": 2571 + }, + { + "loss": 0.0673, + "grad_norm": 1.5468289852142334, + "learning_rate": 7.15e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.286, + "step": 2572 + }, + { + "loss": 0.0498, + "grad_norm": 1.2830432653427124, + "learning_rate": 7.145000000000001e-06, + "num_tokens": 874905.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2865, + "step": 2573 + }, + { + "loss": 0.055, + "grad_norm": 1.0863239765167236, + "learning_rate": 7.14e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 2574 + }, + { + "loss": 0.0606, + "grad_norm": 1.434999704360962, + "learning_rate": 7.135000000000001e-06, + "num_tokens": 875929.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2875, + "step": 2575 + }, + { + "loss": 0.0532, + "grad_norm": 1.290963888168335, + "learning_rate": 7.13e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.288, + "step": 2576 + }, + { + "loss": 0.0026, + "grad_norm": 0.36665645241737366, + "learning_rate": 7.125e-06, + "num_tokens": 876532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2885, + "step": 2577 + }, + { + "loss": 0.0485, + "grad_norm": 1.2393323183059692, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.289, + "step": 2578 + }, + { + "loss": 0.0029, + "grad_norm": 0.3994691073894501, + "learning_rate": 7.1150000000000005e-06, + "num_tokens": 877135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2894999999999999, + "step": 2579 + }, + { + "loss": 0.0544, + "grad_norm": 1.361981987953186, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.29, + "step": 2580 + }, + { + "loss": 0.0529, + "grad_norm": 1.1892880201339722, + "learning_rate": 7.105000000000001e-06, + "num_tokens": 878159.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2905, + "step": 2581 + }, + { + "loss": 0.069, + "grad_norm": 1.5022639036178589, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.291, + "step": 2582 + }, + { + "loss": 0.0594, + "grad_norm": 1.2174897193908691, + "learning_rate": 7.095000000000001e-06, + "num_tokens": 879183.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2915, + "step": 2583 + }, + { + "loss": 0.0723, + "grad_norm": 2.1814920902252197, + "learning_rate": 7.09e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.292, + "step": 2584 + }, + { + "loss": 0.0544, + "grad_norm": 1.1524139642715454, + "learning_rate": 7.085000000000001e-06, + "num_tokens": 880207.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2925, + "step": 2585 + }, + { + "loss": 0.0035, + "grad_norm": 0.5082859396934509, + "learning_rate": 7.08e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.293, + "step": 2586 + }, + { + "loss": 0.0034, + "grad_norm": 0.49455657601356506, + "learning_rate": 7.075000000000001e-06, + "num_tokens": 880389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2934999999999999, + "step": 2587 + }, + { + "loss": 0.0516, + "grad_norm": 1.1291673183441162, + "learning_rate": 7.07e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.294, + "step": 2588 + }, + { + "loss": 0.0402, + "grad_norm": 1.073132038116455, + "learning_rate": 7.065000000000001e-06, + "num_tokens": 881413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2945, + "step": 2589 + }, + { + "loss": 0.0409, + "grad_norm": 1.1712205410003662, + "learning_rate": 7.06e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.295, + "step": 2590 + }, + { + "loss": 0.0596, + "grad_norm": 1.2515616416931152, + "learning_rate": 7.055000000000001e-06, + "num_tokens": 882437.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2955, + "step": 2591 + }, + { + "loss": 0.0039, + "grad_norm": 0.5442217588424683, + "learning_rate": 7.05e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.296, + "step": 2592 + }, + { + "loss": 0.0041, + "grad_norm": 0.5982818603515625, + "learning_rate": 7.045e-06, + "num_tokens": 882619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2965, + "step": 2593 + }, + { + "loss": 0.0558, + "grad_norm": 1.3499200344085693, + "learning_rate": 7.04e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.297, + "step": 2594 + }, + { + "loss": 0.0038, + "grad_norm": 0.5531075596809387, + "learning_rate": 7.035e-06, + "num_tokens": 883222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2974999999999999, + "step": 2595 + }, + { + "loss": 0.0716, + "grad_norm": 1.8495835065841675, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.298, + "step": 2596 + }, + { + "loss": 0.0387, + "grad_norm": 1.2195173501968384, + "learning_rate": 7.0250000000000005e-06, + "num_tokens": 884246.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2985, + "step": 2597 + }, + { + "loss": 0.0715, + "grad_norm": 1.7892330884933472, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.299, + "step": 2598 + }, + { + "loss": 0.0034, + "grad_norm": 0.5045487284660339, + "learning_rate": 7.015000000000001e-06, + "num_tokens": 884849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2995, + "step": 2599 + }, + { + "loss": 0.0551, + "grad_norm": 1.5834842920303345, + "learning_rate": 7.01e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3, + "step": 2600 + }, + { + "loss": 0.0037, + "grad_norm": 0.5456190705299377, + "learning_rate": 7.005000000000001e-06, + "num_tokens": 885452.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3005, + "step": 2601 + }, + { + "loss": 0.0036, + "grad_norm": 0.5648893117904663, + "learning_rate": 7e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.301, + "step": 2602 + }, + { + "loss": 0.06, + "grad_norm": 1.417505145072937, + "learning_rate": 6.995000000000001e-06, + "num_tokens": 886055.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3014999999999999, + "step": 2603 + }, + { + "loss": 0.0684, + "grad_norm": 1.5355315208435059, + "learning_rate": 6.99e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.302, + "step": 2604 + }, + { + "loss": 0.0027, + "grad_norm": 0.4013388454914093, + "learning_rate": 6.985000000000001e-06, + "num_tokens": 886658.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3025, + "step": 2605 + }, + { + "loss": 0.0026, + "grad_norm": 0.38935649394989014, + "learning_rate": 6.98e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 1.0, + "epoch": 1.303, + "step": 2606 + }, + { + "loss": 0.0578, + "grad_norm": 1.1277109384536743, + "learning_rate": 6.975000000000001e-06, + "num_tokens": 887261.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3035, + "step": 2607 + }, + { + "loss": 0.0023, + "grad_norm": 0.3507567048072815, + "learning_rate": 6.97e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.304, + "step": 2608 + }, + { + "loss": 0.0021, + "grad_norm": 0.3047695755958557, + "learning_rate": 6.965e-06, + "num_tokens": 887443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3045, + "step": 2609 + }, + { + "loss": 0.0564, + "grad_norm": 1.2580876350402832, + "learning_rate": 6.96e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.305, + "step": 2610 + }, + { + "loss": 0.0018, + "grad_norm": 0.26692500710487366, + "learning_rate": 6.955e-06, + "num_tokens": 888046.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3054999999999999, + "step": 2611 + }, + { + "loss": 0.0601, + "grad_norm": 1.2882280349731445, + "learning_rate": 6.95e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.306, + "step": 2612 + }, + { + "loss": 0.0662, + "grad_norm": 1.3626042604446411, + "learning_rate": 6.945e-06, + "num_tokens": 889070.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3065, + "step": 2613 + }, + { + "loss": 0.0015, + "grad_norm": 0.20663970708847046, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 1.0, + "epoch": 1.307, + "step": 2614 + }, + { + "loss": 0.0421, + "grad_norm": 1.0858242511749268, + "learning_rate": 6.9350000000000005e-06, + "num_tokens": 889673.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3075, + "step": 2615 + }, + { + "loss": 0.061, + "grad_norm": 1.1361438035964966, + "learning_rate": 6.93e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.308, + "step": 2616 + }, + { + "loss": 0.053, + "grad_norm": 1.0651867389678955, + "learning_rate": 6.925000000000001e-06, + "num_tokens": 890697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3085, + "step": 2617 + }, + { + "loss": 0.0648, + "grad_norm": 1.4413301944732666, + "learning_rate": 6.92e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.309, + "step": 2618 + }, + { + "loss": 0.0016, + "grad_norm": 0.23106220364570618, + "learning_rate": 6.915000000000001e-06, + "num_tokens": 891300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3094999999999999, + "step": 2619 + }, + { + "loss": 0.0596, + "grad_norm": 1.1959160566329956, + "learning_rate": 6.91e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.31, + "step": 2620 + }, + { + "loss": 0.0625, + "grad_norm": 1.4631091356277466, + "learning_rate": 6.905000000000001e-06, + "num_tokens": 892324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3105, + "step": 2621 + }, + { + "loss": 0.0385, + "grad_norm": 1.1421785354614258, + "learning_rate": 6.9e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.311, + "step": 2622 + }, + { + "loss": 0.0644, + "grad_norm": 1.3361622095108032, + "learning_rate": 6.895000000000001e-06, + "num_tokens": 893348.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3115, + "step": 2623 + }, + { + "loss": 0.0393, + "grad_norm": 1.3101776838302612, + "learning_rate": 6.89e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.312, + "step": 2624 + }, + { + "loss": 0.0415, + "grad_norm": 1.2668944597244263, + "learning_rate": 6.885e-06, + "num_tokens": 894372.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3125, + "step": 2625 + }, + { + "loss": 0.0637, + "grad_norm": 1.8910597562789917, + "learning_rate": 6.88e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.313, + "step": 2626 + }, + { + "loss": 0.0385, + "grad_norm": 1.383195161819458, + "learning_rate": 6.875e-06, + "num_tokens": 895396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3135, + "step": 2627 + }, + { + "loss": 0.0029, + "grad_norm": 0.41114333271980286, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.314, + "step": 2628 + }, + { + "loss": 0.0709, + "grad_norm": 2.5799410343170166, + "learning_rate": 6.865e-06, + "num_tokens": 895999.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3145, + "step": 2629 + }, + { + "loss": 0.0717, + "grad_norm": 1.9481109380722046, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.315, + "step": 2630 + }, + { + "loss": 0.0031, + "grad_norm": 0.4399254620075226, + "learning_rate": 6.8550000000000004e-06, + "num_tokens": 896602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3155000000000001, + "step": 2631 + }, + { + "loss": 0.0692, + "grad_norm": 1.7998204231262207, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.316, + "step": 2632 + }, + { + "loss": 0.0589, + "grad_norm": 1.2681806087493896, + "learning_rate": 6.8450000000000005e-06, + "num_tokens": 897626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3165, + "step": 2633 + }, + { + "loss": 0.1572, + "grad_norm": 2.9861464500427246, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.317, + "step": 2634 + }, + { + "loss": 0.0033, + "grad_norm": 0.4804554879665375, + "learning_rate": 6.835000000000001e-06, + "num_tokens": 898229.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3175, + "step": 2635 + }, + { + "loss": 0.0039, + "grad_norm": 0.5298879742622375, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 1.0, + "epoch": 1.318, + "step": 2636 + }, + { + "loss": 0.0033, + "grad_norm": 0.45830750465393066, + "learning_rate": 6.825000000000001e-06, + "num_tokens": 898411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3185, + "step": 2637 + }, + { + "loss": 0.0759, + "grad_norm": 2.195838451385498, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.319, + "step": 2638 + }, + { + "loss": 0.0028, + "grad_norm": 0.3985951840877533, + "learning_rate": 6.815000000000001e-06, + "num_tokens": 899014.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3195000000000001, + "step": 2639 + }, + { + "loss": 0.0435, + "grad_norm": 1.082383155822754, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.32, + "step": 2640 + }, + { + "loss": 0.0031, + "grad_norm": 0.4386924207210541, + "learning_rate": 6.805000000000001e-06, + "num_tokens": 899617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3205, + "step": 2641 + }, + { + "loss": 0.044, + "grad_norm": 1.3280903100967407, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.321, + "step": 2642 + }, + { + "loss": 0.0024, + "grad_norm": 0.34161683917045593, + "learning_rate": 6.795e-06, + "num_tokens": 900220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3215, + "step": 2643 + }, + { + "loss": 0.0026, + "grad_norm": 0.3536019027233124, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.322, + "step": 2644 + }, + { + "loss": 0.0721, + "grad_norm": 1.825214147567749, + "learning_rate": 6.785e-06, + "num_tokens": 900823.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.3225, + "step": 2645 + }, + { + "loss": 0.0603, + "grad_norm": 1.441401481628418, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.323, + "step": 2646 + }, + { + "loss": 0.0552, + "grad_norm": 1.026498556137085, + "learning_rate": 6.775e-06, + "num_tokens": 901847.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3235000000000001, + "step": 2647 + }, + { + "loss": 0.0607, + "grad_norm": 1.567400574684143, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.324, + "step": 2648 + }, + { + "loss": 0.0365, + "grad_norm": 1.1754707098007202, + "learning_rate": 6.7650000000000005e-06, + "num_tokens": 902871.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3245, + "step": 2649 + }, + { + "loss": 0.0634, + "grad_norm": 1.0925911664962769, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.325, + "step": 2650 + }, + { + "loss": 0.0022, + "grad_norm": 0.3080379068851471, + "learning_rate": 6.7550000000000005e-06, + "num_tokens": 903474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3255, + "step": 2651 + }, + { + "loss": 0.0024, + "grad_norm": 0.3412145972251892, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.326, + "step": 2652 + }, + { + "loss": 0.0612, + "grad_norm": 1.387506127357483, + "learning_rate": 6.745000000000001e-06, + "num_tokens": 904077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3265, + "step": 2653 + }, + { + "loss": 0.0543, + "grad_norm": 1.0726388692855835, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 2654 + }, + { + "loss": 0.0515, + "grad_norm": 1.3620095252990723, + "learning_rate": 6.735000000000001e-06, + "num_tokens": 905101.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3275000000000001, + "step": 2655 + }, + { + "loss": 0.0536, + "grad_norm": 0.999693751335144, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.328, + "step": 2656 + }, + { + "loss": 0.0725, + "grad_norm": 1.338326096534729, + "learning_rate": 6.725000000000001e-06, + "num_tokens": 906125.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3285, + "step": 2657 + }, + { + "loss": 0.0025, + "grad_norm": 0.3621944487094879, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.329, + "step": 2658 + }, + { + "loss": 0.0027, + "grad_norm": 0.3732605576515198, + "learning_rate": 6.715e-06, + "num_tokens": 906307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3295, + "step": 2659 + }, + { + "loss": 0.0025, + "grad_norm": 0.3675785958766937, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 2660 + }, + { + "loss": 0.0546, + "grad_norm": 1.420166015625, + "learning_rate": 6.705e-06, + "num_tokens": 906910.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3305, + "step": 2661 + }, + { + "loss": 0.065, + "grad_norm": 1.7972251176834106, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.331, + "step": 2662 + }, + { + "loss": 0.0026, + "grad_norm": 0.38739708065986633, + "learning_rate": 6.695e-06, + "num_tokens": 907513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3315000000000001, + "step": 2663 + }, + { + "loss": 0.0621, + "grad_norm": 1.1773098707199097, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.332, + "step": 2664 + }, + { + "loss": 0.047, + "grad_norm": 1.3367711305618286, + "learning_rate": 6.685e-06, + "num_tokens": 908537.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3325, + "step": 2665 + }, + { + "loss": 0.0614, + "grad_norm": 1.5761219263076782, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.333, + "step": 2666 + }, + { + "loss": 0.0028, + "grad_norm": 0.39666748046875, + "learning_rate": 6.6750000000000005e-06, + "num_tokens": 909140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3335, + "step": 2667 + }, + { + "loss": 0.0026, + "grad_norm": 0.38161027431488037, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 2668 + }, + { + "loss": 0.0027, + "grad_norm": 0.3782355785369873, + "learning_rate": 6.6650000000000006e-06, + "num_tokens": 909322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3345, + "step": 2669 + }, + { + "loss": 0.0449, + "grad_norm": 1.2690225839614868, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.335, + "step": 2670 + }, + { + "loss": 0.0618, + "grad_norm": 1.4404915571212769, + "learning_rate": 6.655000000000001e-06, + "num_tokens": 910346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3355000000000001, + "step": 2671 + }, + { + "loss": 0.0593, + "grad_norm": 1.6381967067718506, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.336, + "step": 2672 + }, + { + "loss": 0.0023, + "grad_norm": 0.3195578455924988, + "learning_rate": 6.645000000000001e-06, + "num_tokens": 910949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3365, + "step": 2673 + }, + { + "loss": 0.1244, + "grad_norm": 2.2930221557617188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.337, + "step": 2674 + }, + { + "loss": 0.061, + "grad_norm": 1.1066110134124756, + "learning_rate": 6.635e-06, + "num_tokens": 911973.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3375, + "step": 2675 + }, + { + "loss": 0.0023, + "grad_norm": 0.3287852704524994, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.338, + "step": 2676 + }, + { + "loss": 0.0723, + "grad_norm": 1.8842978477478027, + "learning_rate": 6.625e-06, + "num_tokens": 912576.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3385, + "step": 2677 + }, + { + "loss": 0.0616, + "grad_norm": 1.410254955291748, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.339, + "step": 2678 + }, + { + "loss": 0.0661, + "grad_norm": 1.7658559083938599, + "learning_rate": 6.615e-06, + "num_tokens": 913600.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3395000000000001, + "step": 2679 + }, + { + "loss": 0.0023, + "grad_norm": 0.3321514427661896, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.34, + "step": 2680 + }, + { + "loss": 0.0026, + "grad_norm": 0.38943803310394287, + "learning_rate": 6.605e-06, + "num_tokens": 913782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3405, + "step": 2681 + }, + { + "loss": 0.0533, + "grad_norm": 1.220119833946228, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.341, + "step": 2682 + }, + { + "loss": 0.0577, + "grad_norm": 1.4489399194717407, + "learning_rate": 6.595e-06, + "num_tokens": 914806.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3415, + "step": 2683 + }, + { + "loss": 0.0534, + "grad_norm": 1.437482237815857, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.342, + "step": 2684 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185518980026245, + "learning_rate": 6.5850000000000005e-06, + "num_tokens": 915409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3425, + "step": 2685 + }, + { + "loss": 0.0557, + "grad_norm": 1.233544945716858, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.343, + "step": 2686 + }, + { + "loss": 0.1326, + "grad_norm": 2.9976046085357666, + "learning_rate": 6.5750000000000006e-06, + "num_tokens": 916433.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3435000000000001, + "step": 2687 + }, + { + "loss": 0.0555, + "grad_norm": 1.1236023902893066, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3439999999999999, + "step": 2688 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615230619907379, + "learning_rate": 6.565000000000001e-06, + "num_tokens": 917036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3445, + "step": 2689 + }, + { + "loss": 0.0613, + "grad_norm": 1.391479730606079, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.345, + "step": 2690 + }, + { + "loss": 0.0023, + "grad_norm": 0.32829907536506653, + "learning_rate": 6.555e-06, + "num_tokens": 917639.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3455, + "step": 2691 + }, + { + "loss": 0.0025, + "grad_norm": 0.35658934712409973, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 1.0, + "epoch": 1.346, + "step": 2692 + }, + { + "loss": 0.0028, + "grad_norm": 0.40413787961006165, + "learning_rate": 6.545e-06, + "num_tokens": 917821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3465, + "step": 2693 + }, + { + "loss": 0.0023, + "grad_norm": 0.3243667185306549, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 1.0, + "epoch": 1.347, + "step": 2694 + }, + { + "loss": 0.0023, + "grad_norm": 0.33630460500717163, + "learning_rate": 6.535e-06, + "num_tokens": 918003.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3475, + "step": 2695 + }, + { + "loss": 0.0529, + "grad_norm": 1.6163023710250854, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3479999999999999, + "step": 2696 + }, + { + "loss": 0.0678, + "grad_norm": 1.5625479221343994, + "learning_rate": 6.525e-06, + "num_tokens": 919027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3485, + "step": 2697 + }, + { + "loss": 0.0676, + "grad_norm": 1.5719348192214966, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.349, + "step": 2698 + }, + { + "loss": 0.002, + "grad_norm": 0.2859533727169037, + "learning_rate": 6.515e-06, + "num_tokens": 919630.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3495, + "step": 2699 + }, + { + "loss": 0.0434, + "grad_norm": 1.324418067932129, + "learning_rate": 6.51e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.35, + "step": 2700 + }, + { + "loss": 0.042, + "grad_norm": 1.3165403604507446, + "learning_rate": 6.505e-06, + "num_tokens": 920654.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3505, + "step": 2701 + }, + { + "loss": 0.0018, + "grad_norm": 0.2492700070142746, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.351, + "step": 2702 + }, + { + "loss": 0.1336, + "grad_norm": 2.710927963256836, + "learning_rate": 6.4950000000000005e-06, + "num_tokens": 921257.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.3515, + "step": 2703 + }, + { + "loss": 0.059, + "grad_norm": 1.8472118377685547, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3519999999999999, + "step": 2704 + }, + { + "loss": 0.0448, + "grad_norm": 1.164633870124817, + "learning_rate": 6.485000000000001e-06, + "num_tokens": 922281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3525, + "step": 2705 + }, + { + "loss": 0.0544, + "grad_norm": 1.3916175365447998, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.353, + "step": 2706 + }, + { + "loss": 0.0463, + "grad_norm": 1.397131085395813, + "learning_rate": 6.475e-06, + "num_tokens": 923305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3535, + "step": 2707 + }, + { + "loss": 0.0019, + "grad_norm": 0.26947012543678284, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.354, + "step": 2708 + }, + { + "loss": 0.0017, + "grad_norm": 0.23892365396022797, + "learning_rate": 6.465e-06, + "num_tokens": 923487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3545, + "step": 2709 + }, + { + "loss": 0.0018, + "grad_norm": 0.25066784024238586, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 2710 + }, + { + "loss": 0.0435, + "grad_norm": 1.2238185405731201, + "learning_rate": 6.455e-06, + "num_tokens": 924090.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3555, + "step": 2711 + }, + { + "loss": 0.0019, + "grad_norm": 0.26420801877975464, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3559999999999999, + "step": 2712 + }, + { + "loss": 0.0572, + "grad_norm": 1.1416776180267334, + "learning_rate": 6.445e-06, + "num_tokens": 924693.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3565, + "step": 2713 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754037082195282, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.357, + "step": 2714 + }, + { + "loss": 0.0018, + "grad_norm": 0.25344598293304443, + "learning_rate": 6.435e-06, + "num_tokens": 924875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3575, + "step": 2715 + }, + { + "loss": 0.0017, + "grad_norm": 0.23587873578071594, + "learning_rate": 6.43e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 2716 + }, + { + "loss": 0.0701, + "grad_norm": 1.6822742223739624, + "learning_rate": 6.425e-06, + "num_tokens": 925478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3585, + "step": 2717 + }, + { + "loss": 0.0017, + "grad_norm": 0.22698912024497986, + "learning_rate": 6.42e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 1.0, + "epoch": 1.359, + "step": 2718 + }, + { + "loss": 0.044, + "grad_norm": 1.2083390951156616, + "learning_rate": 6.415e-06, + "num_tokens": 926081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3595, + "step": 2719 + }, + { + "loss": 0.0017, + "grad_norm": 0.23327840864658356, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3599999999999999, + "step": 2720 + }, + { + "loss": 0.0557, + "grad_norm": 1.281182885169983, + "learning_rate": 6.4050000000000005e-06, + "num_tokens": 926684.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3605, + "step": 2721 + }, + { + "loss": 0.0539, + "grad_norm": 1.1743288040161133, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.361, + "step": 2722 + }, + { + "loss": 0.0646, + "grad_norm": 1.2470465898513794, + "learning_rate": 6.395e-06, + "num_tokens": 927708.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3615, + "step": 2723 + }, + { + "loss": 0.0015, + "grad_norm": 0.20256949961185455, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 1.0, + "epoch": 1.362, + "step": 2724 + }, + { + "loss": 0.0394, + "grad_norm": 1.1593482494354248, + "learning_rate": 6.385e-06, + "num_tokens": 928311.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3625, + "step": 2725 + }, + { + "loss": 0.0737, + "grad_norm": 1.937491774559021, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.363, + "step": 2726 + }, + { + "loss": 0.0438, + "grad_norm": 1.1960216760635376, + "learning_rate": 6.375e-06, + "num_tokens": 929335.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3635, + "step": 2727 + }, + { + "loss": 0.0016, + "grad_norm": 0.21763351559638977, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3639999999999999, + "step": 2728 + }, + { + "loss": 0.0017, + "grad_norm": 0.24479590356349945, + "learning_rate": 6.365e-06, + "num_tokens": 929517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3645, + "step": 2729 + }, + { + "loss": 0.0619, + "grad_norm": 1.315623164176941, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.365, + "step": 2730 + }, + { + "loss": 0.0016, + "grad_norm": 0.2220989614725113, + "learning_rate": 6.355e-06, + "num_tokens": 930120.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3655, + "step": 2731 + }, + { + "loss": 0.0017, + "grad_norm": 0.2321062982082367, + "learning_rate": 6.35e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 2732 + }, + { + "loss": 0.0017, + "grad_norm": 0.23798637092113495, + "learning_rate": 6.345e-06, + "num_tokens": 930302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3665, + "step": 2733 + }, + { + "loss": 0.0577, + "grad_norm": 1.2568942308425903, + "learning_rate": 6.34e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.367, + "step": 2734 + }, + { + "loss": 0.041, + "grad_norm": 1.6406105756759644, + "learning_rate": 6.335e-06, + "num_tokens": 931326.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3675, + "step": 2735 + }, + { + "loss": 0.0517, + "grad_norm": 1.235734224319458, + "learning_rate": 6.33e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 2736 + }, + { + "loss": 0.0423, + "grad_norm": 0.9826679825782776, + "learning_rate": 6.3250000000000004e-06, + "num_tokens": 932350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3685, + "step": 2737 + }, + { + "loss": 0.0018, + "grad_norm": 0.26410505175590515, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.369, + "step": 2738 + }, + { + "loss": 0.002, + "grad_norm": 0.2839818596839905, + "learning_rate": 6.315e-06, + "num_tokens": 932532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3695, + "step": 2739 + }, + { + "loss": 0.0533, + "grad_norm": 1.2392011880874634, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.37, + "step": 2740 + }, + { + "loss": 0.0017, + "grad_norm": 0.23982419073581696, + "learning_rate": 6.305e-06, + "num_tokens": 933135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3705, + "step": 2741 + }, + { + "loss": 0.0548, + "grad_norm": 1.4777438640594482, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.371, + "step": 2742 + }, + { + "loss": 0.0019, + "grad_norm": 0.2724550664424896, + "learning_rate": 6.295e-06, + "num_tokens": 933738.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3715, + "step": 2743 + }, + { + "loss": 0.0019, + "grad_norm": 0.2623855173587799, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3719999999999999, + "step": 2744 + }, + { + "loss": 0.0583, + "grad_norm": 1.0648019313812256, + "learning_rate": 6.285e-06, + "num_tokens": 934341.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3725, + "step": 2745 + }, + { + "loss": 0.0725, + "grad_norm": 1.589500069618225, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.373, + "step": 2746 + }, + { + "loss": 0.0617, + "grad_norm": 1.4101024866104126, + "learning_rate": 6.275e-06, + "num_tokens": 935365.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3735, + "step": 2747 + }, + { + "loss": 0.0019, + "grad_norm": 0.2686757743358612, + "learning_rate": 6.27e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 1.0, + "epoch": 1.374, + "step": 2748 + }, + { + "loss": 0.0451, + "grad_norm": 1.6723026037216187, + "learning_rate": 6.265e-06, + "num_tokens": 935968.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3745, + "step": 2749 + }, + { + "loss": 0.1481, + "grad_norm": 2.561096668243408, + "learning_rate": 6.26e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.375, + "step": 2750 + }, + { + "loss": 0.0593, + "grad_norm": 1.1495637893676758, + "learning_rate": 6.255e-06, + "num_tokens": 936992.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3755, + "step": 2751 + }, + { + "loss": 0.0583, + "grad_norm": 1.0880846977233887, + "learning_rate": 6.25e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.376, + "step": 2752 + }, + { + "loss": 0.0641, + "grad_norm": 1.4671814441680908, + "learning_rate": 6.245000000000001e-06, + "num_tokens": 938016.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3765, + "step": 2753 + }, + { + "loss": 0.0022, + "grad_norm": 0.3182397186756134, + "learning_rate": 6.24e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 1.0, + "epoch": 1.377, + "step": 2754 + }, + { + "loss": 0.0605, + "grad_norm": 1.1844297647476196, + "learning_rate": 6.235000000000001e-06, + "num_tokens": 938619.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3775, + "step": 2755 + }, + { + "loss": 0.0633, + "grad_norm": 1.227432131767273, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3780000000000001, + "step": 2756 + }, + { + "loss": 0.0026, + "grad_norm": 0.3716835677623749, + "learning_rate": 6.225000000000001e-06, + "num_tokens": 939222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3785, + "step": 2757 + }, + { + "loss": 0.0599, + "grad_norm": 1.3364546298980713, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.379, + "step": 2758 + }, + { + "loss": 0.0532, + "grad_norm": 1.3746514320373535, + "learning_rate": 6.215000000000001e-06, + "num_tokens": 940246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3795, + "step": 2759 + }, + { + "loss": 0.0696, + "grad_norm": 1.6494160890579224, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.38, + "step": 2760 + }, + { + "loss": 0.0031, + "grad_norm": 0.4407944083213806, + "learning_rate": 6.205000000000001e-06, + "num_tokens": 940849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3805, + "step": 2761 + }, + { + "loss": 0.0559, + "grad_norm": 1.3899201154708862, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.381, + "step": 2762 + }, + { + "loss": 0.0393, + "grad_norm": 1.0294471979141235, + "learning_rate": 6.195000000000001e-06, + "num_tokens": 941873.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3815, + "step": 2763 + }, + { + "loss": 0.0028, + "grad_norm": 0.41492387652397156, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3820000000000001, + "step": 2764 + }, + { + "loss": 0.039, + "grad_norm": 1.2755433320999146, + "learning_rate": 6.185000000000001e-06, + "num_tokens": 942476.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3825, + "step": 2765 + }, + { + "loss": 0.0407, + "grad_norm": 1.1641042232513428, + "learning_rate": 6.18e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.383, + "step": 2766 + }, + { + "loss": 0.0033, + "grad_norm": 0.45876702666282654, + "learning_rate": 6.175000000000001e-06, + "num_tokens": 943079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3835, + "step": 2767 + }, + { + "loss": 0.053, + "grad_norm": 1.1277137994766235, + "learning_rate": 6.17e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.384, + "step": 2768 + }, + { + "loss": 0.069, + "grad_norm": 1.974735140800476, + "learning_rate": 6.165000000000001e-06, + "num_tokens": 944103.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3845, + "step": 2769 + }, + { + "loss": 0.0399, + "grad_norm": 1.308519959449768, + "learning_rate": 6.16e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.385, + "step": 2770 + }, + { + "loss": 0.0399, + "grad_norm": 1.3881995677947998, + "learning_rate": 6.155000000000001e-06, + "num_tokens": 945127.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3855, + "step": 2771 + }, + { + "loss": 0.0388, + "grad_norm": 1.376846194267273, + "learning_rate": 6.15e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3860000000000001, + "step": 2772 + }, + { + "loss": 0.0565, + "grad_norm": 1.6753615140914917, + "learning_rate": 6.145000000000001e-06, + "num_tokens": 946151.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3865, + "step": 2773 + }, + { + "loss": 0.0537, + "grad_norm": 1.350510597229004, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.387, + "step": 2774 + }, + { + "loss": 0.0348, + "grad_norm": 1.0870490074157715, + "learning_rate": 6.1350000000000006e-06, + "num_tokens": 947175.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3875, + "step": 2775 + }, + { + "loss": 0.0041, + "grad_norm": 0.5800921320915222, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 1.0, + "epoch": 1.388, + "step": 2776 + }, + { + "loss": 0.0046, + "grad_norm": 0.6146813631057739, + "learning_rate": 6.125000000000001e-06, + "num_tokens": 947357.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3885, + "step": 2777 + }, + { + "loss": 0.0685, + "grad_norm": 2.028545618057251, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.389, + "step": 2778 + }, + { + "loss": 0.0562, + "grad_norm": 1.10191011428833, + "learning_rate": 6.115000000000001e-06, + "num_tokens": 948381.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3895, + "step": 2779 + }, + { + "loss": 0.057, + "grad_norm": 1.6782788038253784, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3900000000000001, + "step": 2780 + }, + { + "loss": 0.0048, + "grad_norm": 0.6447672843933105, + "learning_rate": 6.105000000000001e-06, + "num_tokens": 948984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3905, + "step": 2781 + }, + { + "loss": 0.0045, + "grad_norm": 0.6120741963386536, + "learning_rate": 6.1e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.391, + "step": 2782 + }, + { + "loss": 0.0037, + "grad_norm": 0.5294094085693359, + "learning_rate": 6.095000000000001e-06, + "num_tokens": 949166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3915, + "step": 2783 + }, + { + "loss": 0.0041, + "grad_norm": 0.5634744167327881, + "learning_rate": 6.09e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.392, + "step": 2784 + }, + { + "loss": 0.0543, + "grad_norm": 1.1946736574172974, + "learning_rate": 6.085000000000001e-06, + "num_tokens": 949769.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3925, + "step": 2785 + }, + { + "loss": 0.0393, + "grad_norm": 1.366204857826233, + "learning_rate": 6.08e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.393, + "step": 2786 + }, + { + "loss": 0.0031, + "grad_norm": 0.4588482677936554, + "learning_rate": 6.075000000000001e-06, + "num_tokens": 950372.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3935, + "step": 2787 + }, + { + "loss": 0.0741, + "grad_norm": 1.6554986238479614, + "learning_rate": 6.07e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.3940000000000001, + "step": 2788 + }, + { + "loss": 0.0358, + "grad_norm": 1.0052374601364136, + "learning_rate": 6.065000000000001e-06, + "num_tokens": 951396.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3945, + "step": 2789 + }, + { + "loss": 0.0029, + "grad_norm": 0.4081237316131592, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.395, + "step": 2790 + }, + { + "loss": 0.0627, + "grad_norm": 1.5037425756454468, + "learning_rate": 6.0550000000000005e-06, + "num_tokens": 951999.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3955, + "step": 2791 + }, + { + "loss": 0.0024, + "grad_norm": 0.36483630537986755, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.396, + "step": 2792 + }, + { + "loss": 0.0455, + "grad_norm": 1.2050751447677612, + "learning_rate": 6.0450000000000006e-06, + "num_tokens": 952602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3965, + "step": 2793 + }, + { + "loss": 0.0021, + "grad_norm": 0.3035581111907959, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.397, + "step": 2794 + }, + { + "loss": 0.0025, + "grad_norm": 0.3607647716999054, + "learning_rate": 6.035000000000001e-06, + "num_tokens": 952784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3975, + "step": 2795 + }, + { + "loss": 0.0625, + "grad_norm": 1.2081470489501953, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3980000000000001, + "step": 2796 + }, + { + "loss": 0.0425, + "grad_norm": 1.0764844417572021, + "learning_rate": 6.025000000000001e-06, + "num_tokens": 953808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3985, + "step": 2797 + }, + { + "loss": 0.0632, + "grad_norm": 1.425076961517334, + "learning_rate": 6.02e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.399, + "step": 2798 + }, + { + "loss": 0.0395, + "grad_norm": 0.9470378160476685, + "learning_rate": 6.015000000000001e-06, + "num_tokens": 954832.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3995, + "step": 2799 + }, + { + "loss": 0.0404, + "grad_norm": 1.0599867105484009, + "learning_rate": 6.01e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4, + "step": 2800 + }, + { + "loss": 0.0577, + "grad_norm": 1.2933481931686401, + "learning_rate": 6.005000000000001e-06, + "num_tokens": 955856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4005, + "step": 2801 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215804398059845, + "learning_rate": 6e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 1.0, + "epoch": 1.401, + "step": 2802 + }, + { + "loss": 0.0601, + "grad_norm": 1.4103161096572876, + "learning_rate": 5.995000000000001e-06, + "num_tokens": 956459.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4015, + "step": 2803 + }, + { + "loss": 0.0022, + "grad_norm": 0.303093820810318, + "learning_rate": 5.99e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4020000000000001, + "step": 2804 + }, + { + "loss": 0.0663, + "grad_norm": 1.360801339149475, + "learning_rate": 5.985000000000001e-06, + "num_tokens": 957062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4025, + "step": 2805 + }, + { + "loss": 0.0022, + "grad_norm": 0.3075718581676483, + "learning_rate": 5.98e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 1.0, + "epoch": 1.403, + "step": 2806 + }, + { + "loss": 0.0602, + "grad_norm": 1.137125849723816, + "learning_rate": 5.975e-06, + "num_tokens": 957665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4035, + "step": 2807 + }, + { + "loss": 0.0022, + "grad_norm": 0.30045661330223083, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.404, + "step": 2808 + }, + { + "loss": 0.0392, + "grad_norm": 1.0042834281921387, + "learning_rate": 5.9650000000000005e-06, + "num_tokens": 958268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4045, + "step": 2809 + }, + { + "loss": 0.0401, + "grad_norm": 1.117727279663086, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.405, + "step": 2810 + }, + { + "loss": 0.0703, + "grad_norm": 1.4459725618362427, + "learning_rate": 5.955000000000001e-06, + "num_tokens": 959292.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4055, + "step": 2811 + }, + { + "loss": 0.0621, + "grad_norm": 1.3719003200531006, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4060000000000001, + "step": 2812 + }, + { + "loss": 0.0023, + "grad_norm": 0.31605690717697144, + "learning_rate": 5.945000000000001e-06, + "num_tokens": 959895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4064999999999999, + "step": 2813 + }, + { + "loss": 0.0605, + "grad_norm": 1.3043557405471802, + "learning_rate": 5.94e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.407, + "step": 2814 + }, + { + "loss": 0.0653, + "grad_norm": 1.2358129024505615, + "learning_rate": 5.935000000000001e-06, + "num_tokens": 960919.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4075, + "step": 2815 + }, + { + "loss": 0.0025, + "grad_norm": 0.3330060839653015, + "learning_rate": 5.93e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.408, + "step": 2816 + }, + { + "loss": 0.058, + "grad_norm": 1.1393845081329346, + "learning_rate": 5.925000000000001e-06, + "num_tokens": 961522.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4085, + "step": 2817 + }, + { + "loss": 0.0689, + "grad_norm": 1.4732993841171265, + "learning_rate": 5.92e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.409, + "step": 2818 + }, + { + "loss": 0.0028, + "grad_norm": 0.37631359696388245, + "learning_rate": 5.915000000000001e-06, + "num_tokens": 962125.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4095, + "step": 2819 + }, + { + "loss": 0.0026, + "grad_norm": 0.35936713218688965, + "learning_rate": 5.91e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.41, + "step": 2820 + }, + { + "loss": 0.0558, + "grad_norm": 1.2061470746994019, + "learning_rate": 5.905000000000001e-06, + "num_tokens": 962728.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4104999999999999, + "step": 2821 + }, + { + "loss": 0.0582, + "grad_norm": 1.513380527496338, + "learning_rate": 5.9e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 2822 + }, + { + "loss": 0.0418, + "grad_norm": 1.2391456365585327, + "learning_rate": 5.895e-06, + "num_tokens": 963752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4115, + "step": 2823 + }, + { + "loss": 0.069, + "grad_norm": 1.4670116901397705, + "learning_rate": 5.89e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.412, + "step": 2824 + }, + { + "loss": 0.0028, + "grad_norm": 0.3788264989852905, + "learning_rate": 5.885e-06, + "num_tokens": 964355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4125, + "step": 2825 + }, + { + "loss": 0.0027, + "grad_norm": 0.3687077462673187, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 1.0, + "epoch": 1.413, + "step": 2826 + }, + { + "loss": 0.0399, + "grad_norm": 1.233347773551941, + "learning_rate": 5.8750000000000005e-06, + "num_tokens": 964958.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4135, + "step": 2827 + }, + { + "loss": 0.0027, + "grad_norm": 0.37683984637260437, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.414, + "step": 2828 + }, + { + "loss": 0.048, + "grad_norm": 1.2649948596954346, + "learning_rate": 5.865000000000001e-06, + "num_tokens": 965561.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4144999999999999, + "step": 2829 + }, + { + "loss": 0.0589, + "grad_norm": 1.3882242441177368, + "learning_rate": 5.86e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.415, + "step": 2830 + }, + { + "loss": 0.0362, + "grad_norm": 1.1658241748809814, + "learning_rate": 5.855000000000001e-06, + "num_tokens": 966585.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4155, + "step": 2831 + }, + { + "loss": 0.0521, + "grad_norm": 1.0679434537887573, + "learning_rate": 5.85e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.416, + "step": 2832 + }, + { + "loss": 0.003, + "grad_norm": 0.40383246541023254, + "learning_rate": 5.845000000000001e-06, + "num_tokens": 967188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4165, + "step": 2833 + }, + { + "loss": 0.0427, + "grad_norm": 1.2304917573928833, + "learning_rate": 5.84e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.417, + "step": 2834 + }, + { + "loss": 0.0538, + "grad_norm": 1.1524217128753662, + "learning_rate": 5.835000000000001e-06, + "num_tokens": 968212.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4175, + "step": 2835 + }, + { + "loss": 0.0379, + "grad_norm": 0.9404373168945312, + "learning_rate": 5.83e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.418, + "step": 2836 + }, + { + "loss": 0.0031, + "grad_norm": 0.4096873104572296, + "learning_rate": 5.825000000000001e-06, + "num_tokens": 968815.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4184999999999999, + "step": 2837 + }, + { + "loss": 0.0028, + "grad_norm": 0.37403908371925354, + "learning_rate": 5.82e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.419, + "step": 2838 + }, + { + "loss": 0.0361, + "grad_norm": 0.9613595604896545, + "learning_rate": 5.815e-06, + "num_tokens": 969418.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.4195, + "step": 2839 + }, + { + "loss": 0.0571, + "grad_norm": 1.3871361017227173, + "learning_rate": 5.81e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.42, + "step": 2840 + }, + { + "loss": 0.0365, + "grad_norm": 1.060208797454834, + "learning_rate": 5.805e-06, + "num_tokens": 970442.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4205, + "step": 2841 + }, + { + "loss": 0.0031, + "grad_norm": 0.4013337790966034, + "learning_rate": 5.8e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 1.0, + "epoch": 1.421, + "step": 2842 + }, + { + "loss": 0.041, + "grad_norm": 1.2097371816635132, + "learning_rate": 5.795e-06, + "num_tokens": 971045.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4215, + "step": 2843 + }, + { + "loss": 0.0614, + "grad_norm": 1.1929858922958374, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.422, + "step": 2844 + }, + { + "loss": 0.0559, + "grad_norm": 1.3881855010986328, + "learning_rate": 5.7850000000000005e-06, + "num_tokens": 972069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4224999999999999, + "step": 2845 + }, + { + "loss": 0.0649, + "grad_norm": 1.5359828472137451, + "learning_rate": 5.78e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.423, + "step": 2846 + }, + { + "loss": 0.0562, + "grad_norm": 1.2387086153030396, + "learning_rate": 5.775000000000001e-06, + "num_tokens": 973093.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4235, + "step": 2847 + }, + { + "loss": 0.0634, + "grad_norm": 1.30796480178833, + "learning_rate": 5.77e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.424, + "step": 2848 + }, + { + "loss": 0.0035, + "grad_norm": 0.4502550959587097, + "learning_rate": 5.765000000000001e-06, + "num_tokens": 973696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4245, + "step": 2849 + }, + { + "loss": 0.0625, + "grad_norm": 1.4468958377838135, + "learning_rate": 5.76e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.425, + "step": 2850 + }, + { + "loss": 0.0675, + "grad_norm": 1.6001074314117432, + "learning_rate": 5.755000000000001e-06, + "num_tokens": 974720.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4255, + "step": 2851 + }, + { + "loss": 0.0039, + "grad_norm": 0.5094487071037292, + "learning_rate": 5.75e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.426, + "step": 2852 + }, + { + "loss": 0.039, + "grad_norm": 0.9305217266082764, + "learning_rate": 5.745000000000001e-06, + "num_tokens": 975323.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4264999999999999, + "step": 2853 + }, + { + "loss": 0.0379, + "grad_norm": 0.9311109185218811, + "learning_rate": 5.74e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.427, + "step": 2854 + }, + { + "loss": 0.0656, + "grad_norm": 1.3803378343582153, + "learning_rate": 5.735e-06, + "num_tokens": 976347.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4275, + "step": 2855 + }, + { + "loss": 0.0495, + "grad_norm": 1.455142855644226, + "learning_rate": 5.73e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.428, + "step": 2856 + }, + { + "loss": 0.048, + "grad_norm": 0.9757342338562012, + "learning_rate": 5.725e-06, + "num_tokens": 977371.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4285, + "step": 2857 + }, + { + "loss": 0.07, + "grad_norm": 1.3820722103118896, + "learning_rate": 5.72e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.429, + "step": 2858 + }, + { + "loss": 0.0496, + "grad_norm": 0.9005600810050964, + "learning_rate": 5.715e-06, + "num_tokens": 978395.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4295, + "step": 2859 + }, + { + "loss": 0.0588, + "grad_norm": 1.1311612129211426, + "learning_rate": 5.71e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.43, + "step": 2860 + }, + { + "loss": 0.0603, + "grad_norm": 1.2565733194351196, + "learning_rate": 5.7050000000000004e-06, + "num_tokens": 979419.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4304999999999999, + "step": 2861 + }, + { + "loss": 0.0061, + "grad_norm": 0.7569929361343384, + "learning_rate": 5.7e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.431, + "step": 2862 + }, + { + "loss": 0.0061, + "grad_norm": 0.757468044757843, + "learning_rate": 5.6950000000000005e-06, + "num_tokens": 979601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4315, + "step": 2863 + }, + { + "loss": 0.0442, + "grad_norm": 1.3257757425308228, + "learning_rate": 5.69e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.432, + "step": 2864 + }, + { + "loss": 0.0054, + "grad_norm": 0.7246440649032593, + "learning_rate": 5.685000000000001e-06, + "num_tokens": 980204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4325, + "step": 2865 + }, + { + "loss": 0.0558, + "grad_norm": 1.1359434127807617, + "learning_rate": 5.68e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.433, + "step": 2866 + }, + { + "loss": 0.0059, + "grad_norm": 0.7417834997177124, + "learning_rate": 5.675000000000001e-06, + "num_tokens": 980807.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4335, + "step": 2867 + }, + { + "loss": 0.0046, + "grad_norm": 0.6065738201141357, + "learning_rate": 5.67e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 1.0, + "epoch": 1.434, + "step": 2868 + }, + { + "loss": 0.0045, + "grad_norm": 0.6112881898880005, + "learning_rate": 5.665000000000001e-06, + "num_tokens": 980989.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4344999999999999, + "step": 2869 + }, + { + "loss": 0.0598, + "grad_norm": 1.1446788311004639, + "learning_rate": 5.66e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.435, + "step": 2870 + }, + { + "loss": 0.004, + "grad_norm": 0.5359569787979126, + "learning_rate": 5.655e-06, + "num_tokens": 981592.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4355, + "step": 2871 + }, + { + "loss": 0.0372, + "grad_norm": 1.0225598812103271, + "learning_rate": 5.65e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.436, + "step": 2872 + }, + { + "loss": 0.0031, + "grad_norm": 0.4344872236251831, + "learning_rate": 5.645e-06, + "num_tokens": 982195.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4365, + "step": 2873 + }, + { + "loss": 0.0035, + "grad_norm": 0.4770989418029785, + "learning_rate": 5.64e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 1.0, + "epoch": 1.437, + "step": 2874 + }, + { + "loss": 0.1529, + "grad_norm": 2.6292223930358887, + "learning_rate": 5.635e-06, + "num_tokens": 982798.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4375, + "step": 2875 + }, + { + "loss": 0.0536, + "grad_norm": 1.1502479314804077, + "learning_rate": 5.63e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.438, + "step": 2876 + }, + { + "loss": 0.0541, + "grad_norm": 1.5837680101394653, + "learning_rate": 5.625e-06, + "num_tokens": 983822.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4385, + "step": 2877 + }, + { + "loss": 0.0621, + "grad_norm": 1.0932730436325073, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.439, + "step": 2878 + }, + { + "loss": 0.0024, + "grad_norm": 0.3176769018173218, + "learning_rate": 5.6150000000000005e-06, + "num_tokens": 984425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4395, + "step": 2879 + }, + { + "loss": 0.056, + "grad_norm": 1.2500354051589966, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.44, + "step": 2880 + }, + { + "loss": 0.046, + "grad_norm": 1.282015323638916, + "learning_rate": 5.6050000000000005e-06, + "num_tokens": 985449.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4405000000000001, + "step": 2881 + }, + { + "loss": 0.0672, + "grad_norm": 1.5532522201538086, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.441, + "step": 2882 + }, + { + "loss": 0.0571, + "grad_norm": 1.1880862712860107, + "learning_rate": 5.595000000000001e-06, + "num_tokens": 986473.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4415, + "step": 2883 + }, + { + "loss": 0.0019, + "grad_norm": 0.26678329706192017, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.442, + "step": 2884 + }, + { + "loss": 0.002, + "grad_norm": 0.26291605830192566, + "learning_rate": 5.585000000000001e-06, + "num_tokens": 986655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4425, + "step": 2885 + }, + { + "loss": 0.002, + "grad_norm": 0.2711234986782074, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.443, + "step": 2886 + }, + { + "loss": 0.0021, + "grad_norm": 0.2862178087234497, + "learning_rate": 5.575000000000001e-06, + "num_tokens": 986837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4435, + "step": 2887 + }, + { + "loss": 0.0571, + "grad_norm": 1.3704899549484253, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.444, + "step": 2888 + }, + { + "loss": 0.0585, + "grad_norm": 1.0157582759857178, + "learning_rate": 5.565e-06, + "num_tokens": 987861.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4445000000000001, + "step": 2889 + }, + { + "loss": 0.0377, + "grad_norm": 1.079724669456482, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.445, + "step": 2890 + }, + { + "loss": 0.14, + "grad_norm": 1.9184038639068604, + "learning_rate": 5.555e-06, + "num_tokens": 988885.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4455, + "step": 2891 + }, + { + "loss": 0.0019, + "grad_norm": 0.25762176513671875, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.446, + "step": 2892 + }, + { + "loss": 0.0702, + "grad_norm": 1.5166800022125244, + "learning_rate": 5.545e-06, + "num_tokens": 989488.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4465, + "step": 2893 + }, + { + "loss": 0.0394, + "grad_norm": 1.1091899871826172, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.447, + "step": 2894 + }, + { + "loss": 0.0647, + "grad_norm": 1.4911457300186157, + "learning_rate": 5.535e-06, + "num_tokens": 990512.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4475, + "step": 2895 + }, + { + "loss": 0.063, + "grad_norm": 1.6225489377975464, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.448, + "step": 2896 + }, + { + "loss": 0.041, + "grad_norm": 1.3053377866744995, + "learning_rate": 5.5250000000000005e-06, + "num_tokens": 991536.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4485000000000001, + "step": 2897 + }, + { + "loss": 0.002, + "grad_norm": 0.27576708793640137, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 1.0, + "epoch": 1.449, + "step": 2898 + }, + { + "loss": 0.0019, + "grad_norm": 0.26415082812309265, + "learning_rate": 5.5150000000000006e-06, + "num_tokens": 991718.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4495, + "step": 2899 + }, + { + "loss": 0.0021, + "grad_norm": 0.29174545407295227, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 2900 + }, + { + "loss": 0.0573, + "grad_norm": 1.38834810256958, + "learning_rate": 5.505000000000001e-06, + "num_tokens": 992321.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4505, + "step": 2901 + }, + { + "loss": 0.0443, + "grad_norm": 1.4421913623809814, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 2902 + }, + { + "loss": 0.0022, + "grad_norm": 0.29639050364494324, + "learning_rate": 5.495000000000001e-06, + "num_tokens": 992924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4515, + "step": 2903 + }, + { + "loss": 0.0655, + "grad_norm": 1.5755751132965088, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.452, + "step": 2904 + }, + { + "loss": 0.0022, + "grad_norm": 0.2955166697502136, + "learning_rate": 5.485e-06, + "num_tokens": 993527.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4525000000000001, + "step": 2905 + }, + { + "loss": 0.0021, + "grad_norm": 0.2841387689113617, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.453, + "step": 2906 + }, + { + "loss": 0.0021, + "grad_norm": 0.286550909280777, + "learning_rate": 5.475e-06, + "num_tokens": 993709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4535, + "step": 2907 + }, + { + "loss": 0.0357, + "grad_norm": 1.0881201028823853, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.454, + "step": 2908 + }, + { + "loss": 0.0409, + "grad_norm": 1.0831390619277954, + "learning_rate": 5.465e-06, + "num_tokens": 994733.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4545, + "step": 2909 + }, + { + "loss": 0.0573, + "grad_norm": 1.2077234983444214, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.455, + "step": 2910 + }, + { + "loss": 0.0567, + "grad_norm": 1.2307626008987427, + "learning_rate": 5.455e-06, + "num_tokens": 995757.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4555, + "step": 2911 + }, + { + "loss": 0.067, + "grad_norm": 1.356170654296875, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.456, + "step": 2912 + }, + { + "loss": 0.0019, + "grad_norm": 0.2535565495491028, + "learning_rate": 5.445e-06, + "num_tokens": 996360.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4565000000000001, + "step": 2913 + }, + { + "loss": 0.0366, + "grad_norm": 1.0972084999084473, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.457, + "step": 2914 + }, + { + "loss": 0.054, + "grad_norm": 1.0509806871414185, + "learning_rate": 5.4350000000000005e-06, + "num_tokens": 997384.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4575, + "step": 2915 + }, + { + "loss": 0.0609, + "grad_norm": 1.3918635845184326, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.458, + "step": 2916 + }, + { + "loss": 0.0388, + "grad_norm": 1.0420371294021606, + "learning_rate": 5.4250000000000006e-06, + "num_tokens": 998408.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4585, + "step": 2917 + }, + { + "loss": 0.072, + "grad_norm": 1.3679769039154053, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.459, + "step": 2918 + }, + { + "loss": 0.0027, + "grad_norm": 0.3709925413131714, + "learning_rate": 5.415000000000001e-06, + "num_tokens": 999011.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4595, + "step": 2919 + }, + { + "loss": 0.0661, + "grad_norm": 1.381754755973816, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.46, + "step": 2920 + }, + { + "loss": 0.041, + "grad_norm": 1.2045968770980835, + "learning_rate": 5.405e-06, + "num_tokens": 1000035.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4605000000000001, + "step": 2921 + }, + { + "loss": 0.0023, + "grad_norm": 0.3062268793582916, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 1.0, + "epoch": 1.461, + "step": 2922 + }, + { + "loss": 0.0464, + "grad_norm": 1.0317680835723877, + "learning_rate": 5.395e-06, + "num_tokens": 1000638.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4615, + "step": 2923 + }, + { + "loss": 0.0495, + "grad_norm": 1.3268100023269653, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.462, + "step": 2924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6260963678359985, + "learning_rate": 5.385e-06, + "num_tokens": 1001662.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4625, + "step": 2925 + }, + { + "loss": 0.0553, + "grad_norm": 1.0903215408325195, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.463, + "step": 2926 + }, + { + "loss": 0.0029, + "grad_norm": 0.3851076066493988, + "learning_rate": 5.375e-06, + "num_tokens": 1002265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4635, + "step": 2927 + }, + { + "loss": 0.0692, + "grad_norm": 1.6572927236557007, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.464, + "step": 2928 + }, + { + "loss": 0.0625, + "grad_norm": 1.5664637088775635, + "learning_rate": 5.365e-06, + "num_tokens": 1003289.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4645000000000001, + "step": 2929 + }, + { + "loss": 0.0626, + "grad_norm": 1.198908805847168, + "learning_rate": 5.36e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.465, + "step": 2930 + }, + { + "loss": 0.0641, + "grad_norm": 1.2499873638153076, + "learning_rate": 5.355e-06, + "num_tokens": 1004313.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4655, + "step": 2931 + }, + { + "loss": 0.0042, + "grad_norm": 0.5362296104431152, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 1.0, + "epoch": 1.466, + "step": 2932 + }, + { + "loss": 0.0037, + "grad_norm": 0.49612900614738464, + "learning_rate": 5.3450000000000005e-06, + "num_tokens": 1004495.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4665, + "step": 2933 + }, + { + "loss": 0.0039, + "grad_norm": 0.5115715861320496, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.467, + "step": 2934 + }, + { + "loss": 0.056, + "grad_norm": 1.3353906869888306, + "learning_rate": 5.335000000000001e-06, + "num_tokens": 1005098.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4675, + "step": 2935 + }, + { + "loss": 0.0407, + "grad_norm": 1.1807116270065308, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.468, + "step": 2936 + }, + { + "loss": 0.0551, + "grad_norm": 1.257308006286621, + "learning_rate": 5.325e-06, + "num_tokens": 1006122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4685000000000001, + "step": 2937 + }, + { + "loss": 0.0606, + "grad_norm": 1.2219009399414062, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4689999999999999, + "step": 2938 + }, + { + "loss": 0.0403, + "grad_norm": 1.094189167022705, + "learning_rate": 5.315e-06, + "num_tokens": 1007146.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4695, + "step": 2939 + }, + { + "loss": 0.0467, + "grad_norm": 1.1191236972808838, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 2940 + }, + { + "loss": 0.0556, + "grad_norm": 1.1905457973480225, + "learning_rate": 5.305e-06, + "num_tokens": 1008170.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4705, + "step": 2941 + }, + { + "loss": 0.0038, + "grad_norm": 0.5084776282310486, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 1.0, + "epoch": 1.471, + "step": 2942 + }, + { + "loss": 0.0558, + "grad_norm": 0.9725843071937561, + "learning_rate": 5.295e-06, + "num_tokens": 1008773.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4715, + "step": 2943 + }, + { + "loss": 0.058, + "grad_norm": 1.1404790878295898, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.472, + "step": 2944 + }, + { + "loss": 0.0038, + "grad_norm": 0.4927501380443573, + "learning_rate": 5.285e-06, + "num_tokens": 1009376.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4725, + "step": 2945 + }, + { + "loss": 0.052, + "grad_norm": 1.0383561849594116, + "learning_rate": 5.28e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4729999999999999, + "step": 2946 + }, + { + "loss": 0.0039, + "grad_norm": 0.5245242118835449, + "learning_rate": 5.275e-06, + "num_tokens": 1009979.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4735, + "step": 2947 + }, + { + "loss": 0.0599, + "grad_norm": 1.137878179550171, + "learning_rate": 5.27e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.474, + "step": 2948 + }, + { + "loss": 0.0039, + "grad_norm": 0.5066397190093994, + "learning_rate": 5.265e-06, + "num_tokens": 1010582.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4745, + "step": 2949 + }, + { + "loss": 0.0037, + "grad_norm": 0.4922652542591095, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 1.0, + "epoch": 1.475, + "step": 2950 + }, + { + "loss": 0.0402, + "grad_norm": 1.1538424491882324, + "learning_rate": 5.2550000000000005e-06, + "num_tokens": 1011185.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4755, + "step": 2951 + }, + { + "loss": 0.0562, + "grad_norm": 1.8279345035552979, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.476, + "step": 2952 + }, + { + "loss": 0.0636, + "grad_norm": 1.2982397079467773, + "learning_rate": 5.245e-06, + "num_tokens": 1012209.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4765, + "step": 2953 + }, + { + "loss": 0.0033, + "grad_norm": 0.4363272488117218, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4769999999999999, + "step": 2954 + }, + { + "loss": 0.0549, + "grad_norm": 1.556806206703186, + "learning_rate": 5.235e-06, + "num_tokens": 1012812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4775, + "step": 2955 + }, + { + "loss": 0.0358, + "grad_norm": 1.0845907926559448, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.478, + "step": 2956 + }, + { + "loss": 0.0032, + "grad_norm": 0.4301038384437561, + "learning_rate": 5.225e-06, + "num_tokens": 1013415.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4785, + "step": 2957 + }, + { + "loss": 0.003, + "grad_norm": 0.3937813341617584, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 1.0, + "epoch": 1.479, + "step": 2958 + }, + { + "loss": 0.0403, + "grad_norm": 0.9416876435279846, + "learning_rate": 5.215e-06, + "num_tokens": 1014018.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4795, + "step": 2959 + }, + { + "loss": 0.0029, + "grad_norm": 0.3991153836250305, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.48, + "step": 2960 + }, + { + "loss": 0.0367, + "grad_norm": 1.106955885887146, + "learning_rate": 5.205e-06, + "num_tokens": 1014621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4805, + "step": 2961 + }, + { + "loss": 0.0586, + "grad_norm": 1.3418941497802734, + "learning_rate": 5.2e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4809999999999999, + "step": 2962 + }, + { + "loss": 0.0358, + "grad_norm": 0.9489701390266418, + "learning_rate": 5.195e-06, + "num_tokens": 1015645.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4815, + "step": 2963 + }, + { + "loss": 0.0629, + "grad_norm": 1.0855809450149536, + "learning_rate": 5.19e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.482, + "step": 2964 + }, + { + "loss": 0.0027, + "grad_norm": 0.3812173306941986, + "learning_rate": 5.185e-06, + "num_tokens": 1016248.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4825, + "step": 2965 + }, + { + "loss": 0.0028, + "grad_norm": 0.3925476372241974, + "learning_rate": 5.18e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 1.0, + "epoch": 1.483, + "step": 2966 + }, + { + "loss": 0.0567, + "grad_norm": 1.3809915781021118, + "learning_rate": 5.1750000000000004e-06, + "num_tokens": 1016851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4835, + "step": 2967 + }, + { + "loss": 0.0428, + "grad_norm": 1.4269046783447266, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.484, + "step": 2968 + }, + { + "loss": 0.0026, + "grad_norm": 0.3535688519477844, + "learning_rate": 5.165e-06, + "num_tokens": 1017454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4845, + "step": 2969 + }, + { + "loss": 0.0025, + "grad_norm": 0.34918057918548584, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4849999999999999, + "step": 2970 + }, + { + "loss": 0.0025, + "grad_norm": 0.34093669056892395, + "learning_rate": 5.155e-06, + "num_tokens": 1017636.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4855, + "step": 2971 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282490372657776, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.486, + "step": 2972 + }, + { + "loss": 0.0762, + "grad_norm": 2.083855628967285, + "learning_rate": 5.145e-06, + "num_tokens": 1018239.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4865, + "step": 2973 + }, + { + "loss": 0.0548, + "grad_norm": 1.5333393812179565, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.487, + "step": 2974 + }, + { + "loss": 0.0373, + "grad_norm": 1.078650712966919, + "learning_rate": 5.135e-06, + "num_tokens": 1019263.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4875, + "step": 2975 + }, + { + "loss": 0.0447, + "grad_norm": 1.3176923990249634, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.488, + "step": 2976 + }, + { + "loss": 0.0023, + "grad_norm": 0.3142336308956146, + "learning_rate": 5.125e-06, + "num_tokens": 1019866.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4885, + "step": 2977 + }, + { + "loss": 0.0021, + "grad_norm": 0.2898966073989868, + "learning_rate": 5.12e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 2978 + }, + { + "loss": 0.046, + "grad_norm": 1.2612260580062866, + "learning_rate": 5.115e-06, + "num_tokens": 1020469.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4895, + "step": 2979 + }, + { + "loss": 0.0718, + "grad_norm": 2.1195919513702393, + "learning_rate": 5.11e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.49, + "step": 2980 + }, + { + "loss": 0.002, + "grad_norm": 0.2805778682231903, + "learning_rate": 5.105e-06, + "num_tokens": 1021072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4905, + "step": 2981 + }, + { + "loss": 0.002, + "grad_norm": 0.2843017280101776, + "learning_rate": 5.1e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 2982 + }, + { + "loss": 0.002, + "grad_norm": 0.277892529964447, + "learning_rate": 5.095e-06, + "num_tokens": 1021254.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4915, + "step": 2983 + }, + { + "loss": 0.0422, + "grad_norm": 1.0654278993606567, + "learning_rate": 5.09e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.492, + "step": 2984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29488760232925415, + "learning_rate": 5.085e-06, + "num_tokens": 1021857.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4925, + "step": 2985 + }, + { + "loss": 0.0392, + "grad_norm": 1.086630940437317, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4929999999999999, + "step": 2986 + }, + { + "loss": 0.0018, + "grad_norm": 0.24030831456184387, + "learning_rate": 5.075e-06, + "num_tokens": 1022460.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4935, + "step": 2987 + }, + { + "loss": 0.0406, + "grad_norm": 0.9846900105476379, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.494, + "step": 2988 + }, + { + "loss": 0.0418, + "grad_norm": 1.6849744319915771, + "learning_rate": 5.065e-06, + "num_tokens": 1023484.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4945, + "step": 2989 + }, + { + "loss": 0.0015, + "grad_norm": 0.2105080932378769, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 1.0, + "epoch": 1.495, + "step": 2990 + }, + { + "loss": 0.0019, + "grad_norm": 0.26552438735961914, + "learning_rate": 5.055e-06, + "num_tokens": 1023666.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4955, + "step": 2991 + }, + { + "loss": 0.0016, + "grad_norm": 0.21752813458442688, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 2992 + }, + { + "loss": 0.0666, + "grad_norm": 1.4344254732131958, + "learning_rate": 5.045e-06, + "num_tokens": 1024269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4965, + "step": 2993 + }, + { + "loss": 0.0415, + "grad_norm": 1.1530293226242065, + "learning_rate": 5.04e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4969999999999999, + "step": 2994 + }, + { + "loss": 0.0365, + "grad_norm": 1.0033750534057617, + "learning_rate": 5.035e-06, + "num_tokens": 1025293.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4975, + "step": 2995 + }, + { + "loss": 0.0369, + "grad_norm": 1.062666654586792, + "learning_rate": 5.03e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.498, + "step": 2996 + }, + { + "loss": 0.0016, + "grad_norm": 0.23261243104934692, + "learning_rate": 5.025e-06, + "num_tokens": 1025896.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4985, + "step": 2997 + }, + { + "loss": 0.0019, + "grad_norm": 0.26436832547187805, + "learning_rate": 5.02e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 2998 + }, + { + "loss": 0.0395, + "grad_norm": 1.0828720331192017, + "learning_rate": 5.015e-06, + "num_tokens": 1026499.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4995, + "step": 2999 + }, + { + "loss": 0.0018, + "grad_norm": 0.24229036271572113, + "learning_rate": 5.01e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5, + "step": 3000 + }, + { + "loss": 0.0636, + "grad_norm": 1.5817841291427612, + "learning_rate": 5.0049999999999995e-06, + "num_tokens": 1027102.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5005, + "step": 3001 + }, + { + "loss": 0.0016, + "grad_norm": 0.21737374365329742, + "learning_rate": 5e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.501, + "step": 3002 + }, + { + "loss": 0.0535, + "grad_norm": 1.0760457515716553, + "learning_rate": 4.9950000000000005e-06, + "num_tokens": 1027705.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5015, + "step": 3003 + }, + { + "loss": 0.0702, + "grad_norm": 1.5160242319107056, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 3004 + }, + { + "loss": 0.002, + "grad_norm": 0.28444817662239075, + "learning_rate": 4.9850000000000006e-06, + "num_tokens": 1028308.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5025, + "step": 3005 + }, + { + "loss": 0.0659, + "grad_norm": 1.394598364830017, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5030000000000001, + "step": 3006 + }, + { + "loss": 0.0549, + "grad_norm": 1.4268598556518555, + "learning_rate": 4.975000000000001e-06, + "num_tokens": 1029332.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5034999999999998, + "step": 3007 + }, + { + "loss": 0.0693, + "grad_norm": 1.3022048473358154, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.504, + "step": 3008 + }, + { + "loss": 0.0577, + "grad_norm": 1.6034104824066162, + "learning_rate": 4.965000000000001e-06, + "num_tokens": 1030356.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5045, + "step": 3009 + }, + { + "loss": 0.002, + "grad_norm": 0.26663535833358765, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.505, + "step": 3010 + }, + { + "loss": 0.0021, + "grad_norm": 0.29342901706695557, + "learning_rate": 4.955e-06, + "num_tokens": 1030538.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5055, + "step": 3011 + }, + { + "loss": 0.0574, + "grad_norm": 1.232057809829712, + "learning_rate": 4.95e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.506, + "step": 3012 + }, + { + "loss": 0.0022, + "grad_norm": 0.2940972149372101, + "learning_rate": 4.945e-06, + "num_tokens": 1031141.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5065, + "step": 3013 + }, + { + "loss": 0.0022, + "grad_norm": 0.3054879307746887, + "learning_rate": 4.94e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 3014 + }, + { + "loss": 0.002, + "grad_norm": 0.2681850492954254, + "learning_rate": 4.935e-06, + "num_tokens": 1031323.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5074999999999998, + "step": 3015 + }, + { + "loss": 0.0018, + "grad_norm": 0.24893507361412048, + "learning_rate": 4.93e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 3016 + }, + { + "loss": 0.0514, + "grad_norm": 0.9832684993743896, + "learning_rate": 4.925e-06, + "num_tokens": 1031926.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5085, + "step": 3017 + }, + { + "loss": 0.0546, + "grad_norm": 1.0513758659362793, + "learning_rate": 4.92e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.509, + "step": 3018 + }, + { + "loss": 0.0438, + "grad_norm": 1.3256640434265137, + "learning_rate": 4.915e-06, + "num_tokens": 1032950.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5095, + "step": 3019 + }, + { + "loss": 0.039, + "grad_norm": 1.1269205808639526, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.51, + "step": 3020 + }, + { + "loss": 0.0606, + "grad_norm": 1.2971444129943848, + "learning_rate": 4.9050000000000005e-06, + "num_tokens": 1033974.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5105, + "step": 3021 + }, + { + "loss": 0.0018, + "grad_norm": 0.24280324578285217, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5110000000000001, + "step": 3022 + }, + { + "loss": 0.0726, + "grad_norm": 1.984804630279541, + "learning_rate": 4.8950000000000006e-06, + "num_tokens": 1034577.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.5114999999999998, + "step": 3023 + }, + { + "loss": 0.0444, + "grad_norm": 1.1891791820526123, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.512, + "step": 3024 + }, + { + "loss": 0.0425, + "grad_norm": 1.3020859956741333, + "learning_rate": 4.885000000000001e-06, + "num_tokens": 1035601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5125, + "step": 3025 + }, + { + "loss": 0.0397, + "grad_norm": 0.8992137312889099, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.513, + "step": 3026 + }, + { + "loss": 0.0518, + "grad_norm": 1.0060539245605469, + "learning_rate": 4.875e-06, + "num_tokens": 1036625.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5135, + "step": 3027 + }, + { + "loss": 0.0618, + "grad_norm": 1.2295892238616943, + "learning_rate": 4.87e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.514, + "step": 3028 + }, + { + "loss": 0.057, + "grad_norm": 1.2740446329116821, + "learning_rate": 4.865e-06, + "num_tokens": 1037649.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5145, + "step": 3029 + }, + { + "loss": 0.067, + "grad_norm": 1.2444658279418945, + "learning_rate": 4.86e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5150000000000001, + "step": 3030 + }, + { + "loss": 0.0389, + "grad_norm": 1.0539816617965698, + "learning_rate": 4.855e-06, + "num_tokens": 1038673.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5154999999999998, + "step": 3031 + }, + { + "loss": 0.0613, + "grad_norm": 1.2166608572006226, + "learning_rate": 4.85e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.516, + "step": 3032 + }, + { + "loss": 0.0636, + "grad_norm": 1.2355148792266846, + "learning_rate": 4.845e-06, + "num_tokens": 1039697.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5165, + "step": 3033 + }, + { + "loss": 0.0586, + "grad_norm": 1.195371150970459, + "learning_rate": 4.84e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.517, + "step": 3034 + }, + { + "loss": 0.0031, + "grad_norm": 0.4328796863555908, + "learning_rate": 4.835e-06, + "num_tokens": 1040300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5175, + "step": 3035 + }, + { + "loss": 0.0033, + "grad_norm": 0.4462224841117859, + "learning_rate": 4.83e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 3036 + }, + { + "loss": 0.0404, + "grad_norm": 1.2766720056533813, + "learning_rate": 4.825e-06, + "num_tokens": 1040903.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5185, + "step": 3037 + }, + { + "loss": 0.0038, + "grad_norm": 0.5095945000648499, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5190000000000001, + "step": 3038 + }, + { + "loss": 0.0528, + "grad_norm": 1.006589651107788, + "learning_rate": 4.8150000000000005e-06, + "num_tokens": 1041506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5194999999999999, + "step": 3039 + }, + { + "loss": 0.0417, + "grad_norm": 1.2964030504226685, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.52, + "step": 3040 + }, + { + "loss": 0.0592, + "grad_norm": 1.1840168237686157, + "learning_rate": 4.805000000000001e-06, + "num_tokens": 1042530.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5205, + "step": 3041 + }, + { + "loss": 0.0038, + "grad_norm": 0.49861085414886475, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 1.0, + "epoch": 1.521, + "step": 3042 + }, + { + "loss": 0.0037, + "grad_norm": 0.49751704931259155, + "learning_rate": 4.795e-06, + "num_tokens": 1042712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5215, + "step": 3043 + }, + { + "loss": 0.0481, + "grad_norm": 1.022782564163208, + "learning_rate": 4.79e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.522, + "step": 3044 + }, + { + "loss": 0.0038, + "grad_norm": 0.49228596687316895, + "learning_rate": 4.785e-06, + "num_tokens": 1043315.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5225, + "step": 3045 + }, + { + "loss": 0.0376, + "grad_norm": 1.1729862689971924, + "learning_rate": 4.78e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5230000000000001, + "step": 3046 + }, + { + "loss": 0.0653, + "grad_norm": 1.5206072330474854, + "learning_rate": 4.775e-06, + "num_tokens": 1044339.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5234999999999999, + "step": 3047 + }, + { + "loss": 0.0633, + "grad_norm": 1.2756298780441284, + "learning_rate": 4.77e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.524, + "step": 3048 + }, + { + "loss": 0.0036, + "grad_norm": 0.4977829158306122, + "learning_rate": 4.765e-06, + "num_tokens": 1044942.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5245, + "step": 3049 + }, + { + "loss": 0.0526, + "grad_norm": 1.0627686977386475, + "learning_rate": 4.76e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.525, + "step": 3050 + }, + { + "loss": 0.0381, + "grad_norm": 1.1623107194900513, + "learning_rate": 4.755e-06, + "num_tokens": 1045966.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5255, + "step": 3051 + }, + { + "loss": 0.0036, + "grad_norm": 0.5119946002960205, + "learning_rate": 4.75e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.526, + "step": 3052 + }, + { + "loss": 0.0581, + "grad_norm": 1.3532719612121582, + "learning_rate": 4.745e-06, + "num_tokens": 1046569.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5265, + "step": 3053 + }, + { + "loss": 0.0594, + "grad_norm": 1.2599351406097412, + "learning_rate": 4.74e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5270000000000001, + "step": 3054 + }, + { + "loss": 0.0033, + "grad_norm": 0.4622514843940735, + "learning_rate": 4.735e-06, + "num_tokens": 1047172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5274999999999999, + "step": 3055 + }, + { + "loss": 0.0728, + "grad_norm": 1.6162607669830322, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.528, + "step": 3056 + }, + { + "loss": 0.0627, + "grad_norm": 1.4714545011520386, + "learning_rate": 4.7250000000000005e-06, + "num_tokens": 1048196.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5285, + "step": 3057 + }, + { + "loss": 0.0034, + "grad_norm": 0.48141252994537354, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 1.0, + "epoch": 1.529, + "step": 3058 + }, + { + "loss": 0.0385, + "grad_norm": 1.0676530599594116, + "learning_rate": 4.715e-06, + "num_tokens": 1048799.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5295, + "step": 3059 + }, + { + "loss": 0.0032, + "grad_norm": 0.44829145073890686, + "learning_rate": 4.71e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 1.0, + "epoch": 1.53, + "step": 3060 + }, + { + "loss": 0.0031, + "grad_norm": 0.4258093535900116, + "learning_rate": 4.705e-06, + "num_tokens": 1048981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5305, + "step": 3061 + }, + { + "loss": 0.0715, + "grad_norm": 1.3509596586227417, + "learning_rate": 4.7e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5310000000000001, + "step": 3062 + }, + { + "loss": 0.0341, + "grad_norm": 1.0876250267028809, + "learning_rate": 4.695e-06, + "num_tokens": 1050005.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5314999999999999, + "step": 3063 + }, + { + "loss": 0.0611, + "grad_norm": 1.3174924850463867, + "learning_rate": 4.69e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.532, + "step": 3064 + }, + { + "loss": 0.0417, + "grad_norm": 1.123489499092102, + "learning_rate": 4.685000000000001e-06, + "num_tokens": 1051029.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5325, + "step": 3065 + }, + { + "loss": 0.066, + "grad_norm": 1.7399777173995972, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.533, + "step": 3066 + }, + { + "loss": 0.0028, + "grad_norm": 0.38190290331840515, + "learning_rate": 4.675000000000001e-06, + "num_tokens": 1051632.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5335, + "step": 3067 + }, + { + "loss": 0.0651, + "grad_norm": 1.4947158098220825, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.534, + "step": 3068 + }, + { + "loss": 0.003, + "grad_norm": 0.40696173906326294, + "learning_rate": 4.665e-06, + "num_tokens": 1052235.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5345, + "step": 3069 + }, + { + "loss": 0.0555, + "grad_norm": 1.2926570177078247, + "learning_rate": 4.66e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5350000000000001, + "step": 3070 + }, + { + "loss": 0.0625, + "grad_norm": 1.2110244035720825, + "learning_rate": 4.655e-06, + "num_tokens": 1053259.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5354999999999999, + "step": 3071 + }, + { + "loss": 0.0033, + "grad_norm": 0.44495561718940735, + "learning_rate": 4.65e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 1.0, + "epoch": 1.536, + "step": 3072 + }, + { + "loss": 0.0574, + "grad_norm": 1.1019057035446167, + "learning_rate": 4.645e-06, + "num_tokens": 1053862.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5365, + "step": 3073 + }, + { + "loss": 0.003, + "grad_norm": 0.4128797650337219, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.537, + "step": 3074 + }, + { + "loss": 0.0572, + "grad_norm": 1.164238452911377, + "learning_rate": 4.6350000000000005e-06, + "num_tokens": 1054465.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5375, + "step": 3075 + }, + { + "loss": 0.0631, + "grad_norm": 1.4220542907714844, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.538, + "step": 3076 + }, + { + "loss": 0.0377, + "grad_norm": 1.2259591817855835, + "learning_rate": 4.625000000000001e-06, + "num_tokens": 1055489.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5385, + "step": 3077 + }, + { + "loss": 0.003, + "grad_norm": 0.4099157154560089, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5390000000000001, + "step": 3078 + }, + { + "loss": 0.0027, + "grad_norm": 0.3750811219215393, + "learning_rate": 4.615000000000001e-06, + "num_tokens": 1055671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5394999999999999, + "step": 3079 + }, + { + "loss": 0.0621, + "grad_norm": 1.2325596809387207, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.54, + "step": 3080 + }, + { + "loss": 0.0504, + "grad_norm": 0.9959844350814819, + "learning_rate": 4.605000000000001e-06, + "num_tokens": 1056695.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5405, + "step": 3081 + }, + { + "loss": 0.0574, + "grad_norm": 1.0301742553710938, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.541, + "step": 3082 + }, + { + "loss": 0.0512, + "grad_norm": 1.0320547819137573, + "learning_rate": 4.595000000000001e-06, + "num_tokens": 1057719.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5415, + "step": 3083 + }, + { + "loss": 0.0561, + "grad_norm": 1.225005865097046, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.542, + "step": 3084 + }, + { + "loss": 0.0376, + "grad_norm": 1.1090381145477295, + "learning_rate": 4.585e-06, + "num_tokens": 1058743.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.5425, + "step": 3085 + }, + { + "loss": 0.0032, + "grad_norm": 0.44738513231277466, + "learning_rate": 4.58e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5430000000000001, + "step": 3086 + }, + { + "loss": 0.0031, + "grad_norm": 0.4485037624835968, + "learning_rate": 4.575e-06, + "num_tokens": 1058925.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5434999999999999, + "step": 3087 + }, + { + "loss": 0.0703, + "grad_norm": 1.630645751953125, + "learning_rate": 4.57e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.544, + "step": 3088 + }, + { + "loss": 0.0034, + "grad_norm": 0.4586680233478546, + "learning_rate": 4.565e-06, + "num_tokens": 1059528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5445, + "step": 3089 + }, + { + "loss": 0.003, + "grad_norm": 0.41872572898864746, + "learning_rate": 4.56e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.545, + "step": 3090 + }, + { + "loss": 0.0433, + "grad_norm": 1.1152652502059937, + "learning_rate": 4.5550000000000004e-06, + "num_tokens": 1060131.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5455, + "step": 3091 + }, + { + "loss": 0.0025, + "grad_norm": 0.35068032145500183, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.546, + "step": 3092 + }, + { + "loss": 0.0396, + "grad_norm": 1.0990018844604492, + "learning_rate": 4.5450000000000005e-06, + "num_tokens": 1060734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5465, + "step": 3093 + }, + { + "loss": 0.0635, + "grad_norm": 1.6193867921829224, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5470000000000002, + "step": 3094 + }, + { + "loss": 0.0027, + "grad_norm": 0.3813343644142151, + "learning_rate": 4.535000000000001e-06, + "num_tokens": 1061337.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5474999999999999, + "step": 3095 + }, + { + "loss": 0.0025, + "grad_norm": 0.3389427363872528, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 1.0, + "epoch": 1.548, + "step": 3096 + }, + { + "loss": 0.0652, + "grad_norm": 1.455460786819458, + "learning_rate": 4.525000000000001e-06, + "num_tokens": 1061940.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5485, + "step": 3097 + }, + { + "loss": 0.0596, + "grad_norm": 1.318932056427002, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.549, + "step": 3098 + }, + { + "loss": 0.0021, + "grad_norm": 0.30851492285728455, + "learning_rate": 4.515000000000001e-06, + "num_tokens": 1062543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5495, + "step": 3099 + }, + { + "loss": 0.0021, + "grad_norm": 0.29576948285102844, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 1.0, + "epoch": 1.55, + "step": 3100 + }, + { + "loss": 0.0021, + "grad_norm": 0.29117029905319214, + "learning_rate": 4.505e-06, + "num_tokens": 1062725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5505, + "step": 3101 + }, + { + "loss": 0.04, + "grad_norm": 1.1777619123458862, + "learning_rate": 4.5e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5510000000000002, + "step": 3102 + }, + { + "loss": 0.0538, + "grad_norm": 1.1641870737075806, + "learning_rate": 4.495e-06, + "num_tokens": 1063749.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5514999999999999, + "step": 3103 + }, + { + "loss": 0.0423, + "grad_norm": 1.3220707178115845, + "learning_rate": 4.49e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.552, + "step": 3104 + }, + { + "loss": 0.0021, + "grad_norm": 0.30619239807128906, + "learning_rate": 4.485e-06, + "num_tokens": 1064352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5525, + "step": 3105 + }, + { + "loss": 0.0681, + "grad_norm": 1.3809969425201416, + "learning_rate": 4.48e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.553, + "step": 3106 + }, + { + "loss": 0.055, + "grad_norm": 1.1956359148025513, + "learning_rate": 4.475e-06, + "num_tokens": 1065376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5535, + "step": 3107 + }, + { + "loss": 0.0573, + "grad_norm": 1.2887022495269775, + "learning_rate": 4.47e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.554, + "step": 3108 + }, + { + "loss": 0.0554, + "grad_norm": 1.1560310125350952, + "learning_rate": 4.4650000000000004e-06, + "num_tokens": 1066400.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5545, + "step": 3109 + }, + { + "loss": 0.0021, + "grad_norm": 0.29395192861557007, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5550000000000002, + "step": 3110 + }, + { + "loss": 0.0652, + "grad_norm": 1.608464002609253, + "learning_rate": 4.4550000000000005e-06, + "num_tokens": 1067003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5554999999999999, + "step": 3111 + }, + { + "loss": 0.0558, + "grad_norm": 1.2650138139724731, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.556, + "step": 3112 + }, + { + "loss": 0.0458, + "grad_norm": 1.2872962951660156, + "learning_rate": 4.445000000000001e-06, + "num_tokens": 1068027.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5565, + "step": 3113 + }, + { + "loss": 0.0022, + "grad_norm": 0.30732589960098267, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.557, + "step": 3114 + }, + { + "loss": 0.0558, + "grad_norm": 1.0926036834716797, + "learning_rate": 4.435000000000001e-06, + "num_tokens": 1068630.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5575, + "step": 3115 + }, + { + "loss": 0.0023, + "grad_norm": 0.32145828008651733, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 1.0, + "epoch": 1.558, + "step": 3116 + }, + { + "loss": 0.0373, + "grad_norm": 1.1655807495117188, + "learning_rate": 4.425e-06, + "num_tokens": 1069233.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5585, + "step": 3117 + }, + { + "loss": 0.0769, + "grad_norm": 1.796105980873108, + "learning_rate": 4.42e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.5590000000000002, + "step": 3118 + }, + { + "loss": 0.0026, + "grad_norm": 0.3620903789997101, + "learning_rate": 4.415e-06, + "num_tokens": 1069836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5594999999999999, + "step": 3119 + }, + { + "loss": 0.0429, + "grad_norm": 1.309659481048584, + "learning_rate": 4.41e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.56, + "step": 3120 + }, + { + "loss": 0.0023, + "grad_norm": 0.32819899916648865, + "learning_rate": 4.405e-06, + "num_tokens": 1070439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5605, + "step": 3121 + }, + { + "loss": 0.0576, + "grad_norm": 1.0110256671905518, + "learning_rate": 4.4e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.561, + "step": 3122 + }, + { + "loss": 0.0474, + "grad_norm": 1.327854037284851, + "learning_rate": 4.395e-06, + "num_tokens": 1071463.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5615, + "step": 3123 + }, + { + "loss": 0.0371, + "grad_norm": 1.2000775337219238, + "learning_rate": 4.39e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.562, + "step": 3124 + }, + { + "loss": 0.0532, + "grad_norm": 1.1874752044677734, + "learning_rate": 4.385e-06, + "num_tokens": 1072487.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5625, + "step": 3125 + }, + { + "loss": 0.0387, + "grad_norm": 1.2780605554580688, + "learning_rate": 4.38e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.563, + "step": 3126 + }, + { + "loss": 0.0029, + "grad_norm": 0.38496679067611694, + "learning_rate": 4.3750000000000005e-06, + "num_tokens": 1073090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5635, + "step": 3127 + }, + { + "loss": 0.0028, + "grad_norm": 0.3800834119319916, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.564, + "step": 3128 + }, + { + "loss": 0.0386, + "grad_norm": 1.077006459236145, + "learning_rate": 4.3650000000000006e-06, + "num_tokens": 1073693.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5645, + "step": 3129 + }, + { + "loss": 0.0669, + "grad_norm": 1.2879207134246826, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.565, + "step": 3130 + }, + { + "loss": 0.0027, + "grad_norm": 0.37664031982421875, + "learning_rate": 4.355000000000001e-06, + "num_tokens": 1074296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5655000000000001, + "step": 3131 + }, + { + "loss": 0.0026, + "grad_norm": 0.35762181878089905, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5659999999999998, + "step": 3132 + }, + { + "loss": 0.0026, + "grad_norm": 0.3616492450237274, + "learning_rate": 4.345000000000001e-06, + "num_tokens": 1074478.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5665, + "step": 3133 + }, + { + "loss": 0.054, + "grad_norm": 1.413800835609436, + "learning_rate": 4.34e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.567, + "step": 3134 + }, + { + "loss": 0.0549, + "grad_norm": 1.1791685819625854, + "learning_rate": 4.335e-06, + "num_tokens": 1075502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5675, + "step": 3135 + }, + { + "loss": 0.0382, + "grad_norm": 1.1417726278305054, + "learning_rate": 4.33e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.568, + "step": 3136 + }, + { + "loss": 0.0586, + "grad_norm": 1.360926866531372, + "learning_rate": 4.325e-06, + "num_tokens": 1076526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5685, + "step": 3137 + }, + { + "loss": 0.0569, + "grad_norm": 1.1636319160461426, + "learning_rate": 4.32e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.569, + "step": 3138 + }, + { + "loss": 0.0024, + "grad_norm": 0.3462548851966858, + "learning_rate": 4.315e-06, + "num_tokens": 1077129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5695000000000001, + "step": 3139 + }, + { + "loss": 0.0619, + "grad_norm": 1.3171995878219604, + "learning_rate": 4.31e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5699999999999998, + "step": 3140 + }, + { + "loss": 0.0026, + "grad_norm": 0.35494717955589294, + "learning_rate": 4.305e-06, + "num_tokens": 1077732.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5705, + "step": 3141 + }, + { + "loss": 0.003, + "grad_norm": 0.4175266921520233, + "learning_rate": 4.3e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 3142 + }, + { + "loss": 0.0588, + "grad_norm": 1.5107394456863403, + "learning_rate": 4.295e-06, + "num_tokens": 1078335.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5715, + "step": 3143 + }, + { + "loss": 0.0583, + "grad_norm": 1.5851935148239136, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.572, + "step": 3144 + }, + { + "loss": 0.0401, + "grad_norm": 1.1422215700149536, + "learning_rate": 4.2850000000000005e-06, + "num_tokens": 1079359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5725, + "step": 3145 + }, + { + "loss": 0.0429, + "grad_norm": 1.3809804916381836, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.573, + "step": 3146 + }, + { + "loss": 0.0397, + "grad_norm": 1.1466025114059448, + "learning_rate": 4.2750000000000006e-06, + "num_tokens": 1080383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5735000000000001, + "step": 3147 + }, + { + "loss": 0.0389, + "grad_norm": 1.035447120666504, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 3148 + }, + { + "loss": 0.0029, + "grad_norm": 0.39080947637557983, + "learning_rate": 4.265000000000001e-06, + "num_tokens": 1080986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5745, + "step": 3149 + }, + { + "loss": 0.0029, + "grad_norm": 0.39702585339546204, + "learning_rate": 4.26e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.575, + "step": 3150 + }, + { + "loss": 0.0376, + "grad_norm": 1.0406111478805542, + "learning_rate": 4.255e-06, + "num_tokens": 1081589.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5755, + "step": 3151 + }, + { + "loss": 0.0029, + "grad_norm": 0.40471911430358887, + "learning_rate": 4.25e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.576, + "step": 3152 + }, + { + "loss": 0.0542, + "grad_norm": 1.382663607597351, + "learning_rate": 4.245e-06, + "num_tokens": 1082192.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5765, + "step": 3153 + }, + { + "loss": 0.0026, + "grad_norm": 0.39454102516174316, + "learning_rate": 4.24e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.577, + "step": 3154 + }, + { + "loss": 0.0515, + "grad_norm": 1.1649845838546753, + "learning_rate": 4.235e-06, + "num_tokens": 1082795.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5775000000000001, + "step": 3155 + }, + { + "loss": 0.0383, + "grad_norm": 1.10068941116333, + "learning_rate": 4.23e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5779999999999998, + "step": 3156 + }, + { + "loss": 0.0417, + "grad_norm": 1.2253996133804321, + "learning_rate": 4.225e-06, + "num_tokens": 1083819.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5785, + "step": 3157 + }, + { + "loss": 0.0028, + "grad_norm": 0.3961932361125946, + "learning_rate": 4.22e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 1.0, + "epoch": 1.579, + "step": 3158 + }, + { + "loss": 0.0503, + "grad_norm": 1.089829921722412, + "learning_rate": 4.215e-06, + "num_tokens": 1084422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5795, + "step": 3159 + }, + { + "loss": 0.0026, + "grad_norm": 0.3804922103881836, + "learning_rate": 4.21e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.58, + "step": 3160 + }, + { + "loss": 0.0551, + "grad_norm": 1.131371259689331, + "learning_rate": 4.205e-06, + "num_tokens": 1085025.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5805, + "step": 3161 + }, + { + "loss": 0.0707, + "grad_norm": 1.5008512735366821, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.581, + "step": 3162 + }, + { + "loss": 0.1371, + "grad_norm": 2.452535629272461, + "learning_rate": 4.1950000000000005e-06, + "num_tokens": 1086049.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.5815000000000001, + "step": 3163 + }, + { + "loss": 0.0375, + "grad_norm": 1.132121205329895, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5819999999999999, + "step": 3164 + }, + { + "loss": 0.0372, + "grad_norm": 1.136691689491272, + "learning_rate": 4.185000000000001e-06, + "num_tokens": 1087073.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5825, + "step": 3165 + }, + { + "loss": 0.066, + "grad_norm": 1.451141595840454, + "learning_rate": 4.18e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.583, + "step": 3166 + }, + { + "loss": 0.0601, + "grad_norm": 1.3219071626663208, + "learning_rate": 4.175e-06, + "num_tokens": 1088097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5835, + "step": 3167 + }, + { + "loss": 0.0033, + "grad_norm": 0.44295263290405273, + "learning_rate": 4.17e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.584, + "step": 3168 + }, + { + "loss": 0.0033, + "grad_norm": 0.4387746751308441, + "learning_rate": 4.165e-06, + "num_tokens": 1088279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5845, + "step": 3169 + }, + { + "loss": 0.0031, + "grad_norm": 0.42495018243789673, + "learning_rate": 4.16e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 3170 + }, + { + "loss": 0.0032, + "grad_norm": 0.43195274472236633, + "learning_rate": 4.155e-06, + "num_tokens": 1088461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5855000000000001, + "step": 3171 + }, + { + "loss": 0.0383, + "grad_norm": 1.089600682258606, + "learning_rate": 4.15e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5859999999999999, + "step": 3172 + }, + { + "loss": 0.037, + "grad_norm": 1.125685691833496, + "learning_rate": 4.145e-06, + "num_tokens": 1089485.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5865, + "step": 3173 + }, + { + "loss": 0.0028, + "grad_norm": 0.3951958119869232, + "learning_rate": 4.14e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 1.0, + "epoch": 1.587, + "step": 3174 + }, + { + "loss": 0.0032, + "grad_norm": 0.4249975085258484, + "learning_rate": 4.135e-06, + "num_tokens": 1089667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5875, + "step": 3175 + }, + { + "loss": 0.003, + "grad_norm": 0.4017711281776428, + "learning_rate": 4.13e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 1.0, + "epoch": 1.588, + "step": 3176 + }, + { + "loss": 0.0554, + "grad_norm": 1.5242044925689697, + "learning_rate": 4.125e-06, + "num_tokens": 1090270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5885, + "step": 3177 + }, + { + "loss": 0.0397, + "grad_norm": 1.1341863870620728, + "learning_rate": 4.12e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.589, + "step": 3178 + }, + { + "loss": 0.0027, + "grad_norm": 0.36381402611732483, + "learning_rate": 4.115e-06, + "num_tokens": 1090873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5895000000000001, + "step": 3179 + }, + { + "loss": 0.0607, + "grad_norm": 1.1853790283203125, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5899999999999999, + "step": 3180 + }, + { + "loss": 0.0643, + "grad_norm": 1.3047658205032349, + "learning_rate": 4.1050000000000005e-06, + "num_tokens": 1091897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5905, + "step": 3181 + }, + { + "loss": 0.0026, + "grad_norm": 0.35462620854377747, + "learning_rate": 4.1e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.591, + "step": 3182 + }, + { + "loss": 0.0551, + "grad_norm": 1.313693642616272, + "learning_rate": 4.095e-06, + "num_tokens": 1092500.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5915, + "step": 3183 + }, + { + "loss": 0.0476, + "grad_norm": 1.3256938457489014, + "learning_rate": 4.09e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.592, + "step": 3184 + }, + { + "loss": 0.0674, + "grad_norm": 1.4579592943191528, + "learning_rate": 4.085e-06, + "num_tokens": 1093524.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5925, + "step": 3185 + }, + { + "loss": 0.0654, + "grad_norm": 1.39744234085083, + "learning_rate": 4.08e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.593, + "step": 3186 + }, + { + "loss": 0.0024, + "grad_norm": 0.3426502048969269, + "learning_rate": 4.075e-06, + "num_tokens": 1094127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5935000000000001, + "step": 3187 + }, + { + "loss": 0.0025, + "grad_norm": 0.34538590908050537, + "learning_rate": 4.07e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 3188 + }, + { + "loss": 0.0023, + "grad_norm": 0.317192405462265, + "learning_rate": 4.065e-06, + "num_tokens": 1094309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5945, + "step": 3189 + }, + { + "loss": 0.067, + "grad_norm": 1.3644077777862549, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.595, + "step": 3190 + }, + { + "loss": 0.0403, + "grad_norm": 1.0108872652053833, + "learning_rate": 4.055000000000001e-06, + "num_tokens": 1095333.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5955, + "step": 3191 + }, + { + "loss": 0.0023, + "grad_norm": 0.32959794998168945, + "learning_rate": 4.05e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 1.0, + "epoch": 1.596, + "step": 3192 + }, + { + "loss": 0.0695, + "grad_norm": 1.4694541692733765, + "learning_rate": 4.045e-06, + "num_tokens": 1095936.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5965, + "step": 3193 + }, + { + "loss": 0.0579, + "grad_norm": 1.4185339212417603, + "learning_rate": 4.04e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.597, + "step": 3194 + }, + { + "loss": 0.0023, + "grad_norm": 0.3271894156932831, + "learning_rate": 4.035e-06, + "num_tokens": 1096539.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5975000000000001, + "step": 3195 + }, + { + "loss": 0.0687, + "grad_norm": 1.3683706521987915, + "learning_rate": 4.03e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5979999999999999, + "step": 3196 + }, + { + "loss": 0.0022, + "grad_norm": 0.3076697289943695, + "learning_rate": 4.0250000000000004e-06, + "num_tokens": 1097142.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5985, + "step": 3197 + }, + { + "loss": 0.0633, + "grad_norm": 1.3920204639434814, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.599, + "step": 3198 + }, + { + "loss": 0.0025, + "grad_norm": 0.340093195438385, + "learning_rate": 4.0150000000000005e-06, + "num_tokens": 1097745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5995, + "step": 3199 + }, + { + "loss": 0.0446, + "grad_norm": 1.343589186668396, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6, + "step": 3200 + }, + { + "loss": 0.0019, + "grad_norm": 0.27124884724617004, + "learning_rate": 4.005000000000001e-06, + "num_tokens": 1098348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6005, + "step": 3201 + }, + { + "loss": 0.0404, + "grad_norm": 0.9648232460021973, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.601, + "step": 3202 + }, + { + "loss": 0.0019, + "grad_norm": 0.27278977632522583, + "learning_rate": 3.995000000000001e-06, + "num_tokens": 1098951.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6015000000000001, + "step": 3203 + }, + { + "loss": 0.0376, + "grad_norm": 1.0787500143051147, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6019999999999999, + "step": 3204 + }, + { + "loss": 0.0528, + "grad_norm": 1.1423871517181396, + "learning_rate": 3.985000000000001e-06, + "num_tokens": 1099975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6025, + "step": 3205 + }, + { + "loss": 0.0428, + "grad_norm": 1.0963202714920044, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.603, + "step": 3206 + }, + { + "loss": 0.0023, + "grad_norm": 0.3151981234550476, + "learning_rate": 3.975000000000001e-06, + "num_tokens": 1100578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6035, + "step": 3207 + }, + { + "loss": 0.0627, + "grad_norm": 1.3276523351669312, + "learning_rate": 3.97e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.604, + "step": 3208 + }, + { + "loss": 0.0644, + "grad_norm": 1.2610445022583008, + "learning_rate": 3.965e-06, + "num_tokens": 1101602.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6045, + "step": 3209 + }, + { + "loss": 0.0605, + "grad_norm": 1.5303077697753906, + "learning_rate": 3.96e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.605, + "step": 3210 + }, + { + "loss": 0.0428, + "grad_norm": 1.1033059358596802, + "learning_rate": 3.955e-06, + "num_tokens": 1102626.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6055000000000001, + "step": 3211 + }, + { + "loss": 0.0025, + "grad_norm": 0.3444884419441223, + "learning_rate": 3.95e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6059999999999999, + "step": 3212 + }, + { + "loss": 0.0021, + "grad_norm": 0.30967977643013, + "learning_rate": 3.945e-06, + "num_tokens": 1102808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6065, + "step": 3213 + }, + { + "loss": 0.0023, + "grad_norm": 0.3297445774078369, + "learning_rate": 3.94e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.607, + "step": 3214 + }, + { + "loss": 0.0389, + "grad_norm": 0.9863300323486328, + "learning_rate": 3.9350000000000004e-06, + "num_tokens": 1103411.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6075, + "step": 3215 + }, + { + "loss": 0.0024, + "grad_norm": 0.34737643599510193, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.608, + "step": 3216 + }, + { + "loss": 0.0636, + "grad_norm": 1.4206818342208862, + "learning_rate": 3.9250000000000005e-06, + "num_tokens": 1104014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6085, + "step": 3217 + }, + { + "loss": 0.0635, + "grad_norm": 1.3302878141403198, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.609, + "step": 3218 + }, + { + "loss": 0.0023, + "grad_norm": 0.34072810411453247, + "learning_rate": 3.915000000000001e-06, + "num_tokens": 1104617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6095000000000002, + "step": 3219 + }, + { + "loss": 0.0023, + "grad_norm": 0.324464350938797, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6099999999999999, + "step": 3220 + }, + { + "loss": 0.041, + "grad_norm": 1.2196465730667114, + "learning_rate": 3.905000000000001e-06, + "num_tokens": 1105220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6105, + "step": 3221 + }, + { + "loss": 0.0609, + "grad_norm": 1.3683393001556396, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.611, + "step": 3222 + }, + { + "loss": 0.067, + "grad_norm": 1.3955715894699097, + "learning_rate": 3.895000000000001e-06, + "num_tokens": 1106244.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6115, + "step": 3223 + }, + { + "loss": 0.0681, + "grad_norm": 1.2971601486206055, + "learning_rate": 3.89e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.612, + "step": 3224 + }, + { + "loss": 0.0399, + "grad_norm": 0.9620857834815979, + "learning_rate": 3.885e-06, + "num_tokens": 1107268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6125, + "step": 3225 + }, + { + "loss": 0.0563, + "grad_norm": 1.419252634048462, + "learning_rate": 3.88e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.613, + "step": 3226 + }, + { + "loss": 0.0025, + "grad_norm": 0.3523210883140564, + "learning_rate": 3.875e-06, + "num_tokens": 1107871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6135000000000002, + "step": 3227 + }, + { + "loss": 0.0025, + "grad_norm": 0.3481607437133789, + "learning_rate": 3.87e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 3228 + }, + { + "loss": 0.0668, + "grad_norm": 1.5234949588775635, + "learning_rate": 3.865e-06, + "num_tokens": 1108474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6145, + "step": 3229 + }, + { + "loss": 0.065, + "grad_norm": 1.0866061449050903, + "learning_rate": 3.86e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.615, + "step": 3230 + }, + { + "loss": 0.0023, + "grad_norm": 0.32322317361831665, + "learning_rate": 3.855e-06, + "num_tokens": 1109077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6155, + "step": 3231 + }, + { + "loss": 0.0028, + "grad_norm": 0.3983127474784851, + "learning_rate": 3.85e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 3232 + }, + { + "loss": 0.0028, + "grad_norm": 0.3855290114879608, + "learning_rate": 3.8450000000000005e-06, + "num_tokens": 1109259.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6165, + "step": 3233 + }, + { + "loss": 0.0628, + "grad_norm": 1.2134065628051758, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.617, + "step": 3234 + }, + { + "loss": 0.0026, + "grad_norm": 0.3645097613334656, + "learning_rate": 3.8350000000000006e-06, + "num_tokens": 1109862.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6175000000000002, + "step": 3235 + }, + { + "loss": 0.0564, + "grad_norm": 1.3227709531784058, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6179999999999999, + "step": 3236 + }, + { + "loss": 0.0356, + "grad_norm": 1.1357544660568237, + "learning_rate": 3.825000000000001e-06, + "num_tokens": 1110886.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6185, + "step": 3237 + }, + { + "loss": 0.002, + "grad_norm": 0.2842106819152832, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.619, + "step": 3238 + }, + { + "loss": 0.0021, + "grad_norm": 0.2954864501953125, + "learning_rate": 3.815000000000001e-06, + "num_tokens": 1111068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6195, + "step": 3239 + }, + { + "loss": 0.0535, + "grad_norm": 1.2989691495895386, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.62, + "step": 3240 + }, + { + "loss": 0.0633, + "grad_norm": 1.4842454195022583, + "learning_rate": 3.8050000000000004e-06, + "num_tokens": 1112092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6205, + "step": 3241 + }, + { + "loss": 0.0613, + "grad_norm": 1.4029802083969116, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.621, + "step": 3242 + }, + { + "loss": 0.0021, + "grad_norm": 0.3039712905883789, + "learning_rate": 3.7950000000000005e-06, + "num_tokens": 1112695.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6215000000000002, + "step": 3243 + }, + { + "loss": 0.0564, + "grad_norm": 1.3126254081726074, + "learning_rate": 3.79e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6219999999999999, + "step": 3244 + }, + { + "loss": 0.0372, + "grad_norm": 1.1704014539718628, + "learning_rate": 3.785e-06, + "num_tokens": 1113719.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6225, + "step": 3245 + }, + { + "loss": 0.0438, + "grad_norm": 1.2828481197357178, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.623, + "step": 3246 + }, + { + "loss": 0.0023, + "grad_norm": 0.343226820230484, + "learning_rate": 3.7750000000000003e-06, + "num_tokens": 1114322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6235, + "step": 3247 + }, + { + "loss": 0.0402, + "grad_norm": 1.072348952293396, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.624, + "step": 3248 + }, + { + "loss": 0.0372, + "grad_norm": 1.061455488204956, + "learning_rate": 3.7650000000000004e-06, + "num_tokens": 1115346.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6245, + "step": 3249 + }, + { + "loss": 0.0621, + "grad_norm": 1.3332241773605347, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.625, + "step": 3250 + }, + { + "loss": 0.0665, + "grad_norm": 1.4206236600875854, + "learning_rate": 3.7550000000000005e-06, + "num_tokens": 1116370.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6255, + "step": 3251 + }, + { + "loss": 0.0616, + "grad_norm": 1.5544387102127075, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.626, + "step": 3252 + }, + { + "loss": 0.0024, + "grad_norm": 0.34623461961746216, + "learning_rate": 3.745e-06, + "num_tokens": 1116973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6265, + "step": 3253 + }, + { + "loss": 0.0611, + "grad_norm": 1.2223175764083862, + "learning_rate": 3.74e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.627, + "step": 3254 + }, + { + "loss": 0.0517, + "grad_norm": 1.338625192642212, + "learning_rate": 3.7350000000000002e-06, + "num_tokens": 1117997.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6275, + "step": 3255 + }, + { + "loss": 0.0567, + "grad_norm": 1.3747273683547974, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6280000000000001, + "step": 3256 + }, + { + "loss": 0.0026, + "grad_norm": 0.36324965953826904, + "learning_rate": 3.7250000000000003e-06, + "num_tokens": 1118600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6284999999999998, + "step": 3257 + }, + { + "loss": 0.0025, + "grad_norm": 0.3447258472442627, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 3258 + }, + { + "loss": 0.0026, + "grad_norm": 0.36628466844558716, + "learning_rate": 3.7150000000000004e-06, + "num_tokens": 1118782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6295, + "step": 3259 + }, + { + "loss": 0.0535, + "grad_norm": 1.2702912092208862, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.63, + "step": 3260 + }, + { + "loss": 0.0026, + "grad_norm": 0.37140271067619324, + "learning_rate": 3.705e-06, + "num_tokens": 1119385.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6305, + "step": 3261 + }, + { + "loss": 0.003, + "grad_norm": 0.4019966721534729, + "learning_rate": 3.7e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 1.0, + "epoch": 1.631, + "step": 3262 + }, + { + "loss": 0.0669, + "grad_norm": 1.4418880939483643, + "learning_rate": 3.695e-06, + "num_tokens": 1119988.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6315, + "step": 3263 + }, + { + "loss": 0.0396, + "grad_norm": 1.2212142944335938, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6320000000000001, + "step": 3264 + }, + { + "loss": 0.0026, + "grad_norm": 0.37143605947494507, + "learning_rate": 3.6850000000000003e-06, + "num_tokens": 1120591.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6324999999999998, + "step": 3265 + }, + { + "loss": 0.0588, + "grad_norm": 1.3627078533172607, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.633, + "step": 3266 + }, + { + "loss": 0.0027, + "grad_norm": 0.3791561722755432, + "learning_rate": 3.6750000000000004e-06, + "num_tokens": 1121194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6335, + "step": 3267 + }, + { + "loss": 0.0567, + "grad_norm": 1.289622187614441, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.634, + "step": 3268 + }, + { + "loss": 0.0579, + "grad_norm": 1.220171332359314, + "learning_rate": 3.665e-06, + "num_tokens": 1122218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6345, + "step": 3269 + }, + { + "loss": 0.0543, + "grad_norm": 1.3633372783660889, + "learning_rate": 3.66e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.635, + "step": 3270 + }, + { + "loss": 0.0376, + "grad_norm": 1.1212244033813477, + "learning_rate": 3.655e-06, + "num_tokens": 1123242.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6355, + "step": 3271 + }, + { + "loss": 0.066, + "grad_norm": 1.352933645248413, + "learning_rate": 3.65e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6360000000000001, + "step": 3272 + }, + { + "loss": 0.0469, + "grad_norm": 1.09308922290802, + "learning_rate": 3.6450000000000003e-06, + "num_tokens": 1124266.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6364999999999998, + "step": 3273 + }, + { + "loss": 0.1411, + "grad_norm": 2.6187405586242676, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.637, + "step": 3274 + }, + { + "loss": 0.0414, + "grad_norm": 1.162994146347046, + "learning_rate": 3.6350000000000003e-06, + "num_tokens": 1125290.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6375, + "step": 3275 + }, + { + "loss": 0.0028, + "grad_norm": 0.3896919786930084, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.638, + "step": 3276 + }, + { + "loss": 0.0026, + "grad_norm": 0.3726244270801544, + "learning_rate": 3.625e-06, + "num_tokens": 1125472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6385, + "step": 3277 + }, + { + "loss": 0.0026, + "grad_norm": 0.36463192105293274, + "learning_rate": 3.62e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.639, + "step": 3278 + }, + { + "loss": 0.0507, + "grad_norm": 1.3470423221588135, + "learning_rate": 3.615e-06, + "num_tokens": 1126075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6395, + "step": 3279 + }, + { + "loss": 0.0683, + "grad_norm": 1.4609153270721436, + "learning_rate": 3.61e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.6400000000000001, + "step": 3280 + }, + { + "loss": 0.0535, + "grad_norm": 1.1537185907363892, + "learning_rate": 3.6050000000000002e-06, + "num_tokens": 1127099.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6404999999999998, + "step": 3281 + }, + { + "loss": 0.0608, + "grad_norm": 1.3845043182373047, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.641, + "step": 3282 + }, + { + "loss": 0.0447, + "grad_norm": 1.212424397468567, + "learning_rate": 3.5950000000000003e-06, + "num_tokens": 1128123.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6415, + "step": 3283 + }, + { + "loss": 0.0026, + "grad_norm": 0.37876564264297485, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 1.0, + "epoch": 1.642, + "step": 3284 + }, + { + "loss": 0.0408, + "grad_norm": 1.2840468883514404, + "learning_rate": 3.585e-06, + "num_tokens": 1128726.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6425, + "step": 3285 + }, + { + "loss": 0.0386, + "grad_norm": 1.1343239545822144, + "learning_rate": 3.58e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.643, + "step": 3286 + }, + { + "loss": 0.0381, + "grad_norm": 1.1031399965286255, + "learning_rate": 3.575e-06, + "num_tokens": 1129750.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6435, + "step": 3287 + }, + { + "loss": 0.0728, + "grad_norm": 1.8012501001358032, + "learning_rate": 3.57e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6440000000000001, + "step": 3288 + }, + { + "loss": 0.003, + "grad_norm": 0.42031532526016235, + "learning_rate": 3.565e-06, + "num_tokens": 1130353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6444999999999999, + "step": 3289 + }, + { + "loss": 0.0028, + "grad_norm": 0.42307499051094055, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 1.0, + "epoch": 1.645, + "step": 3290 + }, + { + "loss": 0.0656, + "grad_norm": 1.4206976890563965, + "learning_rate": 3.5550000000000003e-06, + "num_tokens": 1130956.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6455, + "step": 3291 + }, + { + "loss": 0.0373, + "grad_norm": 1.0836045742034912, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 3292 + }, + { + "loss": 0.0666, + "grad_norm": 1.4353013038635254, + "learning_rate": 3.545e-06, + "num_tokens": 1131980.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6465, + "step": 3293 + }, + { + "loss": 0.0033, + "grad_norm": 0.48532357811927795, + "learning_rate": 3.54e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.647, + "step": 3294 + }, + { + "loss": 0.0032, + "grad_norm": 0.4415268898010254, + "learning_rate": 3.535e-06, + "num_tokens": 1132162.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6475, + "step": 3295 + }, + { + "loss": 0.0029, + "grad_norm": 0.41665494441986084, + "learning_rate": 3.53e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 3296 + }, + { + "loss": 0.0638, + "grad_norm": 1.2469731569290161, + "learning_rate": 3.525e-06, + "num_tokens": 1132765.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6484999999999999, + "step": 3297 + }, + { + "loss": 0.0614, + "grad_norm": 1.251099944114685, + "learning_rate": 3.52e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.649, + "step": 3298 + }, + { + "loss": 0.0027, + "grad_norm": 0.39604058861732483, + "learning_rate": 3.5150000000000002e-06, + "num_tokens": 1133368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6495, + "step": 3299 + }, + { + "loss": 0.0588, + "grad_norm": 1.0699150562286377, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.65, + "step": 3300 + }, + { + "loss": 0.0583, + "grad_norm": 1.2757554054260254, + "learning_rate": 3.505e-06, + "num_tokens": 1134392.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6505, + "step": 3301 + }, + { + "loss": 0.0401, + "grad_norm": 1.3257462978363037, + "learning_rate": 3.5e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.651, + "step": 3302 + }, + { + "loss": 0.0643, + "grad_norm": 1.4011600017547607, + "learning_rate": 3.495e-06, + "num_tokens": 1135416.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6515, + "step": 3303 + }, + { + "loss": 0.0587, + "grad_norm": 1.5523959398269653, + "learning_rate": 3.49e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6520000000000001, + "step": 3304 + }, + { + "loss": 0.0602, + "grad_norm": 1.1153236627578735, + "learning_rate": 3.485e-06, + "num_tokens": 1136440.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6524999999999999, + "step": 3305 + }, + { + "loss": 0.0032, + "grad_norm": 0.4743506610393524, + "learning_rate": 3.48e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 1.0, + "epoch": 1.653, + "step": 3306 + }, + { + "loss": 0.0032, + "grad_norm": 0.44705691933631897, + "learning_rate": 3.475e-06, + "num_tokens": 1136622.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6535, + "step": 3307 + }, + { + "loss": 0.0627, + "grad_norm": 1.376706838607788, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.654, + "step": 3308 + }, + { + "loss": 0.0578, + "grad_norm": 1.3461076021194458, + "learning_rate": 3.465e-06, + "num_tokens": 1137646.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6545, + "step": 3309 + }, + { + "loss": 0.0028, + "grad_norm": 0.4053739011287689, + "learning_rate": 3.46e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 1.0, + "epoch": 1.655, + "step": 3310 + }, + { + "loss": 0.0028, + "grad_norm": 0.4151926636695862, + "learning_rate": 3.455e-06, + "num_tokens": 1137828.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6555, + "step": 3311 + }, + { + "loss": 0.003, + "grad_norm": 0.42436280846595764, + "learning_rate": 3.45e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6560000000000001, + "step": 3312 + }, + { + "loss": 0.0029, + "grad_norm": 0.41050389409065247, + "learning_rate": 3.445e-06, + "num_tokens": 1138010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6564999999999999, + "step": 3313 + }, + { + "loss": 0.0562, + "grad_norm": 1.2650190591812134, + "learning_rate": 3.44e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.657, + "step": 3314 + }, + { + "loss": 0.0558, + "grad_norm": 1.1567943096160889, + "learning_rate": 3.4350000000000006e-06, + "num_tokens": 1139034.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6575, + "step": 3315 + }, + { + "loss": 0.0413, + "grad_norm": 1.3011746406555176, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.658, + "step": 3316 + }, + { + "loss": 0.0569, + "grad_norm": 1.4117727279663086, + "learning_rate": 3.4250000000000007e-06, + "num_tokens": 1140058.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6585, + "step": 3317 + }, + { + "loss": 0.0027, + "grad_norm": 0.3829484283924103, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.659, + "step": 3318 + }, + { + "loss": 0.0516, + "grad_norm": 1.152258038520813, + "learning_rate": 3.4150000000000003e-06, + "num_tokens": 1140661.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6595, + "step": 3319 + }, + { + "loss": 0.0396, + "grad_norm": 1.20711088180542, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6600000000000001, + "step": 3320 + }, + { + "loss": 0.0522, + "grad_norm": 1.251099705696106, + "learning_rate": 3.4050000000000004e-06, + "num_tokens": 1141685.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6604999999999999, + "step": 3321 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730953454971313, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.661, + "step": 3322 + }, + { + "loss": 0.0613, + "grad_norm": 1.5974045991897583, + "learning_rate": 3.3950000000000005e-06, + "num_tokens": 1142709.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6615, + "step": 3323 + }, + { + "loss": 0.0522, + "grad_norm": 1.416182518005371, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.662, + "step": 3324 + }, + { + "loss": 0.0595, + "grad_norm": 1.381279706954956, + "learning_rate": 3.3850000000000006e-06, + "num_tokens": 1143733.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6625, + "step": 3325 + }, + { + "loss": 0.0563, + "grad_norm": 1.2484899759292603, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.663, + "step": 3326 + }, + { + "loss": 0.0029, + "grad_norm": 0.41797107458114624, + "learning_rate": 3.3750000000000003e-06, + "num_tokens": 1144336.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6635, + "step": 3327 + }, + { + "loss": 0.0027, + "grad_norm": 0.39544638991355896, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6640000000000001, + "step": 3328 + }, + { + "loss": 0.0371, + "grad_norm": 1.0045322179794312, + "learning_rate": 3.3650000000000004e-06, + "num_tokens": 1144939.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6644999999999999, + "step": 3329 + }, + { + "loss": 0.0671, + "grad_norm": 1.530097246170044, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.665, + "step": 3330 + }, + { + "loss": 0.0529, + "grad_norm": 1.179215669631958, + "learning_rate": 3.3550000000000005e-06, + "num_tokens": 1145963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6655, + "step": 3331 + }, + { + "loss": 0.0033, + "grad_norm": 0.46830442547798157, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.666, + "step": 3332 + }, + { + "loss": 0.0031, + "grad_norm": 0.44680675864219666, + "learning_rate": 3.3450000000000006e-06, + "num_tokens": 1146145.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6665, + "step": 3333 + }, + { + "loss": 0.0591, + "grad_norm": 2.0427138805389404, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.667, + "step": 3334 + }, + { + "loss": 0.0446, + "grad_norm": 1.0700162649154663, + "learning_rate": 3.3350000000000003e-06, + "num_tokens": 1147169.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6675, + "step": 3335 + }, + { + "loss": 0.0352, + "grad_norm": 0.953519344329834, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6680000000000001, + "step": 3336 + }, + { + "loss": 0.0402, + "grad_norm": 1.208362102508545, + "learning_rate": 3.3250000000000004e-06, + "num_tokens": 1148193.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6684999999999999, + "step": 3337 + }, + { + "loss": 0.0034, + "grad_norm": 0.48497405648231506, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 1.0, + "epoch": 1.669, + "step": 3338 + }, + { + "loss": 0.0031, + "grad_norm": 0.4533288776874542, + "learning_rate": 3.3150000000000004e-06, + "num_tokens": 1148375.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6695, + "step": 3339 + }, + { + "loss": 0.0531, + "grad_norm": 1.031333088874817, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.67, + "step": 3340 + }, + { + "loss": 0.0029, + "grad_norm": 0.40945783257484436, + "learning_rate": 3.3050000000000005e-06, + "num_tokens": 1148978.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6705, + "step": 3341 + }, + { + "loss": 0.0643, + "grad_norm": 1.0990197658538818, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.671, + "step": 3342 + }, + { + "loss": 0.0379, + "grad_norm": 1.0483911037445068, + "learning_rate": 3.2950000000000002e-06, + "num_tokens": 1150002.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6715, + "step": 3343 + }, + { + "loss": 0.0489, + "grad_norm": 1.0835374593734741, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6720000000000002, + "step": 3344 + }, + { + "loss": 0.0033, + "grad_norm": 0.4901528060436249, + "learning_rate": 3.2850000000000003e-06, + "num_tokens": 1150605.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6724999999999999, + "step": 3345 + }, + { + "loss": 0.0029, + "grad_norm": 0.41757330298423767, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.673, + "step": 3346 + }, + { + "loss": 0.0379, + "grad_norm": 0.9371951818466187, + "learning_rate": 3.2750000000000004e-06, + "num_tokens": 1151208.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6735, + "step": 3347 + }, + { + "loss": 0.0397, + "grad_norm": 1.0155102014541626, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 3348 + }, + { + "loss": 0.0027, + "grad_norm": 0.3897286653518677, + "learning_rate": 3.2650000000000005e-06, + "num_tokens": 1151811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6745, + "step": 3349 + }, + { + "loss": 0.0028, + "grad_norm": 0.4042399525642395, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 1.0, + "epoch": 1.675, + "step": 3350 + }, + { + "loss": 0.003, + "grad_norm": 0.43666109442710876, + "learning_rate": 3.255e-06, + "num_tokens": 1151993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6755, + "step": 3351 + }, + { + "loss": 0.0029, + "grad_norm": 0.42103472352027893, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6760000000000002, + "step": 3352 + }, + { + "loss": 0.0028, + "grad_norm": 0.41361838579177856, + "learning_rate": 3.2450000000000003e-06, + "num_tokens": 1152175.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6764999999999999, + "step": 3353 + }, + { + "loss": 0.0357, + "grad_norm": 0.9301024675369263, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.677, + "step": 3354 + }, + { + "loss": 0.0025, + "grad_norm": 0.3655649721622467, + "learning_rate": 3.2350000000000004e-06, + "num_tokens": 1152778.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6775, + "step": 3355 + }, + { + "loss": 0.0363, + "grad_norm": 1.0852001905441284, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.678, + "step": 3356 + }, + { + "loss": 0.0021, + "grad_norm": 0.3051436245441437, + "learning_rate": 3.2250000000000005e-06, + "num_tokens": 1153381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6785, + "step": 3357 + }, + { + "loss": 0.0025, + "grad_norm": 0.38162630796432495, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 3358 + }, + { + "loss": 0.0022, + "grad_norm": 0.33861595392227173, + "learning_rate": 3.215e-06, + "num_tokens": 1153563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6795, + "step": 3359 + }, + { + "loss": 0.0021, + "grad_norm": 0.311531126499176, + "learning_rate": 3.21e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 3360 + }, + { + "loss": 0.002, + "grad_norm": 0.30146220326423645, + "learning_rate": 3.2050000000000002e-06, + "num_tokens": 1153745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6804999999999999, + "step": 3361 + }, + { + "loss": 0.0019, + "grad_norm": 0.28205639123916626, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 3362 + }, + { + "loss": 0.0483, + "grad_norm": 1.185204029083252, + "learning_rate": 3.1950000000000003e-06, + "num_tokens": 1154348.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6815, + "step": 3363 + }, + { + "loss": 0.0705, + "grad_norm": 1.442715048789978, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.682, + "step": 3364 + }, + { + "loss": 0.059, + "grad_norm": 1.5234472751617432, + "learning_rate": 3.1850000000000004e-06, + "num_tokens": 1155372.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6825, + "step": 3365 + }, + { + "loss": 0.0712, + "grad_norm": 1.9519693851470947, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.683, + "step": 3366 + }, + { + "loss": 0.041, + "grad_norm": 1.0349758863449097, + "learning_rate": 3.175e-06, + "num_tokens": 1156396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6835, + "step": 3367 + }, + { + "loss": 0.0423, + "grad_norm": 1.263643503189087, + "learning_rate": 3.17e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 3368 + }, + { + "loss": 0.0015, + "grad_norm": 0.21718572080135345, + "learning_rate": 3.165e-06, + "num_tokens": 1156999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6844999999999999, + "step": 3369 + }, + { + "loss": 0.0612, + "grad_norm": 1.4974867105484009, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.685, + "step": 3370 + }, + { + "loss": 0.0684, + "grad_norm": 1.3690571784973145, + "learning_rate": 3.1550000000000003e-06, + "num_tokens": 1158023.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6855, + "step": 3371 + }, + { + "loss": 0.0015, + "grad_norm": 0.22092363238334656, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 1.0, + "epoch": 1.686, + "step": 3372 + }, + { + "loss": 0.0466, + "grad_norm": 1.359930157661438, + "learning_rate": 3.1450000000000004e-06, + "num_tokens": 1158626.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6865, + "step": 3373 + }, + { + "loss": 0.0017, + "grad_norm": 0.23505748808383942, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.687, + "step": 3374 + }, + { + "loss": 0.0412, + "grad_norm": 1.154797077178955, + "learning_rate": 3.135e-06, + "num_tokens": 1159229.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6875, + "step": 3375 + }, + { + "loss": 0.0688, + "grad_norm": 1.5609385967254639, + "learning_rate": 3.13e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.688, + "step": 3376 + }, + { + "loss": 0.0689, + "grad_norm": 1.9219101667404175, + "learning_rate": 3.125e-06, + "num_tokens": 1160253.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6885, + "step": 3377 + }, + { + "loss": 0.0528, + "grad_norm": 1.4017720222473145, + "learning_rate": 3.12e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 3378 + }, + { + "loss": 0.0018, + "grad_norm": 0.2644074261188507, + "learning_rate": 3.1150000000000002e-06, + "num_tokens": 1160856.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6895, + "step": 3379 + }, + { + "loss": 0.0359, + "grad_norm": 1.1351364850997925, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.69, + "step": 3380 + }, + { + "loss": 0.0561, + "grad_norm": 1.2852329015731812, + "learning_rate": 3.1050000000000003e-06, + "num_tokens": 1161880.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6905000000000001, + "step": 3381 + }, + { + "loss": 0.0019, + "grad_norm": 0.2809182107448578, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6909999999999998, + "step": 3382 + }, + { + "loss": 0.0019, + "grad_norm": 0.2629799544811249, + "learning_rate": 3.0950000000000004e-06, + "num_tokens": 1162062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6915, + "step": 3383 + }, + { + "loss": 0.0583, + "grad_norm": 1.3401031494140625, + "learning_rate": 3.09e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.692, + "step": 3384 + }, + { + "loss": 0.0019, + "grad_norm": 0.2741340398788452, + "learning_rate": 3.085e-06, + "num_tokens": 1162665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6925, + "step": 3385 + }, + { + "loss": 0.0019, + "grad_norm": 0.2670257091522217, + "learning_rate": 3.08e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 3386 + }, + { + "loss": 0.0529, + "grad_norm": 0.9913851022720337, + "learning_rate": 3.075e-06, + "num_tokens": 1163268.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6935, + "step": 3387 + }, + { + "loss": 0.0018, + "grad_norm": 0.2675456404685974, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.694, + "step": 3388 + }, + { + "loss": 0.0405, + "grad_norm": 1.6220101118087769, + "learning_rate": 3.0650000000000003e-06, + "num_tokens": 1163871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6945000000000001, + "step": 3389 + }, + { + "loss": 0.0478, + "grad_norm": 1.0595648288726807, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 3390 + }, + { + "loss": 0.0022, + "grad_norm": 0.3088478446006775, + "learning_rate": 3.0550000000000004e-06, + "num_tokens": 1164474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6955, + "step": 3391 + }, + { + "loss": 0.0501, + "grad_norm": 1.3393687009811401, + "learning_rate": 3.05e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.696, + "step": 3392 + }, + { + "loss": 0.0019, + "grad_norm": 0.2677120566368103, + "learning_rate": 3.045e-06, + "num_tokens": 1165077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6965, + "step": 3393 + }, + { + "loss": 0.0519, + "grad_norm": 1.1974607706069946, + "learning_rate": 3.04e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.697, + "step": 3394 + }, + { + "loss": 0.0406, + "grad_norm": 1.0820717811584473, + "learning_rate": 3.035e-06, + "num_tokens": 1166101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6975, + "step": 3395 + }, + { + "loss": 0.002, + "grad_norm": 0.2836916148662567, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.698, + "step": 3396 + }, + { + "loss": 0.002, + "grad_norm": 0.2837901711463928, + "learning_rate": 3.0250000000000003e-06, + "num_tokens": 1166283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6985000000000001, + "step": 3397 + }, + { + "loss": 0.0546, + "grad_norm": 1.4433382749557495, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6989999999999998, + "step": 3398 + }, + { + "loss": 0.0021, + "grad_norm": 0.2978130877017975, + "learning_rate": 3.0150000000000004e-06, + "num_tokens": 1166886.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6995, + "step": 3399 + }, + { + "loss": 0.002, + "grad_norm": 0.2806030511856079, + "learning_rate": 3.01e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 3400 + }, + { + "loss": 0.0636, + "grad_norm": 1.3879796266555786, + "learning_rate": 3.005e-06, + "num_tokens": 1167489.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7005, + "step": 3401 + }, + { + "loss": 0.002, + "grad_norm": 0.2759900689125061, + "learning_rate": 3e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.701, + "step": 3402 + }, + { + "loss": 0.0574, + "grad_norm": 1.3505700826644897, + "learning_rate": 2.995e-06, + "num_tokens": 1168092.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7015, + "step": 3403 + }, + { + "loss": 0.0554, + "grad_norm": 1.4108113050460815, + "learning_rate": 2.99e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.702, + "step": 3404 + }, + { + "loss": 0.0558, + "grad_norm": 1.5085475444793701, + "learning_rate": 2.9850000000000002e-06, + "num_tokens": 1169116.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7025000000000001, + "step": 3405 + }, + { + "loss": 0.0019, + "grad_norm": 0.2683292031288147, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7029999999999998, + "step": 3406 + }, + { + "loss": 0.0367, + "grad_norm": 1.1768198013305664, + "learning_rate": 2.9750000000000003e-06, + "num_tokens": 1169719.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7035, + "step": 3407 + }, + { + "loss": 0.002, + "grad_norm": 0.2821144759654999, + "learning_rate": 2.97e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 1.0, + "epoch": 1.704, + "step": 3408 + }, + { + "loss": 0.0018, + "grad_norm": 0.26630160212516785, + "learning_rate": 2.965e-06, + "num_tokens": 1169901.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7045, + "step": 3409 + }, + { + "loss": 0.0018, + "grad_norm": 0.2571128308773041, + "learning_rate": 2.96e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 3410 + }, + { + "loss": 0.002, + "grad_norm": 0.28111621737480164, + "learning_rate": 2.955e-06, + "num_tokens": 1170083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7055, + "step": 3411 + }, + { + "loss": 0.002, + "grad_norm": 0.27419018745422363, + "learning_rate": 2.95e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 3412 + }, + { + "loss": 0.0019, + "grad_norm": 0.26888176798820496, + "learning_rate": 2.945e-06, + "num_tokens": 1170265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7065000000000001, + "step": 3413 + }, + { + "loss": 0.0018, + "grad_norm": 0.2536250352859497, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 3414 + }, + { + "loss": 0.0018, + "grad_norm": 0.24844178557395935, + "learning_rate": 2.9350000000000003e-06, + "num_tokens": 1170447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7075, + "step": 3415 + }, + { + "loss": 0.0487, + "grad_norm": 1.4517875909805298, + "learning_rate": 2.93e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.708, + "step": 3416 + }, + { + "loss": 0.0564, + "grad_norm": 1.2101439237594604, + "learning_rate": 2.925e-06, + "num_tokens": 1171471.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7085, + "step": 3417 + }, + { + "loss": 0.043, + "grad_norm": 1.1227502822875977, + "learning_rate": 2.92e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.709, + "step": 3418 + }, + { + "loss": 0.0556, + "grad_norm": 1.1113651990890503, + "learning_rate": 2.915e-06, + "num_tokens": 1172495.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7095, + "step": 3419 + }, + { + "loss": 0.0015, + "grad_norm": 0.21050438284873962, + "learning_rate": 2.91e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.71, + "step": 3420 + }, + { + "loss": 0.0492, + "grad_norm": 1.136242389678955, + "learning_rate": 2.905e-06, + "num_tokens": 1173098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7105000000000001, + "step": 3421 + }, + { + "loss": 0.0549, + "grad_norm": 1.1831704378128052, + "learning_rate": 2.9e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7109999999999999, + "step": 3422 + }, + { + "loss": 0.0589, + "grad_norm": 1.318955659866333, + "learning_rate": 2.8950000000000002e-06, + "num_tokens": 1174122.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7115, + "step": 3423 + }, + { + "loss": 0.0385, + "grad_norm": 1.1089059114456177, + "learning_rate": 2.89e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.712, + "step": 3424 + }, + { + "loss": 0.0017, + "grad_norm": 0.24754203855991364, + "learning_rate": 2.885e-06, + "num_tokens": 1174725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7125, + "step": 3425 + }, + { + "loss": 0.0563, + "grad_norm": 1.1799119710922241, + "learning_rate": 2.88e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.713, + "step": 3426 + }, + { + "loss": 0.0017, + "grad_norm": 0.2318888157606125, + "learning_rate": 2.875e-06, + "num_tokens": 1175328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7135, + "step": 3427 + }, + { + "loss": 0.0623, + "grad_norm": 1.3154571056365967, + "learning_rate": 2.87e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.714, + "step": 3428 + }, + { + "loss": 0.0019, + "grad_norm": 0.26307183504104614, + "learning_rate": 2.865e-06, + "num_tokens": 1175931.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7145000000000001, + "step": 3429 + }, + { + "loss": 0.0018, + "grad_norm": 0.2589333653450012, + "learning_rate": 2.86e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 3430 + }, + { + "loss": 0.0504, + "grad_norm": 1.4614155292510986, + "learning_rate": 2.855e-06, + "num_tokens": 1176534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7155, + "step": 3431 + }, + { + "loss": 0.0018, + "grad_norm": 0.2591991722583771, + "learning_rate": 2.85e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.716, + "step": 3432 + }, + { + "loss": 0.0018, + "grad_norm": 0.25856250524520874, + "learning_rate": 2.845e-06, + "num_tokens": 1176716.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7165, + "step": 3433 + }, + { + "loss": 0.0368, + "grad_norm": 1.2794378995895386, + "learning_rate": 2.84e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.717, + "step": 3434 + }, + { + "loss": 0.0595, + "grad_norm": 1.1754332780838013, + "learning_rate": 2.835e-06, + "num_tokens": 1177740.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7175, + "step": 3435 + }, + { + "loss": 0.0016, + "grad_norm": 0.218499094247818, + "learning_rate": 2.83e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 1.0, + "epoch": 1.718, + "step": 3436 + }, + { + "loss": 0.0562, + "grad_norm": 1.4319361448287964, + "learning_rate": 2.825e-06, + "num_tokens": 1178343.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7185000000000001, + "step": 3437 + }, + { + "loss": 0.0548, + "grad_norm": 1.1614960432052612, + "learning_rate": 2.82e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7189999999999999, + "step": 3438 + }, + { + "loss": 0.0634, + "grad_norm": 1.559000849723816, + "learning_rate": 2.815e-06, + "num_tokens": 1179367.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7195, + "step": 3439 + }, + { + "loss": 0.0593, + "grad_norm": 1.1891441345214844, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 3440 + }, + { + "loss": 0.0638, + "grad_norm": 1.2654136419296265, + "learning_rate": 2.8050000000000007e-06, + "num_tokens": 1180391.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7205, + "step": 3441 + }, + { + "loss": 0.0411, + "grad_norm": 1.2888840436935425, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.721, + "step": 3442 + }, + { + "loss": 0.002, + "grad_norm": 0.2810196280479431, + "learning_rate": 2.7950000000000003e-06, + "num_tokens": 1180994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7215, + "step": 3443 + }, + { + "loss": 0.0393, + "grad_norm": 1.1534147262573242, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.722, + "step": 3444 + }, + { + "loss": 0.0019, + "grad_norm": 0.2703098952770233, + "learning_rate": 2.7850000000000004e-06, + "num_tokens": 1181597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7225000000000001, + "step": 3445 + }, + { + "loss": 0.0612, + "grad_norm": 1.2400104999542236, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7229999999999999, + "step": 3446 + }, + { + "loss": 0.0019, + "grad_norm": 0.27535656094551086, + "learning_rate": 2.7750000000000005e-06, + "num_tokens": 1182200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7235, + "step": 3447 + }, + { + "loss": 0.002, + "grad_norm": 0.2844158411026001, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 3448 + }, + { + "loss": 0.002, + "grad_norm": 0.2850154936313629, + "learning_rate": 2.7650000000000006e-06, + "num_tokens": 1182382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7245, + "step": 3449 + }, + { + "loss": 0.0018, + "grad_norm": 0.26619744300842285, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 3450 + }, + { + "loss": 0.0019, + "grad_norm": 0.2684476971626282, + "learning_rate": 2.7550000000000003e-06, + "num_tokens": 1182564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7255, + "step": 3451 + }, + { + "loss": 0.0577, + "grad_norm": 1.3094863891601562, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.726, + "step": 3452 + }, + { + "loss": 0.0378, + "grad_norm": 1.201589822769165, + "learning_rate": 2.7450000000000004e-06, + "num_tokens": 1183588.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7265000000000001, + "step": 3453 + }, + { + "loss": 0.0537, + "grad_norm": 1.2897847890853882, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7269999999999999, + "step": 3454 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792169749736786, + "learning_rate": 2.7350000000000005e-06, + "num_tokens": 1184191.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7275, + "step": 3455 + }, + { + "loss": 0.002, + "grad_norm": 0.28593137860298157, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 3456 + }, + { + "loss": 0.058, + "grad_norm": 1.3839404582977295, + "learning_rate": 2.7250000000000006e-06, + "num_tokens": 1184794.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7285, + "step": 3457 + }, + { + "loss": 0.0018, + "grad_norm": 0.2617915868759155, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 1.0, + "epoch": 1.729, + "step": 3458 + }, + { + "loss": 0.0019, + "grad_norm": 0.2803640067577362, + "learning_rate": 2.7150000000000003e-06, + "num_tokens": 1184976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7295, + "step": 3459 + }, + { + "loss": 0.0389, + "grad_norm": 1.0974253416061401, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.73, + "step": 3460 + }, + { + "loss": 0.0017, + "grad_norm": 0.24105492234230042, + "learning_rate": 2.7050000000000004e-06, + "num_tokens": 1185579.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7305000000000001, + "step": 3461 + }, + { + "loss": 0.0017, + "grad_norm": 0.2462151199579239, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 3462 + }, + { + "loss": 0.0681, + "grad_norm": 2.0248329639434814, + "learning_rate": 2.6950000000000005e-06, + "num_tokens": 1186182.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7315, + "step": 3463 + }, + { + "loss": 0.0506, + "grad_norm": 1.0506778955459595, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.732, + "step": 3464 + }, + { + "loss": 0.0414, + "grad_norm": 1.1461181640625, + "learning_rate": 2.6850000000000006e-06, + "num_tokens": 1187206.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7325, + "step": 3465 + }, + { + "loss": 0.002, + "grad_norm": 0.29532936215400696, + "learning_rate": 2.68e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 1.0, + "epoch": 1.733, + "step": 3466 + }, + { + "loss": 0.0018, + "grad_norm": 0.2511617839336395, + "learning_rate": 2.6750000000000002e-06, + "num_tokens": 1187388.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7335, + "step": 3467 + }, + { + "loss": 0.0017, + "grad_norm": 0.24015438556671143, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 3468 + }, + { + "loss": 0.0394, + "grad_norm": 1.186040997505188, + "learning_rate": 2.6650000000000003e-06, + "num_tokens": 1187991.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7345000000000002, + "step": 3469 + }, + { + "loss": 0.0516, + "grad_norm": 1.3716928958892822, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7349999999999999, + "step": 3470 + }, + { + "loss": 0.0017, + "grad_norm": 0.24118225276470184, + "learning_rate": 2.6550000000000004e-06, + "num_tokens": 1188594.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7355, + "step": 3471 + }, + { + "loss": 0.0634, + "grad_norm": 1.3280280828475952, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.736, + "step": 3472 + }, + { + "loss": 0.0606, + "grad_norm": 1.5957295894622803, + "learning_rate": 2.6450000000000005e-06, + "num_tokens": 1189618.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7365, + "step": 3473 + }, + { + "loss": 0.0019, + "grad_norm": 0.26652151346206665, + "learning_rate": 2.64e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.737, + "step": 3474 + }, + { + "loss": 0.0465, + "grad_norm": 1.2865381240844727, + "learning_rate": 2.635e-06, + "num_tokens": 1190221.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7375, + "step": 3475 + }, + { + "loss": 0.0696, + "grad_norm": 1.5268961191177368, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.738, + "step": 3476 + }, + { + "loss": 0.0016, + "grad_norm": 0.22352814674377441, + "learning_rate": 2.6250000000000003e-06, + "num_tokens": 1190824.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7385000000000002, + "step": 3477 + }, + { + "loss": 0.0398, + "grad_norm": 1.0832366943359375, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7389999999999999, + "step": 3478 + }, + { + "loss": 0.002, + "grad_norm": 0.2866823971271515, + "learning_rate": 2.6150000000000004e-06, + "num_tokens": 1191427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7395, + "step": 3479 + }, + { + "loss": 0.0017, + "grad_norm": 0.25320085883140564, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 3480 + }, + { + "loss": 0.0554, + "grad_norm": 1.305580496788025, + "learning_rate": 2.6050000000000005e-06, + "num_tokens": 1192030.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7405, + "step": 3481 + }, + { + "loss": 0.053, + "grad_norm": 1.3485558032989502, + "learning_rate": 2.6e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.741, + "step": 3482 + }, + { + "loss": 0.0597, + "grad_norm": 1.3094996213912964, + "learning_rate": 2.595e-06, + "num_tokens": 1193054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7415, + "step": 3483 + }, + { + "loss": 0.0361, + "grad_norm": 1.02549409866333, + "learning_rate": 2.59e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.742, + "step": 3484 + }, + { + "loss": 0.0549, + "grad_norm": 1.1604732275009155, + "learning_rate": 2.5850000000000002e-06, + "num_tokens": 1194078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7425000000000002, + "step": 3485 + }, + { + "loss": 0.0578, + "grad_norm": 1.1389886140823364, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7429999999999999, + "step": 3486 + }, + { + "loss": 0.0383, + "grad_norm": 1.1444112062454224, + "learning_rate": 2.5750000000000003e-06, + "num_tokens": 1195102.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7435, + "step": 3487 + }, + { + "loss": 0.0363, + "grad_norm": 1.2686033248901367, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.744, + "step": 3488 + }, + { + "loss": 0.0609, + "grad_norm": 1.2078722715377808, + "learning_rate": 2.5650000000000004e-06, + "num_tokens": 1196126.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7445, + "step": 3489 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754855155944824, + "learning_rate": 2.56e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 1.0, + "epoch": 1.745, + "step": 3490 + }, + { + "loss": 0.063, + "grad_norm": 1.346100091934204, + "learning_rate": 2.555e-06, + "num_tokens": 1196729.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7455, + "step": 3491 + }, + { + "loss": 0.0625, + "grad_norm": 1.3309886455535889, + "learning_rate": 2.55e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.746, + "step": 3492 + }, + { + "loss": 0.0023, + "grad_norm": 0.3301111161708832, + "learning_rate": 2.545e-06, + "num_tokens": 1197332.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7465000000000002, + "step": 3493 + }, + { + "loss": 0.0382, + "grad_norm": 1.0473533868789673, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7469999999999999, + "step": 3494 + }, + { + "loss": 0.0625, + "grad_norm": 1.2907440662384033, + "learning_rate": 2.5350000000000003e-06, + "num_tokens": 1198356.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7475, + "step": 3495 + }, + { + "loss": 0.0412, + "grad_norm": 1.1875349283218384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.748, + "step": 3496 + }, + { + "loss": 0.1176, + "grad_norm": 2.9710206985473633, + "learning_rate": 2.5250000000000004e-06, + "num_tokens": 1199380.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.7485, + "step": 3497 + }, + { + "loss": 0.0026, + "grad_norm": 0.36476898193359375, + "learning_rate": 2.52e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.749, + "step": 3498 + }, + { + "loss": 0.0379, + "grad_norm": 1.0208238363265991, + "learning_rate": 2.515e-06, + "num_tokens": 1199983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7495, + "step": 3499 + }, + { + "loss": 0.0026, + "grad_norm": 0.37356528639793396, + "learning_rate": 2.51e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.75, + "step": 3500 + }, + { + "loss": 0.0027, + "grad_norm": 0.39622190594673157, + "learning_rate": 2.505e-06, + "num_tokens": 1200165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7505, + "step": 3501 + }, + { + "loss": 0.0372, + "grad_norm": 1.0979310274124146, + "learning_rate": 2.5e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.751, + "step": 3502 + }, + { + "loss": 0.0362, + "grad_norm": 1.0418155193328857, + "learning_rate": 2.4950000000000003e-06, + "num_tokens": 1201189.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7515, + "step": 3503 + }, + { + "loss": 0.0632, + "grad_norm": 1.6260945796966553, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.752, + "step": 3504 + }, + { + "loss": 0.0029, + "grad_norm": 0.3957514762878418, + "learning_rate": 2.4850000000000003e-06, + "num_tokens": 1201792.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7525, + "step": 3505 + }, + { + "loss": 0.0024, + "grad_norm": 0.3393152356147766, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 3506 + }, + { + "loss": 0.0515, + "grad_norm": 1.1930348873138428, + "learning_rate": 2.475e-06, + "num_tokens": 1202395.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7534999999999998, + "step": 3507 + }, + { + "loss": 0.0026, + "grad_norm": 0.380045086145401, + "learning_rate": 2.47e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 1.0, + "epoch": 1.754, + "step": 3508 + }, + { + "loss": 0.0027, + "grad_norm": 0.3971390724182129, + "learning_rate": 2.465e-06, + "num_tokens": 1202577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7545, + "step": 3509 + }, + { + "loss": 0.0028, + "grad_norm": 0.38638150691986084, + "learning_rate": 2.46e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 1.0, + "epoch": 1.755, + "step": 3510 + }, + { + "loss": 0.0615, + "grad_norm": 1.3876094818115234, + "learning_rate": 2.4550000000000002e-06, + "num_tokens": 1203180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7555, + "step": 3511 + }, + { + "loss": 0.0432, + "grad_norm": 1.4136366844177246, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.756, + "step": 3512 + }, + { + "loss": 0.0024, + "grad_norm": 0.34141626954078674, + "learning_rate": 2.4450000000000003e-06, + "num_tokens": 1203783.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7565, + "step": 3513 + }, + { + "loss": 0.0566, + "grad_norm": 1.0875115394592285, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7570000000000001, + "step": 3514 + }, + { + "loss": 0.0482, + "grad_norm": 1.5494464635849, + "learning_rate": 2.435e-06, + "num_tokens": 1204807.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7574999999999998, + "step": 3515 + }, + { + "loss": 0.0413, + "grad_norm": 1.0267417430877686, + "learning_rate": 2.43e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.758, + "step": 3516 + }, + { + "loss": 0.0529, + "grad_norm": 1.3826123476028442, + "learning_rate": 2.425e-06, + "num_tokens": 1205831.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7585, + "step": 3517 + }, + { + "loss": 0.0622, + "grad_norm": 1.3799962997436523, + "learning_rate": 2.42e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.759, + "step": 3518 + }, + { + "loss": 0.0026, + "grad_norm": 0.36601629853248596, + "learning_rate": 2.415e-06, + "num_tokens": 1206434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7595, + "step": 3519 + }, + { + "loss": 0.057, + "grad_norm": 1.4413540363311768, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.76, + "step": 3520 + }, + { + "loss": 0.062, + "grad_norm": 1.5269067287445068, + "learning_rate": 2.4050000000000003e-06, + "num_tokens": 1207458.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7605, + "step": 3521 + }, + { + "loss": 0.0529, + "grad_norm": 1.1583778858184814, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7610000000000001, + "step": 3522 + }, + { + "loss": 0.0629, + "grad_norm": 1.502618432044983, + "learning_rate": 2.395e-06, + "num_tokens": 1208482.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7614999999999998, + "step": 3523 + }, + { + "loss": 0.0556, + "grad_norm": 1.4562733173370361, + "learning_rate": 2.39e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.762, + "step": 3524 + }, + { + "loss": 0.0028, + "grad_norm": 0.4034802317619324, + "learning_rate": 2.385e-06, + "num_tokens": 1209085.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7625, + "step": 3525 + }, + { + "loss": 0.0501, + "grad_norm": 1.3905121088027954, + "learning_rate": 2.38e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.763, + "step": 3526 + }, + { + "loss": 0.0628, + "grad_norm": 1.1878178119659424, + "learning_rate": 2.375e-06, + "num_tokens": 1210109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7635, + "step": 3527 + }, + { + "loss": 0.0371, + "grad_norm": 1.1999701261520386, + "learning_rate": 2.37e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.764, + "step": 3528 + }, + { + "loss": 0.0029, + "grad_norm": 0.40889084339141846, + "learning_rate": 2.3650000000000002e-06, + "num_tokens": 1210712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7645, + "step": 3529 + }, + { + "loss": 0.0389, + "grad_norm": 1.039504885673523, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7650000000000001, + "step": 3530 + }, + { + "loss": 0.068, + "grad_norm": 1.371443748474121, + "learning_rate": 2.355e-06, + "num_tokens": 1211736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7654999999999998, + "step": 3531 + }, + { + "loss": 0.0695, + "grad_norm": 1.7425730228424072, + "learning_rate": 2.35e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.766, + "step": 3532 + }, + { + "loss": 0.0523, + "grad_norm": 1.3040227890014648, + "learning_rate": 2.345e-06, + "num_tokens": 1212760.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7665, + "step": 3533 + }, + { + "loss": 0.0027, + "grad_norm": 0.3859405517578125, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 1.0, + "epoch": 1.767, + "step": 3534 + }, + { + "loss": 0.0385, + "grad_norm": 1.0744153261184692, + "learning_rate": 2.3350000000000005e-06, + "num_tokens": 1213363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7675, + "step": 3535 + }, + { + "loss": 0.0029, + "grad_norm": 0.4078717827796936, + "learning_rate": 2.33e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.768, + "step": 3536 + }, + { + "loss": 0.0464, + "grad_norm": 1.3526980876922607, + "learning_rate": 2.325e-06, + "num_tokens": 1213966.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7685, + "step": 3537 + }, + { + "loss": 0.0032, + "grad_norm": 0.44447413086891174, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7690000000000001, + "step": 3538 + }, + { + "loss": 0.0346, + "grad_norm": 0.9852960705757141, + "learning_rate": 2.3150000000000003e-06, + "num_tokens": 1214569.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7694999999999999, + "step": 3539 + }, + { + "loss": 0.0581, + "grad_norm": 1.1710577011108398, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.77, + "step": 3540 + }, + { + "loss": 0.003, + "grad_norm": 0.42533135414123535, + "learning_rate": 2.3050000000000004e-06, + "num_tokens": 1215172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7705, + "step": 3541 + }, + { + "loss": 0.0373, + "grad_norm": 0.9175604581832886, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.771, + "step": 3542 + }, + { + "loss": 0.0464, + "grad_norm": 1.2586400508880615, + "learning_rate": 2.2950000000000005e-06, + "num_tokens": 1216196.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.7715, + "step": 3543 + }, + { + "loss": 0.0557, + "grad_norm": 1.3000445365905762, + "learning_rate": 2.29e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.772, + "step": 3544 + }, + { + "loss": 0.0377, + "grad_norm": 1.0466715097427368, + "learning_rate": 2.285e-06, + "num_tokens": 1217220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7725, + "step": 3545 + }, + { + "loss": 0.003, + "grad_norm": 0.41341033577919006, + "learning_rate": 2.28e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7730000000000001, + "step": 3546 + }, + { + "loss": 0.0555, + "grad_norm": 1.2895411252975464, + "learning_rate": 2.2750000000000002e-06, + "num_tokens": 1217823.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7734999999999999, + "step": 3547 + }, + { + "loss": 0.0032, + "grad_norm": 0.4543672800064087, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 1.0, + "epoch": 1.774, + "step": 3548 + }, + { + "loss": 0.0033, + "grad_norm": 0.45242005586624146, + "learning_rate": 2.2650000000000003e-06, + "num_tokens": 1218005.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7745, + "step": 3549 + }, + { + "loss": 0.0664, + "grad_norm": 1.4492830038070679, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.775, + "step": 3550 + }, + { + "loss": 0.0621, + "grad_norm": 1.410575270652771, + "learning_rate": 2.2550000000000004e-06, + "num_tokens": 1219029.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7755, + "step": 3551 + }, + { + "loss": 0.0668, + "grad_norm": 1.4600263833999634, + "learning_rate": 2.25e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.776, + "step": 3552 + }, + { + "loss": 0.0518, + "grad_norm": 1.185958981513977, + "learning_rate": 2.245e-06, + "num_tokens": 1220053.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7765, + "step": 3553 + }, + { + "loss": 0.0031, + "grad_norm": 0.4426004886627197, + "learning_rate": 2.24e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7770000000000001, + "step": 3554 + }, + { + "loss": 0.0391, + "grad_norm": 1.1847765445709229, + "learning_rate": 2.235e-06, + "num_tokens": 1220656.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7774999999999999, + "step": 3555 + }, + { + "loss": 0.0387, + "grad_norm": 1.1244046688079834, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.778, + "step": 3556 + }, + { + "loss": 0.0639, + "grad_norm": 1.5144935846328735, + "learning_rate": 2.2250000000000003e-06, + "num_tokens": 1221680.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7785, + "step": 3557 + }, + { + "loss": 0.0504, + "grad_norm": 1.1694223880767822, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.779, + "step": 3558 + }, + { + "loss": 0.039, + "grad_norm": 1.198093295097351, + "learning_rate": 2.2150000000000004e-06, + "num_tokens": 1222704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7795, + "step": 3559 + }, + { + "loss": 0.0556, + "grad_norm": 1.4882034063339233, + "learning_rate": 2.21e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.78, + "step": 3560 + }, + { + "loss": 0.0033, + "grad_norm": 0.4605433940887451, + "learning_rate": 2.205e-06, + "num_tokens": 1223307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7805, + "step": 3561 + }, + { + "loss": 0.0427, + "grad_norm": 1.400830864906311, + "learning_rate": 2.2e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7810000000000001, + "step": 3562 + }, + { + "loss": 0.0596, + "grad_norm": 1.4765678644180298, + "learning_rate": 2.195e-06, + "num_tokens": 1224331.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7814999999999999, + "step": 3563 + }, + { + "loss": 0.0029, + "grad_norm": 0.4184083044528961, + "learning_rate": 2.19e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 1.0, + "epoch": 1.782, + "step": 3564 + }, + { + "loss": 0.0031, + "grad_norm": 0.4302586615085602, + "learning_rate": 2.1850000000000003e-06, + "num_tokens": 1224513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7825, + "step": 3565 + }, + { + "loss": 0.0031, + "grad_norm": 0.4298599362373352, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 1.0, + "epoch": 1.783, + "step": 3566 + }, + { + "loss": 0.065, + "grad_norm": 1.424648642539978, + "learning_rate": 2.1750000000000004e-06, + "num_tokens": 1225116.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7835, + "step": 3567 + }, + { + "loss": 0.0031, + "grad_norm": 0.4238447844982147, + "learning_rate": 2.17e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.784, + "step": 3568 + }, + { + "loss": 0.0031, + "grad_norm": 0.4220222532749176, + "learning_rate": 2.165e-06, + "num_tokens": 1225298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7845, + "step": 3569 + }, + { + "loss": 0.003, + "grad_norm": 0.42732101678848267, + "learning_rate": 2.16e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7850000000000001, + "step": 3570 + }, + { + "loss": 0.0346, + "grad_norm": 1.0672036409378052, + "learning_rate": 2.155e-06, + "num_tokens": 1225901.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7854999999999999, + "step": 3571 + }, + { + "loss": 0.0424, + "grad_norm": 1.0617742538452148, + "learning_rate": 2.15e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.786, + "step": 3572 + }, + { + "loss": 0.0592, + "grad_norm": 1.3852803707122803, + "learning_rate": 2.1450000000000002e-06, + "num_tokens": 1226925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7865, + "step": 3573 + }, + { + "loss": 0.0029, + "grad_norm": 0.4290924072265625, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 1.0, + "epoch": 1.787, + "step": 3574 + }, + { + "loss": 0.051, + "grad_norm": 1.1031818389892578, + "learning_rate": 2.1350000000000003e-06, + "num_tokens": 1227528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7875, + "step": 3575 + }, + { + "loss": 0.0393, + "grad_norm": 1.184659719467163, + "learning_rate": 2.13e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.788, + "step": 3576 + }, + { + "loss": 0.0755, + "grad_norm": 1.9755206108093262, + "learning_rate": 2.125e-06, + "num_tokens": 1228552.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.7885, + "step": 3577 + }, + { + "loss": 0.071, + "grad_norm": 1.4741475582122803, + "learning_rate": 2.12e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7890000000000001, + "step": 3578 + }, + { + "loss": 0.0609, + "grad_norm": 1.6418182849884033, + "learning_rate": 2.115e-06, + "num_tokens": 1229576.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7894999999999999, + "step": 3579 + }, + { + "loss": 0.0027, + "grad_norm": 0.40381157398223877, + "learning_rate": 2.11e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.79, + "step": 3580 + }, + { + "loss": 0.0551, + "grad_norm": 1.2949596643447876, + "learning_rate": 2.105e-06, + "num_tokens": 1230179.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7905, + "step": 3581 + }, + { + "loss": 0.0504, + "grad_norm": 1.073058843612671, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.791, + "step": 3582 + }, + { + "loss": 0.0028, + "grad_norm": 0.3910202980041504, + "learning_rate": 2.0950000000000003e-06, + "num_tokens": 1230782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7915, + "step": 3583 + }, + { + "loss": 0.0029, + "grad_norm": 0.40099310874938965, + "learning_rate": 2.09e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.792, + "step": 3584 + }, + { + "loss": 0.0686, + "grad_norm": 1.5408157110214233, + "learning_rate": 2.085e-06, + "num_tokens": 1231385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7925, + "step": 3585 + }, + { + "loss": 0.0547, + "grad_norm": 1.2888717651367188, + "learning_rate": 2.08e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7930000000000001, + "step": 3586 + }, + { + "loss": 0.0392, + "grad_norm": 1.1414070129394531, + "learning_rate": 2.075e-06, + "num_tokens": 1232409.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7934999999999999, + "step": 3587 + }, + { + "loss": 0.0567, + "grad_norm": 1.2421129941940308, + "learning_rate": 2.07e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.794, + "step": 3588 + }, + { + "loss": 0.0567, + "grad_norm": 1.2121027708053589, + "learning_rate": 2.065e-06, + "num_tokens": 1233433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7945, + "step": 3589 + }, + { + "loss": 0.0028, + "grad_norm": 0.4114837944507599, + "learning_rate": 2.06e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.795, + "step": 3590 + }, + { + "loss": 0.003, + "grad_norm": 0.4205188453197479, + "learning_rate": 2.0550000000000002e-06, + "num_tokens": 1233615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7955, + "step": 3591 + }, + { + "loss": 0.0029, + "grad_norm": 0.39967694878578186, + "learning_rate": 2.05e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 1.0, + "epoch": 1.796, + "step": 3592 + }, + { + "loss": 0.056, + "grad_norm": 1.251736044883728, + "learning_rate": 2.045e-06, + "num_tokens": 1234218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7965, + "step": 3593 + }, + { + "loss": 0.0028, + "grad_norm": 0.3914256989955902, + "learning_rate": 2.04e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7970000000000002, + "step": 3594 + }, + { + "loss": 0.0604, + "grad_norm": 1.1881632804870605, + "learning_rate": 2.035e-06, + "num_tokens": 1234821.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7974999999999999, + "step": 3595 + }, + { + "loss": 0.0622, + "grad_norm": 1.149919033050537, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.798, + "step": 3596 + }, + { + "loss": 0.0549, + "grad_norm": 1.0469919443130493, + "learning_rate": 2.025e-06, + "num_tokens": 1235845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7985, + "step": 3597 + }, + { + "loss": 0.0535, + "grad_norm": 1.3651666641235352, + "learning_rate": 2.02e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.799, + "step": 3598 + }, + { + "loss": 0.0026, + "grad_norm": 0.37465357780456543, + "learning_rate": 2.015e-06, + "num_tokens": 1236448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7995, + "step": 3599 + }, + { + "loss": 0.0365, + "grad_norm": 1.0199239253997803, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8, + "step": 3600 + }, + { + "loss": 0.0617, + "grad_norm": 1.1323697566986084, + "learning_rate": 2.0050000000000003e-06, + "num_tokens": 1237472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8005, + "step": 3601 + }, + { + "loss": 0.003, + "grad_norm": 0.4225693345069885, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8010000000000002, + "step": 3602 + }, + { + "loss": 0.0379, + "grad_norm": 1.1038097143173218, + "learning_rate": 1.9950000000000004e-06, + "num_tokens": 1238075.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8014999999999999, + "step": 3603 + }, + { + "loss": 0.003, + "grad_norm": 0.4044983685016632, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.802, + "step": 3604 + }, + { + "loss": 0.0655, + "grad_norm": 1.8133554458618164, + "learning_rate": 1.985e-06, + "num_tokens": 1238678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8025, + "step": 3605 + }, + { + "loss": 0.0028, + "grad_norm": 0.39725902676582336, + "learning_rate": 1.98e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.803, + "step": 3606 + }, + { + "loss": 0.003, + "grad_norm": 0.4250074028968811, + "learning_rate": 1.975e-06, + "num_tokens": 1238860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8035, + "step": 3607 + }, + { + "loss": 0.0378, + "grad_norm": 1.14003586769104, + "learning_rate": 1.97e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.804, + "step": 3608 + }, + { + "loss": 0.0028, + "grad_norm": 0.39355626702308655, + "learning_rate": 1.9650000000000002e-06, + "num_tokens": 1239463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8045, + "step": 3609 + }, + { + "loss": 0.0378, + "grad_norm": 1.2409162521362305, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8050000000000002, + "step": 3610 + }, + { + "loss": 0.0448, + "grad_norm": 1.4544258117675781, + "learning_rate": 1.9550000000000003e-06, + "num_tokens": 1240487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8054999999999999, + "step": 3611 + }, + { + "loss": 0.0027, + "grad_norm": 0.3753180205821991, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.806, + "step": 3612 + }, + { + "loss": 0.0029, + "grad_norm": 0.4058220088481903, + "learning_rate": 1.945e-06, + "num_tokens": 1240669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8065, + "step": 3613 + }, + { + "loss": 0.0574, + "grad_norm": 1.4277732372283936, + "learning_rate": 1.94e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.807, + "step": 3614 + }, + { + "loss": 0.0645, + "grad_norm": 1.5439943075180054, + "learning_rate": 1.935e-06, + "num_tokens": 1241693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8075, + "step": 3615 + }, + { + "loss": 0.0609, + "grad_norm": 1.4575119018554688, + "learning_rate": 1.93e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.808, + "step": 3616 + }, + { + "loss": 0.0024, + "grad_norm": 0.33791404962539673, + "learning_rate": 1.925e-06, + "num_tokens": 1242296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8085, + "step": 3617 + }, + { + "loss": 0.0392, + "grad_norm": 0.994301974773407, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8090000000000002, + "step": 3618 + }, + { + "loss": 0.0026, + "grad_norm": 0.35725516080856323, + "learning_rate": 1.9150000000000003e-06, + "num_tokens": 1242899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8094999999999999, + "step": 3619 + }, + { + "loss": 0.1147, + "grad_norm": 2.219489097595215, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.81, + "step": 3620 + }, + { + "loss": 0.0025, + "grad_norm": 0.358549028635025, + "learning_rate": 1.9050000000000002e-06, + "num_tokens": 1243502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8105, + "step": 3621 + }, + { + "loss": 0.0497, + "grad_norm": 1.0606470108032227, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.811, + "step": 3622 + }, + { + "loss": 0.0354, + "grad_norm": 1.1863391399383545, + "learning_rate": 1.895e-06, + "num_tokens": 1244526.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8115, + "step": 3623 + }, + { + "loss": 0.0617, + "grad_norm": 1.461073398590088, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.812, + "step": 3624 + }, + { + "loss": 0.0522, + "grad_norm": 1.180123209953308, + "learning_rate": 1.8850000000000002e-06, + "num_tokens": 1245550.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8125, + "step": 3625 + }, + { + "loss": 0.0513, + "grad_norm": 1.1050792932510376, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.813, + "step": 3626 + }, + { + "loss": 0.0382, + "grad_norm": 1.1048370599746704, + "learning_rate": 1.8750000000000003e-06, + "num_tokens": 1246574.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8135, + "step": 3627 + }, + { + "loss": 0.0594, + "grad_norm": 1.5278170108795166, + "learning_rate": 1.87e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.814, + "step": 3628 + }, + { + "loss": 0.0026, + "grad_norm": 0.3680756688117981, + "learning_rate": 1.8650000000000001e-06, + "num_tokens": 1247177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8145, + "step": 3629 + }, + { + "loss": 0.0025, + "grad_norm": 0.3478946387767792, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.815, + "step": 3630 + }, + { + "loss": 0.0602, + "grad_norm": 1.2490179538726807, + "learning_rate": 1.8550000000000002e-06, + "num_tokens": 1247780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8155000000000001, + "step": 3631 + }, + { + "loss": 0.0751, + "grad_norm": 1.6024861335754395, + "learning_rate": 1.85e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8159999999999998, + "step": 3632 + }, + { + "loss": 0.055, + "grad_norm": 1.4603705406188965, + "learning_rate": 1.8450000000000001e-06, + "num_tokens": 1248804.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8165, + "step": 3633 + }, + { + "loss": 0.0025, + "grad_norm": 0.37733298540115356, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.817, + "step": 3634 + }, + { + "loss": 0.0028, + "grad_norm": 0.3999163806438446, + "learning_rate": 1.8350000000000002e-06, + "num_tokens": 1248986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8175, + "step": 3635 + }, + { + "loss": 0.0027, + "grad_norm": 0.39710038900375366, + "learning_rate": 1.83e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.818, + "step": 3636 + }, + { + "loss": 0.0028, + "grad_norm": 0.39646029472351074, + "learning_rate": 1.825e-06, + "num_tokens": 1249168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8185, + "step": 3637 + }, + { + "loss": 0.0426, + "grad_norm": 1.3070132732391357, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.819, + "step": 3638 + }, + { + "loss": 0.039, + "grad_norm": 1.1619224548339844, + "learning_rate": 1.8150000000000002e-06, + "num_tokens": 1250192.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8195000000000001, + "step": 3639 + }, + { + "loss": 0.0367, + "grad_norm": 1.1559624671936035, + "learning_rate": 1.81e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8199999999999998, + "step": 3640 + }, + { + "loss": 0.053, + "grad_norm": 1.3208280801773071, + "learning_rate": 1.805e-06, + "num_tokens": 1251216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8205, + "step": 3641 + }, + { + "loss": 0.0544, + "grad_norm": 1.2948426008224487, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.821, + "step": 3642 + }, + { + "loss": 0.049, + "grad_norm": 1.0491054058074951, + "learning_rate": 1.7950000000000002e-06, + "num_tokens": 1252240.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8215, + "step": 3643 + }, + { + "loss": 0.037, + "grad_norm": 1.3279922008514404, + "learning_rate": 1.79e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.822, + "step": 3644 + }, + { + "loss": 0.0027, + "grad_norm": 0.38797032833099365, + "learning_rate": 1.785e-06, + "num_tokens": 1252843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8225, + "step": 3645 + }, + { + "loss": 0.0526, + "grad_norm": 1.3761346340179443, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.823, + "step": 3646 + }, + { + "loss": 0.0594, + "grad_norm": 1.5943882465362549, + "learning_rate": 1.7750000000000002e-06, + "num_tokens": 1253867.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8235000000000001, + "step": 3647 + }, + { + "loss": 0.0386, + "grad_norm": 1.1582005023956299, + "learning_rate": 1.77e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8239999999999998, + "step": 3648 + }, + { + "loss": 0.0625, + "grad_norm": 1.422128438949585, + "learning_rate": 1.765e-06, + "num_tokens": 1254891.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8245, + "step": 3649 + }, + { + "loss": 0.0027, + "grad_norm": 0.3794823884963989, + "learning_rate": 1.76e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.825, + "step": 3650 + }, + { + "loss": 0.0377, + "grad_norm": 1.0281649827957153, + "learning_rate": 1.7550000000000001e-06, + "num_tokens": 1255494.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8255, + "step": 3651 + }, + { + "loss": 0.057, + "grad_norm": 1.2542749643325806, + "learning_rate": 1.75e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.826, + "step": 3652 + }, + { + "loss": 0.0027, + "grad_norm": 0.3857089579105377, + "learning_rate": 1.745e-06, + "num_tokens": 1256097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8265, + "step": 3653 + }, + { + "loss": 0.0529, + "grad_norm": 1.148740291595459, + "learning_rate": 1.74e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.827, + "step": 3654 + }, + { + "loss": 0.003, + "grad_norm": 0.4200035333633423, + "learning_rate": 1.7350000000000001e-06, + "num_tokens": 1256700.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8275000000000001, + "step": 3655 + }, + { + "loss": 0.0028, + "grad_norm": 0.3945881426334381, + "learning_rate": 1.73e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8279999999999998, + "step": 3656 + }, + { + "loss": 0.039, + "grad_norm": 0.9618701934814453, + "learning_rate": 1.725e-06, + "num_tokens": 1257303.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8285, + "step": 3657 + }, + { + "loss": 0.0399, + "grad_norm": 1.2282723188400269, + "learning_rate": 1.72e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.829, + "step": 3658 + }, + { + "loss": 0.0509, + "grad_norm": 1.175613284111023, + "learning_rate": 1.7150000000000003e-06, + "num_tokens": 1258327.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8295, + "step": 3659 + }, + { + "loss": 0.0378, + "grad_norm": 1.1486104726791382, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.83, + "step": 3660 + }, + { + "loss": 0.0589, + "grad_norm": 1.3274273872375488, + "learning_rate": 1.7050000000000002e-06, + "num_tokens": 1259351.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8305, + "step": 3661 + }, + { + "loss": 0.046, + "grad_norm": 1.3887542486190796, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.831, + "step": 3662 + }, + { + "loss": 0.0029, + "grad_norm": 0.39590317010879517, + "learning_rate": 1.6950000000000003e-06, + "num_tokens": 1259954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8315000000000001, + "step": 3663 + }, + { + "loss": 0.0369, + "grad_norm": 1.080889105796814, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8319999999999999, + "step": 3664 + }, + { + "loss": 0.0535, + "grad_norm": 1.3136940002441406, + "learning_rate": 1.6850000000000002e-06, + "num_tokens": 1260978.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8325, + "step": 3665 + }, + { + "loss": 0.059, + "grad_norm": 1.5410752296447754, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 3666 + }, + { + "loss": 0.0029, + "grad_norm": 0.3952591121196747, + "learning_rate": 1.6750000000000003e-06, + "num_tokens": 1261581.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8335, + "step": 3667 + }, + { + "loss": 0.0518, + "grad_norm": 1.3276718854904175, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.834, + "step": 3668 + }, + { + "loss": 0.003, + "grad_norm": 0.4232414960861206, + "learning_rate": 1.6650000000000002e-06, + "num_tokens": 1262184.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8345, + "step": 3669 + }, + { + "loss": 0.0639, + "grad_norm": 1.2759331464767456, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.835, + "step": 3670 + }, + { + "loss": 0.0571, + "grad_norm": 1.5148133039474487, + "learning_rate": 1.6550000000000002e-06, + "num_tokens": 1263208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8355000000000001, + "step": 3671 + }, + { + "loss": 0.0637, + "grad_norm": 1.4910366535186768, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8359999999999999, + "step": 3672 + }, + { + "loss": 0.0029, + "grad_norm": 0.4135521948337555, + "learning_rate": 1.6450000000000001e-06, + "num_tokens": 1263811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8365, + "step": 3673 + }, + { + "loss": 0.0511, + "grad_norm": 1.2618604898452759, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.837, + "step": 3674 + }, + { + "loss": 0.0501, + "grad_norm": 1.1598845720291138, + "learning_rate": 1.6350000000000002e-06, + "num_tokens": 1264835.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8375, + "step": 3675 + }, + { + "loss": 0.0445, + "grad_norm": 1.0752735137939453, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.838, + "step": 3676 + }, + { + "loss": 0.003, + "grad_norm": 0.42967167496681213, + "learning_rate": 1.6250000000000001e-06, + "num_tokens": 1265438.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8385, + "step": 3677 + }, + { + "loss": 0.003, + "grad_norm": 0.41333630681037903, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 1.0, + "epoch": 1.839, + "step": 3678 + }, + { + "loss": 0.0033, + "grad_norm": 0.4601726531982422, + "learning_rate": 1.6150000000000002e-06, + "num_tokens": 1265620.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8395000000000001, + "step": 3679 + }, + { + "loss": 0.0648, + "grad_norm": 1.4645088911056519, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8399999999999999, + "step": 3680 + }, + { + "loss": 0.0371, + "grad_norm": 1.0282845497131348, + "learning_rate": 1.605e-06, + "num_tokens": 1266644.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8405, + "step": 3681 + }, + { + "loss": 0.0034, + "grad_norm": 0.4804507791996002, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 1.0, + "epoch": 1.841, + "step": 3682 + }, + { + "loss": 0.0611, + "grad_norm": 1.6006290912628174, + "learning_rate": 1.5950000000000002e-06, + "num_tokens": 1267247.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8415, + "step": 3683 + }, + { + "loss": 0.0032, + "grad_norm": 0.4456159472465515, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 1.0, + "epoch": 1.842, + "step": 3684 + }, + { + "loss": 0.0028, + "grad_norm": 0.39536213874816895, + "learning_rate": 1.585e-06, + "num_tokens": 1267429.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8425, + "step": 3685 + }, + { + "loss": 0.0441, + "grad_norm": 1.2790175676345825, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.843, + "step": 3686 + }, + { + "loss": 0.0545, + "grad_norm": 1.1657609939575195, + "learning_rate": 1.5750000000000002e-06, + "num_tokens": 1268453.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8435000000000001, + "step": 3687 + }, + { + "loss": 0.0536, + "grad_norm": 1.0926413536071777, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8439999999999999, + "step": 3688 + }, + { + "loss": 0.0362, + "grad_norm": 0.9912558197975159, + "learning_rate": 1.565e-06, + "num_tokens": 1269477.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8445, + "step": 3689 + }, + { + "loss": 0.0374, + "grad_norm": 1.0493851900100708, + "learning_rate": 1.56e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.845, + "step": 3690 + }, + { + "loss": 0.0028, + "grad_norm": 0.4059640169143677, + "learning_rate": 1.5550000000000001e-06, + "num_tokens": 1270080.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8455, + "step": 3691 + }, + { + "loss": 0.003, + "grad_norm": 0.4232662618160248, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 1.0, + "epoch": 1.846, + "step": 3692 + }, + { + "loss": 0.0031, + "grad_norm": 0.43225178122520447, + "learning_rate": 1.545e-06, + "num_tokens": 1270262.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8465, + "step": 3693 + }, + { + "loss": 0.0027, + "grad_norm": 0.3701487183570862, + "learning_rate": 1.54e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.847, + "step": 3694 + }, + { + "loss": 0.0545, + "grad_norm": 1.3909512758255005, + "learning_rate": 1.5350000000000001e-06, + "num_tokens": 1270865.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8475000000000001, + "step": 3695 + }, + { + "loss": 0.0027, + "grad_norm": 0.38712078332901, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8479999999999999, + "step": 3696 + }, + { + "loss": 0.0506, + "grad_norm": 1.0741735696792603, + "learning_rate": 1.525e-06, + "num_tokens": 1271468.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8485, + "step": 3697 + }, + { + "loss": 0.0693, + "grad_norm": 1.657240629196167, + "learning_rate": 1.52e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.849, + "step": 3698 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615441918373108, + "learning_rate": 1.5150000000000001e-06, + "num_tokens": 1272071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8495, + "step": 3699 + }, + { + "loss": 0.0355, + "grad_norm": 0.9562244415283203, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.85, + "step": 3700 + }, + { + "loss": 0.0026, + "grad_norm": 0.36725983023643494, + "learning_rate": 1.505e-06, + "num_tokens": 1272674.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8505, + "step": 3701 + }, + { + "loss": 0.0028, + "grad_norm": 0.3878721296787262, + "learning_rate": 1.5e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 1.0, + "epoch": 1.851, + "step": 3702 + }, + { + "loss": 0.0359, + "grad_norm": 1.0378117561340332, + "learning_rate": 1.495e-06, + "num_tokens": 1273277.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8515000000000001, + "step": 3703 + }, + { + "loss": 0.0656, + "grad_norm": 1.2746002674102783, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8519999999999999, + "step": 3704 + }, + { + "loss": 0.0026, + "grad_norm": 0.35767146944999695, + "learning_rate": 1.485e-06, + "num_tokens": 1273880.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8525, + "step": 3705 + }, + { + "loss": 0.0026, + "grad_norm": 0.36552944779396057, + "learning_rate": 1.48e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.853, + "step": 3706 + }, + { + "loss": 0.0473, + "grad_norm": 1.1046762466430664, + "learning_rate": 1.475e-06, + "num_tokens": 1274483.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8535, + "step": 3707 + }, + { + "loss": 0.0625, + "grad_norm": 1.4509928226470947, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.854, + "step": 3708 + }, + { + "loss": 0.0421, + "grad_norm": 1.1400452852249146, + "learning_rate": 1.465e-06, + "num_tokens": 1275507.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8545, + "step": 3709 + }, + { + "loss": 0.0026, + "grad_norm": 0.3619054853916168, + "learning_rate": 1.46e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 1.0, + "epoch": 1.855, + "step": 3710 + }, + { + "loss": 0.0026, + "grad_norm": 0.3667825162410736, + "learning_rate": 1.455e-06, + "num_tokens": 1275689.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8555000000000001, + "step": 3711 + }, + { + "loss": 0.0466, + "grad_norm": 1.255405068397522, + "learning_rate": 1.45e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8559999999999999, + "step": 3712 + }, + { + "loss": 0.0657, + "grad_norm": 1.4270333051681519, + "learning_rate": 1.445e-06, + "num_tokens": 1276713.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8565, + "step": 3713 + }, + { + "loss": 0.0356, + "grad_norm": 1.035252571105957, + "learning_rate": 1.44e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.857, + "step": 3714 + }, + { + "loss": 0.0024, + "grad_norm": 0.34851282835006714, + "learning_rate": 1.435e-06, + "num_tokens": 1277316.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8575, + "step": 3715 + }, + { + "loss": 0.0669, + "grad_norm": 1.6207127571105957, + "learning_rate": 1.43e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.858, + "step": 3716 + }, + { + "loss": 0.0025, + "grad_norm": 0.34068116545677185, + "learning_rate": 1.425e-06, + "num_tokens": 1277919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8585, + "step": 3717 + }, + { + "loss": 0.0023, + "grad_norm": 0.3336624801158905, + "learning_rate": 1.42e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 3718 + }, + { + "loss": 0.0663, + "grad_norm": 1.4342654943466187, + "learning_rate": 1.415e-06, + "num_tokens": 1278522.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8595000000000002, + "step": 3719 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730687618255615, + "learning_rate": 1.41e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8599999999999999, + "step": 3720 + }, + { + "loss": 0.062, + "grad_norm": 1.4714523553848267, + "learning_rate": 1.4050000000000003e-06, + "num_tokens": 1279546.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8605, + "step": 3721 + }, + { + "loss": 0.0514, + "grad_norm": 1.2004119157791138, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.861, + "step": 3722 + }, + { + "loss": 0.0023, + "grad_norm": 0.3368993103504181, + "learning_rate": 1.3950000000000002e-06, + "num_tokens": 1280149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8615, + "step": 3723 + }, + { + "loss": 0.0025, + "grad_norm": 0.3626645803451538, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 3724 + }, + { + "loss": 0.0379, + "grad_norm": 1.129130482673645, + "learning_rate": 1.3850000000000003e-06, + "num_tokens": 1280752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8625, + "step": 3725 + }, + { + "loss": 0.0026, + "grad_norm": 0.35549208521842957, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.863, + "step": 3726 + }, + { + "loss": 0.039, + "grad_norm": 1.0426714420318604, + "learning_rate": 1.3750000000000002e-06, + "num_tokens": 1281355.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8635000000000002, + "step": 3727 + }, + { + "loss": 0.0591, + "grad_norm": 1.4238243103027344, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8639999999999999, + "step": 3728 + }, + { + "loss": 0.0587, + "grad_norm": 1.182423710823059, + "learning_rate": 1.3650000000000003e-06, + "num_tokens": 1282379.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8645, + "step": 3729 + }, + { + "loss": 0.0344, + "grad_norm": 1.0535178184509277, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.865, + "step": 3730 + }, + { + "loss": 0.0024, + "grad_norm": 0.34818780422210693, + "learning_rate": 1.3550000000000002e-06, + "num_tokens": 1282982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8655, + "step": 3731 + }, + { + "loss": 0.0652, + "grad_norm": 1.3155183792114258, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.866, + "step": 3732 + }, + { + "loss": 0.0543, + "grad_norm": 1.2466151714324951, + "learning_rate": 1.3450000000000003e-06, + "num_tokens": 1284006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8665, + "step": 3733 + }, + { + "loss": 0.0366, + "grad_norm": 1.1111284494400024, + "learning_rate": 1.34e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.867, + "step": 3734 + }, + { + "loss": 0.036, + "grad_norm": 1.2413430213928223, + "learning_rate": 1.3350000000000001e-06, + "num_tokens": 1285030.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8675000000000002, + "step": 3735 + }, + { + "loss": 0.0503, + "grad_norm": 1.2572247982025146, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8679999999999999, + "step": 3736 + }, + { + "loss": 0.0634, + "grad_norm": 1.3656840324401855, + "learning_rate": 1.3250000000000002e-06, + "num_tokens": 1286054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8685, + "step": 3737 + }, + { + "loss": 0.0369, + "grad_norm": 1.1938374042510986, + "learning_rate": 1.32e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.869, + "step": 3738 + }, + { + "loss": 0.0619, + "grad_norm": 1.5963718891143799, + "learning_rate": 1.3150000000000001e-06, + "num_tokens": 1287078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8695, + "step": 3739 + }, + { + "loss": 0.0569, + "grad_norm": 1.3680788278579712, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.87, + "step": 3740 + }, + { + "loss": 0.0535, + "grad_norm": 1.175209879875183, + "learning_rate": 1.3050000000000002e-06, + "num_tokens": 1288102.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8705, + "step": 3741 + }, + { + "loss": 0.0026, + "grad_norm": 0.3611868619918823, + "learning_rate": 1.3e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.871, + "step": 3742 + }, + { + "loss": 0.0377, + "grad_norm": 1.2314857244491577, + "learning_rate": 1.295e-06, + "num_tokens": 1288705.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8715000000000002, + "step": 3743 + }, + { + "loss": 0.0511, + "grad_norm": 1.4128717184066772, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8719999999999999, + "step": 3744 + }, + { + "loss": 0.1336, + "grad_norm": 2.185844659805298, + "learning_rate": 1.2850000000000002e-06, + "num_tokens": 1289729.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.8725, + "step": 3745 + }, + { + "loss": 0.0025, + "grad_norm": 0.33957669138908386, + "learning_rate": 1.28e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 1.0, + "epoch": 1.873, + "step": 3746 + }, + { + "loss": 0.0027, + "grad_norm": 0.3769534230232239, + "learning_rate": 1.275e-06, + "num_tokens": 1289911.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8735, + "step": 3747 + }, + { + "loss": 0.0584, + "grad_norm": 1.4691829681396484, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.874, + "step": 3748 + }, + { + "loss": 0.0635, + "grad_norm": 1.6226807832717896, + "learning_rate": 1.2650000000000002e-06, + "num_tokens": 1290935.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8745, + "step": 3749 + }, + { + "loss": 0.0033, + "grad_norm": 0.4503451883792877, + "learning_rate": 1.26e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 1.0, + "epoch": 1.875, + "step": 3750 + }, + { + "loss": 0.0028, + "grad_norm": 0.39449983835220337, + "learning_rate": 1.255e-06, + "num_tokens": 1291117.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8755, + "step": 3751 + }, + { + "loss": 0.0029, + "grad_norm": 0.4101957678794861, + "learning_rate": 1.25e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 1.0, + "epoch": 1.876, + "step": 3752 + }, + { + "loss": 0.0359, + "grad_norm": 1.259843111038208, + "learning_rate": 1.2450000000000002e-06, + "num_tokens": 1291720.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8765, + "step": 3753 + }, + { + "loss": 0.0027, + "grad_norm": 0.372577965259552, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.877, + "step": 3754 + }, + { + "loss": 0.0596, + "grad_norm": 1.1994444131851196, + "learning_rate": 1.235e-06, + "num_tokens": 1292323.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8775, + "step": 3755 + }, + { + "loss": 0.0703, + "grad_norm": 1.5322065353393555, + "learning_rate": 1.23e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8780000000000001, + "step": 3756 + }, + { + "loss": 0.0643, + "grad_norm": 1.7045296430587769, + "learning_rate": 1.2250000000000001e-06, + "num_tokens": 1293347.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8784999999999998, + "step": 3757 + }, + { + "loss": 0.0439, + "grad_norm": 1.2476153373718262, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.879, + "step": 3758 + }, + { + "loss": 0.0402, + "grad_norm": 1.186736822128296, + "learning_rate": 1.215e-06, + "num_tokens": 1294371.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8795, + "step": 3759 + }, + { + "loss": 0.0029, + "grad_norm": 0.39700445532798767, + "learning_rate": 1.21e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.88, + "step": 3760 + }, + { + "loss": 0.1202, + "grad_norm": 3.1105434894561768, + "learning_rate": 1.2050000000000001e-06, + "num_tokens": 1294974.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.8805, + "step": 3761 + }, + { + "loss": 0.0408, + "grad_norm": 1.1640613079071045, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.881, + "step": 3762 + }, + { + "loss": 0.0023, + "grad_norm": 0.32245126366615295, + "learning_rate": 1.195e-06, + "num_tokens": 1295577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8815, + "step": 3763 + }, + { + "loss": 0.0644, + "grad_norm": 1.4617496728897095, + "learning_rate": 1.19e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8820000000000001, + "step": 3764 + }, + { + "loss": 0.0024, + "grad_norm": 0.3409968614578247, + "learning_rate": 1.185e-06, + "num_tokens": 1296180.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8824999999999998, + "step": 3765 + }, + { + "loss": 0.0666, + "grad_norm": 2.035632848739624, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.883, + "step": 3766 + }, + { + "loss": 0.0402, + "grad_norm": 1.1498757600784302, + "learning_rate": 1.175e-06, + "num_tokens": 1297204.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8835, + "step": 3767 + }, + { + "loss": 0.0593, + "grad_norm": 1.348196268081665, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.884, + "step": 3768 + }, + { + "loss": 0.0667, + "grad_norm": 1.692858099937439, + "learning_rate": 1.165e-06, + "num_tokens": 1298228.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8845, + "step": 3769 + }, + { + "loss": 0.0029, + "grad_norm": 0.40195682644844055, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 1.0, + "epoch": 1.885, + "step": 3770 + }, + { + "loss": 0.0515, + "grad_norm": 1.0095990896224976, + "learning_rate": 1.1550000000000002e-06, + "num_tokens": 1298831.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8855, + "step": 3771 + }, + { + "loss": 0.0411, + "grad_norm": 1.4529675245285034, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8860000000000001, + "step": 3772 + }, + { + "loss": 0.0029, + "grad_norm": 0.39934462308883667, + "learning_rate": 1.145e-06, + "num_tokens": 1299434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8864999999999998, + "step": 3773 + }, + { + "loss": 0.0026, + "grad_norm": 0.37341752648353577, + "learning_rate": 1.14e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.887, + "step": 3774 + }, + { + "loss": 0.003, + "grad_norm": 0.427602082490921, + "learning_rate": 1.1350000000000001e-06, + "num_tokens": 1299616.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8875, + "step": 3775 + }, + { + "loss": 0.0027, + "grad_norm": 0.38110828399658203, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 1.0, + "epoch": 1.888, + "step": 3776 + }, + { + "loss": 0.05, + "grad_norm": 1.3058017492294312, + "learning_rate": 1.125e-06, + "num_tokens": 1300219.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8885, + "step": 3777 + }, + { + "loss": 0.0551, + "grad_norm": 1.049538016319275, + "learning_rate": 1.12e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.889, + "step": 3778 + }, + { + "loss": 0.0543, + "grad_norm": 1.1460436582565308, + "learning_rate": 1.1150000000000001e-06, + "num_tokens": 1301243.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8895, + "step": 3779 + }, + { + "loss": 0.0402, + "grad_norm": 1.1601300239562988, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8900000000000001, + "step": 3780 + }, + { + "loss": 0.0571, + "grad_norm": 1.1402069330215454, + "learning_rate": 1.105e-06, + "num_tokens": 1302267.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8904999999999998, + "step": 3781 + }, + { + "loss": 0.0381, + "grad_norm": 1.2498735189437866, + "learning_rate": 1.1e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.891, + "step": 3782 + }, + { + "loss": 0.0658, + "grad_norm": 1.471903920173645, + "learning_rate": 1.095e-06, + "num_tokens": 1303291.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8915, + "step": 3783 + }, + { + "loss": 0.003, + "grad_norm": 0.40989261865615845, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.892, + "step": 3784 + }, + { + "loss": 0.0029, + "grad_norm": 0.4065409004688263, + "learning_rate": 1.085e-06, + "num_tokens": 1303473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8925, + "step": 3785 + }, + { + "loss": 0.0027, + "grad_norm": 0.38934385776519775, + "learning_rate": 1.08e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.893, + "step": 3786 + }, + { + "loss": 0.0028, + "grad_norm": 0.3856496810913086, + "learning_rate": 1.075e-06, + "num_tokens": 1303655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8935, + "step": 3787 + }, + { + "loss": 0.0422, + "grad_norm": 1.3679287433624268, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8940000000000001, + "step": 3788 + }, + { + "loss": 0.051, + "grad_norm": 1.206390619277954, + "learning_rate": 1.065e-06, + "num_tokens": 1304679.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8944999999999999, + "step": 3789 + }, + { + "loss": 0.0029, + "grad_norm": 0.41105058789253235, + "learning_rate": 1.06e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 1.0, + "epoch": 1.895, + "step": 3790 + }, + { + "loss": 0.0027, + "grad_norm": 0.3825374245643616, + "learning_rate": 1.055e-06, + "num_tokens": 1304861.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8955, + "step": 3791 + }, + { + "loss": 0.0024, + "grad_norm": 0.3389546871185303, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.896, + "step": 3792 + }, + { + "loss": 0.0027, + "grad_norm": 0.38113462924957275, + "learning_rate": 1.045e-06, + "num_tokens": 1305043.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8965, + "step": 3793 + }, + { + "loss": 0.0025, + "grad_norm": 0.35084959864616394, + "learning_rate": 1.04e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 1.0, + "epoch": 1.897, + "step": 3794 + }, + { + "loss": 0.056, + "grad_norm": 1.4280885457992554, + "learning_rate": 1.035e-06, + "num_tokens": 1305646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8975, + "step": 3795 + }, + { + "loss": 0.0584, + "grad_norm": 1.4864161014556885, + "learning_rate": 1.03e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8980000000000001, + "step": 3796 + }, + { + "loss": 0.0023, + "grad_norm": 0.32296261191368103, + "learning_rate": 1.025e-06, + "num_tokens": 1306249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8984999999999999, + "step": 3797 + }, + { + "loss": 0.0372, + "grad_norm": 1.1412842273712158, + "learning_rate": 1.02e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.899, + "step": 3798 + }, + { + "loss": 0.036, + "grad_norm": 1.0588805675506592, + "learning_rate": 1.0150000000000002e-06, + "num_tokens": 1307273.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8995, + "step": 3799 + }, + { + "loss": 0.0025, + "grad_norm": 0.34841030836105347, + "learning_rate": 1.01e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9, + "step": 3800 + }, + { + "loss": 0.0025, + "grad_norm": 0.3537651002407074, + "learning_rate": 1.0050000000000001e-06, + "num_tokens": 1307455.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9005, + "step": 3801 + }, + { + "loss": 0.0405, + "grad_norm": 1.1438575983047485, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.901, + "step": 3802 + }, + { + "loss": 0.0694, + "grad_norm": 1.4709012508392334, + "learning_rate": 9.950000000000002e-07, + "num_tokens": 1308479.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9015, + "step": 3803 + }, + { + "loss": 0.0023, + "grad_norm": 0.3326675593852997, + "learning_rate": 9.9e-07, + "num_tokens": 1308570.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9020000000000001, + "step": 3804 + }, + { + "loss": 0.0635, + "grad_norm": 1.4323761463165283, + "learning_rate": 9.85e-07, + "num_tokens": 1309082.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9024999999999999, + "step": 3805 + }, + { + "loss": 0.0683, + "grad_norm": 1.6102875471115112, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.903, + "step": 3806 + }, + { + "loss": 0.0022, + "grad_norm": 0.3131149709224701, + "learning_rate": 9.750000000000002e-07, + "num_tokens": 1309685.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9035, + "step": 3807 + }, + { + "loss": 0.0021, + "grad_norm": 0.30395570397377014, + "learning_rate": 9.7e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 3808 + }, + { + "loss": 0.056, + "grad_norm": 1.3097760677337646, + "learning_rate": 9.65e-07, + "num_tokens": 1310288.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9045, + "step": 3809 + }, + { + "loss": 0.0425, + "grad_norm": 1.2873075008392334, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.905, + "step": 3810 + }, + { + "loss": 0.0366, + "grad_norm": 1.1098606586456299, + "learning_rate": 9.550000000000002e-07, + "num_tokens": 1311312.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9055, + "step": 3811 + }, + { + "loss": 0.0023, + "grad_norm": 0.33073046803474426, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9060000000000001, + "step": 3812 + }, + { + "loss": 0.0558, + "grad_norm": 1.287516713142395, + "learning_rate": 9.450000000000001e-07, + "num_tokens": 1311915.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9064999999999999, + "step": 3813 + }, + { + "loss": 0.0023, + "grad_norm": 0.3197239935398102, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 1.0, + "epoch": 1.907, + "step": 3814 + }, + { + "loss": 0.0022, + "grad_norm": 0.3093603253364563, + "learning_rate": 9.35e-07, + "num_tokens": 1312097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9075, + "step": 3815 + }, + { + "loss": 0.0027, + "grad_norm": 0.3792094588279724, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.908, + "step": 3816 + }, + { + "loss": 0.0024, + "grad_norm": 0.33527225255966187, + "learning_rate": 9.25e-07, + "num_tokens": 1312279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9085, + "step": 3817 + }, + { + "loss": 0.0531, + "grad_norm": 1.204848051071167, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.909, + "step": 3818 + }, + { + "loss": 0.0702, + "grad_norm": 1.3416361808776855, + "learning_rate": 9.15e-07, + "num_tokens": 1313303.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9095, + "step": 3819 + }, + { + "loss": 0.0541, + "grad_norm": 1.515673279762268, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9100000000000001, + "step": 3820 + }, + { + "loss": 0.0024, + "grad_norm": 0.33284807205200195, + "learning_rate": 9.05e-07, + "num_tokens": 1313906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9104999999999999, + "step": 3821 + }, + { + "loss": 0.0023, + "grad_norm": 0.32082033157348633, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 3822 + }, + { + "loss": 0.056, + "grad_norm": 1.2340785264968872, + "learning_rate": 8.95e-07, + "num_tokens": 1314509.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9115, + "step": 3823 + }, + { + "loss": 0.0021, + "grad_norm": 0.3040038049221039, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.912, + "step": 3824 + }, + { + "loss": 0.0392, + "grad_norm": 1.3959851264953613, + "learning_rate": 8.85e-07, + "num_tokens": 1315112.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9125, + "step": 3825 + }, + { + "loss": 0.0027, + "grad_norm": 0.37887290120124817, + "learning_rate": 8.8e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 1.0, + "epoch": 1.913, + "step": 3826 + }, + { + "loss": 0.0022, + "grad_norm": 0.30666735768318176, + "learning_rate": 8.75e-07, + "num_tokens": 1315294.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9135, + "step": 3827 + }, + { + "loss": 0.0691, + "grad_norm": 1.3549600839614868, + "learning_rate": 8.7e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9140000000000001, + "step": 3828 + }, + { + "loss": 0.0675, + "grad_norm": 1.2945553064346313, + "learning_rate": 8.65e-07, + "num_tokens": 1316318.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9144999999999999, + "step": 3829 + }, + { + "loss": 0.0022, + "grad_norm": 0.3147728145122528, + "learning_rate": 8.6e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.915, + "step": 3830 + }, + { + "loss": 0.0531, + "grad_norm": 1.0365914106369019, + "learning_rate": 8.550000000000002e-07, + "num_tokens": 1316921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9155, + "step": 3831 + }, + { + "loss": 0.0416, + "grad_norm": 1.2123857736587524, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.916, + "step": 3832 + }, + { + "loss": 0.0023, + "grad_norm": 0.3252547085285187, + "learning_rate": 8.450000000000002e-07, + "num_tokens": 1317524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9165, + "step": 3833 + }, + { + "loss": 0.0021, + "grad_norm": 0.29913613200187683, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.917, + "step": 3834 + }, + { + "loss": 0.0688, + "grad_norm": 1.6491233110427856, + "learning_rate": 8.350000000000002e-07, + "num_tokens": 1318127.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9175, + "step": 3835 + }, + { + "loss": 0.0021, + "grad_norm": 0.3058773875236511, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9180000000000001, + "step": 3836 + }, + { + "loss": 0.038, + "grad_norm": 1.1742405891418457, + "learning_rate": 8.250000000000001e-07, + "num_tokens": 1318730.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9184999999999999, + "step": 3837 + }, + { + "loss": 0.002, + "grad_norm": 0.27437257766723633, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.919, + "step": 3838 + }, + { + "loss": 0.0397, + "grad_norm": 1.1734699010849, + "learning_rate": 8.150000000000001e-07, + "num_tokens": 1319333.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9195, + "step": 3839 + }, + { + "loss": 0.0688, + "grad_norm": 1.6114236116409302, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.92, + "step": 3840 + }, + { + "loss": 0.0396, + "grad_norm": 1.3022080659866333, + "learning_rate": 8.050000000000001e-07, + "num_tokens": 1320357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9205, + "step": 3841 + }, + { + "loss": 0.002, + "grad_norm": 0.2882446348667145, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.921, + "step": 3842 + }, + { + "loss": 0.0636, + "grad_norm": 1.4788239002227783, + "learning_rate": 7.950000000000001e-07, + "num_tokens": 1320960.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9215, + "step": 3843 + }, + { + "loss": 0.0554, + "grad_norm": 1.472805142402649, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 3844 + }, + { + "loss": 0.0382, + "grad_norm": 1.3122379779815674, + "learning_rate": 7.850000000000001e-07, + "num_tokens": 1321984.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9224999999999999, + "step": 3845 + }, + { + "loss": 0.0019, + "grad_norm": 0.27439191937446594, + "learning_rate": 7.8e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.923, + "step": 3846 + }, + { + "loss": 0.0021, + "grad_norm": 0.3059723973274231, + "learning_rate": 7.750000000000001e-07, + "num_tokens": 1322166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9235, + "step": 3847 + }, + { + "loss": 0.0021, + "grad_norm": 0.3025694489479065, + "learning_rate": 7.7e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 3848 + }, + { + "loss": 0.0416, + "grad_norm": 1.4384698867797852, + "learning_rate": 7.650000000000001e-07, + "num_tokens": 1322769.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9245, + "step": 3849 + }, + { + "loss": 0.0019, + "grad_norm": 0.26954689621925354, + "learning_rate": 7.6e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.925, + "step": 3850 + }, + { + "loss": 0.0373, + "grad_norm": 1.0434874296188354, + "learning_rate": 7.550000000000001e-07, + "num_tokens": 1323372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9255, + "step": 3851 + }, + { + "loss": 0.0384, + "grad_norm": 1.2146815061569214, + "learning_rate": 7.5e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9260000000000002, + "step": 3852 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992803454399109, + "learning_rate": 7.450000000000001e-07, + "num_tokens": 1323975.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9264999999999999, + "step": 3853 + }, + { + "loss": 0.0683, + "grad_norm": 2.0715625286102295, + "learning_rate": 7.4e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.927, + "step": 3854 + }, + { + "loss": 0.0687, + "grad_norm": 1.7195099592208862, + "learning_rate": 7.350000000000001e-07, + "num_tokens": 1324999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.9275, + "step": 3855 + }, + { + "loss": 0.0022, + "grad_norm": 0.31213998794555664, + "learning_rate": 7.3e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.928, + "step": 3856 + }, + { + "loss": 0.0446, + "grad_norm": 1.5833452939987183, + "learning_rate": 7.25e-07, + "num_tokens": 1325602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9285, + "step": 3857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27154725790023804, + "learning_rate": 7.2e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.929, + "step": 3858 + }, + { + "loss": 0.0385, + "grad_norm": 1.1363227367401123, + "learning_rate": 7.15e-07, + "num_tokens": 1326205.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9295, + "step": 3859 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992321252822876, + "learning_rate": 7.1e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9300000000000002, + "step": 3860 + }, + { + "loss": 0.0537, + "grad_norm": 1.2202407121658325, + "learning_rate": 7.05e-07, + "num_tokens": 1326808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9304999999999999, + "step": 3861 + }, + { + "loss": 0.0659, + "grad_norm": 1.3972662687301636, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.931, + "step": 3862 + }, + { + "loss": 0.0022, + "grad_norm": 0.3156076967716217, + "learning_rate": 6.950000000000001e-07, + "num_tokens": 1327411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9315, + "step": 3863 + }, + { + "loss": 0.002, + "grad_norm": 0.2746105492115021, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 3864 + }, + { + "loss": 0.0492, + "grad_norm": 1.111280083656311, + "learning_rate": 6.850000000000001e-07, + "num_tokens": 1328014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9325, + "step": 3865 + }, + { + "loss": 0.0557, + "grad_norm": 1.1395080089569092, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.933, + "step": 3866 + }, + { + "loss": 0.041, + "grad_norm": 1.1225674152374268, + "learning_rate": 6.750000000000001e-07, + "num_tokens": 1329038.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9335, + "step": 3867 + }, + { + "loss": 0.0021, + "grad_norm": 0.2975449860095978, + "learning_rate": 6.7e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9340000000000002, + "step": 3868 + }, + { + "loss": 0.002, + "grad_norm": 0.2790532410144806, + "learning_rate": 6.650000000000001e-07, + "num_tokens": 1329220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9344999999999999, + "step": 3869 + }, + { + "loss": 0.0019, + "grad_norm": 0.27045223116874695, + "learning_rate": 6.6e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 3870 + }, + { + "loss": 0.0587, + "grad_norm": 1.2998172044754028, + "learning_rate": 6.550000000000001e-07, + "num_tokens": 1329823.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9355, + "step": 3871 + }, + { + "loss": 0.1167, + "grad_norm": 2.1144580841064453, + "learning_rate": 6.5e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.936, + "step": 3872 + }, + { + "loss": 0.0021, + "grad_norm": 0.29768821597099304, + "learning_rate": 6.450000000000001e-07, + "num_tokens": 1330426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9365, + "step": 3873 + }, + { + "loss": 0.0021, + "grad_norm": 0.3033559024333954, + "learning_rate": 6.4e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 3874 + }, + { + "loss": 0.0017, + "grad_norm": 0.2499658465385437, + "learning_rate": 6.350000000000001e-07, + "num_tokens": 1330608.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9375, + "step": 3875 + }, + { + "loss": 0.002, + "grad_norm": 0.28729239106178284, + "learning_rate": 6.3e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 3876 + }, + { + "loss": 0.0538, + "grad_norm": 1.3207937479019165, + "learning_rate": 6.25e-07, + "num_tokens": 1331211.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9385, + "step": 3877 + }, + { + "loss": 0.0022, + "grad_norm": 0.3201894760131836, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.939, + "step": 3878 + }, + { + "loss": 0.058, + "grad_norm": 1.3156497478485107, + "learning_rate": 6.15e-07, + "num_tokens": 1331814.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9395, + "step": 3879 + }, + { + "loss": 0.0544, + "grad_norm": 1.192156195640564, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.94, + "step": 3880 + }, + { + "loss": 0.0634, + "grad_norm": 2.076542377471924, + "learning_rate": 6.05e-07, + "num_tokens": 1332838.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9405000000000001, + "step": 3881 + }, + { + "loss": 0.0488, + "grad_norm": 1.3221850395202637, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9409999999999998, + "step": 3882 + }, + { + "loss": 0.0021, + "grad_norm": 0.3004106283187866, + "learning_rate": 5.95e-07, + "num_tokens": 1333441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9415, + "step": 3883 + }, + { + "loss": 0.0541, + "grad_norm": 1.230305790901184, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.942, + "step": 3884 + }, + { + "loss": 0.002, + "grad_norm": 0.2805992662906647, + "learning_rate": 5.850000000000001e-07, + "num_tokens": 1334044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9425, + "step": 3885 + }, + { + "loss": 0.0019, + "grad_norm": 0.27598538994789124, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 3886 + }, + { + "loss": 0.0021, + "grad_norm": 0.3006319999694824, + "learning_rate": 5.750000000000001e-07, + "num_tokens": 1334226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9435, + "step": 3887 + }, + { + "loss": 0.0628, + "grad_norm": 1.3234870433807373, + "learning_rate": 5.7e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.944, + "step": 3888 + }, + { + "loss": 0.0368, + "grad_norm": 0.9632979035377502, + "learning_rate": 5.650000000000001e-07, + "num_tokens": 1335250.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9445000000000001, + "step": 3889 + }, + { + "loss": 0.0396, + "grad_norm": 1.0664863586425781, + "learning_rate": 5.6e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9449999999999998, + "step": 3890 + }, + { + "loss": 0.0361, + "grad_norm": 0.998447060585022, + "learning_rate": 5.550000000000001e-07, + "num_tokens": 1336274.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9455, + "step": 3891 + }, + { + "loss": 0.066, + "grad_norm": 1.6561861038208008, + "learning_rate": 5.5e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.946, + "step": 3892 + }, + { + "loss": 0.0564, + "grad_norm": 1.0982937812805176, + "learning_rate": 5.450000000000001e-07, + "num_tokens": 1337298.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9465, + "step": 3893 + }, + { + "loss": 0.0649, + "grad_norm": 1.3116402626037598, + "learning_rate": 5.4e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.947, + "step": 3894 + }, + { + "loss": 0.0393, + "grad_norm": 1.211995005607605, + "learning_rate": 5.350000000000001e-07, + "num_tokens": 1338322.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9475, + "step": 3895 + }, + { + "loss": 0.0656, + "grad_norm": 1.3053356409072876, + "learning_rate": 5.3e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.948, + "step": 3896 + }, + { + "loss": 0.059, + "grad_norm": 1.4926881790161133, + "learning_rate": 5.250000000000001e-07, + "num_tokens": 1339346.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9485000000000001, + "step": 3897 + }, + { + "loss": 0.0517, + "grad_norm": 1.099536657333374, + "learning_rate": 5.2e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9489999999999998, + "step": 3898 + }, + { + "loss": 0.002, + "grad_norm": 0.2851589620113373, + "learning_rate": 5.15e-07, + "num_tokens": 1339949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9495, + "step": 3899 + }, + { + "loss": 0.002, + "grad_norm": 0.2879925072193146, + "learning_rate": 5.1e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 3900 + }, + { + "loss": 0.0557, + "grad_norm": 1.0640603303909302, + "learning_rate": 5.05e-07, + "num_tokens": 1340552.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9505, + "step": 3901 + }, + { + "loss": 0.0021, + "grad_norm": 0.3005947470664978, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.951, + "step": 3902 + }, + { + "loss": 0.0021, + "grad_norm": 0.30592235922813416, + "learning_rate": 4.95e-07, + "num_tokens": 1340734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9515, + "step": 3903 + }, + { + "loss": 0.0508, + "grad_norm": 1.1045085191726685, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.952, + "step": 3904 + }, + { + "loss": 0.0539, + "grad_norm": 1.1382217407226562, + "learning_rate": 4.85e-07, + "num_tokens": 1341758.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9525000000000001, + "step": 3905 + }, + { + "loss": 0.0576, + "grad_norm": 1.5904083251953125, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9529999999999998, + "step": 3906 + }, + { + "loss": 0.0401, + "grad_norm": 1.0153878927230835, + "learning_rate": 4.7500000000000006e-07, + "num_tokens": 1342782.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9535, + "step": 3907 + }, + { + "loss": 0.0023, + "grad_norm": 0.32124239206314087, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.954, + "step": 3908 + }, + { + "loss": 0.037, + "grad_norm": 1.1176637411117554, + "learning_rate": 4.6500000000000005e-07, + "num_tokens": 1343385.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9545, + "step": 3909 + }, + { + "loss": 0.0414, + "grad_norm": 1.1863677501678467, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.955, + "step": 3910 + }, + { + "loss": 0.0697, + "grad_norm": 1.6575289964675903, + "learning_rate": 4.5500000000000004e-07, + "num_tokens": 1344409.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9555, + "step": 3911 + }, + { + "loss": 0.0384, + "grad_norm": 1.020317554473877, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.956, + "step": 3912 + }, + { + "loss": 0.0554, + "grad_norm": 1.1557419300079346, + "learning_rate": 4.4500000000000003e-07, + "num_tokens": 1345433.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9565000000000001, + "step": 3913 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282678723335266, + "learning_rate": 4.4e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9569999999999999, + "step": 3914 + }, + { + "loss": 0.0611, + "grad_norm": 1.4425996541976929, + "learning_rate": 4.35e-07, + "num_tokens": 1346036.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9575, + "step": 3915 + }, + { + "loss": 0.0021, + "grad_norm": 0.30943119525909424, + "learning_rate": 4.3e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.958, + "step": 3916 + }, + { + "loss": 0.0021, + "grad_norm": 0.29412642121315, + "learning_rate": 4.2500000000000006e-07, + "num_tokens": 1346218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9585, + "step": 3917 + }, + { + "loss": 0.0021, + "grad_norm": 0.2940139174461365, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.959, + "step": 3918 + }, + { + "loss": 0.0021, + "grad_norm": 0.3061344027519226, + "learning_rate": 4.1500000000000005e-07, + "num_tokens": 1346400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9595, + "step": 3919 + }, + { + "loss": 0.0399, + "grad_norm": 1.3357733488082886, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.96, + "step": 3920 + }, + { + "loss": 0.0548, + "grad_norm": 1.1528651714324951, + "learning_rate": 4.0500000000000004e-07, + "num_tokens": 1347424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9605000000000001, + "step": 3921 + }, + { + "loss": 0.0024, + "grad_norm": 0.3415958285331726, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9609999999999999, + "step": 3922 + }, + { + "loss": 0.0672, + "grad_norm": 1.716910719871521, + "learning_rate": 3.9500000000000003e-07, + "num_tokens": 1348027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9615, + "step": 3923 + }, + { + "loss": 0.0019, + "grad_norm": 0.2726108729839325, + "learning_rate": 3.9e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.962, + "step": 3924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6874312162399292, + "learning_rate": 3.85e-07, + "num_tokens": 1348630.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9625, + "step": 3925 + }, + { + "loss": 0.0677, + "grad_norm": 1.6080477237701416, + "learning_rate": 3.8e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 3926 + }, + { + "loss": 0.0455, + "grad_norm": 1.2764126062393188, + "learning_rate": 3.75e-07, + "num_tokens": 1349654.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9635, + "step": 3927 + }, + { + "loss": 0.0414, + "grad_norm": 1.4081971645355225, + "learning_rate": 3.7e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.964, + "step": 3928 + }, + { + "loss": 0.0022, + "grad_norm": 0.3177483081817627, + "learning_rate": 3.65e-07, + "num_tokens": 1350257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9645000000000001, + "step": 3929 + }, + { + "loss": 0.0024, + "grad_norm": 0.33574411273002625, + "learning_rate": 3.6e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 3930 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346923887729645, + "learning_rate": 3.55e-07, + "num_tokens": 1350439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9655, + "step": 3931 + }, + { + "loss": 0.0562, + "grad_norm": 1.2322405576705933, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.966, + "step": 3932 + }, + { + "loss": 0.0382, + "grad_norm": 1.126086711883545, + "learning_rate": 3.4500000000000003e-07, + "num_tokens": 1351463.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9665, + "step": 3933 + }, + { + "loss": 0.0679, + "grad_norm": 1.7950743436813354, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.967, + "step": 3934 + }, + { + "loss": 0.0023, + "grad_norm": 0.31813737750053406, + "learning_rate": 3.35e-07, + "num_tokens": 1352066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9675, + "step": 3935 + }, + { + "loss": 0.0563, + "grad_norm": 1.4460132122039795, + "learning_rate": 3.3e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.968, + "step": 3936 + }, + { + "loss": 0.0388, + "grad_norm": 1.2290942668914795, + "learning_rate": 3.25e-07, + "num_tokens": 1353090.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9685000000000001, + "step": 3937 + }, + { + "loss": 0.0624, + "grad_norm": 1.2616753578186035, + "learning_rate": 3.2e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9689999999999999, + "step": 3938 + }, + { + "loss": 0.0018, + "grad_norm": 0.258317232131958, + "learning_rate": 3.15e-07, + "num_tokens": 1353693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9695, + "step": 3939 + }, + { + "loss": 0.0021, + "grad_norm": 0.2969084680080414, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 3940 + }, + { + "loss": 0.0023, + "grad_norm": 0.3306228518486023, + "learning_rate": 3.0500000000000004e-07, + "num_tokens": 1353875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9705, + "step": 3941 + }, + { + "loss": 0.0021, + "grad_norm": 0.2877337336540222, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.971, + "step": 3942 + }, + { + "loss": 0.0385, + "grad_norm": 1.1180164813995361, + "learning_rate": 2.9500000000000003e-07, + "num_tokens": 1354478.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9715, + "step": 3943 + }, + { + "loss": 0.0422, + "grad_norm": 1.2713475227355957, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 3944 + }, + { + "loss": 0.0021, + "grad_norm": 0.30450907349586487, + "learning_rate": 2.85e-07, + "num_tokens": 1355081.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9725000000000001, + "step": 3945 + }, + { + "loss": 0.0369, + "grad_norm": 1.0453548431396484, + "learning_rate": 2.8e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9729999999999999, + "step": 3946 + }, + { + "loss": 0.0647, + "grad_norm": 1.4603972434997559, + "learning_rate": 2.75e-07, + "num_tokens": 1356105.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9735, + "step": 3947 + }, + { + "loss": 0.0572, + "grad_norm": 1.3418960571289062, + "learning_rate": 2.7e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.974, + "step": 3948 + }, + { + "loss": 0.0616, + "grad_norm": 1.2075037956237793, + "learning_rate": 2.65e-07, + "num_tokens": 1357129.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9745, + "step": 3949 + }, + { + "loss": 0.0561, + "grad_norm": 1.3293365240097046, + "learning_rate": 2.6e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.975, + "step": 3950 + }, + { + "loss": 0.0546, + "grad_norm": 1.1330344676971436, + "learning_rate": 2.55e-07, + "num_tokens": 1358153.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9755, + "step": 3951 + }, + { + "loss": 0.0553, + "grad_norm": 1.403975486755371, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 3952 + }, + { + "loss": 0.0589, + "grad_norm": 1.0574450492858887, + "learning_rate": 2.4500000000000004e-07, + "num_tokens": 1359177.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9765000000000001, + "step": 3953 + }, + { + "loss": 0.0024, + "grad_norm": 0.34114331007003784, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9769999999999999, + "step": 3954 + }, + { + "loss": 0.0531, + "grad_norm": 1.2925927639007568, + "learning_rate": 2.3500000000000003e-07, + "num_tokens": 1359780.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9775, + "step": 3955 + }, + { + "loss": 0.0023, + "grad_norm": 0.32414519786834717, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.978, + "step": 3956 + }, + { + "loss": 0.0409, + "grad_norm": 1.1193647384643555, + "learning_rate": 2.2500000000000002e-07, + "num_tokens": 1360383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9785, + "step": 3957 + }, + { + "loss": 0.0528, + "grad_norm": 1.0519967079162598, + "learning_rate": 2.2e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.979, + "step": 3958 + }, + { + "loss": 0.002, + "grad_norm": 0.290457159280777, + "learning_rate": 2.15e-07, + "num_tokens": 1360986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9795, + "step": 3959 + }, + { + "loss": 0.064, + "grad_norm": 1.5267326831817627, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.98, + "step": 3960 + }, + { + "loss": 0.0571, + "grad_norm": 1.354665756225586, + "learning_rate": 2.0500000000000002e-07, + "num_tokens": 1362010.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9805000000000001, + "step": 3961 + }, + { + "loss": 0.0023, + "grad_norm": 0.3175540566444397, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9809999999999999, + "step": 3962 + }, + { + "loss": 0.0022, + "grad_norm": 0.31645578145980835, + "learning_rate": 1.95e-07, + "num_tokens": 1362192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9815, + "step": 3963 + }, + { + "loss": 0.0023, + "grad_norm": 0.32781633734703064, + "learning_rate": 1.9e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 3964 + }, + { + "loss": 0.0022, + "grad_norm": 0.3074043393135071, + "learning_rate": 1.85e-07, + "num_tokens": 1362374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9825, + "step": 3965 + }, + { + "loss": 0.0616, + "grad_norm": 1.3107956647872925, + "learning_rate": 1.8e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.983, + "step": 3966 + }, + { + "loss": 0.0428, + "grad_norm": 1.0233242511749268, + "learning_rate": 1.7500000000000002e-07, + "num_tokens": 1363398.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9835, + "step": 3967 + }, + { + "loss": 0.0509, + "grad_norm": 1.1120326519012451, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.984, + "step": 3968 + }, + { + "loss": 0.0578, + "grad_norm": 1.1184195280075073, + "learning_rate": 1.65e-07, + "num_tokens": 1364422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9845000000000002, + "step": 3969 + }, + { + "loss": 0.0024, + "grad_norm": 0.3374731242656708, + "learning_rate": 1.6e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9849999999999999, + "step": 3970 + }, + { + "loss": 0.0647, + "grad_norm": 1.385146141052246, + "learning_rate": 1.5500000000000002e-07, + "num_tokens": 1365025.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9855, + "step": 3971 + }, + { + "loss": 0.0621, + "grad_norm": 1.3918462991714478, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.986, + "step": 3972 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185434639453888, + "learning_rate": 1.4500000000000001e-07, + "num_tokens": 1365628.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9865, + "step": 3973 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098815679550171, + "learning_rate": 1.4e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 3974 + }, + { + "loss": 0.0508, + "grad_norm": 1.1450035572052002, + "learning_rate": 1.35e-07, + "num_tokens": 1366231.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9875, + "step": 3975 + }, + { + "loss": 0.0545, + "grad_norm": 1.133862018585205, + "learning_rate": 1.3e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.988, + "step": 3976 + }, + { + "loss": 0.0575, + "grad_norm": 1.3929400444030762, + "learning_rate": 1.2500000000000002e-07, + "num_tokens": 1367255.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9885000000000002, + "step": 3977 + }, + { + "loss": 0.0023, + "grad_norm": 0.32601818442344666, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9889999999999999, + "step": 3978 + }, + { + "loss": 0.0614, + "grad_norm": 1.4804233312606812, + "learning_rate": 1.1500000000000001e-07, + "num_tokens": 1367858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9895, + "step": 3979 + }, + { + "loss": 0.0339, + "grad_norm": 1.0161491632461548, + "learning_rate": 1.1e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.99, + "step": 3980 + }, + { + "loss": 0.0374, + "grad_norm": 0.9113408327102661, + "learning_rate": 1.0500000000000001e-07, + "num_tokens": 1368882.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9905, + "step": 3981 + }, + { + "loss": 0.0022, + "grad_norm": 0.31800293922424316, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.991, + "step": 3982 + }, + { + "loss": 0.0022, + "grad_norm": 0.3091203570365906, + "learning_rate": 9.5e-08, + "num_tokens": 1369064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9915, + "step": 3983 + }, + { + "loss": 0.0697, + "grad_norm": 1.368817687034607, + "learning_rate": 9e-08, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.992, + "step": 3984 + }, + { + "loss": 0.0024, + "grad_norm": 0.334277480840683, + "learning_rate": 8.500000000000001e-08, + "num_tokens": 1369667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9925000000000002, + "step": 3985 + }, + { + "loss": 0.0545, + "grad_norm": 1.1396604776382446, + "learning_rate": 8e-08, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9929999999999999, + "step": 3986 + }, + { + "loss": 0.002, + "grad_norm": 0.2931969463825226, + "learning_rate": 7.500000000000001e-08, + "num_tokens": 1370270.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9935, + "step": 3987 + }, + { + "loss": 0.0021, + "grad_norm": 0.29304033517837524, + "learning_rate": 7e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 3988 + }, + { + "loss": 0.0579, + "grad_norm": 1.3336025476455688, + "learning_rate": 6.5e-08, + "num_tokens": 1370873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9945, + "step": 3989 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215644359588623, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.995, + "step": 3990 + }, + { + "loss": 0.0405, + "grad_norm": 1.221953272819519, + "learning_rate": 5.5e-08, + "num_tokens": 1371476.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9955, + "step": 3991 + }, + { + "loss": 0.0404, + "grad_norm": 1.0604480504989624, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.996, + "step": 3992 + }, + { + "loss": 0.0381, + "grad_norm": 0.919835090637207, + "learning_rate": 4.5e-08, + "num_tokens": 1372500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9965000000000002, + "step": 3993 + }, + { + "loss": 0.0378, + "grad_norm": 1.2490025758743286, + "learning_rate": 4e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9969999999999999, + "step": 3994 + }, + { + "loss": 0.0021, + "grad_norm": 0.3125726878643036, + "learning_rate": 3.5e-08, + "num_tokens": 1373103.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9975, + "step": 3995 + }, + { + "loss": 0.0023, + "grad_norm": 0.3294070065021515, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 3996 + }, + { + "loss": 0.002, + "grad_norm": 0.2793242931365967, + "learning_rate": 2.5000000000000002e-08, + "num_tokens": 1373285.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9985, + "step": 3997 + }, + { + "loss": 0.0386, + "grad_norm": 1.0813380479812622, + "learning_rate": 2e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.999, + "step": 3998 + }, + { + "loss": 0.0025, + "grad_norm": 0.3470178544521332, + "learning_rate": 1.5000000000000002e-08, + "num_tokens": 1373888.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9995, + "step": 3999 + }, + { + "loss": 0.0681, + "grad_norm": 1.5211089849472046, + "learning_rate": 1e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 2.0, + "step": 4000 + }, + { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898, + "epoch": 2.0, + "step": 4000 + } +] \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..be089935a10e89f2cb7ed806e7c10efa3baca54a --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/local_available_sft/qwen_1_5b/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "unsloth_available": false, + "train_runtime": 483.7085, + "train_loss": 0.11515871361242898, + "train_metrics": { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json b/docs/results/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json new file mode 100644 index 0000000000000000000000000000000000000000..d1b7b1a5957e0e14f32d42e1ca77788ac2a9b540 --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/remote_status/live_hf_status_snapshot.json @@ -0,0 +1,325 @@ +{ + "created_at_utc": "2026-04-26T03:28:38.201754+00:00", + "space": "TheJackBright/polyguard-openenv-training-full", + "artifact_repo": "TheJackBright/polyguard-openenv-training-full-artifacts", + "runtime": { + "stage": "RUNNING", + "hardware": "a10g-large", + "requested_hardware": "a10g-large" + }, + "space_status": { + "status": "running", + "started_at": 1777162756.623835, + "finished_at": null, + "commands": [ + { + "args": [ + "python", + "scripts/bootstrap_data.py" + ], + "returncode": 0, + "elapsed_seconds": 0.577 + }, + { + "args": [ + "python", + "scripts/build_training_corpus.py", + "--profile", + "massive", + "--with-local", + "--with-synthetic", + "--with-hf" + ], + "returncode": 0, + "elapsed_seconds": 3.86 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 257.387 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 4230.645 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 7.303 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 15.201 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 18.461 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 3.989 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 454.278 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 5118.654 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 10.6 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 17.128 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 21.528 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 4.001 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-3B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 736.955 + } + ], + "artifact_repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "model_sweep": [ + "Qwen/Qwen2.5-0.5B-Instruct", + "Qwen/Qwen2.5-1.5B-Instruct", + "Qwen/Qwen2.5-3B-Instruct" + ] + }, + "artifact_repo_file_count": 1, + "artifact_repo_files_head": [ + ".gitattributes" + ], + "current_command": "python scripts/train_grpo_trl.py --model-id Qwen/Qwen2.5-3B-Instruct --prompts-path data/processed/training_corpus_grpo_prompts.jsonl --output-dir checkpoints/sweeps/qwen-qwen2-5-3b-instruct --report-path outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json --max-prompts 0 --max-steps 0 --epochs 1.0 --batch-size 2 --grad-accum 1 --num-generations 2 --max-prompt-length 384 --max-completion-length 64 --learning-rate 1e-06 --use-unsloth", + "provenance_note": "The remote training Space has completed Qwen 0.5B and Qwen 1.5B commands, but the artifact repo has not uploaded run files yet. Remote GRPO JSONs and GRPO histories should replace the local_available placeholders after final upload." +} \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json b/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json new file mode 100644 index 0000000000000000000000000000000000000000..5e9349464595903be86a379dea8d7ccac4ede0bc --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_1_5b_remote_stage_durations.json @@ -0,0 +1,74 @@ +[ + { + "model": "Qwen 0.5B", + "stage": "SFT", + "elapsed_seconds": 257.387, + "returncode": 0 + }, + { + "model": "Qwen 0.5B", + "stage": "GRPO", + "elapsed_seconds": 4230.645, + "returncode": 0 + }, + { + "model": "Qwen 0.5B", + "stage": "Other", + "elapsed_seconds": 7.303, + "returncode": 0 + }, + { + "model": "Qwen 0.5B", + "stage": "SFT inference", + "elapsed_seconds": 15.201, + "returncode": 0 + }, + { + "model": "Qwen 0.5B", + "stage": "GRPO inference", + "elapsed_seconds": 18.461, + "returncode": 0 + }, + { + "model": "Qwen 0.5B", + "stage": "Ablation", + "elapsed_seconds": 3.989, + "returncode": 0 + }, + { + "model": "Qwen 1.5B", + "stage": "SFT", + "elapsed_seconds": 454.278, + "returncode": 0 + }, + { + "model": "Qwen 1.5B", + "stage": "GRPO", + "elapsed_seconds": 5118.654, + "returncode": 0 + }, + { + "model": "Qwen 1.5B", + "stage": "Other", + "elapsed_seconds": 10.6, + "returncode": 0 + }, + { + "model": "Qwen 1.5B", + "stage": "SFT inference", + "elapsed_seconds": 17.128, + "returncode": 0 + }, + { + "model": "Qwen 1.5B", + "stage": "GRPO inference", + "elapsed_seconds": 21.528, + "returncode": 0 + }, + { + "model": "Qwen 1.5B", + "stage": "Ablation", + "elapsed_seconds": 4.001, + "returncode": 0 + } +] \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json b/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json new file mode 100644 index 0000000000000000000000000000000000000000..1d39208d9e157f7cfe48f125c169871661c72f4a --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/remote_status/qwen_0_5b_completed_commands.json @@ -0,0 +1,126 @@ +[ + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 257.387 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 4230.645 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 7.303 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 15.201 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 18.461 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 3.989 + } +] \ No newline at end of file diff --git a/docs/results/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json b/docs/results/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json new file mode 100644 index 0000000000000000000000000000000000000000..34d71a236845d1260e8bf13e93d19883913468cc --- /dev/null +++ b/docs/results/qwen_completed_runs/reports/remote_status/qwen_1_5b_completed_commands.json @@ -0,0 +1,126 @@ +[ + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 454.278 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 5118.654 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 10.6 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 17.128 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 21.528 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 4.001 + } +] \ No newline at end of file diff --git a/docs/results/qwen_model_grpo_reward.png b/docs/results/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..feb30b2964e85202cbd63cba29fcd2fe8a7f0e76 Binary files /dev/null and b/docs/results/qwen_model_grpo_reward.png differ diff --git a/docs/results/qwen_model_sft_loss.png b/docs/results/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..13f8fb2e7228902f1fe04146ded3ad25039b7e95 Binary files /dev/null and b/docs/results/qwen_model_sft_loss.png differ diff --git a/docs/results/qwen_model_sft_reward.png b/docs/results/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d50ce0e76d003559d01b3014fade4e1f1336bde1 Binary files /dev/null and b/docs/results/qwen_model_sft_reward.png differ diff --git a/docs/results/reward_component_bars.png b/docs/results/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..91fd9b5c6c5d46e5a9e51aeb8731d3cdfcf2a7e5 --- /dev/null +++ b/docs/results/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1e967e0537bb1c49091b534231072fd2e4750d4650c02479fd292a1d5f543ae +size 123446 diff --git a/docs/results/risk_train.json b/docs/results/risk_train.json new file mode 100644 index 0000000000000000000000000000000000000000..6482da8f83639a916904d9e9ae558df6212feb0e --- /dev/null +++ b/docs/results/risk_train.json @@ -0,0 +1,6 @@ +{ + "dataset_size": 180.0, + "status": "trained", + "train_mae": 0.0033, + "model_path": "outputs/models/tabular_risk.pkl" +} \ No newline at end of file diff --git a/docs/results/robustness.json b/docs/results/robustness.json new file mode 100644 index 0000000000000000000000000000000000000000..9edf7a91e469f060323ebbaf23cb668965a2213d --- /dev/null +++ b/docs/results/robustness.json @@ -0,0 +1,10 @@ +{ + "missing_labs_safety_rate": 0.666667, + "noisy_dose_info_safety_rate": 1.0, + "conflicting_meds_safety_rate": 1.0, + "alias_noise_safety_rate": 1.0, + "hidden_duplicate_detection_rate": 1.0, + "wrong_candidate_id_resilience": 1.0, + "stale_evidence_safety_rate": 1.0, + "delayed_ade_manifestation_safety_rate": 1.0 +} \ No newline at end of file diff --git a/docs/results/sft_loss_curves.png b/docs/results/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..738eee194fc1f44b476dfef161bba19be96f7b00 Binary files /dev/null and b/docs/results/sft_loss_curves.png differ diff --git a/docs/results/sft_run.json b/docs/results/sft_run.json new file mode 100644 index 0000000000000000000000000000000000000000..76228de881515a4ecc37b27f08442f85307b0f68 --- /dev/null +++ b/docs/results/sft_run.json @@ -0,0 +1,9 @@ +{ + "status": "ok", + "backend": "fallback_sklearn", + "examples_used": 51, + "train_accuracy": 0.5098, + "artifact_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/sft_policy_fallback.json", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "trl_runtime_error": "We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.\nCheck your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'." +} \ No newline at end of file diff --git a/docs/results/sft_trl_run.json b/docs/results/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..e49c30bdde3d50be652809e01980974b13691c98 --- /dev/null +++ b/docs/results/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "unsloth_available": false, + "train_runtime": 715.2908, + "train_loss": 0.15688225453009363, + "train_metrics": { + "train_runtime": 715.2908, + "train_samples_per_second": 5.592, + "train_steps_per_second": 2.796, + "total_flos": 2.949554402500608e+16, + "train_loss": 0.15688225453009363 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/sft_validity_reward.png b/docs/results/sft_validity_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..b6c1317caf732177559402d05e3a16b6d9e03bd9 Binary files /dev/null and b/docs/results/sft_validity_reward.png differ diff --git a/docs/results/sft_vs_grpo_reward.png b/docs/results/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..d3df1daf390065d80a17d53e649626f8534d8549 Binary files /dev/null and b/docs/results/sft_vs_grpo_reward.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1b2faf9c8218a4e723aaac00e7a7f2cddf0538 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/artifact_repo_listing.json @@ -0,0 +1,9 @@ +{ + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "pending_artifact_upload", + "files": [ + ".gitattributes" + ], + "meaningful_file_count": 0, + "error": "" +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..30df76ac40b24370c4d47f38a5b392e8e7c8b36f Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..354ee4f38019cfceb7db848c00ee7bda6270c162 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..a334d8db37904ac9ab47a582cd1efb83545a7027 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..5d068d5f289f2e688017d55fba2219c1d0154167 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json new file mode 100644 index 0000000000000000000000000000000000000000..32d4f98fc269daee5221d67244ea0c995322747f --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_polyguard_report.json @@ -0,0 +1,133 @@ +{ + "status": "ok", + "judge": "PolyGuard verifier/reward system", + "llm_as_judge": false, + "matched_seeds": [ + 8000, + 8001, + 8002, + 8003, + 8004, + 8005, + 8006, + 8007 + ], + "summaries": { + "basic_llm": { + "episodes": 8, + "avg_reward": 0.762, + "avg_latency_seconds": 0.0038, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.25, + "candidate_diversity": 1 + }, + "sft_policy": { + "episodes": 8, + "avg_reward": 0.818, + "avg_latency_seconds": 0.0012, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.0, + "candidate_diversity": 2 + }, + "full_polyguard_pipeline": { + "episodes": 8, + "avg_reward": 0.805, + "avg_latency_seconds": 0.3876, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.0, + "candidate_diversity": 2 + } + }, + "pipeline_minus_basic_reward_delta": 0.043, + "deltas": [ + { + "seed": 8000, + "basic_reward": 0.717, + "pipeline_reward": 0.804, + "reward_delta": 0.087, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [ + "holdout_ddi_not_addressed" + ], + "pipeline_failure_reasons": [] + }, + { + "seed": 8001, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8002, + "basic_reward": 0.777, + "pipeline_reward": 0.804, + "reward_delta": 0.027, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8003, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8004, + "basic_reward": 0.717, + "pipeline_reward": 0.804, + "reward_delta": 0.087, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [ + "holdout_ddi_not_addressed" + ], + "pipeline_failure_reasons": [] + }, + { + "seed": 8005, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8006, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8007, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + } + ], + "notes": [ + "basic_llm is an evaluation-only prompt-style proxy that selects the first legal candidate without verifier reranking.", + "sft_policy is an evaluation-only SFT-style safety ranker over the same candidate set.", + "full_polyguard_pipeline runs the orchestrated LLM+bandit stack and scores through the same verifier." + ] +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json new file mode 100644 index 0000000000000000000000000000000000000000..adec7032d7fae6ba4ca73ed347e0176c38aa961f --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/hf_status_snapshot.json @@ -0,0 +1,311 @@ +{ + "status": "running", + "started_at": 1777162756.623835, + "finished_at": null, + "commands": [ + { + "args": [ + "python", + "scripts/bootstrap_data.py" + ], + "returncode": 0, + "elapsed_seconds": 0.577 + }, + { + "args": [ + "python", + "scripts/build_training_corpus.py", + "--profile", + "massive", + "--with-local", + "--with-synthetic", + "--with-hf" + ], + "returncode": 0, + "elapsed_seconds": 3.86 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 257.387 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 4230.645 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 7.303 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 15.201 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 18.461 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 3.989 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 454.278 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 5118.654 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 10.6 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 17.128 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 21.528 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 4.001 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-3B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 736.955 + } + ], + "artifact_repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "model_sweep": [ + "Qwen/Qwen2.5-0.5B-Instruct", + "Qwen/Qwen2.5-1.5B-Instruct", + "Qwen/Qwen2.5-3B-Instruct" + ], + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "log_tail": "\u2588\u2588\u2588\u2588\u2588\u258a| 1965/2000 [11:41<00:10, 3.22it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1966/2000 [11:42<00:11, 2.91it/s]\n \n{'loss': 0.0449, 'grad_norm': 0.8585970401763916, 'learning_rate': 3.7e-07, 'num_tokens': 1350951.0, 'mean_token_accuracy': 0.9767054915428162, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1966/2000 [11:42<00:11, 2.91it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1967/2000 [11:42<00:11, 2.85it/s]\n \n{'loss': 0.0518, 'grad_norm': 0.7478350400924683, 'learning_rate': 3.6e-07, 'num_tokens': 1351975.0, 'mean_token_accuracy': 0.9755381345748901, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1967/2000 [11:42<00:11, 2.85it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1968/2000 [11:42<00:11, 2.69it/s]\n \n{'loss': 0.0442, 'grad_norm': 0.8791924715042114, 'learning_rate': 3.5000000000000004e-07, 'num_tokens': 1352578.0, 'mean_token_accuracy': 0.9767054915428162, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1968/2000 [11:42<00:11, 2.69it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1969/2000 [11:43<00:11, 2.70it/s]\n \n{'loss': 0.0488, 'grad_norm': 0.6195839047431946, 'learning_rate': 3.4000000000000003e-07, 'num_tokens': 1353602.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1969/2000 [11:43<00:11, 2.70it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1970/2000 [11:43<00:09, 3.27it/s]\n \n{'loss': 0.0047, 'grad_norm': 0.8639671802520752, 'learning_rate': 3.3e-07, 'num_tokens': 1353784.0, 'mean_token_accuracy': 1.0, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1970/2000 [11:43<00:09, 3.27it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1971/2000 [11:43<00:07, 3.82it/s]\n \n{'loss': 0.0048, 'grad_norm': 0.8560010194778442, 'learning_rate': 3.2e-07, 'num_tokens': 1353966.0, 'mean_token_accuracy': 1.0, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1971/2000 [11:43<00:07, 3.82it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1972/2000 [11:43<00:08, 3.41it/s]\n \n{'loss': 0.0382, 'grad_norm': 0.8542295694351196, 'learning_rate': 3.1000000000000005e-07, 'num_tokens': 1354990.0, 'mean_token_accuracy': 0.9823874831199646, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1972/2000 [11:43<00:08, 3.41it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1973/2000 [11:44<00:08, 3.02it/s]\n \n{'loss': 0.033, 'grad_norm': 0.7632898688316345, 'learning_rate': 3.0000000000000004e-07, 'num_tokens': 1355593.0, 'mean_token_accuracy': 0.9833610653877258, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1973/2000 [11:44<00:08, 3.02it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1974/2000 [11:44<00:08, 2.92it/s]\n \n{'loss': 0.0582, 'grad_norm': 0.7546073198318481, 'learning_rate': 2.9000000000000003e-07, 'num_tokens': 1356617.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1974/2000 [11:44<00:08, 2.92it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1975/2000 [11:44<00:08, 2.85it/s]\n \n{'loss': 0.0607, 'grad_norm': 0.9100231528282166, 'learning_rate': 2.8e-07, 'num_tokens': 1357641.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1975/2000 [11:44<00:08, 2.85it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1976/2000 [11:45<00:08, 2.81it/s]\n \n{'loss': 0.0522, 'grad_norm': 0.9831849932670593, 'learning_rate': 2.7e-07, 'num_tokens': 1358665.0, 'mean_token_accuracy': 0.9726027250289917, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1976/2000 [11:45<00:08, 2.81it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1977/2000 [11:45<00:08, 2.67it/s]\n \n{'loss': 0.0455, 'grad_norm': 0.7770227789878845, 'learning_rate': 2.6e-07, 'num_tokens': 1359268.0, 'mean_token_accuracy': 0.9783693552017212, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1977/2000 [11:45<00:08, 2.67it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1978/2000 [11:46<00:08, 2.58it/s]\n \n{'loss': 0.043, 'grad_norm': 0.9285680055618286, 'learning_rate': 2.5000000000000004e-07, 'num_tokens': 1359871.0, 'mean_token_accuracy': 0.981697142124176, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1978/2000 [11:46<00:08, 2.58it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1979/2000 [11:46<00:08, 2.62it/s]\n \n{'loss': 0.0475, 'grad_norm': 0.725820004940033, 'learning_rate': 2.4000000000000003e-07, 'num_tokens': 1360895.0, 'mean_token_accuracy': 0.9784736037254333, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1979/2000 [11:46<00:08, 2.62it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1980/2000 [11:46<00:07, 2.54it/s]\n \n{'loss': 0.0523, 'grad_norm': 0.9508711099624634, 'learning_rate': 2.3000000000000002e-07, 'num_tokens': 1361498.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1980/2000 [11:46<00:07, 2.54it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1981/2000 [11:47<00:07, 2.49it/s]\n \n{'loss': 0.0461, 'grad_norm': 0.9076665639877319, 'learning_rate': 2.2e-07, 'num_tokens': 1362101.0, 'mean_token_accuracy': 0.980033278465271, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1981/2000 [11:47<00:07, 2.49it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1982/2000 [11:47<00:05, 3.07it/s]\n \n{'loss': 0.0049, 'grad_norm': 0.8733372092247009, 'learning_rate': 2.1000000000000003e-07, 'num_tokens': 1362283.0, 'mean_token_accuracy': 1.0, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1982/2000 [11:47<00:05, 3.07it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1983/2000 [11:47<00:06, 2.83it/s]\n \n{'loss': 0.0499, 'grad_norm': 1.0219769477844238, 'learning_rate': 2.0000000000000002e-07, 'num_tokens': 1362886.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1983/2000 [11:47<00:06, 2.83it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1984/2000 [11:48<00:05, 2.79it/s]\n \n{'loss': 0.047, 'grad_norm': 0.6855125427246094, 'learning_rate': 1.9e-07, 'num_tokens': 1363910.0, 'mean_token_accuracy': 0.9794520735740662, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1984/2000 [11:48<00:05, 2.79it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1985/2000 [11:48<00:05, 2.66it/s]\n \n{'loss': 0.053, 'grad_norm': 0.9592626094818115, 'learning_rate': 1.8e-07, 'num_tokens': 1364513.0, 'mean_token_accuracy': 0.9717137813568115, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1985/2000 [11:48<00:05, 2.66it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1986/2000 [11:49<00:05, 2.67it/s]\n \n{'loss': 0.0634, 'grad_norm': 0.9822715520858765, 'learning_rate': 1.7000000000000001e-07, 'num_tokens': 1365537.0, 'mean_token_accuracy': 0.9696673154830933, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1986/2000 [11:49<00:05, 2.67it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1987/2000 [11:49<00:04, 3.24it/s]\n \n{'loss': 0.005, 'grad_norm': 0.9051101207733154, 'learning_rate': 1.6e-07, 'num_tokens': 1365719.0, 'mean_token_accuracy': 1.0, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1987/2000 [11:49<00:04, 3.24it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1988/2000 [11:49<00:03, 3.06it/s]\n \n{'loss': 0.057, 'grad_norm': 0.7732815742492676, 'learning_rate': 1.5000000000000002e-07, 'num_tokens': 1366743.0, 'mean_token_accuracy': 0.9716242551803589, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1988/2000 [11:49<00:03, 3.06it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1989/2000 [11:50<00:03, 2.82it/s]\n \n{'loss': 0.0488, 'grad_norm': 1.0130807161331177, 'learning_rate': 1.4e-07, 'num_tokens': 1367346.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1989/2000 [11:50<00:03, 2.82it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1990/2000 [11:50<00:03, 2.79it/s]\n \n{'loss': 0.0502, 'grad_norm': 0.7733030319213867, 'learning_rate': 1.3e-07, 'num_tokens': 1368370.0, 'mean_token_accuracy': 0.976516604423523, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1990/2000 [11:50<00:03, 2.79it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1991/2000 [11:50<00:03, 2.65it/s]\n \n{'loss': 0.033, 'grad_norm': 0.8099549412727356, 'learning_rate': 1.2000000000000002e-07, 'num_tokens': 1368973.0, 'mean_token_accuracy': 0.981697142124176, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1991/2000 [11:50<00:03, 2.65it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1992/2000 [11:51<00:03, 2.57it/s]\n \n{'loss': 0.0505, 'grad_norm': 0.8513318300247192, 'learning_rate': 1.1e-07, 'num_tokens': 1369576.0, 'mean_token_accuracy': 0.9733777046203613, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1992/2000 [11:51<00:03, 2.57it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1993/2000 [11:51<00:02, 2.51it/s]\n \n{'loss': 0.0471, 'grad_norm': 0.8666603565216064, 'learning_rate': 1.0000000000000001e-07, 'num_tokens': 1370179.0, 'mean_token_accuracy': 0.9783693552017212, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1993/2000 [11:51<00:02, 2.51it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1994/2000 [11:51<00:01, 3.08it/s]\n \n{'loss': 0.0046, 'grad_norm': 0.8277124166488647, 'learning_rate': 9e-08, 'num_tokens': 1370361.0, 'mean_token_accuracy': 1.0, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1994/2000 [11:51<00:01, 3.08it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1995/2000 [11:52<00:01, 2.83it/s]\n \n{'loss': 0.0491, 'grad_norm': 0.7712334990501404, 'learning_rate': 8e-08, 'num_tokens': 1370964.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1995/2000 [11:52<00:01, 2.83it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1996/2000 [11:52<00:01, 2.80it/s]\n \n{'loss': 0.037, 'grad_norm': 0.8775883316993713, 'learning_rate': 7e-08, 'num_tokens': 1371988.0, 'mean_token_accuracy': 0.980430543422699, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1996/2000 [11:52<00:01, 2.80it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1997/2000 [11:53<00:01, 2.77it/s]\n \n{'loss': 0.0377, 'grad_norm': 0.7055721282958984, 'learning_rate': 6.000000000000001e-08, 'num_tokens': 1373012.0, 'mean_token_accuracy': 0.9814090132713318, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1997/2000 [11:53<00:01, 2.77it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1998/2000 [11:53<00:00, 3.33it/s]\n \n{'loss': 0.005, 'grad_norm': 0.8954693675041199, 'learning_rate': 5.0000000000000004e-08, 'num_tokens': 1373194.0, 'mean_token_accuracy': 1.0, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1998/2000 [11:53<00:00, 3.33it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1999/2000 [11:53<00:00, 2.98it/s]\n \n{'loss': 0.0314, 'grad_norm': 0.7444577217102051, 'learning_rate': 4e-08, 'num_tokens': 1373797.0, 'mean_token_accuracy': 0.9883527159690857, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1999/2000 [11:53<00:00, 2.98it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n \n{'loss': 0.0525, 'grad_norm': 1.007545828819275, 'learning_rate': 3.0000000000000004e-08, 'num_tokens': 1374400.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n \n{'train_runtime': 714.3473, 'train_samples_per_second': 5.6, 'train_steps_per_second': 2.8, 'train_loss': 0.1561080440459773, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.80it/s]\nsft_trl_done\n$ python scripts/train_grpo_trl.py --model-id Qwen/Qwen2.5-3B-Instruct --prompts-path data/processed/training_corpus_grpo_prompts.jsonl --output-dir checkpoints/sweeps/qwen-qwen2-5-3b-instruct --report-path outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json --max-prompts 0 --max-steps 0 --epochs 1.0 --batch-size 2 --grad-accum 1 --num-generations 2 --max-prompt-length 384 --max-completion-length 64 --learning-rate 1e-06 --use-unsloth\n" +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/manifest.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..fb9aa967b6aba73ae13fe8bf2e2bc9953aa17ab0 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/manifest.json @@ -0,0 +1,237 @@ +{ + "status": "ok", + "generated_at_unix": 1777179035.763374, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "pending_artifact_upload", + "files": [ + ".gitattributes" + ], + "meaningful_file_count": 0, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/f313e87ad0df089dbe586b469c8f0a34e05bc5cd", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png", + "reward_component_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png", + "primary_reward_channel_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system", + "bundle_zip": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/submission_bundle/qwen_0_5b_1_5b_evidence.zip", + "mirrored_file_count": 56 +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/mirrored_files.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/mirrored_files.json new file mode 100644 index 0000000000000000000000000000000000000000..61be0069bb7d7a3cade76d1a843b605934ce16c2 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/mirrored_files.json @@ -0,0 +1,58 @@ +[ + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/submission_summary.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/README.md", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/manifest.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_legality.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_token_accuracy.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_exploit_detection.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_legality.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_avg_reward.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_training_loss.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_learning_rate.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_latency.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/primary_reward_channel_bars.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_token_accuracy.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_training_loss.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_learning_rate.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_loss_curves.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/grpo_reward_curves.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_vs_grpo_reward.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_loss.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_reward.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_grpo_reward.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/reward_component_bars.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/train_holdout_gap.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_latency_validity.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/anti_cheat_failure_rates.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/policy_stack_avg_reward.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/avg_reward.png", + "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/legality_rate.png" +] diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..b8a16a69c129c24b20c8ab712e219662b853e8ac Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..b02893a92db120bde2f2a629c680c7191230edeb Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..a084c777866c2316a63e3ab9a6339d45606517a5 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_report.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_report.json new file mode 100644 index 0000000000000000000000000000000000000000..17f42d1ba8e5ed4aaf91fc331e9057d45b539b10 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/policy_ablation_report.json @@ -0,0 +1,150 @@ +{ + "status": "ok", + "ablations": { + "bandit_only": { + "avg_reward": 0.779625, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.8125, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.483125, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9056250000000008, + "exploit_detection_count": 2.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0625, + "avg_invalid_actions": 0.0625, + "reward_columns": { + "format_compliance_score": 0.9989999999999996, + "candidate_alignment_score": 0.9989999999999996, + "legality_score": 0.9989999999999996, + "safety_delta_score": 0.483125, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999995, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000002, + "efficiency_score": 0.5855625, + "process_fidelity_score": 0.9056250000000008, + "explanation_grounding_score": 0.8000000000000004, + "anti_cheat_score": 0.9366249999999997, + "uncertainty_calibration_score": 0.8531250000000004 + }, + "primary_reward_channels": { + "safety_legality": 0.9469062499999998, + "clinical_improvement": 0.6273749999999997, + "dosing_quality": 0.6550000000000001, + "process_integrity": 0.8225937500000001 + }, + "policy_stack": "bandit-only", + "failure_mining": { + "total_rows": 32, + "failure_rows": 2, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 2 + } + ] + } + }, + "llm_only": { + "avg_reward": 0.7723913043478261, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.4882608695652174, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.4882608695652174, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999998, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8482608695652176 + }, + "primary_reward_channels": { + "safety_legality": 0.8853478260869562, + "clinical_improvement": 0.6290869565217388, + "dosing_quality": 0.6549999999999998, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm-only", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + }, + "llm_bandit": { + "avg_reward": 0.7647391304347826, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.48982608695652174, + "avg_dosing_quality": 0.717391304347826, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.48982608695652174, + "burden_improvement_score": 0.5043478260869565, + "disease_stability_score": 0.8582608695652173, + "dosing_quality_score": 0.717391304347826, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8126086956521739 + }, + "primary_reward_channels": { + "safety_legality": 0.8765217391304347, + "clinical_improvement": 0.6171739130434781, + "dosing_quality": 0.6386956521739129, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm+bandit", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + } + }, + "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/grpo_ablation_report.json" +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..2b33f8c40f985870bbf6ad986307cf9988ae229d Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..e624303fbcd1dcbc7e67edb578055310873bc7ad Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..d5d8d458cfe55b068060be5cbed93d4f3ea2e15f Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..eaf9687f4bd8f1fddf41434e8317105634a2366a Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..db33a7a97a9a7470e3927df08f1b2c61a5331e05 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..692ae055aa330d28ddecde01f82d2e0fb984de79 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..ffd982a07fec0d80dff092afea033c65d3a06552 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..91f0c0075c563b6915e2f8225a659d9f88c08bc8 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..8ee344753fde4ea2476b340dbf618a9b12b1f94c Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..15a7de44aa9ec407cb7a8647624a67edb8bb38c6 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..d36b471da2f0902e2c513e98a16098be6ec9a515 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..a8de709d9201c4d7a4fb502d3045104c0a8017a5 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..642d57b9cb8a88d2a602adcbc92e220df2fc1c6c Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..c72e897e7360ab9ceaafaaf36dd867414c0694d9 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/remote_stage_records.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/remote_stage_records.json new file mode 100644 index 0000000000000000000000000000000000000000..26352611eeab0bb07b964c76298f3fa0f542711b --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/remote_stage_records.json @@ -0,0 +1,92 @@ +[ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + } +] diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png b/docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..2f0b417999883105867eebe93b2fdb8bbdaf4b43 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bbe17a795d04470e938101377019eadd6246670049fc717149bbe6d28888bae +size 142092 diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/availability.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/availability.json new file mode 100644 index 0000000000000000000000000000000000000000..d10ae0a2d52e93bf7afd4fe5560708fd2cc8e794 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/availability.json @@ -0,0 +1,27 @@ +{ + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + } +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..e97c6f0e7743a9470ea6efc3b741694dafed8b57 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 1.839, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8000\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.224, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "There are 5 candidates with IDs . The candidate with ID has the lowest prediction score. Therefore, we will choose candidate . Rationale: Candidate has the lowest prediction score among the candidates with IDs , indicating that they are the most likely to", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "there is no patient with patient_id = 8002, please select a different patient_id. Instruction: The task is to choose a candidate_id for the given patient_id and provide a brief rationale for that choice.\nThus, the final answer is (e).Human beings have been able to use fire since at least 3 million years ago, but we are not sure how they learned", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8003\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 1.737, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8004\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_1", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.736, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..986493a2b78954513d0716891dc5dd71c576c569 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "model_index": 0, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 2, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..7d5a627d2519bb0afa7485ff88f51eb813d742e6 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.0856, + "grad_norm": 3.887380838394165, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.001, + "step": 1 + }, + { + "loss": 1.6647, + "grad_norm": 1.2190884351730347, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.7138103246688843, + "epoch": 0.002, + "step": 2 + }, + { + "loss": 1.1696, + "grad_norm": 0.8276316523551941, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.003, + "step": 3 + }, + { + "loss": 3.0464, + "grad_norm": 3.3297364711761475, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.004, + "step": 4 + }, + { + "loss": 1.1875, + "grad_norm": 0.8076611757278442, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.005, + "step": 5 + }, + { + "loss": 1.6105, + "grad_norm": 1.0332727432250977, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.7188019752502441, + "epoch": 0.006, + "step": 6 + }, + { + "loss": 1.5834, + "grad_norm": 1.0094527006149292, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.007, + "step": 7 + }, + { + "loss": 1.1683, + "grad_norm": 0.7861526012420654, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.008, + "step": 8 + }, + { + "loss": 1.3843, + "grad_norm": 0.7377748489379883, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7495107650756836, + "epoch": 0.009, + "step": 9 + }, + { + "loss": 1.584, + "grad_norm": 0.9443085193634033, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.720465898513794, + "epoch": 0.01, + "step": 10 + }, + { + "loss": 1.366, + "grad_norm": 0.7967380285263062, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7504892349243164, + "epoch": 0.011, + "step": 11 + }, + { + "loss": 1.5266, + "grad_norm": 1.0016096830368042, + "learning_rate": 1.989e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.012, + "step": 12 + }, + { + "loss": 1.2453, + "grad_norm": 0.9283791184425354, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.7836938500404358, + "epoch": 0.013, + "step": 13 + }, + { + "loss": 1.6206, + "grad_norm": 0.9805537462234497, + "learning_rate": 1.987e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7171381115913391, + "epoch": 0.014, + "step": 14 + }, + { + "loss": 1.5375, + "grad_norm": 0.9191323518753052, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.015, + "step": 15 + }, + { + "loss": 1.3423, + "grad_norm": 0.7822748422622681, + "learning_rate": 1.985e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.016, + "step": 16 + }, + { + "loss": 2.9309, + "grad_norm": 2.773752450942993, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5277777910232544, + "epoch": 0.017, + "step": 17 + }, + { + "loss": 1.1574, + "grad_norm": 0.7265554666519165, + "learning_rate": 1.983e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7876712083816528, + "epoch": 0.018, + "step": 18 + }, + { + "loss": 2.9093, + "grad_norm": 2.9051146507263184, + "learning_rate": 1.982e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5388888716697693, + "epoch": 0.019, + "step": 19 + }, + { + "loss": 1.5786, + "grad_norm": 0.9728697538375854, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.02, + "step": 20 + }, + { + "loss": 1.0934, + "grad_norm": 0.7319854497909546, + "learning_rate": 1.98e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.7974559664726257, + "epoch": 0.021, + "step": 21 + }, + { + "loss": 1.2097, + "grad_norm": 0.8981963992118835, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.022, + "step": 22 + }, + { + "loss": 1.4816, + "grad_norm": 1.0308023691177368, + "learning_rate": 1.978e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.023, + "step": 23 + }, + { + "loss": 1.3218, + "grad_norm": 0.7793745398521423, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.024, + "step": 24 + }, + { + "loss": 1.4883, + "grad_norm": 1.0108226537704468, + "learning_rate": 1.976e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.025, + "step": 25 + }, + { + "loss": 1.1398, + "grad_norm": 0.7284001111984253, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7857142686843872, + "epoch": 0.026, + "step": 26 + }, + { + "loss": 1.5201, + "grad_norm": 0.9933396577835083, + "learning_rate": 1.974e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.7354409098625183, + "epoch": 0.027, + "step": 27 + }, + { + "loss": 2.8162, + "grad_norm": 3.1626200675964355, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.028, + "step": 28 + }, + { + "loss": 1.31, + "grad_norm": 0.8019158244132996, + "learning_rate": 1.972e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.7573385238647461, + "epoch": 0.029, + "step": 29 + }, + { + "loss": 2.7985, + "grad_norm": 3.126246929168701, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.03, + "step": 30 + }, + { + "loss": 1.5341, + "grad_norm": 0.952720582485199, + "learning_rate": 1.97e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7271214723587036, + "epoch": 0.031, + "step": 31 + }, + { + "loss": 1.0763, + "grad_norm": 0.7093926668167114, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.032, + "step": 32 + }, + { + "loss": 1.2127, + "grad_norm": 0.813561201095581, + "learning_rate": 1.968e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.033, + "step": 33 + }, + { + "loss": 2.7516, + "grad_norm": 3.1947083473205566, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.034, + "step": 34 + }, + { + "loss": 1.1881, + "grad_norm": 1.0367817878723145, + "learning_rate": 1.966e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.035, + "step": 35 + }, + { + "loss": 1.1991, + "grad_norm": 0.9249914288520813, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.036, + "step": 36 + }, + { + "loss": 1.0422, + "grad_norm": 0.7850101590156555, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.037, + "step": 37 + }, + { + "loss": 1.2488, + "grad_norm": 0.8151567578315735, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7651663422584534, + "epoch": 0.038, + "step": 38 + }, + { + "loss": 1.5095, + "grad_norm": 1.0585670471191406, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.7254575490951538, + "epoch": 0.039, + "step": 39 + }, + { + "loss": 2.6828, + "grad_norm": 3.3681087493896484, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.04, + "step": 40 + }, + { + "loss": 1.1754, + "grad_norm": 1.029766321182251, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.041, + "step": 41 + }, + { + "loss": 1.0827, + "grad_norm": 0.7520174980163574, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.042, + "step": 42 + }, + { + "loss": 1.1385, + "grad_norm": 1.012759804725647, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.043, + "step": 43 + }, + { + "loss": 2.6322, + "grad_norm": 3.4875218868255615, + "learning_rate": 1.957e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.044, + "step": 44 + }, + { + "loss": 1.23, + "grad_norm": 0.9103058576583862, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.045, + "step": 45 + }, + { + "loss": 1.4499, + "grad_norm": 1.0566458702087402, + "learning_rate": 1.955e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.046, + "step": 46 + }, + { + "loss": 1.1171, + "grad_norm": 1.0389467477798462, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.047, + "step": 47 + }, + { + "loss": 1.4262, + "grad_norm": 1.0595616102218628, + "learning_rate": 1.953e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.048, + "step": 48 + }, + { + "loss": 1.1224, + "grad_norm": 1.0530123710632324, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.049, + "step": 49 + }, + { + "loss": 2.5409, + "grad_norm": 3.6781489849090576, + "learning_rate": 1.951e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.05, + "step": 50 + }, + { + "loss": 1.0942, + "grad_norm": 1.0411880016326904, + "learning_rate": 1.95e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.7970049977302551, + "epoch": 0.051, + "step": 51 + }, + { + "loss": 1.0622, + "grad_norm": 0.8258970975875854, + "learning_rate": 1.949e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.052, + "step": 52 + }, + { + "loss": 1.1977, + "grad_norm": 0.8957047462463379, + "learning_rate": 1.948e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.7700586915016174, + "epoch": 0.053, + "step": 53 + }, + { + "loss": 1.3695, + "grad_norm": 1.122542142868042, + "learning_rate": 1.947e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.7520798444747925, + "epoch": 0.054, + "step": 54 + }, + { + "loss": 0.8548, + "grad_norm": 0.7688314914703369, + "learning_rate": 1.946e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.055, + "step": 55 + }, + { + "loss": 1.0659, + "grad_norm": 1.0568362474441528, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.056, + "step": 56 + }, + { + "loss": 1.0294, + "grad_norm": 0.8596540689468384, + "learning_rate": 1.944e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.057, + "step": 57 + }, + { + "loss": 1.4359, + "grad_norm": 1.2490142583847046, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.7321131229400635, + "epoch": 0.058, + "step": 58 + }, + { + "loss": 2.416, + "grad_norm": 3.7482848167419434, + "learning_rate": 1.942e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.059, + "step": 59 + }, + { + "loss": 1.0725, + "grad_norm": 1.117326259613037, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.06, + "step": 60 + }, + { + "loss": 0.9739, + "grad_norm": 0.8864734768867493, + "learning_rate": 1.94e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.061, + "step": 61 + }, + { + "loss": 1.1443, + "grad_norm": 0.9423307776451111, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.7739726305007935, + "epoch": 0.062, + "step": 62 + }, + { + "loss": 0.8009, + "grad_norm": 0.8988932967185974, + "learning_rate": 1.938e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.063, + "step": 63 + }, + { + "loss": 1.0508, + "grad_norm": 1.1697311401367188, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.064, + "step": 64 + }, + { + "loss": 1.2747, + "grad_norm": 1.2967511415481567, + "learning_rate": 1.936e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.7570715546607971, + "epoch": 0.065, + "step": 65 + }, + { + "loss": 1.2796, + "grad_norm": 1.2881773710250854, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7554076313972473, + "epoch": 0.066, + "step": 66 + }, + { + "loss": 2.3052, + "grad_norm": 4.034823894500732, + "learning_rate": 1.934e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.067, + "step": 67 + }, + { + "loss": 1.2806, + "grad_norm": 1.3690178394317627, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.7587354183197021, + "epoch": 0.068, + "step": 68 + }, + { + "loss": 1.1807, + "grad_norm": 1.0886963605880737, + "learning_rate": 1.932e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.7632094025611877, + "epoch": 0.069, + "step": 69 + }, + { + "loss": 1.0076, + "grad_norm": 1.3501569032669067, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.07, + "step": 70 + }, + { + "loss": 0.921, + "grad_norm": 1.0231209993362427, + "learning_rate": 1.93e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8111546039581299, + "epoch": 0.071, + "step": 71 + }, + { + "loss": 2.1999, + "grad_norm": 4.47637939453125, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.072, + "step": 72 + }, + { + "loss": 2.1852, + "grad_norm": 4.533531188964844, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.073, + "step": 73 + }, + { + "loss": 2.1623, + "grad_norm": 4.683750152587891, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.074, + "step": 74 + }, + { + "loss": 1.2988, + "grad_norm": 1.5087296962738037, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.075, + "step": 75 + }, + { + "loss": 2.1266, + "grad_norm": 4.944180011749268, + "learning_rate": 1.925e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.076, + "step": 76 + }, + { + "loss": 0.9762, + "grad_norm": 1.0376505851745605, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.077, + "step": 77 + }, + { + "loss": 2.0834, + "grad_norm": 5.394686222076416, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.078, + "step": 78 + }, + { + "loss": 0.9309, + "grad_norm": 1.0764528512954712, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8170254230499268, + "epoch": 0.079, + "step": 79 + }, + { + "loss": 0.7549, + "grad_norm": 1.089787244796753, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.08, + "step": 80 + }, + { + "loss": 1.0972, + "grad_norm": 1.2265634536743164, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.7915851473808289, + "epoch": 0.081, + "step": 81 + }, + { + "loss": 2.0061, + "grad_norm": 5.302765846252441, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.082, + "step": 82 + }, + { + "loss": 1.1197, + "grad_norm": 1.216346025466919, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.7749511003494263, + "epoch": 0.083, + "step": 83 + }, + { + "loss": 1.181, + "grad_norm": 1.5846738815307617, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.084, + "step": 84 + }, + { + "loss": 0.8929, + "grad_norm": 1.1130127906799316, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8268101811408997, + "epoch": 0.085, + "step": 85 + }, + { + "loss": 1.9339, + "grad_norm": NaN, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.086, + "step": 86 + }, + { + "loss": 1.1623, + "grad_norm": 1.7714096307754517, + "learning_rate": 1.915e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.7720465660095215, + "epoch": 0.087, + "step": 87 + }, + { + "loss": 1.0203, + "grad_norm": 1.204126000404358, + "learning_rate": 1.914e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.088, + "step": 88 + }, + { + "loss": 0.8569, + "grad_norm": 1.2058078050613403, + "learning_rate": 1.913e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.089, + "step": 89 + }, + { + "loss": 1.197, + "grad_norm": 1.8821589946746826, + "learning_rate": 1.912e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.7670549154281616, + "epoch": 0.09, + "step": 90 + }, + { + "loss": 1.1908, + "grad_norm": 1.9740996360778809, + "learning_rate": 1.911e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.7703827023506165, + "epoch": 0.091, + "step": 91 + }, + { + "loss": 0.889, + "grad_norm": 1.5037046670913696, + "learning_rate": 1.91e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8236272931098938, + "epoch": 0.092, + "step": 92 + }, + { + "loss": 1.1821, + "grad_norm": 1.539967656135559, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.093, + "step": 93 + }, + { + "loss": 1.0278, + "grad_norm": 1.2005809545516968, + "learning_rate": 1.908e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.094, + "step": 94 + }, + { + "loss": 1.1361, + "grad_norm": 1.8167128562927246, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.095, + "step": 95 + }, + { + "loss": 1.0977, + "grad_norm": 2.2985150814056396, + "learning_rate": 1.906e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.096, + "step": 96 + }, + { + "loss": 1.0695, + "grad_norm": 1.590173602104187, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.097, + "step": 97 + }, + { + "loss": 1.1519, + "grad_norm": 1.5389997959136963, + "learning_rate": 1.904e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.098, + "step": 98 + }, + { + "loss": 1.1507, + "grad_norm": 1.6002172231674194, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.099, + "step": 99 + }, + { + "loss": 1.0454, + "grad_norm": 1.181969404220581, + "learning_rate": 1.902e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.1, + "step": 100 + }, + { + "loss": 1.0897, + "grad_norm": 1.832823634147644, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.101, + "step": 101 + }, + { + "loss": 0.8593, + "grad_norm": 1.2972052097320557, + "learning_rate": 1.9e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.102, + "step": 102 + }, + { + "loss": 0.9507, + "grad_norm": 1.114174723625183, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8150684833526611, + "epoch": 0.103, + "step": 103 + }, + { + "loss": 0.8422, + "grad_norm": 1.0837013721466064, + "learning_rate": 1.898e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.104, + "step": 104 + }, + { + "loss": 0.9674, + "grad_norm": 1.1756479740142822, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.105, + "step": 105 + }, + { + "loss": 0.7975, + "grad_norm": 1.3874446153640747, + "learning_rate": 1.896e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.840266227722168, + "epoch": 0.106, + "step": 106 + }, + { + "loss": 1.0557, + "grad_norm": 1.959272027015686, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.7936772108078003, + "epoch": 0.107, + "step": 107 + }, + { + "loss": 1.0885, + "grad_norm": 1.503557801246643, + "learning_rate": 1.894e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.108, + "step": 108 + }, + { + "loss": 0.8082, + "grad_norm": 1.470276117324829, + "learning_rate": 1.893e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.8302828669548035, + "epoch": 0.109, + "step": 109 + }, + { + "loss": 1.5508, + "grad_norm": 6.328886985778809, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.6944444179534912, + "epoch": 0.11, + "step": 110 + }, + { + "loss": 1.0059, + "grad_norm": 1.5663049221038818, + "learning_rate": 1.891e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.8103161454200745, + "epoch": 0.111, + "step": 111 + }, + { + "loss": 1.0336, + "grad_norm": 1.4562171697616577, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.112, + "step": 112 + }, + { + "loss": 1.0438, + "grad_norm": 1.5646629333496094, + "learning_rate": 1.889e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.113, + "step": 113 + }, + { + "loss": 1.0279, + "grad_norm": 1.513607144355774, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.114, + "step": 114 + }, + { + "loss": 1.4402, + "grad_norm": 6.165053367614746, + "learning_rate": 1.887e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.115, + "step": 115 + }, + { + "loss": 0.7349, + "grad_norm": 1.454982876777649, + "learning_rate": 1.886e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.116, + "step": 116 + }, + { + "loss": 0.7338, + "grad_norm": 1.9169820547103882, + "learning_rate": 1.885e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.841930091381073, + "epoch": 0.117, + "step": 117 + }, + { + "loss": 0.7831, + "grad_norm": 1.3472567796707153, + "learning_rate": 1.884e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.118, + "step": 118 + }, + { + "loss": 1.028, + "grad_norm": 1.5241106748580933, + "learning_rate": 1.883e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.8036605715751648, + "epoch": 0.119, + "step": 119 + }, + { + "loss": 1.3458, + "grad_norm": 5.9579386711120605, + "learning_rate": 1.882e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.12, + "step": 120 + }, + { + "loss": 0.7727, + "grad_norm": 1.444265604019165, + "learning_rate": 1.881e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.8385518789291382, + "epoch": 0.121, + "step": 121 + }, + { + "loss": 0.6351, + "grad_norm": 1.281785488128662, + "learning_rate": 1.88e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.122, + "step": 122 + }, + { + "loss": 0.6884, + "grad_norm": 1.6917502880096436, + "learning_rate": 1.879e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.123, + "step": 123 + }, + { + "loss": 0.886, + "grad_norm": 1.6544225215911865, + "learning_rate": 1.878e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.8286189436912537, + "epoch": 0.124, + "step": 124 + }, + { + "loss": 0.7652, + "grad_norm": 1.2762014865875244, + "learning_rate": 1.877e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.125, + "step": 125 + }, + { + "loss": 1.2517, + "grad_norm": 7.621744632720947, + "learning_rate": 1.876e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.126, + "step": 126 + }, + { + "loss": 0.6909, + "grad_norm": 1.8651930093765259, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.127, + "step": 127 + }, + { + "loss": 0.9464, + "grad_norm": 2.0513856410980225, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.821963369846344, + "epoch": 0.128, + "step": 128 + }, + { + "loss": 0.8355, + "grad_norm": 1.3392603397369385, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.8405088186264038, + "epoch": 0.129, + "step": 129 + }, + { + "loss": 0.7124, + "grad_norm": 1.7539966106414795, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.861896812915802, + "epoch": 0.13, + "step": 130 + }, + { + "loss": 1.1931, + "grad_norm": 7.2109856605529785, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.131, + "step": 131 + }, + { + "loss": 0.806, + "grad_norm": 1.531593918800354, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.8424657583236694, + "epoch": 0.132, + "step": 132 + }, + { + "loss": 0.7483, + "grad_norm": 1.6686372756958008, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.133, + "step": 133 + }, + { + "loss": 0.905, + "grad_norm": 3.809466600418091, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.8336106538772583, + "epoch": 0.134, + "step": 134 + }, + { + "loss": 0.7299, + "grad_norm": 1.7963030338287354, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.135, + "step": 135 + }, + { + "loss": 0.6384, + "grad_norm": 2.485582113265991, + "learning_rate": 1.866e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.8718801736831665, + "epoch": 0.136, + "step": 136 + }, + { + "loss": 0.5473, + "grad_norm": 1.6607071161270142, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.137, + "step": 137 + }, + { + "loss": 0.6719, + "grad_norm": 1.6095962524414062, + "learning_rate": 1.864e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.860232949256897, + "epoch": 0.138, + "step": 138 + }, + { + "loss": 0.8772, + "grad_norm": 1.8398959636688232, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.8352745175361633, + "epoch": 0.139, + "step": 139 + }, + { + "loss": 0.6813, + "grad_norm": 1.754347324371338, + "learning_rate": 1.862e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.14, + "step": 140 + }, + { + "loss": 0.8176, + "grad_norm": 1.8010166883468628, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.141, + "step": 141 + }, + { + "loss": 0.6013, + "grad_norm": 2.131845712661743, + "learning_rate": 1.86e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.8768718838691711, + "epoch": 0.142, + "step": 142 + }, + { + "loss": 1.0551, + "grad_norm": 8.797135353088379, + "learning_rate": 1.859e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.8055555820465088, + "epoch": 0.143, + "step": 143 + }, + { + "loss": 0.8096, + "grad_norm": 1.6665289402008057, + "learning_rate": 1.858e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.144, + "step": 144 + }, + { + "loss": 0.6237, + "grad_norm": 2.031190872192383, + "learning_rate": 1.857e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.8735440969467163, + "epoch": 0.145, + "step": 145 + }, + { + "loss": 0.8527, + "grad_norm": 2.5186493396759033, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.8386023044586182, + "epoch": 0.146, + "step": 146 + }, + { + "loss": 0.83, + "grad_norm": 1.5677316188812256, + "learning_rate": 1.855e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.8444226980209351, + "epoch": 0.147, + "step": 147 + }, + { + "loss": 0.6951, + "grad_norm": 3.395341634750366, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.148, + "step": 148 + }, + { + "loss": 0.7634, + "grad_norm": 1.658737301826477, + "learning_rate": 1.853e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.149, + "step": 149 + }, + { + "loss": 0.6195, + "grad_norm": 1.4803838729858398, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.8776907920837402, + "epoch": 0.15, + "step": 150 + }, + { + "loss": 0.6916, + "grad_norm": 1.462860345840454, + "learning_rate": 1.851e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.151, + "step": 151 + }, + { + "loss": 0.7854, + "grad_norm": 1.6279668807983398, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.152, + "step": 152 + }, + { + "loss": 0.749, + "grad_norm": 1.8625388145446777, + "learning_rate": 1.849e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.153, + "step": 153 + }, + { + "loss": 0.6619, + "grad_norm": 1.6320242881774902, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.8679060935974121, + "epoch": 0.154, + "step": 154 + }, + { + "loss": 0.9864, + "grad_norm": NaN, + "learning_rate": 1.847e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.8222222328186035, + "epoch": 0.155, + "step": 155 + }, + { + "loss": 0.7698, + "grad_norm": 2.241466999053955, + "learning_rate": 1.847e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.156, + "step": 156 + }, + { + "loss": 0.8501, + "grad_norm": 2.594738721847534, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.8435940146446228, + "epoch": 0.157, + "step": 157 + }, + { + "loss": 0.962, + "grad_norm": 10.902610778808594, + "learning_rate": 1.845e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.8166666626930237, + "epoch": 0.158, + "step": 158 + }, + { + "loss": 0.7822, + "grad_norm": 1.6955127716064453, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.159, + "step": 159 + }, + { + "loss": 0.7942, + "grad_norm": 2.5727546215057373, + "learning_rate": 1.843e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.8519134521484375, + "epoch": 0.16, + "step": 160 + }, + { + "loss": 0.8074, + "grad_norm": 2.082172155380249, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.161, + "step": 161 + }, + { + "loss": 0.6346, + "grad_norm": 1.4917131662368774, + "learning_rate": 1.841e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.162, + "step": 162 + }, + { + "loss": 0.6574, + "grad_norm": 1.7243297100067139, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.163, + "step": 163 + }, + { + "loss": 0.7782, + "grad_norm": 2.236922264099121, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.164, + "step": 164 + }, + { + "loss": 0.7541, + "grad_norm": 2.998671531677246, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.165, + "step": 165 + }, + { + "loss": 0.7637, + "grad_norm": 2.231337070465088, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.166, + "step": 166 + }, + { + "loss": 0.4918, + "grad_norm": 2.1853654384613037, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.167, + "step": 167 + }, + { + "loss": 0.8615, + "grad_norm": 19.52778434753418, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.168, + "step": 168 + }, + { + "loss": 0.727, + "grad_norm": 2.8629372119903564, + "learning_rate": 1.834e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.169, + "step": 169 + }, + { + "loss": 0.6812, + "grad_norm": 2.578798294067383, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.8600782752037048, + "epoch": 0.17, + "step": 170 + }, + { + "loss": 0.718, + "grad_norm": 2.7950305938720703, + "learning_rate": 1.832e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.171, + "step": 171 + }, + { + "loss": 0.8269, + "grad_norm": 18.518278121948242, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.8333333134651184, + "epoch": 0.172, + "step": 172 + }, + { + "loss": 0.8122, + "grad_norm": 10.636402130126953, + "learning_rate": 1.83e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.8500000238418579, + "epoch": 0.173, + "step": 173 + }, + { + "loss": 0.5631, + "grad_norm": 1.8652675151824951, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.174, + "step": 174 + }, + { + "loss": 0.5823, + "grad_norm": 2.174743890762329, + "learning_rate": 1.828e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.175, + "step": 175 + }, + { + "loss": 0.6878, + "grad_norm": 2.426223039627075, + "learning_rate": 1.827e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.176, + "step": 176 + }, + { + "loss": 0.4815, + "grad_norm": 2.2111594676971436, + "learning_rate": 1.826e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.177, + "step": 177 + }, + { + "loss": 0.7905, + "grad_norm": 12.419157981872559, + "learning_rate": 1.825e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.178, + "step": 178 + }, + { + "loss": 0.6485, + "grad_norm": 2.6929852962493896, + "learning_rate": 1.824e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.8851913213729858, + "epoch": 0.179, + "step": 179 + }, + { + "loss": 0.5821, + "grad_norm": 2.588067054748535, + "learning_rate": 1.823e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.18, + "step": 180 + }, + { + "loss": 0.5376, + "grad_norm": 2.6413276195526123, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.181, + "step": 181 + }, + { + "loss": 0.4776, + "grad_norm": 2.0201733112335205, + "learning_rate": 1.821e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.182, + "step": 182 + }, + { + "loss": 0.7141, + "grad_norm": 8.398615837097168, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 0.8611111044883728, + "epoch": 0.183, + "step": 183 + }, + { + "loss": 0.687, + "grad_norm": 6.920986175537109, + "learning_rate": 1.819e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.184, + "step": 184 + }, + { + "loss": 0.6518, + "grad_norm": 3.54260516166687, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.185, + "step": 185 + }, + { + "loss": 0.6429, + "grad_norm": 4.033841609954834, + "learning_rate": 1.817e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.186, + "step": 186 + }, + { + "loss": 0.4786, + "grad_norm": 2.4023964405059814, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.187, + "step": 187 + }, + { + "loss": 0.5997, + "grad_norm": 2.695603370666504, + "learning_rate": 1.815e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.188, + "step": 188 + }, + { + "loss": 0.6251, + "grad_norm": 7.4209184646606445, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.189, + "step": 189 + }, + { + "loss": 0.6324, + "grad_norm": 10.130674362182617, + "learning_rate": 1.813e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.19, + "step": 190 + }, + { + "loss": 0.5939, + "grad_norm": 2.6180245876312256, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.873776912689209, + "epoch": 0.191, + "step": 191 + }, + { + "loss": 0.4098, + "grad_norm": 2.2663474082946777, + "learning_rate": 1.811e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.192, + "step": 192 + }, + { + "loss": 0.5111, + "grad_norm": 2.2139604091644287, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.8894324898719788, + "epoch": 0.193, + "step": 193 + }, + { + "loss": 0.4332, + "grad_norm": 2.2271547317504883, + "learning_rate": 1.809e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.194, + "step": 194 + }, + { + "loss": 0.4893, + "grad_norm": 2.0789742469787598, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.8972602486610413, + "epoch": 0.195, + "step": 195 + }, + { + "loss": 0.5755, + "grad_norm": 18.601898193359375, + "learning_rate": 1.807e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.196, + "step": 196 + }, + { + "loss": 0.4635, + "grad_norm": 6.127828598022461, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.197, + "step": 197 + }, + { + "loss": 0.603, + "grad_norm": 2.668287515640259, + "learning_rate": 1.805e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.198, + "step": 198 + }, + { + "loss": 0.6088, + "grad_norm": 2.419572353363037, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.8757338523864746, + "epoch": 0.199, + "step": 199 + }, + { + "loss": 0.5672, + "grad_norm": 3.028404712677002, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.8885191082954407, + "epoch": 0.2, + "step": 200 + }, + { + "loss": 0.4556, + "grad_norm": 4.009725093841553, + "learning_rate": 1.802e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.201, + "step": 201 + }, + { + "loss": 0.5269, + "grad_norm": 2.9101243019104004, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.202, + "step": 202 + }, + { + "loss": 0.6214, + "grad_norm": 2.7398433685302734, + "learning_rate": 1.8e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.8581213355064392, + "epoch": 0.203, + "step": 203 + }, + { + "loss": 0.5646, + "grad_norm": 2.60606050491333, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.204, + "step": 204 + }, + { + "loss": 0.3748, + "grad_norm": 3.7512423992156982, + "learning_rate": 1.798e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9234609007835388, + "epoch": 0.205, + "step": 205 + }, + { + "loss": 0.597, + "grad_norm": 3.150888442993164, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.206, + "step": 206 + }, + { + "loss": 0.511, + "grad_norm": 3.328899383544922, + "learning_rate": 1.796e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.207, + "step": 207 + }, + { + "loss": 0.491, + "grad_norm": 8.625993728637695, + "learning_rate": 1.795e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.208, + "step": 208 + }, + { + "loss": 0.4053, + "grad_norm": 2.2067341804504395, + "learning_rate": 1.794e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.209, + "step": 209 + }, + { + "loss": 0.4192, + "grad_norm": 2.0993006229400635, + "learning_rate": 1.793e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.21, + "step": 210 + }, + { + "loss": 0.3785, + "grad_norm": 2.821485996246338, + "learning_rate": 1.792e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9151414036750793, + "epoch": 0.211, + "step": 211 + }, + { + "loss": 0.5336, + "grad_norm": 2.169666051864624, + "learning_rate": 1.791e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.8901830315589905, + "epoch": 0.212, + "step": 212 + }, + { + "loss": 0.5235, + "grad_norm": 3.1590685844421387, + "learning_rate": 1.79e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.8835616707801819, + "epoch": 0.213, + "step": 213 + }, + { + "loss": 0.4736, + "grad_norm": 11.030704498291016, + "learning_rate": 1.789e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 0.9055555462837219, + "epoch": 0.214, + "step": 214 + }, + { + "loss": 0.5599, + "grad_norm": 3.9144341945648193, + "learning_rate": 1.788e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.215, + "step": 215 + }, + { + "loss": 0.5102, + "grad_norm": 2.9705278873443604, + "learning_rate": 1.787e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.216, + "step": 216 + }, + { + "loss": 0.4821, + "grad_norm": 3.4463229179382324, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.217, + "step": 217 + }, + { + "loss": 0.4385, + "grad_norm": 8.850930213928223, + "learning_rate": 1.785e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 0.9277777671813965, + "epoch": 0.218, + "step": 218 + }, + { + "loss": 0.4633, + "grad_norm": 2.936647415161133, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.219, + "step": 219 + }, + { + "loss": 0.4098, + "grad_norm": 6.922672271728516, + "learning_rate": 1.783e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.22, + "step": 220 + }, + { + "loss": 0.5233, + "grad_norm": 2.318746328353882, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.221, + "step": 221 + }, + { + "loss": 0.3223, + "grad_norm": 4.281177520751953, + "learning_rate": 1.781e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.222, + "step": 222 + }, + { + "loss": 0.4973, + "grad_norm": 3.6921546459198, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.8951746821403503, + "epoch": 0.223, + "step": 223 + }, + { + "loss": 0.4666, + "grad_norm": 3.4926915168762207, + "learning_rate": 1.779e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.224, + "step": 224 + }, + { + "loss": 0.3519, + "grad_norm": 2.668114423751831, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.225, + "step": 225 + }, + { + "loss": 0.4244, + "grad_norm": 2.4111084938049316, + "learning_rate": 1.777e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.226, + "step": 226 + }, + { + "loss": 0.3912, + "grad_norm": 10.561456680297852, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 0.949999988079071, + "epoch": 0.227, + "step": 227 + }, + { + "loss": 0.5091, + "grad_norm": 2.472616672515869, + "learning_rate": 1.775e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.228, + "step": 228 + }, + { + "loss": 0.4842, + "grad_norm": 2.881739854812622, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.229, + "step": 229 + }, + { + "loss": 0.4435, + "grad_norm": 3.2438275814056396, + "learning_rate": 1.773e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.23, + "step": 230 + }, + { + "loss": 0.3527, + "grad_norm": 2.2769415378570557, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.231, + "step": 231 + }, + { + "loss": 0.4951, + "grad_norm": 3.046674966812134, + "learning_rate": 1.771e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.232, + "step": 232 + }, + { + "loss": 0.4926, + "grad_norm": 4.042079925537109, + "learning_rate": 1.77e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.233, + "step": 233 + }, + { + "loss": 0.4564, + "grad_norm": 4.222212314605713, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9051580429077148, + "epoch": 0.234, + "step": 234 + }, + { + "loss": 0.3074, + "grad_norm": 3.150768280029297, + "learning_rate": 1.768e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.235, + "step": 235 + }, + { + "loss": 0.3858, + "grad_norm": 3.456815004348755, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.236, + "step": 236 + }, + { + "loss": 0.3352, + "grad_norm": 9.094295501708984, + "learning_rate": 1.766e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.237, + "step": 237 + }, + { + "loss": 0.4867, + "grad_norm": 3.2864322662353516, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.238, + "step": 238 + }, + { + "loss": 0.3303, + "grad_norm": 5.672657012939453, + "learning_rate": 1.764e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.239, + "step": 239 + }, + { + "loss": 0.4708, + "grad_norm": 3.677504062652588, + "learning_rate": 1.763e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.24, + "step": 240 + }, + { + "loss": 0.3175, + "grad_norm": 5.829269886016846, + "learning_rate": 1.762e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.241, + "step": 241 + }, + { + "loss": 0.4315, + "grad_norm": 3.211578130722046, + "learning_rate": 1.761e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.242, + "step": 242 + }, + { + "loss": 0.3084, + "grad_norm": 5.2650628089904785, + "learning_rate": 1.76e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.243, + "step": 243 + }, + { + "loss": 0.4516, + "grad_norm": 5.401496887207031, + "learning_rate": 1.759e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.244, + "step": 244 + }, + { + "loss": 0.4197, + "grad_norm": 3.938694953918457, + "learning_rate": 1.758e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.245, + "step": 245 + }, + { + "loss": 0.4329, + "grad_norm": 3.4744861125946045, + "learning_rate": 1.757e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.246, + "step": 246 + }, + { + "loss": 0.4525, + "grad_norm": 4.853247165679932, + "learning_rate": 1.756e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 0.9084858298301697, + "epoch": 0.247, + "step": 247 + }, + { + "loss": 0.2768, + "grad_norm": 5.6177144050598145, + "learning_rate": 1.755e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.248, + "step": 248 + }, + { + "loss": 0.3517, + "grad_norm": 2.8669052124023438, + "learning_rate": 1.754e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.249, + "step": 249 + }, + { + "loss": 0.4142, + "grad_norm": 3.5590577125549316, + "learning_rate": 1.753e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.25, + "step": 250 + }, + { + "loss": 0.4307, + "grad_norm": 5.072361946105957, + "learning_rate": 1.752e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.251, + "step": 251 + }, + { + "loss": 0.3981, + "grad_norm": 3.637819528579712, + "learning_rate": 1.751e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.912915825843811, + "epoch": 0.252, + "step": 252 + }, + { + "loss": 0.4344, + "grad_norm": 4.066125869750977, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.253, + "step": 253 + }, + { + "loss": 0.3574, + "grad_norm": 4.836447715759277, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.254, + "step": 254 + }, + { + "loss": 0.2738, + "grad_norm": 14.006624221801758, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.255, + "step": 255 + }, + { + "loss": 0.3416, + "grad_norm": 5.2639079093933105, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.256, + "step": 256 + }, + { + "loss": 0.2762, + "grad_norm": 12.536176681518555, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.257, + "step": 257 + }, + { + "loss": 0.4114, + "grad_norm": 6.311218738555908, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9090019464492798, + "epoch": 0.258, + "step": 258 + }, + { + "loss": 0.3912, + "grad_norm": 3.2677178382873535, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.259, + "step": 259 + }, + { + "loss": 0.3059, + "grad_norm": 4.582422256469727, + "learning_rate": 1.743e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.26, + "step": 260 + }, + { + "loss": 0.3697, + "grad_norm": 5.214661121368408, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.261, + "step": 261 + }, + { + "loss": 0.3486, + "grad_norm": 5.719533920288086, + "learning_rate": 1.741e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.262, + "step": 262 + }, + { + "loss": 0.328, + "grad_norm": 4.692359924316406, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9363992214202881, + "epoch": 0.263, + "step": 263 + }, + { + "loss": 0.3665, + "grad_norm": 2.810206174850464, + "learning_rate": 1.739e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.264, + "step": 264 + }, + { + "loss": 0.2363, + "grad_norm": 6.301739692687988, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.265, + "step": 265 + }, + { + "loss": 0.3762, + "grad_norm": 2.9034929275512695, + "learning_rate": 1.737e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.266, + "step": 266 + }, + { + "loss": 0.3573, + "grad_norm": 5.10465669631958, + "learning_rate": 1.736e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.267, + "step": 267 + }, + { + "loss": 0.3708, + "grad_norm": 2.8359761238098145, + "learning_rate": 1.735e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9251247644424438, + "epoch": 0.268, + "step": 268 + }, + { + "loss": 0.3615, + "grad_norm": 2.6100833415985107, + "learning_rate": 1.734e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.9267886877059937, + "epoch": 0.269, + "step": 269 + }, + { + "loss": 0.3131, + "grad_norm": 3.610330820083618, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.27, + "step": 270 + }, + { + "loss": 0.3301, + "grad_norm": 3.1220433712005615, + "learning_rate": 1.732e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.271, + "step": 271 + }, + { + "loss": 0.2314, + "grad_norm": 7.683000564575195, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.272, + "step": 272 + }, + { + "loss": 0.2391, + "grad_norm": 10.635171890258789, + "learning_rate": 1.73e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.273, + "step": 273 + }, + { + "loss": 0.3934, + "grad_norm": 7.659923076629639, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 0.9334442615509033, + "epoch": 0.274, + "step": 274 + }, + { + "loss": 0.3376, + "grad_norm": 5.6293864250183105, + "learning_rate": 1.728e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.275, + "step": 275 + }, + { + "loss": 0.3734, + "grad_norm": 4.872118949890137, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.276, + "step": 276 + }, + { + "loss": 0.2395, + "grad_norm": 3.4475960731506348, + "learning_rate": 1.726e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.277, + "step": 277 + }, + { + "loss": 0.3513, + "grad_norm": 3.5093634128570557, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.278, + "step": 278 + }, + { + "loss": 0.3505, + "grad_norm": 3.436389446258545, + "learning_rate": 1.724e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 0.9367720484733582, + "epoch": 0.279, + "step": 279 + }, + { + "loss": 0.3041, + "grad_norm": 3.4393298625946045, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.28, + "step": 280 + }, + { + "loss": 0.2922, + "grad_norm": 3.826392889022827, + "learning_rate": 1.722e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.281, + "step": 281 + }, + { + "loss": 0.3414, + "grad_norm": 7.017237663269043, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.282, + "step": 282 + }, + { + "loss": 0.3521, + "grad_norm": 4.018287658691406, + "learning_rate": 1.72e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.283, + "step": 283 + }, + { + "loss": 0.3455, + "grad_norm": 3.9697959423065186, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.284, + "step": 284 + }, + { + "loss": 0.3368, + "grad_norm": 3.0641541481018066, + "learning_rate": 1.718e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.285, + "step": 285 + }, + { + "loss": 0.3244, + "grad_norm": 4.277006149291992, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.286, + "step": 286 + }, + { + "loss": 0.353, + "grad_norm": 2.6876814365386963, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.287, + "step": 287 + }, + { + "loss": 0.3236, + "grad_norm": 3.7715723514556885, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.288, + "step": 288 + }, + { + "loss": 0.3158, + "grad_norm": 3.555406332015991, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.289, + "step": 289 + }, + { + "loss": 0.2062, + "grad_norm": 9.316679000854492, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.29, + "step": 290 + }, + { + "loss": 0.2002, + "grad_norm": 5.817254543304443, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.291, + "step": 291 + }, + { + "loss": 0.2809, + "grad_norm": 5.106694221496582, + "learning_rate": 1.711e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.292, + "step": 292 + }, + { + "loss": 0.295, + "grad_norm": 7.797866344451904, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.293, + "step": 293 + }, + { + "loss": 0.3144, + "grad_norm": 8.002677917480469, + "learning_rate": 1.709e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.294, + "step": 294 + }, + { + "loss": 0.2345, + "grad_norm": 4.315321445465088, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.295, + "step": 295 + }, + { + "loss": 0.306, + "grad_norm": 4.690162181854248, + "learning_rate": 1.707e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.296, + "step": 296 + }, + { + "loss": 0.3098, + "grad_norm": 4.387345790863037, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.297, + "step": 297 + }, + { + "loss": 0.2898, + "grad_norm": 5.204096794128418, + "learning_rate": 1.705e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.298, + "step": 298 + }, + { + "loss": 0.2894, + "grad_norm": 4.000877380371094, + "learning_rate": 1.704e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.299, + "step": 299 + }, + { + "loss": 0.3295, + "grad_norm": 5.276703357696533, + "learning_rate": 1.703e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9383561611175537, + "epoch": 0.3, + "step": 300 + }, + { + "loss": 0.2139, + "grad_norm": 2.6593077182769775, + "learning_rate": 1.702e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.301, + "step": 301 + }, + { + "loss": 0.2077, + "grad_norm": 9.37561321258545, + "learning_rate": 1.701e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.302, + "step": 302 + }, + { + "loss": 0.2274, + "grad_norm": 2.972815990447998, + "learning_rate": 1.7e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.303, + "step": 303 + }, + { + "loss": 0.2545, + "grad_norm": 2.4279375076293945, + "learning_rate": 1.699e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.304, + "step": 304 + }, + { + "loss": 0.2871, + "grad_norm": 2.8517541885375977, + "learning_rate": 1.698e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.305, + "step": 305 + }, + { + "loss": 0.2877, + "grad_norm": 4.114612102508545, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.306, + "step": 306 + }, + { + "loss": 0.2145, + "grad_norm": 14.7569580078125, + "learning_rate": 1.696e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.307, + "step": 307 + }, + { + "loss": 0.294, + "grad_norm": 3.094182252883911, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.308, + "step": 308 + }, + { + "loss": 0.2044, + "grad_norm": 3.026052951812744, + "learning_rate": 1.694e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.309, + "step": 309 + }, + { + "loss": 0.3061, + "grad_norm": 3.1381635665893555, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.31, + "step": 310 + }, + { + "loss": 0.2239, + "grad_norm": 2.3573496341705322, + "learning_rate": 1.692e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.311, + "step": 311 + }, + { + "loss": 0.2853, + "grad_norm": 7.762936115264893, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.312, + "step": 312 + }, + { + "loss": 0.2793, + "grad_norm": 7.716437816619873, + "learning_rate": 1.69e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.313, + "step": 313 + }, + { + "loss": 0.2764, + "grad_norm": 4.531182765960693, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.314, + "step": 314 + }, + { + "loss": 0.1807, + "grad_norm": 5.600939750671387, + "learning_rate": 1.688e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.315, + "step": 315 + }, + { + "loss": 0.1751, + "grad_norm": 6.357442378997803, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.316, + "step": 316 + }, + { + "loss": 0.2278, + "grad_norm": 4.381490230560303, + "learning_rate": 1.686e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.317, + "step": 317 + }, + { + "loss": 0.1693, + "grad_norm": 4.711330413818359, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.318, + "step": 318 + }, + { + "loss": 0.2719, + "grad_norm": 7.21658182144165, + "learning_rate": 1.684e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.319, + "step": 319 + }, + { + "loss": 0.1613, + "grad_norm": 2.806929111480713, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.32, + "step": 320 + }, + { + "loss": 0.2236, + "grad_norm": 3.729052782058716, + "learning_rate": 1.682e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.321, + "step": 321 + }, + { + "loss": 0.3026, + "grad_norm": 3.512017250061035, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.322, + "step": 322 + }, + { + "loss": 0.2492, + "grad_norm": 5.842523097991943, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.323, + "step": 323 + }, + { + "loss": 0.2591, + "grad_norm": 3.444624662399292, + "learning_rate": 1.679e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9442269802093506, + "epoch": 0.324, + "step": 324 + }, + { + "loss": 0.245, + "grad_norm": 3.560624837875366, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.325, + "step": 325 + }, + { + "loss": 0.2493, + "grad_norm": 3.812241792678833, + "learning_rate": 1.677e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.326, + "step": 326 + }, + { + "loss": 0.1623, + "grad_norm": 9.361125946044922, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.327, + "step": 327 + }, + { + "loss": 0.2385, + "grad_norm": 4.130789279937744, + "learning_rate": 1.675e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.328, + "step": 328 + }, + { + "loss": 0.248, + "grad_norm": 3.7591042518615723, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.329, + "step": 329 + }, + { + "loss": 0.2815, + "grad_norm": 6.346067905426025, + "learning_rate": 1.673e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.33, + "step": 330 + }, + { + "loss": 0.2502, + "grad_norm": 3.433945655822754, + "learning_rate": 1.672e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.331, + "step": 331 + }, + { + "loss": 0.2994, + "grad_norm": 3.7655599117279053, + "learning_rate": 1.671e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9403131008148193, + "epoch": 0.332, + "step": 332 + }, + { + "loss": 0.2622, + "grad_norm": 3.707118511199951, + "learning_rate": 1.67e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.333, + "step": 333 + }, + { + "loss": 0.2418, + "grad_norm": 5.776569843292236, + "learning_rate": 1.669e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.334, + "step": 334 + }, + { + "loss": 0.2278, + "grad_norm": 2.7461037635803223, + "learning_rate": 1.668e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.335, + "step": 335 + }, + { + "loss": 0.2152, + "grad_norm": 2.729001760482788, + "learning_rate": 1.667e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.336, + "step": 336 + }, + { + "loss": 0.2093, + "grad_norm": 2.409708261489868, + "learning_rate": 1.666e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.337, + "step": 337 + }, + { + "loss": 0.2121, + "grad_norm": 4.6761651039123535, + "learning_rate": 1.665e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.338, + "step": 338 + }, + { + "loss": 0.2645, + "grad_norm": 3.167815685272217, + "learning_rate": 1.664e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.339, + "step": 339 + }, + { + "loss": 0.1629, + "grad_norm": 12.654186248779297, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.34, + "step": 340 + }, + { + "loss": 0.2156, + "grad_norm": 2.461930751800537, + "learning_rate": 1.662e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.341, + "step": 341 + }, + { + "loss": 0.2281, + "grad_norm": 4.044505596160889, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.342, + "step": 342 + }, + { + "loss": 0.2303, + "grad_norm": 3.00589656829834, + "learning_rate": 1.66e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.343, + "step": 343 + }, + { + "loss": 0.2372, + "grad_norm": 1.9332551956176758, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.344, + "step": 344 + }, + { + "loss": 0.2303, + "grad_norm": 3.804724931716919, + "learning_rate": 1.658e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.345, + "step": 345 + }, + { + "loss": 0.1629, + "grad_norm": 13.47612190246582, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.346, + "step": 346 + }, + { + "loss": 0.2276, + "grad_norm": 3.5881187915802, + "learning_rate": 1.656e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.347, + "step": 347 + }, + { + "loss": 0.2474, + "grad_norm": 3.895529270172119, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.348, + "step": 348 + }, + { + "loss": 0.2205, + "grad_norm": 3.4531259536743164, + "learning_rate": 1.654e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.349, + "step": 349 + }, + { + "loss": 0.2277, + "grad_norm": 3.849405288696289, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.35, + "step": 350 + }, + { + "loss": 0.1993, + "grad_norm": 3.522599458694458, + "learning_rate": 1.652e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.351, + "step": 351 + }, + { + "loss": 0.2291, + "grad_norm": 3.7573893070220947, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.352, + "step": 352 + }, + { + "loss": 0.1756, + "grad_norm": 4.224817276000977, + "learning_rate": 1.65e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.353, + "step": 353 + }, + { + "loss": 0.1992, + "grad_norm": 2.2447433471679688, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.354, + "step": 354 + }, + { + "loss": 0.184, + "grad_norm": 2.0203311443328857, + "learning_rate": 1.648e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.355, + "step": 355 + }, + { + "loss": 0.2236, + "grad_norm": 3.499854803085327, + "learning_rate": 1.647e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.356, + "step": 356 + }, + { + "loss": 0.2141, + "grad_norm": 5.057332992553711, + "learning_rate": 1.646e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.357, + "step": 357 + }, + { + "loss": 0.232, + "grad_norm": 2.861778974533081, + "learning_rate": 1.645e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.358, + "step": 358 + }, + { + "loss": 0.184, + "grad_norm": 3.52634596824646, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.359, + "step": 359 + }, + { + "loss": 0.2205, + "grad_norm": 2.3115124702453613, + "learning_rate": 1.643e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.36, + "step": 360 + }, + { + "loss": 0.1838, + "grad_norm": 3.043916940689087, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.361, + "step": 361 + }, + { + "loss": 0.1874, + "grad_norm": 3.2404396533966064, + "learning_rate": 1.641e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.362, + "step": 362 + }, + { + "loss": 0.4084, + "grad_norm": 12.86927604675293, + "learning_rate": 1.64e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.363, + "step": 363 + }, + { + "loss": 0.1677, + "grad_norm": 3.4789700508117676, + "learning_rate": 1.639e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.364, + "step": 364 + }, + { + "loss": 0.1922, + "grad_norm": 4.1049699783325195, + "learning_rate": 1.638e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.365, + "step": 365 + }, + { + "loss": 0.1915, + "grad_norm": 3.2055957317352295, + "learning_rate": 1.637e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.366, + "step": 366 + }, + { + "loss": 0.166, + "grad_norm": 12.477117538452148, + "learning_rate": 1.636e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.367, + "step": 367 + }, + { + "loss": 0.1799, + "grad_norm": 4.58711051940918, + "learning_rate": 1.635e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.368, + "step": 368 + }, + { + "loss": 0.2299, + "grad_norm": 2.874641180038452, + "learning_rate": 1.634e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.369, + "step": 369 + }, + { + "loss": 0.1414, + "grad_norm": 5.157703399658203, + "learning_rate": 1.633e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.37, + "step": 370 + }, + { + "loss": 0.1812, + "grad_norm": 3.2541451454162598, + "learning_rate": 1.632e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.371, + "step": 371 + }, + { + "loss": 0.1366, + "grad_norm": 3.705273151397705, + "learning_rate": 1.631e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.372, + "step": 372 + }, + { + "loss": 0.1681, + "grad_norm": 3.6492865085601807, + "learning_rate": 1.63e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.373, + "step": 373 + }, + { + "loss": 0.1324, + "grad_norm": 3.3717288970947266, + "learning_rate": 1.629e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.374, + "step": 374 + }, + { + "loss": 0.1816, + "grad_norm": 4.410749912261963, + "learning_rate": 1.628e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.375, + "step": 375 + }, + { + "loss": 0.3611, + "grad_norm": 11.978804588317871, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.376, + "step": 376 + }, + { + "loss": 0.1686, + "grad_norm": 2.8153111934661865, + "learning_rate": 1.626e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.377, + "step": 377 + }, + { + "loss": 0.1293, + "grad_norm": 3.5253026485443115, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.378, + "step": 378 + }, + { + "loss": 0.1597, + "grad_norm": 2.9006922245025635, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.379, + "step": 379 + }, + { + "loss": 0.1975, + "grad_norm": 6.231935024261475, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.38, + "step": 380 + }, + { + "loss": 0.1232, + "grad_norm": 3.3006174564361572, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.381, + "step": 381 + }, + { + "loss": 0.1599, + "grad_norm": 3.177495241165161, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.382, + "step": 382 + }, + { + "loss": 0.1858, + "grad_norm": 2.967477798461914, + "learning_rate": 1.62e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.383, + "step": 383 + }, + { + "loss": 0.1725, + "grad_norm": 2.6947214603424072, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.384, + "step": 384 + }, + { + "loss": 0.1644, + "grad_norm": 3.6320605278015137, + "learning_rate": 1.618e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.385, + "step": 385 + }, + { + "loss": 0.1726, + "grad_norm": 6.163839817047119, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.386, + "step": 386 + }, + { + "loss": 0.2253, + "grad_norm": 3.695767879486084, + "learning_rate": 1.616e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.387, + "step": 387 + }, + { + "loss": 0.1295, + "grad_norm": 11.877620697021484, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.388, + "step": 388 + }, + { + "loss": 0.1641, + "grad_norm": 2.5848593711853027, + "learning_rate": 1.614e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.389, + "step": 389 + }, + { + "loss": 0.1299, + "grad_norm": 11.58799934387207, + "learning_rate": 1.613e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.39, + "step": 390 + }, + { + "loss": 0.153, + "grad_norm": 3.0241589546203613, + "learning_rate": 1.612e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.391, + "step": 391 + }, + { + "loss": 0.1741, + "grad_norm": 4.446482181549072, + "learning_rate": 1.611e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.392, + "step": 392 + }, + { + "loss": 0.1517, + "grad_norm": 2.0452992916107178, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.393, + "step": 393 + }, + { + "loss": 0.1482, + "grad_norm": 3.511587142944336, + "learning_rate": 1.609e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.394, + "step": 394 + }, + { + "loss": 0.1673, + "grad_norm": 4.165390968322754, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.395, + "step": 395 + }, + { + "loss": 0.1577, + "grad_norm": 2.5295603275299072, + "learning_rate": 1.607e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.396, + "step": 396 + }, + { + "loss": 0.1444, + "grad_norm": 2.6492788791656494, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.397, + "step": 397 + }, + { + "loss": 0.1731, + "grad_norm": 3.1617088317871094, + "learning_rate": 1.605e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.398, + "step": 398 + }, + { + "loss": 0.1411, + "grad_norm": 2.628790855407715, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.399, + "step": 399 + }, + { + "loss": 0.1442, + "grad_norm": 2.589632272720337, + "learning_rate": 1.603e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.4, + "step": 400 + }, + { + "loss": 0.1647, + "grad_norm": 2.7175090312957764, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.401, + "step": 401 + }, + { + "loss": 0.1225, + "grad_norm": 9.854316711425781, + "learning_rate": 1.601e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.402, + "step": 402 + }, + { + "loss": 0.1635, + "grad_norm": 2.513782501220703, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.403, + "step": 403 + }, + { + "loss": 0.1172, + "grad_norm": 4.978464126586914, + "learning_rate": 1.599e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.404, + "step": 404 + }, + { + "loss": 0.1535, + "grad_norm": 6.545207977294922, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.405, + "step": 405 + }, + { + "loss": 0.1554, + "grad_norm": 4.268946647644043, + "learning_rate": 1.597e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.406, + "step": 406 + }, + { + "loss": 0.1143, + "grad_norm": 2.5581111907958984, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.407, + "step": 407 + }, + { + "loss": 0.1446, + "grad_norm": 4.272138595581055, + "learning_rate": 1.595e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.408, + "step": 408 + }, + { + "loss": 0.1058, + "grad_norm": 1.8749103546142578, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.409, + "step": 409 + }, + { + "loss": 0.1972, + "grad_norm": 4.553700923919678, + "learning_rate": 1.593e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.41, + "step": 410 + }, + { + "loss": 0.1465, + "grad_norm": 4.258208751678467, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.411, + "step": 411 + }, + { + "loss": 0.1556, + "grad_norm": 2.6741788387298584, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.412, + "step": 412 + }, + { + "loss": 0.1074, + "grad_norm": 5.901241779327393, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.413, + "step": 413 + }, + { + "loss": 0.1999, + "grad_norm": 2.886406421661377, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 414 + }, + { + "loss": 0.163, + "grad_norm": 3.367415189743042, + "learning_rate": 1.588e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.415, + "step": 415 + }, + { + "loss": 0.1678, + "grad_norm": 2.3446123600006104, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.416, + "step": 416 + }, + { + "loss": 0.2442, + "grad_norm": 4.648331165313721, + "learning_rate": 1.586e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.417, + "step": 417 + }, + { + "loss": 0.1314, + "grad_norm": 3.296555519104004, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.418, + "step": 418 + }, + { + "loss": 0.1224, + "grad_norm": 14.873774528503418, + "learning_rate": 1.584e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.419, + "step": 419 + }, + { + "loss": 0.1792, + "grad_norm": 2.493760108947754, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.42, + "step": 420 + }, + { + "loss": 0.1289, + "grad_norm": 4.287231922149658, + "learning_rate": 1.582e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.421, + "step": 421 + }, + { + "loss": 0.1176, + "grad_norm": 12.776876449584961, + "learning_rate": 1.581e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.422, + "step": 422 + }, + { + "loss": 0.1651, + "grad_norm": 2.691632032394409, + "learning_rate": 1.58e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.423, + "step": 423 + }, + { + "loss": 0.271, + "grad_norm": 7.320021152496338, + "learning_rate": 1.579e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.424, + "step": 424 + }, + { + "loss": 0.1183, + "grad_norm": 2.511960029602051, + "learning_rate": 1.578e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.425, + "step": 425 + }, + { + "loss": 0.1387, + "grad_norm": 2.424102306365967, + "learning_rate": 1.577e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.426, + "step": 426 + }, + { + "loss": 0.1443, + "grad_norm": 3.659524917602539, + "learning_rate": 1.576e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.427, + "step": 427 + }, + { + "loss": 0.2176, + "grad_norm": 4.393547058105469, + "learning_rate": 1.575e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.428, + "step": 428 + }, + { + "loss": 0.1576, + "grad_norm": 3.995103359222412, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.429, + "step": 429 + }, + { + "loss": 0.0995, + "grad_norm": 7.335996627807617, + "learning_rate": 1.573e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.43, + "step": 430 + }, + { + "loss": 0.1224, + "grad_norm": 2.3261799812316895, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.431, + "step": 431 + }, + { + "loss": 0.1781, + "grad_norm": 3.084444761276245, + "learning_rate": 1.571e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.432, + "step": 432 + }, + { + "loss": 0.1262, + "grad_norm": 2.499669075012207, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.433, + "step": 433 + }, + { + "loss": 0.1306, + "grad_norm": 2.529611587524414, + "learning_rate": 1.569e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.434, + "step": 434 + }, + { + "loss": 0.1473, + "grad_norm": 2.308983325958252, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.435, + "step": 435 + }, + { + "loss": 0.1387, + "grad_norm": 2.9792327880859375, + "learning_rate": 1.567e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.436, + "step": 436 + }, + { + "loss": 0.1256, + "grad_norm": 3.446150302886963, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.437, + "step": 437 + }, + { + "loss": 0.1884, + "grad_norm": 2.8107986450195312, + "learning_rate": 1.565e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.438, + "step": 438 + }, + { + "loss": 0.1801, + "grad_norm": 2.476114511489868, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.439, + "step": 439 + }, + { + "loss": 0.1216, + "grad_norm": 2.8834075927734375, + "learning_rate": 1.563e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.44, + "step": 440 + }, + { + "loss": 0.1391, + "grad_norm": 3.0233523845672607, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.441, + "step": 441 + }, + { + "loss": 0.1355, + "grad_norm": 3.540644645690918, + "learning_rate": 1.561e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.442, + "step": 442 + }, + { + "loss": 0.1031, + "grad_norm": 2.104804515838623, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.443, + "step": 443 + }, + { + "loss": 0.1389, + "grad_norm": 2.2567386627197266, + "learning_rate": 1.559e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.444, + "step": 444 + }, + { + "loss": 0.116, + "grad_norm": 2.4400763511657715, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.445, + "step": 445 + }, + { + "loss": 0.1294, + "grad_norm": 2.306941509246826, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.446, + "step": 446 + }, + { + "loss": 0.1189, + "grad_norm": 2.5862247943878174, + "learning_rate": 1.556e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.447, + "step": 447 + }, + { + "loss": 0.2484, + "grad_norm": 4.606533050537109, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.448, + "step": 448 + }, + { + "loss": 0.2119, + "grad_norm": 3.4597740173339844, + "learning_rate": 1.554e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.449, + "step": 449 + }, + { + "loss": 0.1395, + "grad_norm": 3.5644280910491943, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.45, + "step": 450 + }, + { + "loss": 0.1167, + "grad_norm": 13.761821746826172, + "learning_rate": 1.552e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.451, + "step": 451 + }, + { + "loss": 0.1423, + "grad_norm": 3.3145618438720703, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.452, + "step": 452 + }, + { + "loss": 0.131, + "grad_norm": 4.129085540771484, + "learning_rate": 1.55e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.453, + "step": 453 + }, + { + "loss": 0.1337, + "grad_norm": 2.807199001312256, + "learning_rate": 1.549e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.454, + "step": 454 + }, + { + "loss": 0.1235, + "grad_norm": 2.291154384613037, + "learning_rate": 1.548e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.455, + "step": 455 + }, + { + "loss": 0.123, + "grad_norm": 3.186185836791992, + "learning_rate": 1.547e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.456, + "step": 456 + }, + { + "loss": 0.13, + "grad_norm": 2.2184228897094727, + "learning_rate": 1.546e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.457, + "step": 457 + }, + { + "loss": 0.1232, + "grad_norm": 2.6860218048095703, + "learning_rate": 1.545e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.458, + "step": 458 + }, + { + "loss": 0.1668, + "grad_norm": 2.615064859390259, + "learning_rate": 1.544e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.459, + "step": 459 + }, + { + "loss": 0.1268, + "grad_norm": 3.520294427871704, + "learning_rate": 1.543e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.46, + "step": 460 + }, + { + "loss": 0.1183, + "grad_norm": 3.490569829940796, + "learning_rate": 1.542e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.461, + "step": 461 + }, + { + "loss": 0.1025, + "grad_norm": 12.270122528076172, + "learning_rate": 1.541e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.462, + "step": 462 + }, + { + "loss": 0.1059, + "grad_norm": 2.1151371002197266, + "learning_rate": 1.54e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.463, + "step": 463 + }, + { + "loss": 0.1021, + "grad_norm": 2.0290112495422363, + "learning_rate": 1.539e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.464, + "step": 464 + }, + { + "loss": 0.0993, + "grad_norm": 10.768261909484863, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.465, + "step": 465 + }, + { + "loss": 0.1187, + "grad_norm": 3.7776851654052734, + "learning_rate": 1.537e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.466, + "step": 466 + }, + { + "loss": 0.0929, + "grad_norm": 3.5349013805389404, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.467, + "step": 467 + }, + { + "loss": 0.1292, + "grad_norm": 4.221794605255127, + "learning_rate": 1.535e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.468, + "step": 468 + }, + { + "loss": 0.1597, + "grad_norm": 3.645026445388794, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.469, + "step": 469 + }, + { + "loss": 0.1281, + "grad_norm": 4.336436748504639, + "learning_rate": 1.533e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.47, + "step": 470 + }, + { + "loss": 0.1427, + "grad_norm": 4.119178295135498, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.471, + "step": 471 + }, + { + "loss": 0.1959, + "grad_norm": 3.495059013366699, + "learning_rate": 1.531e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.472, + "step": 472 + }, + { + "loss": 0.1062, + "grad_norm": 2.910947799682617, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.473, + "step": 473 + }, + { + "loss": 0.1641, + "grad_norm": 1.9516125917434692, + "learning_rate": 1.529e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.474, + "step": 474 + }, + { + "loss": 0.1267, + "grad_norm": 2.637050151824951, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.475, + "step": 475 + }, + { + "loss": 0.1602, + "grad_norm": 2.365922689437866, + "learning_rate": 1.527e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 476 + }, + { + "loss": 0.145, + "grad_norm": 3.577690362930298, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.477, + "step": 477 + }, + { + "loss": 0.1917, + "grad_norm": 2.425001621246338, + "learning_rate": 1.525e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.478, + "step": 478 + }, + { + "loss": 0.1295, + "grad_norm": 2.570420503616333, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.479, + "step": 479 + }, + { + "loss": 0.1216, + "grad_norm": 2.951737403869629, + "learning_rate": 1.523e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.48, + "step": 480 + }, + { + "loss": 0.1172, + "grad_norm": 2.9054367542266846, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.481, + "step": 481 + }, + { + "loss": 0.1028, + "grad_norm": 11.967851638793945, + "learning_rate": 1.521e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.482, + "step": 482 + }, + { + "loss": 0.1411, + "grad_norm": 3.018132448196411, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.483, + "step": 483 + }, + { + "loss": 0.0953, + "grad_norm": 2.7196693420410156, + "learning_rate": 1.519e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.484, + "step": 484 + }, + { + "loss": 0.1322, + "grad_norm": 3.49013090133667, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.485, + "step": 485 + }, + { + "loss": 0.0793, + "grad_norm": 3.015738010406494, + "learning_rate": 1.517e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.486, + "step": 486 + }, + { + "loss": 0.1429, + "grad_norm": 2.9223875999450684, + "learning_rate": 1.516e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.487, + "step": 487 + }, + { + "loss": 0.1468, + "grad_norm": 3.956615924835205, + "learning_rate": 1.515e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.488, + "step": 488 + }, + { + "loss": 0.1171, + "grad_norm": 4.619190216064453, + "learning_rate": 1.514e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.489, + "step": 489 + }, + { + "loss": 0.0767, + "grad_norm": 1.605452299118042, + "learning_rate": 1.513e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.49, + "step": 490 + }, + { + "loss": 0.128, + "grad_norm": 4.304430961608887, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.491, + "step": 491 + }, + { + "loss": 0.0781, + "grad_norm": 1.868319034576416, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.492, + "step": 492 + }, + { + "loss": 0.1311, + "grad_norm": 2.720447540283203, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.493, + "step": 493 + }, + { + "loss": 0.1312, + "grad_norm": 3.6773548126220703, + "learning_rate": 1.509e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.494, + "step": 494 + }, + { + "loss": 0.164, + "grad_norm": 3.9428446292877197, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.495, + "step": 495 + }, + { + "loss": 0.1516, + "grad_norm": 2.488532781600952, + "learning_rate": 1.507e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.496, + "step": 496 + }, + { + "loss": 0.076, + "grad_norm": 3.0369679927825928, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.497, + "step": 497 + }, + { + "loss": 0.1552, + "grad_norm": 2.921428680419922, + "learning_rate": 1.505e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.498, + "step": 498 + }, + { + "loss": 0.0745, + "grad_norm": 4.530489921569824, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.499, + "step": 499 + }, + { + "loss": 0.1431, + "grad_norm": 2.894956350326538, + "learning_rate": 1.503e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.5, + "step": 500 + }, + { + "loss": 0.1196, + "grad_norm": 2.8564133644104004, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.501, + "step": 501 + }, + { + "loss": 0.1022, + "grad_norm": 2.487640857696533, + "learning_rate": 1.501e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.502, + "step": 502 + }, + { + "loss": 0.0816, + "grad_norm": 9.081964492797852, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.503, + "step": 503 + }, + { + "loss": 0.0696, + "grad_norm": 5.340896129608154, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.504, + "step": 504 + }, + { + "loss": 0.1355, + "grad_norm": 2.5042786598205566, + "learning_rate": 1.498e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.505, + "step": 505 + }, + { + "loss": 0.1177, + "grad_norm": 2.9676339626312256, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.506, + "step": 506 + }, + { + "loss": 0.1305, + "grad_norm": 2.792555570602417, + "learning_rate": 1.496e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.507, + "step": 507 + }, + { + "loss": 0.1155, + "grad_norm": 3.074509620666504, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.508, + "step": 508 + }, + { + "loss": 0.1274, + "grad_norm": 3.4446146488189697, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.509, + "step": 509 + }, + { + "loss": 0.0961, + "grad_norm": 4.31768798828125, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.51, + "step": 510 + }, + { + "loss": 0.1406, + "grad_norm": 3.5040206909179688, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.511, + "step": 511 + }, + { + "loss": 0.163, + "grad_norm": 3.973576307296753, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.512, + "step": 512 + }, + { + "loss": 0.1435, + "grad_norm": 2.7186615467071533, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.513, + "step": 513 + }, + { + "loss": 0.1024, + "grad_norm": 2.8186845779418945, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.514, + "step": 514 + }, + { + "loss": 0.0781, + "grad_norm": 10.394554138183594, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.515, + "step": 515 + }, + { + "loss": 0.0874, + "grad_norm": 10.657512664794922, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.516, + "step": 516 + }, + { + "loss": 0.0946, + "grad_norm": 2.6607813835144043, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.517, + "step": 517 + }, + { + "loss": 0.1189, + "grad_norm": 2.2012691497802734, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.518, + "step": 518 + }, + { + "loss": 0.1313, + "grad_norm": 3.873806953430176, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.519, + "step": 519 + }, + { + "loss": 0.0999, + "grad_norm": 1.8396018743515015, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.52, + "step": 520 + }, + { + "loss": 0.1057, + "grad_norm": 2.922558307647705, + "learning_rate": 1.482e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.521, + "step": 521 + }, + { + "loss": 0.0865, + "grad_norm": 2.5007052421569824, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.522, + "step": 522 + }, + { + "loss": 0.1029, + "grad_norm": 1.885617733001709, + "learning_rate": 1.48e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.523, + "step": 523 + }, + { + "loss": 0.0958, + "grad_norm": 1.7554020881652832, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.524, + "step": 524 + }, + { + "loss": 0.1244, + "grad_norm": 3.055809736251831, + "learning_rate": 1.478e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.525, + "step": 525 + }, + { + "loss": 0.1059, + "grad_norm": 2.518828868865967, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.526, + "step": 526 + }, + { + "loss": 0.0849, + "grad_norm": 4.157986640930176, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.527, + "step": 527 + }, + { + "loss": 0.0949, + "grad_norm": 5.624795436859131, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.528, + "step": 528 + }, + { + "loss": 0.1133, + "grad_norm": 4.383209228515625, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.529, + "step": 529 + }, + { + "loss": 0.0753, + "grad_norm": 10.447527885437012, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.53, + "step": 530 + }, + { + "loss": 0.0758, + "grad_norm": 2.0648767948150635, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.531, + "step": 531 + }, + { + "loss": 0.109, + "grad_norm": 2.311145782470703, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.532, + "step": 532 + }, + { + "loss": 0.0993, + "grad_norm": 2.5646841526031494, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.533, + "step": 533 + }, + { + "loss": 0.061, + "grad_norm": 4.201132774353027, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 534 + }, + { + "loss": 0.1403, + "grad_norm": 3.2465627193450928, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.535, + "step": 535 + }, + { + "loss": 0.0917, + "grad_norm": 4.278575420379639, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.536, + "step": 536 + }, + { + "loss": 0.1363, + "grad_norm": 2.6477434635162354, + "learning_rate": 1.466e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.537, + "step": 537 + }, + { + "loss": 0.1035, + "grad_norm": 2.616262435913086, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.538, + "step": 538 + }, + { + "loss": 0.1702, + "grad_norm": 2.8426945209503174, + "learning_rate": 1.464e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.539, + "step": 539 + }, + { + "loss": 0.0969, + "grad_norm": 2.934753179550171, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.54, + "step": 540 + }, + { + "loss": 0.0628, + "grad_norm": 6.173173904418945, + "learning_rate": 1.462e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.541, + "step": 541 + }, + { + "loss": 0.113, + "grad_norm": 2.183295249938965, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.542, + "step": 542 + }, + { + "loss": 0.0674, + "grad_norm": 2.466468095779419, + "learning_rate": 1.46e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.543, + "step": 543 + }, + { + "loss": 0.0629, + "grad_norm": 6.685276508331299, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.544, + "step": 544 + }, + { + "loss": 0.0606, + "grad_norm": 6.428196907043457, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 545 + }, + { + "loss": 0.0552, + "grad_norm": 3.2987399101257324, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 546 + }, + { + "loss": 0.1492, + "grad_norm": 3.802187919616699, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.547, + "step": 547 + }, + { + "loss": 0.0903, + "grad_norm": 3.23189115524292, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.548, + "step": 548 + }, + { + "loss": 0.0758, + "grad_norm": 3.0735082626342773, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.549, + "step": 549 + }, + { + "loss": 0.0978, + "grad_norm": 2.9236018657684326, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.55, + "step": 550 + }, + { + "loss": 0.0489, + "grad_norm": 1.232297420501709, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 551 + }, + { + "loss": 0.0472, + "grad_norm": 1.1960967779159546, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 552 + }, + { + "loss": 0.1622, + "grad_norm": 2.9212372303009033, + "learning_rate": 1.45e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.553, + "step": 553 + }, + { + "loss": 0.0964, + "grad_norm": 2.9365901947021484, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.554, + "step": 554 + }, + { + "loss": 0.1015, + "grad_norm": 3.297194719314575, + "learning_rate": 1.448e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.555, + "step": 555 + }, + { + "loss": 0.108, + "grad_norm": 3.8434770107269287, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.556, + "step": 556 + }, + { + "loss": 0.0869, + "grad_norm": 3.068513870239258, + "learning_rate": 1.446e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.557, + "step": 557 + }, + { + "loss": 0.0823, + "grad_norm": 2.382955312728882, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.558, + "step": 558 + }, + { + "loss": 0.0952, + "grad_norm": 2.0796663761138916, + "learning_rate": 1.444e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.559, + "step": 559 + }, + { + "loss": 0.0904, + "grad_norm": 2.491260290145874, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.56, + "step": 560 + }, + { + "loss": 0.0888, + "grad_norm": 1.8683680295944214, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.561, + "step": 561 + }, + { + "loss": 0.0824, + "grad_norm": 2.5860776901245117, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.562, + "step": 562 + }, + { + "loss": 0.0648, + "grad_norm": 10.482237815856934, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 563 + }, + { + "loss": 0.1033, + "grad_norm": 1.8212071657180786, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.564, + "step": 564 + }, + { + "loss": 0.1275, + "grad_norm": 2.206996440887451, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.565, + "step": 565 + }, + { + "loss": 0.1174, + "grad_norm": 2.454157590866089, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.566, + "step": 566 + }, + { + "loss": 0.0846, + "grad_norm": 2.7483479976654053, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.567, + "step": 567 + }, + { + "loss": 0.0712, + "grad_norm": 9.780473709106445, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.568, + "step": 568 + }, + { + "loss": 0.0838, + "grad_norm": 2.227144718170166, + "learning_rate": 1.434e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.569, + "step": 569 + }, + { + "loss": 0.0996, + "grad_norm": 2.4927093982696533, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.57, + "step": 570 + }, + { + "loss": 0.0723, + "grad_norm": 2.6736180782318115, + "learning_rate": 1.432e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.571, + "step": 571 + }, + { + "loss": 0.0765, + "grad_norm": 1.8901737928390503, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 0.572, + "step": 572 + }, + { + "loss": 0.0661, + "grad_norm": 1.9803191423416138, + "learning_rate": 1.43e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.573, + "step": 573 + }, + { + "loss": 0.06, + "grad_norm": 1.9032983779907227, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.574, + "step": 574 + }, + { + "loss": 0.0437, + "grad_norm": 2.9226999282836914, + "learning_rate": 1.428e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 575 + }, + { + "loss": 0.1345, + "grad_norm": 2.60559344291687, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.576, + "step": 576 + }, + { + "loss": 0.043, + "grad_norm": 3.43766713142395, + "learning_rate": 1.426e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 577 + }, + { + "loss": 0.0881, + "grad_norm": 3.27600359916687, + "learning_rate": 1.425e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.578, + "step": 578 + }, + { + "loss": 0.0777, + "grad_norm": 3.8467905521392822, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.579, + "step": 579 + }, + { + "loss": 0.0971, + "grad_norm": 3.3157150745391846, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.58, + "step": 580 + }, + { + "loss": 0.0769, + "grad_norm": 2.6883363723754883, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.581, + "step": 581 + }, + { + "loss": 0.0381, + "grad_norm": 2.187551736831665, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 582 + }, + { + "loss": 0.0571, + "grad_norm": 1.9329798221588135, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.583, + "step": 583 + }, + { + "loss": 0.0984, + "grad_norm": 2.6686573028564453, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 584 + }, + { + "loss": 0.0904, + "grad_norm": 2.7718393802642822, + "learning_rate": 1.418e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.585, + "step": 585 + }, + { + "loss": 0.0364, + "grad_norm": 3.612837314605713, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 586 + }, + { + "loss": 0.1408, + "grad_norm": 2.518528461456299, + "learning_rate": 1.416e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.587, + "step": 587 + }, + { + "loss": 0.0875, + "grad_norm": 2.7795908451080322, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.588, + "step": 588 + }, + { + "loss": 0.0644, + "grad_norm": 2.4260590076446533, + "learning_rate": 1.414e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 0.589, + "step": 589 + }, + { + "loss": 0.0884, + "grad_norm": 2.681588888168335, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 590 + }, + { + "loss": 0.1001, + "grad_norm": 2.8202459812164307, + "learning_rate": 1.412e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.591, + "step": 591 + }, + { + "loss": 0.0774, + "grad_norm": 1.7170965671539307, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.592, + "step": 592 + }, + { + "loss": 0.069, + "grad_norm": 1.68620765209198, + "learning_rate": 1.41e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.593, + "step": 593 + }, + { + "loss": 0.0694, + "grad_norm": 2.236591339111328, + "learning_rate": 1.409e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.594, + "step": 594 + }, + { + "loss": 0.0943, + "grad_norm": 2.7542996406555176, + "learning_rate": 1.408e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.595, + "step": 595 + }, + { + "loss": 0.0578, + "grad_norm": 1.8813996315002441, + "learning_rate": 1.407e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.596, + "step": 596 + }, + { + "loss": 0.0911, + "grad_norm": 2.0993378162384033, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.597, + "step": 597 + }, + { + "loss": 0.107, + "grad_norm": 2.6184418201446533, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.598, + "step": 598 + }, + { + "loss": 0.0803, + "grad_norm": 1.8751370906829834, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.599, + "step": 599 + }, + { + "loss": 0.0774, + "grad_norm": 3.0198869705200195, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.6, + "step": 600 + }, + { + "loss": 0.2953, + "grad_norm": 14.372690200805664, + "learning_rate": 1.402e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.601, + "step": 601 + }, + { + "loss": 0.0943, + "grad_norm": 2.2585110664367676, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.602, + "step": 602 + }, + { + "loss": 0.0432, + "grad_norm": 8.796082496643066, + "learning_rate": 1.4e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.603, + "step": 603 + }, + { + "loss": 0.1307, + "grad_norm": 2.903687000274658, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.604, + "step": 604 + }, + { + "loss": 0.1348, + "grad_norm": 3.1296894550323486, + "learning_rate": 1.398e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.605, + "step": 605 + }, + { + "loss": 0.1161, + "grad_norm": 2.436495542526245, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.606, + "step": 606 + }, + { + "loss": 0.0368, + "grad_norm": 5.359442710876465, + "learning_rate": 1.396e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.607, + "step": 607 + }, + { + "loss": 0.1177, + "grad_norm": 3.3482797145843506, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.608, + "step": 608 + }, + { + "loss": 0.1024, + "grad_norm": 3.229761838912964, + "learning_rate": 1.394e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.609, + "step": 609 + }, + { + "loss": 0.0988, + "grad_norm": 2.772888660430908, + "learning_rate": 1.393e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.61, + "step": 610 + }, + { + "loss": 0.0699, + "grad_norm": 2.91560435295105, + "learning_rate": 1.392e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.611, + "step": 611 + }, + { + "loss": 0.1212, + "grad_norm": 3.1388144493103027, + "learning_rate": 1.391e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.612, + "step": 612 + }, + { + "loss": 0.0776, + "grad_norm": 2.409531831741333, + "learning_rate": 1.39e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.613, + "step": 613 + }, + { + "loss": 0.0922, + "grad_norm": 2.301997423171997, + "learning_rate": 1.389e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.614, + "step": 614 + }, + { + "loss": 0.0382, + "grad_norm": 6.567748546600342, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.615, + "step": 615 + }, + { + "loss": 0.0702, + "grad_norm": 2.9374635219573975, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 616 + }, + { + "loss": 0.0952, + "grad_norm": 2.805278778076172, + "learning_rate": 1.386e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.617, + "step": 617 + }, + { + "loss": 0.0809, + "grad_norm": 2.7832789421081543, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.618, + "step": 618 + }, + { + "loss": 0.0967, + "grad_norm": 2.5809061527252197, + "learning_rate": 1.384e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.619, + "step": 619 + }, + { + "loss": 0.1193, + "grad_norm": 4.146383285522461, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.62, + "step": 620 + }, + { + "loss": 0.0646, + "grad_norm": 2.3339507579803467, + "learning_rate": 1.382e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.621, + "step": 621 + }, + { + "loss": 0.0698, + "grad_norm": 2.154700756072998, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.622, + "step": 622 + }, + { + "loss": 0.0861, + "grad_norm": 3.4389989376068115, + "learning_rate": 1.38e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.623, + "step": 623 + }, + { + "loss": 0.0744, + "grad_norm": 2.087575674057007, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.624, + "step": 624 + }, + { + "loss": 0.093, + "grad_norm": 2.7172322273254395, + "learning_rate": 1.378e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.625, + "step": 625 + }, + { + "loss": 0.0731, + "grad_norm": 2.2669014930725098, + "learning_rate": 1.377e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.626, + "step": 626 + }, + { + "loss": 0.0747, + "grad_norm": 3.104933500289917, + "learning_rate": 1.376e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.627, + "step": 627 + }, + { + "loss": 0.085, + "grad_norm": 2.475816249847412, + "learning_rate": 1.375e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.628, + "step": 628 + }, + { + "loss": 0.1415, + "grad_norm": 3.2964231967926025, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.629, + "step": 629 + }, + { + "loss": 0.0823, + "grad_norm": 1.5372464656829834, + "learning_rate": 1.373e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.63, + "step": 630 + }, + { + "loss": 0.1085, + "grad_norm": 2.136002540588379, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.631, + "step": 631 + }, + { + "loss": 0.0802, + "grad_norm": 2.1365489959716797, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.632, + "step": 632 + }, + { + "loss": 0.0359, + "grad_norm": 7.951494216918945, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.633, + "step": 633 + }, + { + "loss": 0.0344, + "grad_norm": 7.441174507141113, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.634, + "step": 634 + }, + { + "loss": 0.0838, + "grad_norm": 2.689347505569458, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.635, + "step": 635 + }, + { + "loss": 0.1337, + "grad_norm": 4.8380937576293945, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.636, + "step": 636 + }, + { + "loss": 0.1259, + "grad_norm": 3.2358460426330566, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.637, + "step": 637 + }, + { + "loss": 0.0269, + "grad_norm": 3.706432580947876, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 638 + }, + { + "loss": 0.0617, + "grad_norm": 2.4131107330322266, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.639, + "step": 639 + }, + { + "loss": 0.0225, + "grad_norm": 2.5498831272125244, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 640 + }, + { + "loss": 0.1159, + "grad_norm": 2.7629480361938477, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.641, + "step": 641 + }, + { + "loss": 0.0249, + "grad_norm": 2.194697380065918, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 642 + }, + { + "loss": 0.0852, + "grad_norm": 2.5653960704803467, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.643, + "step": 643 + }, + { + "loss": 0.0783, + "grad_norm": 2.402456283569336, + "learning_rate": 1.359e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 644 + }, + { + "loss": 0.1104, + "grad_norm": 2.646005392074585, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.645, + "step": 645 + }, + { + "loss": 0.0582, + "grad_norm": 2.135377883911133, + "learning_rate": 1.357e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.646, + "step": 646 + }, + { + "loss": 0.0242, + "grad_norm": 2.295201539993286, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 647 + }, + { + "loss": 0.0712, + "grad_norm": 2.529376745223999, + "learning_rate": 1.355e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.648, + "step": 648 + }, + { + "loss": 0.0697, + "grad_norm": 2.2107226848602295, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.649, + "step": 649 + }, + { + "loss": 0.1203, + "grad_norm": 2.456563711166382, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.65, + "step": 650 + }, + { + "loss": 0.091, + "grad_norm": 2.3880977630615234, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.651, + "step": 651 + }, + { + "loss": 0.0641, + "grad_norm": 2.5870609283447266, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.652, + "step": 652 + }, + { + "loss": 0.0678, + "grad_norm": 2.0148985385894775, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.653, + "step": 653 + }, + { + "loss": 0.0745, + "grad_norm": 2.9625463485717773, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.654, + "step": 654 + }, + { + "loss": 0.0759, + "grad_norm": 2.3625717163085938, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.655, + "step": 655 + }, + { + "loss": 0.0826, + "grad_norm": 3.747469902038574, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.656, + "step": 656 + }, + { + "loss": 0.0772, + "grad_norm": 2.4018380641937256, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.657, + "step": 657 + }, + { + "loss": 0.0834, + "grad_norm": 2.684398889541626, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.658, + "step": 658 + }, + { + "loss": 0.074, + "grad_norm": 2.106499671936035, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.659, + "step": 659 + }, + { + "loss": 0.0759, + "grad_norm": 2.1065762042999268, + "learning_rate": 1.343e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.66, + "step": 660 + }, + { + "loss": 0.1232, + "grad_norm": 2.89585280418396, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.661, + "step": 661 + }, + { + "loss": 0.0784, + "grad_norm": 2.267303943634033, + "learning_rate": 1.341e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.662, + "step": 662 + }, + { + "loss": 0.0591, + "grad_norm": 1.4712592363357544, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.663, + "step": 663 + }, + { + "loss": 0.0626, + "grad_norm": 1.9069504737854004, + "learning_rate": 1.339e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.664, + "step": 664 + }, + { + "loss": 0.1356, + "grad_norm": 3.2215309143066406, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.665, + "step": 665 + }, + { + "loss": 0.0678, + "grad_norm": 2.080892562866211, + "learning_rate": 1.337e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.666, + "step": 666 + }, + { + "loss": 0.0643, + "grad_norm": 2.593749523162842, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.667, + "step": 667 + }, + { + "loss": 0.3105, + "grad_norm": 13.254192352294922, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.668, + "step": 668 + }, + { + "loss": 0.0305, + "grad_norm": 7.083673000335693, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.669, + "step": 669 + }, + { + "loss": 0.0827, + "grad_norm": 1.9234445095062256, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.67, + "step": 670 + }, + { + "loss": 0.072, + "grad_norm": 1.6489096879959106, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.671, + "step": 671 + }, + { + "loss": 0.0786, + "grad_norm": 2.5704004764556885, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.672, + "step": 672 + }, + { + "loss": 0.1092, + "grad_norm": 2.335846424102783, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.673, + "step": 673 + }, + { + "loss": 0.08, + "grad_norm": 1.7859958410263062, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.674, + "step": 674 + }, + { + "loss": 0.0303, + "grad_norm": 6.245123386383057, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.675, + "step": 675 + }, + { + "loss": 0.0248, + "grad_norm": 6.11707878112793, + "learning_rate": 1.327e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.676, + "step": 676 + }, + { + "loss": 0.0714, + "grad_norm": 2.122776985168457, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.677, + "step": 677 + }, + { + "loss": 0.0583, + "grad_norm": 2.350274085998535, + "learning_rate": 1.325e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.678, + "step": 678 + }, + { + "loss": 0.0192, + "grad_norm": 3.1966686248779297, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 679 + }, + { + "loss": 0.087, + "grad_norm": 2.123091459274292, + "learning_rate": 1.323e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.68, + "step": 680 + }, + { + "loss": 0.0536, + "grad_norm": 2.108837842941284, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.681, + "step": 681 + }, + { + "loss": 0.0187, + "grad_norm": 2.225255012512207, + "learning_rate": 1.321e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 682 + }, + { + "loss": 0.0689, + "grad_norm": 1.968031883239746, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.683, + "step": 683 + }, + { + "loss": 0.0822, + "grad_norm": 2.5669515132904053, + "learning_rate": 1.319e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.684, + "step": 684 + }, + { + "loss": 0.0661, + "grad_norm": 2.156057596206665, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.685, + "step": 685 + }, + { + "loss": 0.0545, + "grad_norm": 2.8333444595336914, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.686, + "step": 686 + }, + { + "loss": 0.0889, + "grad_norm": 3.069793939590454, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.687, + "step": 687 + }, + { + "loss": 0.0761, + "grad_norm": 1.9274708032608032, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.688, + "step": 688 + }, + { + "loss": 0.1089, + "grad_norm": 2.992846965789795, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.689, + "step": 689 + }, + { + "loss": 0.1287, + "grad_norm": 4.56328821182251, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.69, + "step": 690 + }, + { + "loss": 0.1186, + "grad_norm": 2.255676746368408, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.691, + "step": 691 + }, + { + "loss": 0.0906, + "grad_norm": 1.8538860082626343, + "learning_rate": 1.311e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.692, + "step": 692 + }, + { + "loss": 0.2418, + "grad_norm": 11.443807601928711, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9434276223182678, + "epoch": 0.693, + "step": 693 + }, + { + "loss": 0.0399, + "grad_norm": 9.349817276000977, + "learning_rate": 1.309e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.694, + "step": 694 + }, + { + "loss": 0.037, + "grad_norm": 9.234195709228516, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.695, + "step": 695 + }, + { + "loss": 0.1228, + "grad_norm": 2.415926456451416, + "learning_rate": 1.307e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.696, + "step": 696 + }, + { + "loss": 0.0524, + "grad_norm": 2.570728063583374, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.697, + "step": 697 + }, + { + "loss": 0.086, + "grad_norm": 3.062072992324829, + "learning_rate": 1.305e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.698, + "step": 698 + }, + { + "loss": 0.0829, + "grad_norm": 2.552957534790039, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.699, + "step": 699 + }, + { + "loss": 0.1109, + "grad_norm": 2.1273176670074463, + "learning_rate": 1.303e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.7, + "step": 700 + }, + { + "loss": 0.0811, + "grad_norm": 2.13920259475708, + "learning_rate": 1.302e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.701, + "step": 701 + }, + { + "loss": 0.0689, + "grad_norm": 2.0192079544067383, + "learning_rate": 1.301e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.702, + "step": 702 + }, + { + "loss": 0.0726, + "grad_norm": 1.9012140035629272, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.703, + "step": 703 + }, + { + "loss": 0.075, + "grad_norm": 2.420971393585205, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.704, + "step": 704 + }, + { + "loss": 0.0965, + "grad_norm": 1.7867904901504517, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.705, + "step": 705 + }, + { + "loss": 0.0757, + "grad_norm": 2.5515830516815186, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.706, + "step": 706 + }, + { + "loss": 0.0758, + "grad_norm": 2.5376474857330322, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.707, + "step": 707 + }, + { + "loss": 0.0995, + "grad_norm": 1.8845465183258057, + "learning_rate": 1.295e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.708, + "step": 708 + }, + { + "loss": 0.0824, + "grad_norm": 2.292940616607666, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.709, + "step": 709 + }, + { + "loss": 0.0723, + "grad_norm": 2.140986919403076, + "learning_rate": 1.293e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.71, + "step": 710 + }, + { + "loss": 0.0714, + "grad_norm": 2.8790059089660645, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.711, + "step": 711 + }, + { + "loss": 0.0623, + "grad_norm": 1.6493089199066162, + "learning_rate": 1.291e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.712, + "step": 712 + }, + { + "loss": 0.0657, + "grad_norm": 1.8830665349960327, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.713, + "step": 713 + }, + { + "loss": 0.029, + "grad_norm": 7.065803527832031, + "learning_rate": 1.289e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.714, + "step": 714 + }, + { + "loss": 0.0952, + "grad_norm": 2.2632198333740234, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.715, + "step": 715 + }, + { + "loss": 0.0383, + "grad_norm": 8.098624229431152, + "learning_rate": 1.287e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.716, + "step": 716 + }, + { + "loss": 0.023, + "grad_norm": 5.657382011413574, + "learning_rate": 1.286e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.717, + "step": 717 + }, + { + "loss": 0.0649, + "grad_norm": 1.4795526266098022, + "learning_rate": 1.285e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.718, + "step": 718 + }, + { + "loss": 0.0737, + "grad_norm": 2.7369728088378906, + "learning_rate": 1.284e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.719, + "step": 719 + }, + { + "loss": 0.0637, + "grad_norm": 2.345536708831787, + "learning_rate": 1.283e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.72, + "step": 720 + }, + { + "loss": 0.0594, + "grad_norm": 2.2326128482818604, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.721, + "step": 721 + }, + { + "loss": 0.057, + "grad_norm": 3.0859591960906982, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.722, + "step": 722 + }, + { + "loss": 0.0709, + "grad_norm": 2.870548963546753, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.723, + "step": 723 + }, + { + "loss": 0.0772, + "grad_norm": 3.3536510467529297, + "learning_rate": 1.279e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.724, + "step": 724 + }, + { + "loss": 0.0163, + "grad_norm": 2.2633590698242188, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 725 + }, + { + "loss": 0.0128, + "grad_norm": 1.1394838094711304, + "learning_rate": 1.277e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 726 + }, + { + "loss": 0.0683, + "grad_norm": 2.8505446910858154, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.727, + "step": 727 + }, + { + "loss": 0.0557, + "grad_norm": 2.6770808696746826, + "learning_rate": 1.275e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.728, + "step": 728 + }, + { + "loss": 0.0586, + "grad_norm": 3.0272936820983887, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.729, + "step": 729 + }, + { + "loss": 0.0126, + "grad_norm": 0.8217504620552063, + "learning_rate": 1.273e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 730 + }, + { + "loss": 0.0776, + "grad_norm": 4.100428581237793, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.731, + "step": 731 + }, + { + "loss": 0.0689, + "grad_norm": 2.3711600303649902, + "learning_rate": 1.271e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.732, + "step": 732 + }, + { + "loss": 0.0797, + "grad_norm": 3.585756301879883, + "learning_rate": 1.27e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.733, + "step": 733 + }, + { + "loss": 0.0532, + "grad_norm": 2.134615421295166, + "learning_rate": 1.269e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.734, + "step": 734 + }, + { + "loss": 0.0974, + "grad_norm": 2.3772988319396973, + "learning_rate": 1.268e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.735, + "step": 735 + }, + { + "loss": 0.1153, + "grad_norm": 2.4541940689086914, + "learning_rate": 1.267e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.736, + "step": 736 + }, + { + "loss": 0.048, + "grad_norm": 1.6060377359390259, + "learning_rate": 1.266e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.737, + "step": 737 + }, + { + "loss": 0.0451, + "grad_norm": 2.1678755283355713, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.738, + "step": 738 + }, + { + "loss": 0.0748, + "grad_norm": 2.047844409942627, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.739, + "step": 739 + }, + { + "loss": 0.0824, + "grad_norm": 2.762352705001831, + "learning_rate": 1.263e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.74, + "step": 740 + }, + { + "loss": 0.1146, + "grad_norm": 3.0128841400146484, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.741, + "step": 741 + }, + { + "loss": 0.0711, + "grad_norm": 2.0650486946105957, + "learning_rate": 1.261e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.742, + "step": 742 + }, + { + "loss": 0.0334, + "grad_norm": 7.7052412033081055, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.743, + "step": 743 + }, + { + "loss": 0.0709, + "grad_norm": 1.5119361877441406, + "learning_rate": 1.259e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.744, + "step": 744 + }, + { + "loss": 0.0308, + "grad_norm": 7.3754143714904785, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.745, + "step": 745 + }, + { + "loss": 0.0995, + "grad_norm": 2.8331611156463623, + "learning_rate": 1.257e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.746, + "step": 746 + }, + { + "loss": 0.0562, + "grad_norm": 3.423184871673584, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.747, + "step": 747 + }, + { + "loss": 0.0659, + "grad_norm": 1.857692003250122, + "learning_rate": 1.255e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.748, + "step": 748 + }, + { + "loss": 0.2618, + "grad_norm": 11.681804656982422, + "learning_rate": 1.254e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.749, + "step": 749 + }, + { + "loss": 0.0791, + "grad_norm": 2.311647415161133, + "learning_rate": 1.253e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.75, + "step": 750 + }, + { + "loss": 0.0486, + "grad_norm": 2.8530430793762207, + "learning_rate": 1.252e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.751, + "step": 751 + }, + { + "loss": 0.1104, + "grad_norm": 2.617987871170044, + "learning_rate": 1.251e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.752, + "step": 752 + }, + { + "loss": 0.0195, + "grad_norm": 4.978179931640625, + "learning_rate": 1.25e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.753, + "step": 753 + }, + { + "loss": 0.0726, + "grad_norm": 2.0882959365844727, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.754, + "step": 754 + }, + { + "loss": 0.0754, + "grad_norm": 2.1230452060699463, + "learning_rate": 1.248e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.755, + "step": 755 + }, + { + "loss": 0.0707, + "grad_norm": 2.2002744674682617, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.756, + "step": 756 + }, + { + "loss": 0.0494, + "grad_norm": 1.7500207424163818, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.757, + "step": 757 + }, + { + "loss": 0.0811, + "grad_norm": 1.8128851652145386, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.758, + "step": 758 + }, + { + "loss": 0.0756, + "grad_norm": 2.397252082824707, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.759, + "step": 759 + }, + { + "loss": 0.0501, + "grad_norm": 1.975466012954712, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.76, + "step": 760 + }, + { + "loss": 0.1087, + "grad_norm": 2.2733750343322754, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 761 + }, + { + "loss": 0.1041, + "grad_norm": 2.3084492683410645, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.762, + "step": 762 + }, + { + "loss": 0.0496, + "grad_norm": 2.098421096801758, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.763, + "step": 763 + }, + { + "loss": 0.0626, + "grad_norm": 2.004920482635498, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.764, + "step": 764 + }, + { + "loss": 0.0667, + "grad_norm": 1.603124737739563, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.765, + "step": 765 + }, + { + "loss": 0.0829, + "grad_norm": 2.5960142612457275, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.766, + "step": 766 + }, + { + "loss": 0.0234, + "grad_norm": 5.8595757484436035, + "learning_rate": 1.236e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.767, + "step": 767 + }, + { + "loss": 0.1032, + "grad_norm": 1.7731209993362427, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 768 + }, + { + "loss": 0.0228, + "grad_norm": 6.049434185028076, + "learning_rate": 1.234e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.769, + "step": 769 + }, + { + "loss": 0.0828, + "grad_norm": 1.9529765844345093, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.77, + "step": 770 + }, + { + "loss": 0.0718, + "grad_norm": 1.3272991180419922, + "learning_rate": 1.232e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.771, + "step": 771 + }, + { + "loss": 0.0907, + "grad_norm": 2.2710683345794678, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.772, + "step": 772 + }, + { + "loss": 0.2171, + "grad_norm": 6.965005397796631, + "learning_rate": 1.23e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.773, + "step": 773 + }, + { + "loss": 0.0657, + "grad_norm": 2.213243007659912, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.774, + "step": 774 + }, + { + "loss": 0.1745, + "grad_norm": 6.300892353057861, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.775, + "step": 775 + }, + { + "loss": 0.06, + "grad_norm": 2.4582417011260986, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.776, + "step": 776 + }, + { + "loss": 0.0516, + "grad_norm": 1.6709243059158325, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.777, + "step": 777 + }, + { + "loss": 0.1051, + "grad_norm": 2.654740810394287, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.778, + "step": 778 + }, + { + "loss": 0.072, + "grad_norm": 2.0503504276275635, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.779, + "step": 779 + }, + { + "loss": 0.0742, + "grad_norm": 1.800299882888794, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.78, + "step": 780 + }, + { + "loss": 0.0737, + "grad_norm": 2.063502788543701, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.781, + "step": 781 + }, + { + "loss": 0.1061, + "grad_norm": 2.698178291320801, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.782, + "step": 782 + }, + { + "loss": 0.0737, + "grad_norm": 2.0112061500549316, + "learning_rate": 1.22e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.783, + "step": 783 + }, + { + "loss": 0.0195, + "grad_norm": 5.365294933319092, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.784, + "step": 784 + }, + { + "loss": 0.0601, + "grad_norm": 1.5453028678894043, + "learning_rate": 1.218e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.785, + "step": 785 + }, + { + "loss": 0.2441, + "grad_norm": 10.393324851989746, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.786, + "step": 786 + }, + { + "loss": 0.1079, + "grad_norm": 2.6032726764678955, + "learning_rate": 1.216e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.787, + "step": 787 + }, + { + "loss": 0.0639, + "grad_norm": 2.6428260803222656, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.788, + "step": 788 + }, + { + "loss": 0.0632, + "grad_norm": 1.3782398700714111, + "learning_rate": 1.214e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.789, + "step": 789 + }, + { + "loss": 0.0189, + "grad_norm": 4.952188014984131, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.79, + "step": 790 + }, + { + "loss": 0.0613, + "grad_norm": 1.8376456499099731, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.791, + "step": 791 + }, + { + "loss": 0.0539, + "grad_norm": 1.6092228889465332, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.792, + "step": 792 + }, + { + "loss": 0.0151, + "grad_norm": 3.721954345703125, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 793 + }, + { + "loss": 0.0168, + "grad_norm": 3.578442096710205, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 794 + }, + { + "loss": 0.0494, + "grad_norm": 1.714572787284851, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 795 + }, + { + "loss": 0.0715, + "grad_norm": 2.152249813079834, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 796 + }, + { + "loss": 0.0106, + "grad_norm": 1.2338261604309082, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 797 + }, + { + "loss": 0.0948, + "grad_norm": 3.4057295322418213, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 798 + }, + { + "loss": 0.0967, + "grad_norm": 2.297558546066284, + "learning_rate": 1.204e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.799, + "step": 799 + }, + { + "loss": 0.0715, + "grad_norm": 2.948807716369629, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 800 + }, + { + "loss": 0.0691, + "grad_norm": 2.480257749557495, + "learning_rate": 1.202e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.801, + "step": 801 + }, + { + "loss": 0.2602, + "grad_norm": 9.955911636352539, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.802, + "step": 802 + }, + { + "loss": 0.0623, + "grad_norm": 2.92844295501709, + "learning_rate": 1.2e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.803, + "step": 803 + }, + { + "loss": 0.0922, + "grad_norm": 2.3774516582489014, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.804, + "step": 804 + }, + { + "loss": 0.0664, + "grad_norm": 1.5494801998138428, + "learning_rate": 1.198e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.805, + "step": 805 + }, + { + "loss": 0.1929, + "grad_norm": 6.599433422088623, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.806, + "step": 806 + }, + { + "loss": 0.02, + "grad_norm": 5.4353718757629395, + "learning_rate": 1.196e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.807, + "step": 807 + }, + { + "loss": 0.0603, + "grad_norm": 1.707094669342041, + "learning_rate": 1.195e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.808, + "step": 808 + }, + { + "loss": 0.0722, + "grad_norm": 2.148479461669922, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.809, + "step": 809 + }, + { + "loss": 0.0717, + "grad_norm": 2.687295436859131, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.81, + "step": 810 + }, + { + "loss": 0.0695, + "grad_norm": 2.940627098083496, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.811, + "step": 811 + }, + { + "loss": 0.0195, + "grad_norm": 5.349563121795654, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.812, + "step": 812 + }, + { + "loss": 0.0931, + "grad_norm": 1.7995429039001465, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.813, + "step": 813 + }, + { + "loss": 0.0175, + "grad_norm": 5.07689094543457, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.814, + "step": 814 + }, + { + "loss": 0.0159, + "grad_norm": 4.247437000274658, + "learning_rate": 1.188e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.815, + "step": 815 + }, + { + "loss": 0.0783, + "grad_norm": 2.34236216545105, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.816, + "step": 816 + }, + { + "loss": 0.113, + "grad_norm": 2.772456407546997, + "learning_rate": 1.186e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.817, + "step": 817 + }, + { + "loss": 0.0621, + "grad_norm": 2.3582286834716797, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.818, + "step": 818 + }, + { + "loss": 0.0522, + "grad_norm": 3.014678716659546, + "learning_rate": 1.184e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.819, + "step": 819 + }, + { + "loss": 0.0758, + "grad_norm": 2.709341049194336, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.82, + "step": 820 + }, + { + "loss": 0.0718, + "grad_norm": 2.3536617755889893, + "learning_rate": 1.182e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.821, + "step": 821 + }, + { + "loss": 0.0789, + "grad_norm": 3.258106231689453, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.822, + "step": 822 + }, + { + "loss": 0.0763, + "grad_norm": 2.218254804611206, + "learning_rate": 1.18e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.823, + "step": 823 + }, + { + "loss": 0.0599, + "grad_norm": 2.2704806327819824, + "learning_rate": 1.179e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.824, + "step": 824 + }, + { + "loss": 0.0126, + "grad_norm": 2.4626388549804688, + "learning_rate": 1.178e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 825 + }, + { + "loss": 0.0669, + "grad_norm": 2.0617358684539795, + "learning_rate": 1.177e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.826, + "step": 826 + }, + { + "loss": 0.066, + "grad_norm": 2.0766263008117676, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.827, + "step": 827 + }, + { + "loss": 0.0618, + "grad_norm": 1.5771903991699219, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.828, + "step": 828 + }, + { + "loss": 0.0687, + "grad_norm": 1.789569616317749, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.829, + "step": 829 + }, + { + "loss": 0.0157, + "grad_norm": 4.058000087738037, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.83, + "step": 830 + }, + { + "loss": 0.0389, + "grad_norm": 1.5074262619018555, + "learning_rate": 1.172e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.831, + "step": 831 + }, + { + "loss": 0.0663, + "grad_norm": 2.1943564414978027, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.832, + "step": 832 + }, + { + "loss": 0.0734, + "grad_norm": 2.0293729305267334, + "learning_rate": 1.17e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.833, + "step": 833 + }, + { + "loss": 0.0734, + "grad_norm": 1.9577043056488037, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.834, + "step": 834 + }, + { + "loss": 0.0729, + "grad_norm": 2.053274154663086, + "learning_rate": 1.168e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 835 + }, + { + "loss": 0.1016, + "grad_norm": 4.023435115814209, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.836, + "step": 836 + }, + { + "loss": 0.0618, + "grad_norm": 2.152527093887329, + "learning_rate": 1.166e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.837, + "step": 837 + }, + { + "loss": 0.0633, + "grad_norm": 2.2773494720458984, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.838, + "step": 838 + }, + { + "loss": 0.0207, + "grad_norm": 5.423501491546631, + "learning_rate": 1.164e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.839, + "step": 839 + }, + { + "loss": 0.0651, + "grad_norm": 1.2856030464172363, + "learning_rate": 1.163e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.84, + "step": 840 + }, + { + "loss": 0.0628, + "grad_norm": 1.8682835102081299, + "learning_rate": 1.162e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 841 + }, + { + "loss": 0.0192, + "grad_norm": 4.855226516723633, + "learning_rate": 1.161e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.842, + "step": 842 + }, + { + "loss": 0.0757, + "grad_norm": 1.910493016242981, + "learning_rate": 1.16e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.843, + "step": 843 + }, + { + "loss": 0.0778, + "grad_norm": 3.503009796142578, + "learning_rate": 1.159e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.844, + "step": 844 + }, + { + "loss": 0.05, + "grad_norm": 1.867902398109436, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.845, + "step": 845 + }, + { + "loss": 0.0145, + "grad_norm": 3.8562870025634766, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 846 + }, + { + "loss": 0.0668, + "grad_norm": 1.7752705812454224, + "learning_rate": 1.156e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.847, + "step": 847 + }, + { + "loss": 0.0735, + "grad_norm": 2.393582582473755, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.848, + "step": 848 + }, + { + "loss": 0.0985, + "grad_norm": 2.7950665950775146, + "learning_rate": 1.154e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.849, + "step": 849 + }, + { + "loss": 0.0681, + "grad_norm": 2.1131601333618164, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.85, + "step": 850 + }, + { + "loss": 0.0515, + "grad_norm": 2.2755846977233887, + "learning_rate": 1.152e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.851, + "step": 851 + }, + { + "loss": 0.0434, + "grad_norm": 1.569434642791748, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.852, + "step": 852 + }, + { + "loss": 0.1047, + "grad_norm": 3.0928077697753906, + "learning_rate": 1.15e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.853, + "step": 853 + }, + { + "loss": 0.0575, + "grad_norm": 2.008404016494751, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.854, + "step": 854 + }, + { + "loss": 0.0579, + "grad_norm": 1.4861952066421509, + "learning_rate": 1.148e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.855, + "step": 855 + }, + { + "loss": 0.069, + "grad_norm": 1.9950709342956543, + "learning_rate": 1.147e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.856, + "step": 856 + }, + { + "loss": 0.0155, + "grad_norm": 4.394257068634033, + "learning_rate": 1.146e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.857, + "step": 857 + }, + { + "loss": 0.0969, + "grad_norm": 2.6770575046539307, + "learning_rate": 1.145e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.858, + "step": 858 + }, + { + "loss": 0.0712, + "grad_norm": 2.319610595703125, + "learning_rate": 1.144e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 859 + }, + { + "loss": 0.0689, + "grad_norm": 1.8970541954040527, + "learning_rate": 1.143e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.86, + "step": 860 + }, + { + "loss": 0.0899, + "grad_norm": 1.8339478969573975, + "learning_rate": 1.142e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.861, + "step": 861 + }, + { + "loss": 0.1032, + "grad_norm": 2.781162977218628, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.862, + "step": 862 + }, + { + "loss": 0.0604, + "grad_norm": 2.540081024169922, + "learning_rate": 1.14e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.863, + "step": 863 + }, + { + "loss": 0.0491, + "grad_norm": 1.9644439220428467, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.864, + "step": 864 + }, + { + "loss": 0.0802, + "grad_norm": 1.8939117193222046, + "learning_rate": 1.138e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.865, + "step": 865 + }, + { + "loss": 0.0681, + "grad_norm": 2.0177180767059326, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.866, + "step": 866 + }, + { + "loss": 0.0476, + "grad_norm": 1.9407687187194824, + "learning_rate": 1.136e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.867, + "step": 867 + }, + { + "loss": 0.0188, + "grad_norm": 5.371039390563965, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.868, + "step": 868 + }, + { + "loss": 0.0508, + "grad_norm": 1.873732566833496, + "learning_rate": 1.134e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.869, + "step": 869 + }, + { + "loss": 0.0237, + "grad_norm": 6.1496429443359375, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.87, + "step": 870 + }, + { + "loss": 0.099, + "grad_norm": 4.506502151489258, + "learning_rate": 1.132e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.871, + "step": 871 + }, + { + "loss": 0.1, + "grad_norm": 5.314243316650391, + "learning_rate": 1.131e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.872, + "step": 872 + }, + { + "loss": 0.0123, + "grad_norm": 3.1825995445251465, + "learning_rate": 1.13e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 873 + }, + { + "loss": 0.0132, + "grad_norm": 3.1502106189727783, + "learning_rate": 1.129e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 874 + }, + { + "loss": 0.0622, + "grad_norm": 2.719097375869751, + "learning_rate": 1.128e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.875, + "step": 875 + }, + { + "loss": 0.0992, + "grad_norm": 3.1199769973754883, + "learning_rate": 1.127e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.876, + "step": 876 + }, + { + "loss": 0.066, + "grad_norm": 2.5837504863739014, + "learning_rate": 1.126e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.877, + "step": 877 + }, + { + "loss": 0.0542, + "grad_norm": 2.4771666526794434, + "learning_rate": 1.125e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.878, + "step": 878 + }, + { + "loss": 0.0937, + "grad_norm": 3.6200714111328125, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.879, + "step": 879 + }, + { + "loss": 0.0674, + "grad_norm": 2.399535655975342, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.88, + "step": 880 + }, + { + "loss": 0.0678, + "grad_norm": 2.516605854034424, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.881, + "step": 881 + }, + { + "loss": 0.0668, + "grad_norm": 2.5172040462493896, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.882, + "step": 882 + }, + { + "loss": 0.0744, + "grad_norm": 2.4523816108703613, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.883, + "step": 883 + }, + { + "loss": 0.1019, + "grad_norm": 3.3321380615234375, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.884, + "step": 884 + }, + { + "loss": 0.0837, + "grad_norm": 1.8811334371566772, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.885, + "step": 885 + }, + { + "loss": 0.0531, + "grad_norm": 1.9141852855682373, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.886, + "step": 886 + }, + { + "loss": 0.0408, + "grad_norm": 1.487582802772522, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.887, + "step": 887 + }, + { + "loss": 0.0218, + "grad_norm": 5.286271095275879, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.888, + "step": 888 + }, + { + "loss": 0.0628, + "grad_norm": 1.7239201068878174, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.889, + "step": 889 + }, + { + "loss": 0.0625, + "grad_norm": 1.7386255264282227, + "learning_rate": 1.113e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.89, + "step": 890 + }, + { + "loss": 0.0405, + "grad_norm": 1.4104888439178467, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.891, + "step": 891 + }, + { + "loss": 0.0226, + "grad_norm": 4.608585834503174, + "learning_rate": 1.111e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.892, + "step": 892 + }, + { + "loss": 0.0968, + "grad_norm": 2.3830323219299316, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.893, + "step": 893 + }, + { + "loss": 0.0739, + "grad_norm": 1.8739683628082275, + "learning_rate": 1.109e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.894, + "step": 894 + }, + { + "loss": 0.058, + "grad_norm": 2.673945665359497, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.895, + "step": 895 + }, + { + "loss": 0.0943, + "grad_norm": 3.0288586616516113, + "learning_rate": 1.107e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.896, + "step": 896 + }, + { + "loss": 0.0726, + "grad_norm": 2.270813465118408, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.897, + "step": 897 + }, + { + "loss": 0.0589, + "grad_norm": 1.880444049835205, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.898, + "step": 898 + }, + { + "loss": 0.0143, + "grad_norm": 3.3361847400665283, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 899 + }, + { + "loss": 0.059, + "grad_norm": 1.848816990852356, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.9, + "step": 900 + }, + { + "loss": 0.0714, + "grad_norm": 2.0221500396728516, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.901, + "step": 901 + }, + { + "loss": 0.0668, + "grad_norm": 4.154532432556152, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.902, + "step": 902 + }, + { + "loss": 0.0617, + "grad_norm": 1.9648317098617554, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.903, + "step": 903 + }, + { + "loss": 0.0652, + "grad_norm": 2.866431474685669, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.904, + "step": 904 + }, + { + "loss": 0.0459, + "grad_norm": 2.3324079513549805, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.905, + "step": 905 + }, + { + "loss": 0.0111, + "grad_norm": 2.3991503715515137, + "learning_rate": 1.097e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 906 + }, + { + "loss": 0.0654, + "grad_norm": 1.9646960496902466, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.907, + "step": 907 + }, + { + "loss": 0.0798, + "grad_norm": 2.720228433609009, + "learning_rate": 1.095e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.908, + "step": 908 + }, + { + "loss": 0.0974, + "grad_norm": 2.5758628845214844, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.909, + "step": 909 + }, + { + "loss": 0.0621, + "grad_norm": 2.303436517715454, + "learning_rate": 1.093e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.91, + "step": 910 + }, + { + "loss": 0.0944, + "grad_norm": 2.617363929748535, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.911, + "step": 911 + }, + { + "loss": 0.0571, + "grad_norm": 1.898218035697937, + "learning_rate": 1.091e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.912, + "step": 912 + }, + { + "loss": 0.0136, + "grad_norm": 3.2630972862243652, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 913 + }, + { + "loss": 0.0482, + "grad_norm": 2.0208237171173096, + "learning_rate": 1.089e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.914, + "step": 914 + }, + { + "loss": 0.0486, + "grad_norm": 1.8037229776382446, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.915, + "step": 915 + }, + { + "loss": 0.0118, + "grad_norm": 2.722412586212158, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 916 + }, + { + "loss": 0.0687, + "grad_norm": 2.6608150005340576, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.917, + "step": 917 + }, + { + "loss": 0.0101, + "grad_norm": 1.664276361465454, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 918 + }, + { + "loss": 0.0609, + "grad_norm": 2.5043087005615234, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.919, + "step": 919 + }, + { + "loss": 0.0685, + "grad_norm": 2.0320653915405273, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.92, + "step": 920 + }, + { + "loss": 0.0709, + "grad_norm": 2.7590584754943848, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.921, + "step": 921 + }, + { + "loss": 0.0511, + "grad_norm": 2.424579620361328, + "learning_rate": 1.081e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.922, + "step": 922 + }, + { + "loss": 0.061, + "grad_norm": 1.826949119567871, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.923, + "step": 923 + }, + { + "loss": 0.0086, + "grad_norm": 1.5401605367660522, + "learning_rate": 1.079e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 924 + }, + { + "loss": 0.0667, + "grad_norm": 2.49796724319458, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.925, + "step": 925 + }, + { + "loss": 0.0741, + "grad_norm": 2.141827344894409, + "learning_rate": 1.077e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.926, + "step": 926 + }, + { + "loss": 0.0662, + "grad_norm": 2.1507174968719482, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.927, + "step": 927 + }, + { + "loss": 0.0596, + "grad_norm": 1.928731083869934, + "learning_rate": 1.075e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.928, + "step": 928 + }, + { + "loss": 0.0469, + "grad_norm": 2.391432523727417, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.929, + "step": 929 + }, + { + "loss": 0.0121, + "grad_norm": 2.9941039085388184, + "learning_rate": 1.073e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 930 + }, + { + "loss": 0.0452, + "grad_norm": 2.110806465148926, + "learning_rate": 1.072e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.931, + "step": 931 + }, + { + "loss": 0.0624, + "grad_norm": 1.8115919828414917, + "learning_rate": 1.071e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.932, + "step": 932 + }, + { + "loss": 0.0456, + "grad_norm": 1.548567533493042, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.933, + "step": 933 + }, + { + "loss": 0.0565, + "grad_norm": 1.9886720180511475, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.934, + "step": 934 + }, + { + "loss": 0.0457, + "grad_norm": 1.8589720726013184, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.935, + "step": 935 + }, + { + "loss": 0.041, + "grad_norm": 1.6640335321426392, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.936, + "step": 936 + }, + { + "loss": 0.0712, + "grad_norm": 2.0171613693237305, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.937, + "step": 937 + }, + { + "loss": 0.0628, + "grad_norm": 1.6715848445892334, + "learning_rate": 1.065e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.938, + "step": 938 + }, + { + "loss": 0.0416, + "grad_norm": 2.1554946899414062, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.939, + "step": 939 + }, + { + "loss": 0.0737, + "grad_norm": 2.242116689682007, + "learning_rate": 1.063e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.94, + "step": 940 + }, + { + "loss": 0.0177, + "grad_norm": 4.810120105743408, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.941, + "step": 941 + }, + { + "loss": 0.0649, + "grad_norm": 1.675683617591858, + "learning_rate": 1.061e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.942, + "step": 942 + }, + { + "loss": 0.0727, + "grad_norm": 2.5127744674682617, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.943, + "step": 943 + }, + { + "loss": 0.0587, + "grad_norm": 2.14599871635437, + "learning_rate": 1.059e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.944, + "step": 944 + }, + { + "loss": 0.1132, + "grad_norm": 2.5991926193237305, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.945, + "step": 945 + }, + { + "loss": 0.0786, + "grad_norm": 2.0661518573760986, + "learning_rate": 1.057e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.946, + "step": 946 + }, + { + "loss": 0.0686, + "grad_norm": 1.411996841430664, + "learning_rate": 1.056e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 947 + }, + { + "loss": 0.0886, + "grad_norm": 1.8908826112747192, + "learning_rate": 1.055e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.948, + "step": 948 + }, + { + "loss": 0.0795, + "grad_norm": 1.8596928119659424, + "learning_rate": 1.054e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.949, + "step": 949 + }, + { + "loss": 0.064, + "grad_norm": 2.0051939487457275, + "learning_rate": 1.053e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.95, + "step": 950 + }, + { + "loss": 0.0761, + "grad_norm": 1.7486968040466309, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 951 + }, + { + "loss": 0.0519, + "grad_norm": 1.7253214120864868, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.952, + "step": 952 + }, + { + "loss": 0.0688, + "grad_norm": 1.7860913276672363, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.953, + "step": 953 + }, + { + "loss": 0.0287, + "grad_norm": 6.397044658660889, + "learning_rate": 1.049e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 954 + }, + { + "loss": 0.0877, + "grad_norm": 1.6188372373580933, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.955, + "step": 955 + }, + { + "loss": 0.0595, + "grad_norm": 1.6029514074325562, + "learning_rate": 1.047e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.956, + "step": 956 + }, + { + "loss": 0.2163, + "grad_norm": 8.956819534301758, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.957, + "step": 957 + }, + { + "loss": 0.0666, + "grad_norm": 1.4872380495071411, + "learning_rate": 1.045e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.958, + "step": 958 + }, + { + "loss": 0.092, + "grad_norm": 3.029266595840454, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.959, + "step": 959 + }, + { + "loss": 0.0757, + "grad_norm": 1.899221658706665, + "learning_rate": 1.043e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.96, + "step": 960 + }, + { + "loss": 0.0666, + "grad_norm": 1.577907681465149, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.961, + "step": 961 + }, + { + "loss": 0.0581, + "grad_norm": 1.467238426208496, + "learning_rate": 1.041e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 962 + }, + { + "loss": 0.1923, + "grad_norm": 8.706313133239746, + "learning_rate": 1.04e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.963, + "step": 963 + }, + { + "loss": 0.062, + "grad_norm": 2.0428693294525146, + "learning_rate": 1.039e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.964, + "step": 964 + }, + { + "loss": 0.0775, + "grad_norm": 2.0258123874664307, + "learning_rate": 1.038e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.965, + "step": 965 + }, + { + "loss": 0.0661, + "grad_norm": 1.7304749488830566, + "learning_rate": 1.037e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.966, + "step": 966 + }, + { + "loss": 0.0547, + "grad_norm": 1.6691105365753174, + "learning_rate": 1.036e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.967, + "step": 967 + }, + { + "loss": 0.0617, + "grad_norm": 1.681009292602539, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.968, + "step": 968 + }, + { + "loss": 0.0544, + "grad_norm": 1.8074179887771606, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.969, + "step": 969 + }, + { + "loss": 0.0396, + "grad_norm": 1.812711477279663, + "learning_rate": 1.033e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.97, + "step": 970 + }, + { + "loss": 0.0577, + "grad_norm": 2.0831782817840576, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.971, + "step": 971 + }, + { + "loss": 0.0776, + "grad_norm": 1.3640745878219604, + "learning_rate": 1.031e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.972, + "step": 972 + }, + { + "loss": 0.0454, + "grad_norm": 1.9006543159484863, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.973, + "step": 973 + }, + { + "loss": 0.0633, + "grad_norm": 1.6996928453445435, + "learning_rate": 1.029e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.974, + "step": 974 + }, + { + "loss": 0.0738, + "grad_norm": 1.9721561670303345, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.975, + "step": 975 + }, + { + "loss": 0.0439, + "grad_norm": 2.2615768909454346, + "learning_rate": 1.027e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.976, + "step": 976 + }, + { + "loss": 0.0237, + "grad_norm": 5.635776519775391, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.977, + "step": 977 + }, + { + "loss": 0.094, + "grad_norm": 2.4352505207061768, + "learning_rate": 1.025e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.978, + "step": 978 + }, + { + "loss": 0.0648, + "grad_norm": 1.6868159770965576, + "learning_rate": 1.024e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.979, + "step": 979 + }, + { + "loss": 0.0652, + "grad_norm": 2.1479756832122803, + "learning_rate": 1.023e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.98, + "step": 980 + }, + { + "loss": 0.0597, + "grad_norm": 2.0000855922698975, + "learning_rate": 1.022e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.981, + "step": 981 + }, + { + "loss": 0.0643, + "grad_norm": 2.511259078979492, + "learning_rate": 1.021e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.982, + "step": 982 + }, + { + "loss": 0.0161, + "grad_norm": 3.99651837348938, + "learning_rate": 1.02e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.983, + "step": 983 + }, + { + "loss": 0.0649, + "grad_norm": 2.231045722961426, + "learning_rate": 1.019e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.984, + "step": 984 + }, + { + "loss": 0.0386, + "grad_norm": 1.9224427938461304, + "learning_rate": 1.018e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.985, + "step": 985 + }, + { + "loss": 0.0673, + "grad_norm": 2.328557014465332, + "learning_rate": 1.017e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.986, + "step": 986 + }, + { + "loss": 0.0642, + "grad_norm": 2.1176366806030273, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.987, + "step": 987 + }, + { + "loss": 0.0643, + "grad_norm": 2.319209098815918, + "learning_rate": 1.015e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.988, + "step": 988 + }, + { + "loss": 0.0126, + "grad_norm": 2.7921886444091797, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 989 + }, + { + "loss": 0.056, + "grad_norm": 1.6485341787338257, + "learning_rate": 1.013e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.99, + "step": 990 + }, + { + "loss": 0.0559, + "grad_norm": 1.85313081741333, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.991, + "step": 991 + }, + { + "loss": 0.0718, + "grad_norm": 2.0347867012023926, + "learning_rate": 1.011e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.992, + "step": 992 + }, + { + "loss": 0.0611, + "grad_norm": 2.6210453510284424, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.993, + "step": 993 + }, + { + "loss": 0.0428, + "grad_norm": 2.1774537563323975, + "learning_rate": 1.009e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.994, + "step": 994 + }, + { + "loss": 0.0564, + "grad_norm": 1.4708741903305054, + "learning_rate": 1.008e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.995, + "step": 995 + }, + { + "loss": 0.0461, + "grad_norm": 2.133490562438965, + "learning_rate": 1.007e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.996, + "step": 996 + }, + { + "loss": 0.0654, + "grad_norm": 1.8513908386230469, + "learning_rate": 1.006e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.997, + "step": 997 + }, + { + "loss": 0.0467, + "grad_norm": 2.651682138442993, + "learning_rate": 1.005e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.998, + "step": 998 + }, + { + "loss": 0.0496, + "grad_norm": 1.6719735860824585, + "learning_rate": 1.004e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.999, + "step": 999 + }, + { + "loss": 0.064, + "grad_norm": 1.7016679048538208, + "learning_rate": 1.003e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.0, + "step": 1000 + }, + { + "loss": 0.0601, + "grad_norm": 1.5496330261230469, + "learning_rate": 1.002e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.001, + "step": 1001 + }, + { + "loss": 0.0185, + "grad_norm": 4.8348541259765625, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687985.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.002, + "step": 1002 + }, + { + "loss": 0.0205, + "grad_norm": 5.356715202331543, + "learning_rate": 1e-05, + "num_tokens": 688167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.003, + "step": 1003 + }, + { + "loss": 0.065, + "grad_norm": 2.8306968212127686, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.004, + "step": 1004 + }, + { + "loss": 0.048, + "grad_norm": 1.684121012687683, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.005, + "step": 1005 + }, + { + "loss": 0.0611, + "grad_norm": 1.78119957447052, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.006, + "step": 1006 + }, + { + "loss": 0.069, + "grad_norm": 2.2316365242004395, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.007, + "step": 1007 + }, + { + "loss": 0.0779, + "grad_norm": 2.183338165283203, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.008, + "step": 1008 + }, + { + "loss": 0.0642, + "grad_norm": 1.943967580795288, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.009, + "step": 1009 + }, + { + "loss": 0.0415, + "grad_norm": 1.6110951900482178, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.01, + "step": 1010 + }, + { + "loss": 0.0117, + "grad_norm": 3.0185630321502686, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 1011 + }, + { + "loss": 0.0992, + "grad_norm": 3.14607310295105, + "learning_rate": 9.91e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 1.012, + "step": 1012 + }, + { + "loss": 0.047, + "grad_norm": 1.2475289106369019, + "learning_rate": 9.9e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.013, + "step": 1013 + }, + { + "loss": 0.0819, + "grad_norm": 2.5398612022399902, + "learning_rate": 9.89e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.014, + "step": 1014 + }, + { + "loss": 0.0555, + "grad_norm": 1.682294249534607, + "learning_rate": 9.88e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.015, + "step": 1015 + }, + { + "loss": 0.0867, + "grad_norm": 2.457875967025757, + "learning_rate": 9.87e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.016, + "step": 1016 + }, + { + "loss": 0.0667, + "grad_norm": 1.7135660648345947, + "learning_rate": 9.86e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.017, + "step": 1017 + }, + { + "loss": 0.0378, + "grad_norm": 1.4605510234832764, + "learning_rate": 9.85e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.018, + "step": 1018 + }, + { + "loss": 0.0612, + "grad_norm": 3.01509690284729, + "learning_rate": 9.84e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.019, + "step": 1019 + }, + { + "loss": 0.0623, + "grad_norm": 2.2433955669403076, + "learning_rate": 9.83e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.02, + "step": 1020 + }, + { + "loss": 0.0192, + "grad_norm": 5.402326583862305, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.021, + "step": 1021 + }, + { + "loss": 0.099, + "grad_norm": 4.552786827087402, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.022, + "step": 1022 + }, + { + "loss": 0.0569, + "grad_norm": 2.1845462322235107, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.023, + "step": 1023 + }, + { + "loss": 0.063, + "grad_norm": 2.7287683486938477, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.024, + "step": 1024 + }, + { + "loss": 0.0426, + "grad_norm": 2.1356048583984375, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.025, + "step": 1025 + }, + { + "loss": 0.0626, + "grad_norm": 2.1982219219207764, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.026, + "step": 1026 + }, + { + "loss": 0.0881, + "grad_norm": 2.790822982788086, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.027, + "step": 1027 + }, + { + "loss": 0.0872, + "grad_norm": 2.464653968811035, + "learning_rate": 9.75e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.028, + "step": 1028 + }, + { + "loss": 0.0144, + "grad_norm": 3.807983636856079, + "learning_rate": 9.74e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.029, + "step": 1029 + }, + { + "loss": 0.0594, + "grad_norm": 1.6763768196105957, + "learning_rate": 9.73e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.03, + "step": 1030 + }, + { + "loss": 0.0882, + "grad_norm": 1.924737811088562, + "learning_rate": 9.72e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.031, + "step": 1031 + }, + { + "loss": 0.0488, + "grad_norm": 2.331883430480957, + "learning_rate": 9.71e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.032, + "step": 1032 + }, + { + "loss": 0.088, + "grad_norm": 2.7460174560546875, + "learning_rate": 9.7e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.033, + "step": 1033 + }, + { + "loss": 0.0446, + "grad_norm": 1.7645024061203003, + "learning_rate": 9.69e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.034, + "step": 1034 + }, + { + "loss": 0.0806, + "grad_norm": 1.7870028018951416, + "learning_rate": 9.68e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.035, + "step": 1035 + }, + { + "loss": 0.0602, + "grad_norm": 1.6170544624328613, + "learning_rate": 9.67e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.036, + "step": 1036 + }, + { + "loss": 0.0427, + "grad_norm": 2.0376412868499756, + "learning_rate": 9.66e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.037, + "step": 1037 + }, + { + "loss": 0.0636, + "grad_norm": 2.1391189098358154, + "learning_rate": 9.65e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.038, + "step": 1038 + }, + { + "loss": 0.0127, + "grad_norm": 3.4139318466186523, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 1039 + }, + { + "loss": 0.0532, + "grad_norm": 2.2980690002441406, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.04, + "step": 1040 + }, + { + "loss": 0.042, + "grad_norm": 1.7804741859436035, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.041, + "step": 1041 + }, + { + "loss": 0.039, + "grad_norm": 1.5417966842651367, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.042, + "step": 1042 + }, + { + "loss": 0.0691, + "grad_norm": 1.9181416034698486, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.043, + "step": 1043 + }, + { + "loss": 0.0105, + "grad_norm": 2.567687511444092, + "learning_rate": 9.59e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 1044 + }, + { + "loss": 0.0513, + "grad_norm": 2.1507062911987305, + "learning_rate": 9.58e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.045, + "step": 1045 + }, + { + "loss": 0.0661, + "grad_norm": 2.6471474170684814, + "learning_rate": 9.57e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.046, + "step": 1046 + }, + { + "loss": 0.0528, + "grad_norm": 1.6081326007843018, + "learning_rate": 9.56e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.047, + "step": 1047 + }, + { + "loss": 0.0148, + "grad_norm": 3.6129963397979736, + "learning_rate": 9.55e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.048, + "step": 1048 + }, + { + "loss": 0.0589, + "grad_norm": 1.6536871194839478, + "learning_rate": 9.54e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 1049 + }, + { + "loss": 0.0893, + "grad_norm": 2.1024138927459717, + "learning_rate": 9.53e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.05, + "step": 1050 + }, + { + "loss": 0.0628, + "grad_norm": 1.6858649253845215, + "learning_rate": 9.52e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.051, + "step": 1051 + }, + { + "loss": 0.0532, + "grad_norm": 1.6352399587631226, + "learning_rate": 9.51e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.052, + "step": 1052 + }, + { + "loss": 0.0673, + "grad_norm": 1.62017822265625, + "learning_rate": 9.5e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.053, + "step": 1053 + }, + { + "loss": 0.0577, + "grad_norm": 1.5879229307174683, + "learning_rate": 9.49e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.054, + "step": 1054 + }, + { + "loss": 0.0148, + "grad_norm": 4.010829925537109, + "learning_rate": 9.48e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.055, + "step": 1055 + }, + { + "loss": 0.0147, + "grad_norm": 4.00789213180542, + "learning_rate": 9.47e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.056, + "step": 1056 + }, + { + "loss": 0.015, + "grad_norm": 4.107461929321289, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.057, + "step": 1057 + }, + { + "loss": 0.0458, + "grad_norm": 2.3218655586242676, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.058, + "step": 1058 + }, + { + "loss": 0.0119, + "grad_norm": 2.9490623474121094, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 1059 + }, + { + "loss": 0.0367, + "grad_norm": 1.8217196464538574, + "learning_rate": 9.43e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.06, + "step": 1060 + }, + { + "loss": 0.0079, + "grad_norm": 1.3022953271865845, + "learning_rate": 9.42e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 1061 + }, + { + "loss": 0.0724, + "grad_norm": 2.17926287651062, + "learning_rate": 9.41e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.062, + "step": 1062 + }, + { + "loss": 0.039, + "grad_norm": 1.739366888999939, + "learning_rate": 9.4e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.063, + "step": 1063 + }, + { + "loss": 0.0534, + "grad_norm": 2.180590867996216, + "learning_rate": 9.39e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.064, + "step": 1064 + }, + { + "loss": 0.0063, + "grad_norm": 0.5163084864616394, + "learning_rate": 9.38e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 1065 + }, + { + "loss": 0.0584, + "grad_norm": 2.8058063983917236, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.066, + "step": 1066 + }, + { + "loss": 0.0582, + "grad_norm": 2.005493640899658, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.067, + "step": 1067 + }, + { + "loss": 0.0497, + "grad_norm": 2.923448324203491, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.068, + "step": 1068 + }, + { + "loss": 0.006, + "grad_norm": 0.48110926151275635, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 1069 + }, + { + "loss": 0.0704, + "grad_norm": 2.408653497695923, + "learning_rate": 9.33e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.07, + "step": 1070 + }, + { + "loss": 0.0878, + "grad_norm": 2.767408847808838, + "learning_rate": 9.32e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 1071 + }, + { + "loss": 0.0599, + "grad_norm": 1.9640824794769287, + "learning_rate": 9.31e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.072, + "step": 1072 + }, + { + "loss": 0.0674, + "grad_norm": 2.939439535140991, + "learning_rate": 9.3e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.073, + "step": 1073 + }, + { + "loss": 0.0866, + "grad_norm": 2.223776340484619, + "learning_rate": 9.29e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.074, + "step": 1074 + }, + { + "loss": 0.0819, + "grad_norm": 1.7831770181655884, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.075, + "step": 1075 + }, + { + "loss": 0.0552, + "grad_norm": 1.528134822845459, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.076, + "step": 1076 + }, + { + "loss": 0.0105, + "grad_norm": 2.722768783569336, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 1077 + }, + { + "loss": 0.0559, + "grad_norm": 1.601446509361267, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.078, + "step": 1078 + }, + { + "loss": 0.0571, + "grad_norm": 1.6370468139648438, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.079, + "step": 1079 + }, + { + "loss": 0.0611, + "grad_norm": 1.7496470212936401, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.08, + "step": 1080 + }, + { + "loss": 0.0582, + "grad_norm": 1.8051985502243042, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.081, + "step": 1081 + }, + { + "loss": 0.0527, + "grad_norm": 1.1893869638442993, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.082, + "step": 1082 + }, + { + "loss": 0.0613, + "grad_norm": 1.7861930131912231, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.083, + "step": 1083 + }, + { + "loss": 0.0771, + "grad_norm": 1.6442121267318726, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.084, + "step": 1084 + }, + { + "loss": 0.0614, + "grad_norm": 1.7604858875274658, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.085, + "step": 1085 + }, + { + "loss": 0.0686, + "grad_norm": 1.7211897373199463, + "learning_rate": 9.17e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.086, + "step": 1086 + }, + { + "loss": 0.0851, + "grad_norm": 2.2072157859802246, + "learning_rate": 9.16e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.087, + "step": 1087 + }, + { + "loss": 0.0234, + "grad_norm": 6.049727916717529, + "learning_rate": 9.15e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.088, + "step": 1088 + }, + { + "loss": 0.0462, + "grad_norm": 2.178677558898926, + "learning_rate": 9.14e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.089, + "step": 1089 + }, + { + "loss": 0.0866, + "grad_norm": 2.1971359252929688, + "learning_rate": 9.13e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.09, + "step": 1090 + }, + { + "loss": 0.0701, + "grad_norm": 2.604931116104126, + "learning_rate": 9.12e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.091, + "step": 1091 + }, + { + "loss": 0.1403, + "grad_norm": 4.8585004806518555, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.092, + "step": 1092 + }, + { + "loss": 0.0418, + "grad_norm": 2.0918304920196533, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.093, + "step": 1093 + }, + { + "loss": 0.0607, + "grad_norm": 1.5581291913986206, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.094, + "step": 1094 + }, + { + "loss": 0.0464, + "grad_norm": 2.2121376991271973, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.095, + "step": 1095 + }, + { + "loss": 0.0187, + "grad_norm": 5.02223539352417, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.096, + "step": 1096 + }, + { + "loss": 0.051, + "grad_norm": 1.1968108415603638, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.097, + "step": 1097 + }, + { + "loss": 0.0379, + "grad_norm": 1.5838263034820557, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.098, + "step": 1098 + }, + { + "loss": 0.0599, + "grad_norm": 2.1656548976898193, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.099, + "step": 1099 + }, + { + "loss": 0.0531, + "grad_norm": 1.5780129432678223, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1, + "step": 1100 + }, + { + "loss": 0.0101, + "grad_norm": 2.5371878147125244, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 1101 + }, + { + "loss": 0.0635, + "grad_norm": 1.7947604656219482, + "learning_rate": 9.01e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.102, + "step": 1102 + }, + { + "loss": 0.0522, + "grad_norm": 2.101656436920166, + "learning_rate": 9e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.103, + "step": 1103 + }, + { + "loss": 0.0803, + "grad_norm": 1.9881861209869385, + "learning_rate": 8.99e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.104, + "step": 1104 + }, + { + "loss": 0.0618, + "grad_norm": 1.884840965270996, + "learning_rate": 8.98e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.105, + "step": 1105 + }, + { + "loss": 0.0554, + "grad_norm": 1.8216484785079956, + "learning_rate": 8.97e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.106, + "step": 1106 + }, + { + "loss": 0.0631, + "grad_norm": 2.1785407066345215, + "learning_rate": 8.96e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.107, + "step": 1107 + }, + { + "loss": 0.0409, + "grad_norm": 1.5896263122558594, + "learning_rate": 8.95e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.108, + "step": 1108 + }, + { + "loss": 0.1964, + "grad_norm": 6.368833541870117, + "learning_rate": 8.94e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 1.109, + "step": 1109 + }, + { + "loss": 0.0087, + "grad_norm": 1.9522284269332886, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 1110 + }, + { + "loss": 0.2323, + "grad_norm": 7.9943718910217285, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 1.111, + "step": 1111 + }, + { + "loss": 0.0801, + "grad_norm": 1.92306387424469, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.112, + "step": 1112 + }, + { + "loss": 0.045, + "grad_norm": 1.3462337255477905, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.113, + "step": 1113 + }, + { + "loss": 0.0721, + "grad_norm": 2.416792869567871, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 1114 + }, + { + "loss": 0.0406, + "grad_norm": 2.1178133487701416, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.115, + "step": 1115 + }, + { + "loss": 0.0559, + "grad_norm": 1.5205347537994385, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.116, + "step": 1116 + }, + { + "loss": 0.0342, + "grad_norm": 1.617630124092102, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.117, + "step": 1117 + }, + { + "loss": 0.0438, + "grad_norm": 2.34078049659729, + "learning_rate": 8.85e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1179999999999999, + "step": 1118 + }, + { + "loss": 0.0753, + "grad_norm": 1.8780885934829712, + "learning_rate": 8.84e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.119, + "step": 1119 + }, + { + "loss": 0.147, + "grad_norm": 5.077685356140137, + "learning_rate": 8.83e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.12, + "step": 1120 + }, + { + "loss": 0.0469, + "grad_norm": 1.9634060859680176, + "learning_rate": 8.82e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.121, + "step": 1121 + }, + { + "loss": 0.0662, + "grad_norm": 1.4567596912384033, + "learning_rate": 8.81e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1219999999999999, + "step": 1122 + }, + { + "loss": 0.0167, + "grad_norm": 4.722336292266846, + "learning_rate": 8.8e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.123, + "step": 1123 + }, + { + "loss": 0.0388, + "grad_norm": 2.1787490844726562, + "learning_rate": 8.79e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.124, + "step": 1124 + }, + { + "loss": 0.0508, + "grad_norm": 1.4540494680404663, + "learning_rate": 8.78e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.125, + "step": 1125 + }, + { + "loss": 0.0463, + "grad_norm": 1.9126884937286377, + "learning_rate": 8.77e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.126, + "step": 1126 + }, + { + "loss": 0.0413, + "grad_norm": 1.3725852966308594, + "learning_rate": 8.76e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.127, + "step": 1127 + }, + { + "loss": 0.0406, + "grad_norm": 1.769464373588562, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.1280000000000001, + "step": 1128 + }, + { + "loss": 0.0157, + "grad_norm": 4.246346473693848, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.129, + "step": 1129 + }, + { + "loss": 0.1541, + "grad_norm": 4.8993754386901855, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.13, + "step": 1130 + }, + { + "loss": 0.041, + "grad_norm": 1.7246980667114258, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.131, + "step": 1131 + }, + { + "loss": 0.0726, + "grad_norm": 2.2514991760253906, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1320000000000001, + "step": 1132 + }, + { + "loss": 0.0097, + "grad_norm": 2.538367509841919, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 1133 + }, + { + "loss": 0.083, + "grad_norm": 2.2139499187469482, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.134, + "step": 1134 + }, + { + "loss": 0.0086, + "grad_norm": 2.0688657760620117, + "learning_rate": 8.68e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 1135 + }, + { + "loss": 0.0579, + "grad_norm": 1.7580430507659912, + "learning_rate": 8.67e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.1360000000000001, + "step": 1136 + }, + { + "loss": 0.0071, + "grad_norm": 1.2317492961883545, + "learning_rate": 8.66e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 1137 + }, + { + "loss": 0.0547, + "grad_norm": 1.7383458614349365, + "learning_rate": 8.65e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.138, + "step": 1138 + }, + { + "loss": 0.0493, + "grad_norm": 1.9442108869552612, + "learning_rate": 8.64e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.139, + "step": 1139 + }, + { + "loss": 0.0743, + "grad_norm": 2.8182926177978516, + "learning_rate": 8.63e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.1400000000000001, + "step": 1140 + }, + { + "loss": 0.0058, + "grad_norm": 0.5721865296363831, + "learning_rate": 8.62e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 1141 + }, + { + "loss": 0.0615, + "grad_norm": 2.226674795150757, + "learning_rate": 8.61e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.142, + "step": 1142 + }, + { + "loss": 0.0063, + "grad_norm": 0.8222597241401672, + "learning_rate": 8.6e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 1143 + }, + { + "loss": 0.0679, + "grad_norm": 2.1432037353515625, + "learning_rate": 8.59e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.144, + "step": 1144 + }, + { + "loss": 0.0604, + "grad_norm": 2.196251392364502, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.145, + "step": 1145 + }, + { + "loss": 0.0067, + "grad_norm": 0.9334397912025452, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 1146 + }, + { + "loss": 0.0877, + "grad_norm": 2.9189441204071045, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.147, + "step": 1147 + }, + { + "loss": 0.04, + "grad_norm": 1.8555492162704468, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.148, + "step": 1148 + }, + { + "loss": 0.0433, + "grad_norm": 2.1462485790252686, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.149, + "step": 1149 + }, + { + "loss": 0.0912, + "grad_norm": 2.674384593963623, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.15, + "step": 1150 + }, + { + "loss": 0.0806, + "grad_norm": 2.1967833042144775, + "learning_rate": 8.52e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.151, + "step": 1151 + }, + { + "loss": 0.0397, + "grad_norm": 1.576885461807251, + "learning_rate": 8.51e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.152, + "step": 1152 + }, + { + "loss": 0.0385, + "grad_norm": 1.8607549667358398, + "learning_rate": 8.5e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.153, + "step": 1153 + }, + { + "loss": 0.0591, + "grad_norm": 2.075608491897583, + "learning_rate": 8.49e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.154, + "step": 1154 + }, + { + "loss": 0.0072, + "grad_norm": 1.595956563949585, + "learning_rate": 8.48e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 1155 + }, + { + "loss": 0.0107, + "grad_norm": 2.7350447177886963, + "learning_rate": 8.47e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 1156 + }, + { + "loss": 0.0675, + "grad_norm": 1.7995527982711792, + "learning_rate": 8.46e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.157, + "step": 1157 + }, + { + "loss": 0.0655, + "grad_norm": 2.3666279315948486, + "learning_rate": 8.45e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.158, + "step": 1158 + }, + { + "loss": 0.0898, + "grad_norm": 2.2464659214019775, + "learning_rate": 8.44e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.159, + "step": 1159 + }, + { + "loss": 0.0555, + "grad_norm": 2.4049134254455566, + "learning_rate": 8.43e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.16, + "step": 1160 + }, + { + "loss": 0.0835, + "grad_norm": 2.0087289810180664, + "learning_rate": 8.42e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.161, + "step": 1161 + }, + { + "loss": 0.0679, + "grad_norm": 2.1180970668792725, + "learning_rate": 8.41e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.162, + "step": 1162 + }, + { + "loss": 0.0605, + "grad_norm": 1.7271490097045898, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.163, + "step": 1163 + }, + { + "loss": 0.0381, + "grad_norm": 2.031334400177002, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.164, + "step": 1164 + }, + { + "loss": 0.0639, + "grad_norm": 1.7528166770935059, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.165, + "step": 1165 + }, + { + "loss": 0.1307, + "grad_norm": 3.783503293991089, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.166, + "step": 1166 + }, + { + "loss": 0.0473, + "grad_norm": 2.779741048812866, + "learning_rate": 8.36e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.167, + "step": 1167 + }, + { + "loss": 0.0455, + "grad_norm": 1.9504565000534058, + "learning_rate": 8.35e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.168, + "step": 1168 + }, + { + "loss": 0.0662, + "grad_norm": 2.2791426181793213, + "learning_rate": 8.34e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.169, + "step": 1169 + }, + { + "loss": 0.0857, + "grad_norm": 2.4661900997161865, + "learning_rate": 8.33e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.17, + "step": 1170 + }, + { + "loss": 0.0817, + "grad_norm": 2.018150568008423, + "learning_rate": 8.32e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.171, + "step": 1171 + }, + { + "loss": 0.0491, + "grad_norm": 1.4105336666107178, + "learning_rate": 8.31e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.172, + "step": 1172 + }, + { + "loss": 0.0705, + "grad_norm": 1.7099734544754028, + "learning_rate": 8.3e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.173, + "step": 1173 + }, + { + "loss": 0.0197, + "grad_norm": 5.4979472160339355, + "learning_rate": 8.29e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.174, + "step": 1174 + }, + { + "loss": 0.0515, + "grad_norm": 1.9852694272994995, + "learning_rate": 8.28e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.175, + "step": 1175 + }, + { + "loss": 0.0435, + "grad_norm": 1.3928176164627075, + "learning_rate": 8.27e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.176, + "step": 1176 + }, + { + "loss": 0.062, + "grad_norm": 2.7774510383605957, + "learning_rate": 8.26e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.177, + "step": 1177 + }, + { + "loss": 0.053, + "grad_norm": 0.9669445753097534, + "learning_rate": 8.25e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.178, + "step": 1178 + }, + { + "loss": 0.0178, + "grad_norm": 4.694067478179932, + "learning_rate": 8.24e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.179, + "step": 1179 + }, + { + "loss": 0.0133, + "grad_norm": 3.8942577838897705, + "learning_rate": 8.23e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.18, + "step": 1180 + }, + { + "loss": 0.042, + "grad_norm": 1.4630885124206543, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.181, + "step": 1181 + }, + { + "loss": 0.0598, + "grad_norm": 1.6373014450073242, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.182, + "step": 1182 + }, + { + "loss": 0.0454, + "grad_norm": 1.9768292903900146, + "learning_rate": 8.2e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.183, + "step": 1183 + }, + { + "loss": 0.0734, + "grad_norm": 1.4859123229980469, + "learning_rate": 8.19e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.184, + "step": 1184 + }, + { + "loss": 0.0647, + "grad_norm": 1.7751868963241577, + "learning_rate": 8.18e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.185, + "step": 1185 + }, + { + "loss": 0.0643, + "grad_norm": 1.6454154253005981, + "learning_rate": 8.17e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.186, + "step": 1186 + }, + { + "loss": 0.0511, + "grad_norm": 1.9402817487716675, + "learning_rate": 8.16e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.187, + "step": 1187 + }, + { + "loss": 0.047, + "grad_norm": 1.6513389348983765, + "learning_rate": 8.15e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.188, + "step": 1188 + }, + { + "loss": 0.0107, + "grad_norm": 2.9602744579315186, + "learning_rate": 8.14e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 1189 + }, + { + "loss": 0.0708, + "grad_norm": 1.9953235387802124, + "learning_rate": 8.13e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.19, + "step": 1190 + }, + { + "loss": 0.0562, + "grad_norm": 1.7549750804901123, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.191, + "step": 1191 + }, + { + "loss": 0.0589, + "grad_norm": 2.0597615242004395, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.192, + "step": 1192 + }, + { + "loss": 0.0469, + "grad_norm": 1.7559466361999512, + "learning_rate": 8.1e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.193, + "step": 1193 + }, + { + "loss": 0.0757, + "grad_norm": 2.0765254497528076, + "learning_rate": 8.09e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.194, + "step": 1194 + }, + { + "loss": 0.0118, + "grad_norm": 3.379472017288208, + "learning_rate": 8.08e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 1195 + }, + { + "loss": 0.0692, + "grad_norm": 1.6905264854431152, + "learning_rate": 8.07e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.196, + "step": 1196 + }, + { + "loss": 0.0493, + "grad_norm": 2.3974990844726562, + "learning_rate": 8.06e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.197, + "step": 1197 + }, + { + "loss": 0.0533, + "grad_norm": 1.609572410583496, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.198, + "step": 1198 + }, + { + "loss": 0.0727, + "grad_norm": 2.563096523284912, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.199, + "step": 1199 + }, + { + "loss": 0.0556, + "grad_norm": 2.0002143383026123, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.2, + "step": 1200 + }, + { + "loss": 0.0487, + "grad_norm": 1.7846338748931885, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.201, + "step": 1201 + }, + { + "loss": 0.0802, + "grad_norm": 2.2537660598754883, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.202, + "step": 1202 + }, + { + "loss": 0.0584, + "grad_norm": 3.043835163116455, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.203, + "step": 1203 + }, + { + "loss": 0.012, + "grad_norm": 3.2526142597198486, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.204, + "step": 1204 + }, + { + "loss": 0.063, + "grad_norm": 1.3797202110290527, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.205, + "step": 1205 + }, + { + "loss": 0.0658, + "grad_norm": 2.5818750858306885, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.206, + "step": 1206 + }, + { + "loss": 0.0108, + "grad_norm": 3.089911699295044, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 1207 + }, + { + "loss": 0.0781, + "grad_norm": 2.348559856414795, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.208, + "step": 1208 + }, + { + "loss": 0.053, + "grad_norm": 1.6293948888778687, + "learning_rate": 7.94e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.209, + "step": 1209 + }, + { + "loss": 0.0541, + "grad_norm": 1.7948721647262573, + "learning_rate": 7.93e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.21, + "step": 1210 + }, + { + "loss": 0.0408, + "grad_norm": 2.3477344512939453, + "learning_rate": 7.92e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.211, + "step": 1211 + }, + { + "loss": 0.0579, + "grad_norm": 2.6738388538360596, + "learning_rate": 7.91e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.212, + "step": 1212 + }, + { + "loss": 0.055, + "grad_norm": 1.522643804550171, + "learning_rate": 7.9e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.213, + "step": 1213 + }, + { + "loss": 0.0634, + "grad_norm": 1.585366129875183, + "learning_rate": 7.89e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.214, + "step": 1214 + }, + { + "loss": 0.0616, + "grad_norm": 1.645047664642334, + "learning_rate": 7.88e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.215, + "step": 1215 + }, + { + "loss": 0.0757, + "grad_norm": 1.689460039138794, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.216, + "step": 1216 + }, + { + "loss": 0.0454, + "grad_norm": 2.0291545391082764, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.217, + "step": 1217 + }, + { + "loss": 0.0104, + "grad_norm": 3.0368359088897705, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 1218 + }, + { + "loss": 0.0097, + "grad_norm": 2.792633533477783, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 1219 + }, + { + "loss": 0.0776, + "grad_norm": 2.638593912124634, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.22, + "step": 1220 + }, + { + "loss": 0.0612, + "grad_norm": 2.7605133056640625, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.221, + "step": 1221 + }, + { + "loss": 0.0884, + "grad_norm": 2.6775927543640137, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.222, + "step": 1222 + }, + { + "loss": 0.0752, + "grad_norm": 1.9850537776947021, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.223, + "step": 1223 + }, + { + "loss": 0.0439, + "grad_norm": 1.5452102422714233, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.224, + "step": 1224 + }, + { + "loss": 0.0435, + "grad_norm": 2.2355833053588867, + "learning_rate": 7.78e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.225, + "step": 1225 + }, + { + "loss": 0.0532, + "grad_norm": 1.7478253841400146, + "learning_rate": 7.77e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.226, + "step": 1226 + }, + { + "loss": 0.0106, + "grad_norm": 3.0870492458343506, + "learning_rate": 7.76e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 1227 + }, + { + "loss": 0.0534, + "grad_norm": 1.8180068731307983, + "learning_rate": 7.75e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.228, + "step": 1228 + }, + { + "loss": 0.0088, + "grad_norm": 2.428753137588501, + "learning_rate": 7.74e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 1229 + }, + { + "loss": 0.0094, + "grad_norm": 2.480687141418457, + "learning_rate": 7.73e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 1230 + }, + { + "loss": 0.056, + "grad_norm": 1.977836012840271, + "learning_rate": 7.72e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.231, + "step": 1231 + }, + { + "loss": 0.0576, + "grad_norm": 2.694723129272461, + "learning_rate": 7.71e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.232, + "step": 1232 + }, + { + "loss": 0.0559, + "grad_norm": 1.785524606704712, + "learning_rate": 7.7e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.233, + "step": 1233 + }, + { + "loss": 0.0548, + "grad_norm": 1.7176051139831543, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.234, + "step": 1234 + }, + { + "loss": 0.07, + "grad_norm": 1.961999773979187, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2349999999999999, + "step": 1235 + }, + { + "loss": 0.0592, + "grad_norm": 2.465545654296875, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.236, + "step": 1236 + }, + { + "loss": 0.0378, + "grad_norm": 1.4544801712036133, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.237, + "step": 1237 + }, + { + "loss": 0.0602, + "grad_norm": 1.772146224975586, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.238, + "step": 1238 + }, + { + "loss": 0.04, + "grad_norm": 2.1550979614257812, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2389999999999999, + "step": 1239 + }, + { + "loss": 0.0448, + "grad_norm": 2.0862441062927246, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.24, + "step": 1240 + }, + { + "loss": 0.073, + "grad_norm": 1.8445123434066772, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.241, + "step": 1241 + }, + { + "loss": 0.0701, + "grad_norm": 1.734731912612915, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.242, + "step": 1242 + }, + { + "loss": 0.0621, + "grad_norm": 2.5419921875, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2429999999999999, + "step": 1243 + }, + { + "loss": 0.0387, + "grad_norm": 2.232482671737671, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.244, + "step": 1244 + }, + { + "loss": 0.041, + "grad_norm": 2.1068978309631348, + "learning_rate": 7.58e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.245, + "step": 1245 + }, + { + "loss": 0.0677, + "grad_norm": 1.7934560775756836, + "learning_rate": 7.57e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.246, + "step": 1246 + }, + { + "loss": 0.0866, + "grad_norm": 2.3774123191833496, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.2469999999999999, + "step": 1247 + }, + { + "loss": 0.0188, + "grad_norm": 5.182284832000732, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.248, + "step": 1248 + }, + { + "loss": 0.0517, + "grad_norm": 1.6540446281433105, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.249, + "step": 1249 + }, + { + "loss": 0.0801, + "grad_norm": 1.7044258117675781, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.25, + "step": 1250 + }, + { + "loss": 0.018, + "grad_norm": 4.825031757354736, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.251, + "step": 1251 + }, + { + "loss": 0.0579, + "grad_norm": 1.9127049446105957, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.252, + "step": 1252 + }, + { + "loss": 0.0387, + "grad_norm": 1.524353265762329, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2530000000000001, + "step": 1253 + }, + { + "loss": 0.0743, + "grad_norm": 1.8598476648330688, + "learning_rate": 7.49e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.254, + "step": 1254 + }, + { + "loss": 0.0364, + "grad_norm": 1.6264195442199707, + "learning_rate": 7.48e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.255, + "step": 1255 + }, + { + "loss": 0.0746, + "grad_norm": 1.4887213706970215, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.256, + "step": 1256 + }, + { + "loss": 0.0117, + "grad_norm": 3.425563335418701, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 1257 + }, + { + "loss": 0.0552, + "grad_norm": 1.6610738039016724, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.258, + "step": 1258 + }, + { + "loss": 0.0105, + "grad_norm": 2.9016385078430176, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 1259 + }, + { + "loss": 0.0657, + "grad_norm": 2.349597215652466, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.26, + "step": 1260 + }, + { + "loss": 0.0706, + "grad_norm": 1.7171733379364014, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.2610000000000001, + "step": 1261 + }, + { + "loss": 0.0076, + "grad_norm": 2.070596933364868, + "learning_rate": 7.41e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 1262 + }, + { + "loss": 0.082, + "grad_norm": 2.476560115814209, + "learning_rate": 7.4e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.263, + "step": 1263 + }, + { + "loss": 0.0696, + "grad_norm": 2.013134002685547, + "learning_rate": 7.39e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 1264 + }, + { + "loss": 0.0456, + "grad_norm": 2.0719385147094727, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2650000000000001, + "step": 1265 + }, + { + "loss": 0.0789, + "grad_norm": 2.737678289413452, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.266, + "step": 1266 + }, + { + "loss": 0.0755, + "grad_norm": 2.932962417602539, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.267, + "step": 1267 + }, + { + "loss": 0.0621, + "grad_norm": 1.5760010480880737, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.268, + "step": 1268 + }, + { + "loss": 0.145, + "grad_norm": 4.413599491119385, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.2690000000000001, + "step": 1269 + }, + { + "loss": 0.052, + "grad_norm": 1.3965295553207397, + "learning_rate": 7.33e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.27, + "step": 1270 + }, + { + "loss": 0.0507, + "grad_norm": 1.5652461051940918, + "learning_rate": 7.32e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.271, + "step": 1271 + }, + { + "loss": 0.1608, + "grad_norm": 5.22923469543457, + "learning_rate": 7.31e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 1.272, + "step": 1272 + }, + { + "loss": 0.04, + "grad_norm": 2.1607284545898438, + "learning_rate": 7.3e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2730000000000001, + "step": 1273 + }, + { + "loss": 0.0093, + "grad_norm": 2.755345106124878, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 1274 + }, + { + "loss": 0.0403, + "grad_norm": 1.6918083429336548, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.275, + "step": 1275 + }, + { + "loss": 0.0569, + "grad_norm": 1.4805766344070435, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.276, + "step": 1276 + }, + { + "loss": 0.0639, + "grad_norm": 1.9898265600204468, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2770000000000001, + "step": 1277 + }, + { + "loss": 0.0764, + "grad_norm": 2.4644553661346436, + "learning_rate": 7.25e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.278, + "step": 1278 + }, + { + "loss": 0.0458, + "grad_norm": 1.6111081838607788, + "learning_rate": 7.24e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.279, + "step": 1279 + }, + { + "loss": 0.0439, + "grad_norm": 1.847048282623291, + "learning_rate": 7.23e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.28, + "step": 1280 + }, + { + "loss": 0.0485, + "grad_norm": 2.2336626052856445, + "learning_rate": 7.22e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2810000000000001, + "step": 1281 + }, + { + "loss": 0.0204, + "grad_norm": 5.058897972106934, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.282, + "step": 1282 + }, + { + "loss": 0.059, + "grad_norm": 1.464397668838501, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.283, + "step": 1283 + }, + { + "loss": 0.0663, + "grad_norm": 1.986909031867981, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.284, + "step": 1284 + }, + { + "loss": 0.0553, + "grad_norm": 1.3948322534561157, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.285, + "step": 1285 + }, + { + "loss": 0.0762, + "grad_norm": 1.8114221096038818, + "learning_rate": 7.17e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.286, + "step": 1286 + }, + { + "loss": 0.0596, + "grad_norm": 1.3451945781707764, + "learning_rate": 7.16e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 1287 + }, + { + "loss": 0.066, + "grad_norm": 1.6588683128356934, + "learning_rate": 7.15e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.288, + "step": 1288 + }, + { + "loss": 0.0486, + "grad_norm": 1.8605456352233887, + "learning_rate": 7.14e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.289, + "step": 1289 + }, + { + "loss": 0.0567, + "grad_norm": 1.8595200777053833, + "learning_rate": 7.13e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.29, + "step": 1290 + }, + { + "loss": 0.0651, + "grad_norm": 1.3704520463943481, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.291, + "step": 1291 + }, + { + "loss": 0.0776, + "grad_norm": 1.5874192714691162, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.292, + "step": 1292 + }, + { + "loss": 0.0584, + "grad_norm": 1.6083050966262817, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.293, + "step": 1293 + }, + { + "loss": 0.0526, + "grad_norm": 2.637402296066284, + "learning_rate": 7.09e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.294, + "step": 1294 + }, + { + "loss": 0.0434, + "grad_norm": 1.125180721282959, + "learning_rate": 7.08e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.295, + "step": 1295 + }, + { + "loss": 0.0604, + "grad_norm": 1.9658552408218384, + "learning_rate": 7.07e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.296, + "step": 1296 + }, + { + "loss": 0.0609, + "grad_norm": 2.3239123821258545, + "learning_rate": 7.06e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.297, + "step": 1297 + }, + { + "loss": 0.0822, + "grad_norm": 2.9983248710632324, + "learning_rate": 7.05e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.298, + "step": 1298 + }, + { + "loss": 0.062, + "grad_norm": 1.7106144428253174, + "learning_rate": 7.04e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.299, + "step": 1299 + }, + { + "loss": 0.0542, + "grad_norm": 1.9297690391540527, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3, + "step": 1300 + }, + { + "loss": 0.0174, + "grad_norm": 4.6414361000061035, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.301, + "step": 1301 + }, + { + "loss": 0.0755, + "grad_norm": 2.1787867546081543, + "learning_rate": 7.01e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.302, + "step": 1302 + }, + { + "loss": 0.015, + "grad_norm": 4.113848686218262, + "learning_rate": 7e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.303, + "step": 1303 + }, + { + "loss": 0.0492, + "grad_norm": 1.3803060054779053, + "learning_rate": 6.99e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.304, + "step": 1304 + }, + { + "loss": 0.0512, + "grad_norm": 1.5045576095581055, + "learning_rate": 6.98e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.305, + "step": 1305 + }, + { + "loss": 0.0608, + "grad_norm": 1.5915031433105469, + "learning_rate": 6.97e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.306, + "step": 1306 + }, + { + "loss": 0.0583, + "grad_norm": 1.2304151058197021, + "learning_rate": 6.96e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.307, + "step": 1307 + }, + { + "loss": 0.0563, + "grad_norm": 1.7730633020401, + "learning_rate": 6.95e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.308, + "step": 1308 + }, + { + "loss": 0.0684, + "grad_norm": 1.730749249458313, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.309, + "step": 1309 + }, + { + "loss": 0.052, + "grad_norm": 1.6816562414169312, + "learning_rate": 6.93e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.31, + "step": 1310 + }, + { + "loss": 0.0732, + "grad_norm": 2.309110164642334, + "learning_rate": 6.92e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.311, + "step": 1311 + }, + { + "loss": 0.0634, + "grad_norm": 1.8224540948867798, + "learning_rate": 6.91e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.312, + "step": 1312 + }, + { + "loss": 0.0584, + "grad_norm": 1.9186445474624634, + "learning_rate": 6.9e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.313, + "step": 1313 + }, + { + "loss": 0.0348, + "grad_norm": 1.3239874839782715, + "learning_rate": 6.89e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.314, + "step": 1314 + }, + { + "loss": 0.0938, + "grad_norm": 2.3451895713806152, + "learning_rate": 6.88e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.315, + "step": 1315 + }, + { + "loss": 0.0623, + "grad_norm": 1.8779281377792358, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.316, + "step": 1316 + }, + { + "loss": 0.167, + "grad_norm": 4.993703842163086, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.317, + "step": 1317 + }, + { + "loss": 0.0142, + "grad_norm": 4.2328338623046875, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.318, + "step": 1318 + }, + { + "loss": 0.0792, + "grad_norm": 2.0863592624664307, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.319, + "step": 1319 + }, + { + "loss": 0.044, + "grad_norm": 2.3412485122680664, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.32, + "step": 1320 + }, + { + "loss": 0.0404, + "grad_norm": 1.4804179668426514, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.321, + "step": 1321 + }, + { + "loss": 0.0168, + "grad_norm": 4.645394802093506, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.322, + "step": 1322 + }, + { + "loss": 0.0718, + "grad_norm": 1.6375811100006104, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.323, + "step": 1323 + }, + { + "loss": 0.06, + "grad_norm": 1.5656460523605347, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.324, + "step": 1324 + }, + { + "loss": 0.065, + "grad_norm": 1.7190107107162476, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.325, + "step": 1325 + }, + { + "loss": 0.0152, + "grad_norm": 3.9972171783447266, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.326, + "step": 1326 + }, + { + "loss": 0.0679, + "grad_norm": 2.4974441528320312, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 1327 + }, + { + "loss": 0.0582, + "grad_norm": 2.3485262393951416, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.328, + "step": 1328 + }, + { + "loss": 0.0829, + "grad_norm": 2.598663091659546, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.329, + "step": 1329 + }, + { + "loss": 0.01, + "grad_norm": 2.8793528079986572, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 1330 + }, + { + "loss": 0.0661, + "grad_norm": 1.9478849172592163, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.331, + "step": 1331 + }, + { + "loss": 0.0715, + "grad_norm": 1.916156530380249, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.332, + "step": 1332 + }, + { + "loss": 0.0601, + "grad_norm": 1.6466504335403442, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.333, + "step": 1333 + }, + { + "loss": 0.01, + "grad_norm": 2.8242533206939697, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 1334 + }, + { + "loss": 0.0409, + "grad_norm": 1.506545066833496, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.335, + "step": 1335 + }, + { + "loss": 0.0809, + "grad_norm": 1.7198259830474854, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.336, + "step": 1336 + }, + { + "loss": 0.1451, + "grad_norm": 4.725864887237549, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 1.337, + "step": 1337 + }, + { + "loss": 0.0649, + "grad_norm": 1.4829907417297363, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.338, + "step": 1338 + }, + { + "loss": 0.0779, + "grad_norm": 1.798589825630188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.339, + "step": 1339 + }, + { + "loss": 0.0645, + "grad_norm": 2.8309855461120605, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.34, + "step": 1340 + }, + { + "loss": 0.0573, + "grad_norm": 2.2329795360565186, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.341, + "step": 1341 + }, + { + "loss": 0.0633, + "grad_norm": 1.7102524042129517, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.342, + "step": 1342 + }, + { + "loss": 0.0533, + "grad_norm": 1.8966953754425049, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.343, + "step": 1343 + }, + { + "loss": 0.1242, + "grad_norm": 3.5069096088409424, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3439999999999999, + "step": 1344 + }, + { + "loss": 0.0668, + "grad_norm": 1.6451408863067627, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.345, + "step": 1345 + }, + { + "loss": 0.0168, + "grad_norm": 4.646505355834961, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.346, + "step": 1346 + }, + { + "loss": 0.0122, + "grad_norm": 3.5036394596099854, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.347, + "step": 1347 + }, + { + "loss": 0.054, + "grad_norm": 1.476265788078308, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3479999999999999, + "step": 1348 + }, + { + "loss": 0.0771, + "grad_norm": 2.343313455581665, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.349, + "step": 1349 + }, + { + "loss": 0.041, + "grad_norm": 1.5659995079040527, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.35, + "step": 1350 + }, + { + "loss": 0.0377, + "grad_norm": 1.196007251739502, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.351, + "step": 1351 + }, + { + "loss": 0.1297, + "grad_norm": 3.8112542629241943, + "learning_rate": 6.51e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 1.3519999999999999, + "step": 1352 + }, + { + "loss": 0.0526, + "grad_norm": 1.3368208408355713, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.353, + "step": 1353 + }, + { + "loss": 0.0444, + "grad_norm": 1.8093925714492798, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.354, + "step": 1354 + }, + { + "loss": 0.0101, + "grad_norm": 2.882591485977173, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 1355 + }, + { + "loss": 0.0437, + "grad_norm": 1.7717807292938232, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3559999999999999, + "step": 1356 + }, + { + "loss": 0.0546, + "grad_norm": 2.2301149368286133, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.357, + "step": 1357 + }, + { + "loss": 0.0102, + "grad_norm": 2.8497674465179443, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 1358 + }, + { + "loss": 0.059, + "grad_norm": 1.9033845663070679, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.359, + "step": 1359 + }, + { + "loss": 0.0431, + "grad_norm": 1.6551549434661865, + "learning_rate": 6.43e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3599999999999999, + "step": 1360 + }, + { + "loss": 0.0585, + "grad_norm": 1.5250738859176636, + "learning_rate": 6.42e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.361, + "step": 1361 + }, + { + "loss": 0.0576, + "grad_norm": 1.7390161752700806, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.362, + "step": 1362 + }, + { + "loss": 0.0642, + "grad_norm": 2.0047788619995117, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.363, + "step": 1363 + }, + { + "loss": 0.0409, + "grad_norm": 1.696035385131836, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.3639999999999999, + "step": 1364 + }, + { + "loss": 0.0577, + "grad_norm": 1.9078930616378784, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.365, + "step": 1365 + }, + { + "loss": 0.0098, + "grad_norm": 2.792039155960083, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 1366 + }, + { + "loss": 0.0582, + "grad_norm": 1.8414034843444824, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.367, + "step": 1367 + }, + { + "loss": 0.0545, + "grad_norm": 2.1793394088745117, + "learning_rate": 6.35e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 1368 + }, + { + "loss": 0.0449, + "grad_norm": 2.220048666000366, + "learning_rate": 6.34e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.369, + "step": 1369 + }, + { + "loss": 0.0545, + "grad_norm": 1.9344781637191772, + "learning_rate": 6.33e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.37, + "step": 1370 + }, + { + "loss": 0.0567, + "grad_norm": 1.8442058563232422, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.371, + "step": 1371 + }, + { + "loss": 0.0118, + "grad_norm": 3.14497971534729, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.3719999999999999, + "step": 1372 + }, + { + "loss": 0.0721, + "grad_norm": 2.7254114151000977, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.373, + "step": 1373 + }, + { + "loss": 0.0587, + "grad_norm": 1.436458945274353, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.374, + "step": 1374 + }, + { + "loss": 0.1323, + "grad_norm": 3.204223871231079, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.375, + "step": 1375 + }, + { + "loss": 0.0704, + "grad_norm": 1.601090431213379, + "learning_rate": 6.27e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.376, + "step": 1376 + }, + { + "loss": 0.0601, + "grad_norm": 1.5754057168960571, + "learning_rate": 6.26e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.377, + "step": 1377 + }, + { + "loss": 0.0711, + "grad_norm": 1.8766717910766602, + "learning_rate": 6.25e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.3780000000000001, + "step": 1378 + }, + { + "loss": 0.059, + "grad_norm": 2.119466781616211, + "learning_rate": 6.24e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.379, + "step": 1379 + }, + { + "loss": 0.0772, + "grad_norm": 1.8192287683486938, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.38, + "step": 1380 + }, + { + "loss": 0.0588, + "grad_norm": 1.6275320053100586, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.381, + "step": 1381 + }, + { + "loss": 0.0417, + "grad_norm": 2.3129870891571045, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3820000000000001, + "step": 1382 + }, + { + "loss": 0.0444, + "grad_norm": 1.6177237033843994, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.383, + "step": 1383 + }, + { + "loss": 0.0566, + "grad_norm": 2.093630075454712, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.384, + "step": 1384 + }, + { + "loss": 0.0655, + "grad_norm": 1.9267455339431763, + "learning_rate": 6.18e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.385, + "step": 1385 + }, + { + "loss": 0.0442, + "grad_norm": 1.0200287103652954, + "learning_rate": 6.17e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3860000000000001, + "step": 1386 + }, + { + "loss": 0.0638, + "grad_norm": 1.3187520503997803, + "learning_rate": 6.16e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.387, + "step": 1387 + }, + { + "loss": 0.0364, + "grad_norm": 1.6464682817459106, + "learning_rate": 6.15e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.388, + "step": 1388 + }, + { + "loss": 0.0775, + "grad_norm": 2.474910020828247, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.389, + "step": 1389 + }, + { + "loss": 0.0621, + "grad_norm": 1.1011793613433838, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.3900000000000001, + "step": 1390 + }, + { + "loss": 0.0218, + "grad_norm": 5.168939113616943, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.391, + "step": 1391 + }, + { + "loss": 0.0221, + "grad_norm": 5.572858810424805, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.392, + "step": 1392 + }, + { + "loss": 0.0561, + "grad_norm": 1.8146536350250244, + "learning_rate": 6.1e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.393, + "step": 1393 + }, + { + "loss": 0.0804, + "grad_norm": 3.2232189178466797, + "learning_rate": 6.09e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.3940000000000001, + "step": 1394 + }, + { + "loss": 0.039, + "grad_norm": 1.8940805196762085, + "learning_rate": 6.08e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.395, + "step": 1395 + }, + { + "loss": 0.0584, + "grad_norm": 2.0325937271118164, + "learning_rate": 6.07e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.396, + "step": 1396 + }, + { + "loss": 0.0422, + "grad_norm": 1.980771541595459, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.397, + "step": 1397 + }, + { + "loss": 0.0593, + "grad_norm": 1.710123896598816, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.3980000000000001, + "step": 1398 + }, + { + "loss": 0.0592, + "grad_norm": 2.430305004119873, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.399, + "step": 1399 + }, + { + "loss": 0.0467, + "grad_norm": 2.204895496368408, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.4, + "step": 1400 + }, + { + "loss": 0.0496, + "grad_norm": 1.7684513330459595, + "learning_rate": 6.02e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.401, + "step": 1401 + }, + { + "loss": 0.0462, + "grad_norm": 1.7807819843292236, + "learning_rate": 6.01e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.4020000000000001, + "step": 1402 + }, + { + "loss": 0.08, + "grad_norm": 1.9608607292175293, + "learning_rate": 6e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.403, + "step": 1403 + }, + { + "loss": 0.0588, + "grad_norm": 1.6851762533187866, + "learning_rate": 5.99e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.404, + "step": 1404 + }, + { + "loss": 0.0448, + "grad_norm": 1.395566701889038, + "learning_rate": 5.98e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 1.405, + "step": 1405 + }, + { + "loss": 0.0771, + "grad_norm": 1.94028639793396, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.4060000000000001, + "step": 1406 + }, + { + "loss": 0.0717, + "grad_norm": 2.421177864074707, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.407, + "step": 1407 + }, + { + "loss": 0.0602, + "grad_norm": 1.947490930557251, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.408, + "step": 1408 + }, + { + "loss": 0.084, + "grad_norm": 3.4976916313171387, + "learning_rate": 5.94e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.409, + "step": 1409 + }, + { + "loss": 0.0146, + "grad_norm": 3.9808900356292725, + "learning_rate": 5.93e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.41, + "step": 1410 + }, + { + "loss": 0.0583, + "grad_norm": 1.8078984022140503, + "learning_rate": 5.92e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 1411 + }, + { + "loss": 0.0687, + "grad_norm": 1.9551893472671509, + "learning_rate": 5.91e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.412, + "step": 1412 + }, + { + "loss": 0.0133, + "grad_norm": 3.68121075630188, + "learning_rate": 5.9e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.413, + "step": 1413 + }, + { + "loss": 0.0411, + "grad_norm": 1.987641453742981, + "learning_rate": 5.89e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.414, + "step": 1414 + }, + { + "loss": 0.0527, + "grad_norm": 1.6725058555603027, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.415, + "step": 1415 + }, + { + "loss": 0.0516, + "grad_norm": 1.3503282070159912, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.416, + "step": 1416 + }, + { + "loss": 0.0439, + "grad_norm": 1.5804824829101562, + "learning_rate": 5.86e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.417, + "step": 1417 + }, + { + "loss": 0.0481, + "grad_norm": 1.3769683837890625, + "learning_rate": 5.85e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.418, + "step": 1418 + }, + { + "loss": 0.0108, + "grad_norm": 3.01991868019104, + "learning_rate": 5.84e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.419, + "step": 1419 + }, + { + "loss": 0.0497, + "grad_norm": 1.416107177734375, + "learning_rate": 5.83e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.42, + "step": 1420 + }, + { + "loss": 0.0377, + "grad_norm": 1.3515864610671997, + "learning_rate": 5.82e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.421, + "step": 1421 + }, + { + "loss": 0.0607, + "grad_norm": 1.8614403009414673, + "learning_rate": 5.81e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.422, + "step": 1422 + }, + { + "loss": 0.0679, + "grad_norm": 2.109128952026367, + "learning_rate": 5.8e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.423, + "step": 1423 + }, + { + "loss": 0.0751, + "grad_norm": 1.5067026615142822, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.424, + "step": 1424 + }, + { + "loss": 0.0547, + "grad_norm": 1.5301975011825562, + "learning_rate": 5.78e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.425, + "step": 1425 + }, + { + "loss": 0.0683, + "grad_norm": 2.2441554069519043, + "learning_rate": 5.77e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.426, + "step": 1426 + }, + { + "loss": 0.0458, + "grad_norm": 1.8737249374389648, + "learning_rate": 5.76e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.427, + "step": 1427 + }, + { + "loss": 0.0687, + "grad_norm": 1.9434070587158203, + "learning_rate": 5.75e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.428, + "step": 1428 + }, + { + "loss": 0.0806, + "grad_norm": 1.8568007946014404, + "learning_rate": 5.74e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.429, + "step": 1429 + }, + { + "loss": 0.065, + "grad_norm": 2.0390608310699463, + "learning_rate": 5.73e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.43, + "step": 1430 + }, + { + "loss": 0.0615, + "grad_norm": 1.7913262844085693, + "learning_rate": 5.72e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.431, + "step": 1431 + }, + { + "loss": 0.0515, + "grad_norm": 2.496122121810913, + "learning_rate": 5.71e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.432, + "step": 1432 + }, + { + "loss": 0.0501, + "grad_norm": 1.633486270904541, + "learning_rate": 5.7e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.433, + "step": 1433 + }, + { + "loss": 0.0171, + "grad_norm": 4.812644958496094, + "learning_rate": 5.69e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.434, + "step": 1434 + }, + { + "loss": 0.0756, + "grad_norm": 2.208841562271118, + "learning_rate": 5.68e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.435, + "step": 1435 + }, + { + "loss": 0.0358, + "grad_norm": 1.725355625152588, + "learning_rate": 5.67e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.436, + "step": 1436 + }, + { + "loss": 0.0173, + "grad_norm": 4.879479885101318, + "learning_rate": 5.66e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.437, + "step": 1437 + }, + { + "loss": 0.1386, + "grad_norm": 3.6769933700561523, + "learning_rate": 5.65e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.438, + "step": 1438 + }, + { + "loss": 0.0712, + "grad_norm": 1.624098300933838, + "learning_rate": 5.64e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.439, + "step": 1439 + }, + { + "loss": 0.0534, + "grad_norm": 2.2485837936401367, + "learning_rate": 5.63e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.44, + "step": 1440 + }, + { + "loss": 0.0572, + "grad_norm": 1.977672815322876, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.441, + "step": 1441 + }, + { + "loss": 0.0515, + "grad_norm": 2.81058669090271, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.442, + "step": 1442 + }, + { + "loss": 0.0118, + "grad_norm": 3.3733158111572266, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.443, + "step": 1443 + }, + { + "loss": 0.0546, + "grad_norm": 1.634824275970459, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.444, + "step": 1444 + }, + { + "loss": 0.0549, + "grad_norm": 1.9184083938598633, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.445, + "step": 1445 + }, + { + "loss": 0.1835, + "grad_norm": 5.609441757202148, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 1.446, + "step": 1446 + }, + { + "loss": 0.0568, + "grad_norm": 1.4348167181015015, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.447, + "step": 1447 + }, + { + "loss": 0.0711, + "grad_norm": 1.6240220069885254, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.448, + "step": 1448 + }, + { + "loss": 0.0395, + "grad_norm": 1.7122279405593872, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.449, + "step": 1449 + }, + { + "loss": 0.0092, + "grad_norm": 2.6746726036071777, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 1450 + }, + { + "loss": 0.0516, + "grad_norm": 1.2466599941253662, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 1451 + }, + { + "loss": 0.0755, + "grad_norm": 2.3185651302337646, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.452, + "step": 1452 + }, + { + "loss": 0.0107, + "grad_norm": 3.2160799503326416, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.453, + "step": 1453 + }, + { + "loss": 0.0353, + "grad_norm": 1.6237694025039673, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.454, + "step": 1454 + }, + { + "loss": 0.052, + "grad_norm": 1.6856698989868164, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.455, + "step": 1455 + }, + { + "loss": 0.0672, + "grad_norm": 1.7814722061157227, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.456, + "step": 1456 + }, + { + "loss": 0.0354, + "grad_norm": 1.4843939542770386, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.457, + "step": 1457 + }, + { + "loss": 0.0642, + "grad_norm": 1.6205660104751587, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.458, + "step": 1458 + }, + { + "loss": 0.0694, + "grad_norm": 2.024721384048462, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.459, + "step": 1459 + }, + { + "loss": 0.0587, + "grad_norm": 1.8312665224075317, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.46, + "step": 1460 + }, + { + "loss": 0.0411, + "grad_norm": 1.8380608558654785, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.461, + "step": 1461 + }, + { + "loss": 0.0597, + "grad_norm": 1.7451549768447876, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.462, + "step": 1462 + }, + { + "loss": 0.0773, + "grad_norm": 1.7938144207000732, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.463, + "step": 1463 + }, + { + "loss": 0.0639, + "grad_norm": 2.6028213500976562, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.464, + "step": 1464 + }, + { + "loss": 0.0686, + "grad_norm": 1.8541765213012695, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.465, + "step": 1465 + }, + { + "loss": 0.0548, + "grad_norm": 1.739157795906067, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.466, + "step": 1466 + }, + { + "loss": 0.0131, + "grad_norm": 3.847865581512451, + "learning_rate": 5.36e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.467, + "step": 1467 + }, + { + "loss": 0.0556, + "grad_norm": 1.4072014093399048, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.468, + "step": 1468 + }, + { + "loss": 0.0656, + "grad_norm": 1.7529304027557373, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.4689999999999999, + "step": 1469 + }, + { + "loss": 0.0472, + "grad_norm": 1.359227180480957, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 1470 + }, + { + "loss": 0.0553, + "grad_norm": 1.8881477117538452, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.471, + "step": 1471 + }, + { + "loss": 0.0728, + "grad_norm": 1.792786717414856, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.472, + "step": 1472 + }, + { + "loss": 0.0589, + "grad_norm": 1.9897642135620117, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.4729999999999999, + "step": 1473 + }, + { + "loss": 0.0641, + "grad_norm": 2.224968433380127, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.474, + "step": 1474 + }, + { + "loss": 0.0176, + "grad_norm": 4.579442977905273, + "learning_rate": 5.28e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.475, + "step": 1475 + }, + { + "loss": 0.0465, + "grad_norm": 1.7030646800994873, + "learning_rate": 5.27e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.476, + "step": 1476 + }, + { + "loss": 0.0638, + "grad_norm": 1.8251057863235474, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.4769999999999999, + "step": 1477 + }, + { + "loss": 0.0532, + "grad_norm": 1.7170004844665527, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.478, + "step": 1478 + }, + { + "loss": 0.0146, + "grad_norm": 4.36711311340332, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.479, + "step": 1479 + }, + { + "loss": 0.0384, + "grad_norm": 1.4616270065307617, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.48, + "step": 1480 + }, + { + "loss": 0.0536, + "grad_norm": 1.4146326780319214, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4809999999999999, + "step": 1481 + }, + { + "loss": 0.058, + "grad_norm": 1.4087859392166138, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.482, + "step": 1482 + }, + { + "loss": 0.0131, + "grad_norm": 3.685961961746216, + "learning_rate": 5.2e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.483, + "step": 1483 + }, + { + "loss": 0.054, + "grad_norm": 2.024017572402954, + "learning_rate": 5.19e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.484, + "step": 1484 + }, + { + "loss": 0.0127, + "grad_norm": 3.772671699523926, + "learning_rate": 5.18e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.4849999999999999, + "step": 1485 + }, + { + "loss": 0.0119, + "grad_norm": 3.4980599880218506, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.486, + "step": 1486 + }, + { + "loss": 0.0759, + "grad_norm": 2.152510643005371, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.487, + "step": 1487 + }, + { + "loss": 0.0408, + "grad_norm": 1.5923069715499878, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.488, + "step": 1488 + }, + { + "loss": 0.0085, + "grad_norm": 2.5293490886688232, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 1489 + }, + { + "loss": 0.0694, + "grad_norm": 2.434215545654297, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.49, + "step": 1490 + }, + { + "loss": 0.0084, + "grad_norm": 2.269744873046875, + "learning_rate": 5.12e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 1491 + }, + { + "loss": 0.0472, + "grad_norm": 2.460083246231079, + "learning_rate": 5.11e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.492, + "step": 1492 + }, + { + "loss": 0.0346, + "grad_norm": 1.8150253295898438, + "learning_rate": 5.1e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.4929999999999999, + "step": 1493 + }, + { + "loss": 0.0436, + "grad_norm": 2.3509392738342285, + "learning_rate": 5.09e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.494, + "step": 1494 + }, + { + "loss": 0.0413, + "grad_norm": 1.7899376153945923, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.495, + "step": 1495 + }, + { + "loss": 0.0068, + "grad_norm": 1.4986844062805176, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 1496 + }, + { + "loss": 0.0719, + "grad_norm": 1.9978880882263184, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4969999999999999, + "step": 1497 + }, + { + "loss": 0.0407, + "grad_norm": 1.5322047472000122, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.498, + "step": 1498 + }, + { + "loss": 0.0057, + "grad_norm": 1.21915602684021, + "learning_rate": 5.04e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 1499 + }, + { + "loss": 0.0392, + "grad_norm": 1.8600904941558838, + "learning_rate": 5.03e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5, + "step": 1500 + }, + { + "loss": 0.058, + "grad_norm": 1.788377285003662, + "learning_rate": 5.02e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.501, + "step": 1501 + }, + { + "loss": 0.073, + "grad_norm": 2.0460190773010254, + "learning_rate": 5.01e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 1502 + }, + { + "loss": 0.0631, + "grad_norm": 2.3501951694488525, + "learning_rate": 5e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5030000000000001, + "step": 1503 + }, + { + "loss": 0.0655, + "grad_norm": 1.5405539274215698, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.504, + "step": 1504 + }, + { + "loss": 0.0527, + "grad_norm": 2.613194227218628, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.505, + "step": 1505 + }, + { + "loss": 0.0533, + "grad_norm": 2.3490524291992188, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.506, + "step": 1506 + }, + { + "loss": 0.007, + "grad_norm": 1.7071534395217896, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 1507 + }, + { + "loss": 0.0063, + "grad_norm": 1.578574776649475, + "learning_rate": 4.95e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 1508 + }, + { + "loss": 0.0586, + "grad_norm": 1.7500479221343994, + "learning_rate": 4.94e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.509, + "step": 1509 + }, + { + "loss": 0.0489, + "grad_norm": 2.1021506786346436, + "learning_rate": 4.93e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.51, + "step": 1510 + }, + { + "loss": 0.0505, + "grad_norm": 1.444482684135437, + "learning_rate": 4.92e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5110000000000001, + "step": 1511 + }, + { + "loss": 0.0663, + "grad_norm": 2.043468475341797, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.512, + "step": 1512 + }, + { + "loss": 0.0429, + "grad_norm": 1.7074294090270996, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.513, + "step": 1513 + }, + { + "loss": 0.0655, + "grad_norm": 2.4234681129455566, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.514, + "step": 1514 + }, + { + "loss": 0.0766, + "grad_norm": 2.124605655670166, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.5150000000000001, + "step": 1515 + }, + { + "loss": 0.0549, + "grad_norm": 1.533837080001831, + "learning_rate": 4.87e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.516, + "step": 1516 + }, + { + "loss": 0.0674, + "grad_norm": 1.8479790687561035, + "learning_rate": 4.86e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.517, + "step": 1517 + }, + { + "loss": 0.0105, + "grad_norm": 2.9812541007995605, + "learning_rate": 4.85e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 1518 + }, + { + "loss": 0.0394, + "grad_norm": 1.3361161947250366, + "learning_rate": 4.84e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5190000000000001, + "step": 1519 + }, + { + "loss": 0.0526, + "grad_norm": 1.8740735054016113, + "learning_rate": 4.83e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.52, + "step": 1520 + }, + { + "loss": 0.0622, + "grad_norm": 2.8182497024536133, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.521, + "step": 1521 + }, + { + "loss": 0.053, + "grad_norm": 1.3909233808517456, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.522, + "step": 1522 + }, + { + "loss": 0.0352, + "grad_norm": 1.3657585382461548, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5230000000000001, + "step": 1523 + }, + { + "loss": 0.0667, + "grad_norm": 1.9412925243377686, + "learning_rate": 4.79e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.524, + "step": 1524 + }, + { + "loss": 0.0536, + "grad_norm": 1.9261113405227661, + "learning_rate": 4.78e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.525, + "step": 1525 + }, + { + "loss": 0.0371, + "grad_norm": 1.7484430074691772, + "learning_rate": 4.77e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.526, + "step": 1526 + }, + { + "loss": 0.0629, + "grad_norm": 1.5757131576538086, + "learning_rate": 4.76e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5270000000000001, + "step": 1527 + }, + { + "loss": 0.0743, + "grad_norm": 2.2460429668426514, + "learning_rate": 4.75e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.528, + "step": 1528 + }, + { + "loss": 0.0537, + "grad_norm": 2.029741048812866, + "learning_rate": 4.74e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.529, + "step": 1529 + }, + { + "loss": 0.0363, + "grad_norm": 1.7011500597000122, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.53, + "step": 1530 + }, + { + "loss": 0.0773, + "grad_norm": 2.4450201988220215, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.5310000000000001, + "step": 1531 + }, + { + "loss": 0.0597, + "grad_norm": 2.192077159881592, + "learning_rate": 4.71e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.532, + "step": 1532 + }, + { + "loss": 0.0539, + "grad_norm": 1.464800238609314, + "learning_rate": 4.7e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.533, + "step": 1533 + }, + { + "loss": 0.0762, + "grad_norm": 2.326375722885132, + "learning_rate": 4.69e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.534, + "step": 1534 + }, + { + "loss": 0.0517, + "grad_norm": 1.547634482383728, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5350000000000001, + "step": 1535 + }, + { + "loss": 0.0783, + "grad_norm": 2.2572309970855713, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.536, + "step": 1536 + }, + { + "loss": 0.0644, + "grad_norm": 2.7545583248138428, + "learning_rate": 4.66e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.537, + "step": 1537 + }, + { + "loss": 0.0596, + "grad_norm": 1.4186100959777832, + "learning_rate": 4.65e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.538, + "step": 1538 + }, + { + "loss": 0.0408, + "grad_norm": 1.7284655570983887, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5390000000000001, + "step": 1539 + }, + { + "loss": 0.0605, + "grad_norm": 1.7523491382598877, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.54, + "step": 1540 + }, + { + "loss": 0.0593, + "grad_norm": 1.346951961517334, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.541, + "step": 1541 + }, + { + "loss": 0.0618, + "grad_norm": 1.4633326530456543, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.542, + "step": 1542 + }, + { + "loss": 0.0401, + "grad_norm": 1.6125143766403198, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5430000000000001, + "step": 1543 + }, + { + "loss": 0.0703, + "grad_norm": 1.801979422569275, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.544, + "step": 1544 + }, + { + "loss": 0.0168, + "grad_norm": 4.75988245010376, + "learning_rate": 4.58e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.545, + "step": 1545 + }, + { + "loss": 0.0395, + "grad_norm": 1.7274175882339478, + "learning_rate": 4.57e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.546, + "step": 1546 + }, + { + "loss": 0.0673, + "grad_norm": 1.813065767288208, + "learning_rate": 4.56e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5470000000000002, + "step": 1547 + }, + { + "loss": 0.0149, + "grad_norm": 4.271875858306885, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.548, + "step": 1548 + }, + { + "loss": 0.0663, + "grad_norm": 2.038168430328369, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.549, + "step": 1549 + }, + { + "loss": 0.0129, + "grad_norm": 3.939451217651367, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.55, + "step": 1550 + }, + { + "loss": 0.0375, + "grad_norm": 1.818014144897461, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5510000000000002, + "step": 1551 + }, + { + "loss": 0.0589, + "grad_norm": 1.9127329587936401, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.552, + "step": 1552 + }, + { + "loss": 0.062, + "grad_norm": 2.125767946243286, + "learning_rate": 4.5e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.553, + "step": 1553 + }, + { + "loss": 0.0627, + "grad_norm": 1.3601936101913452, + "learning_rate": 4.49e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.554, + "step": 1554 + }, + { + "loss": 0.0573, + "grad_norm": 1.9718780517578125, + "learning_rate": 4.48e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.5550000000000002, + "step": 1555 + }, + { + "loss": 0.0702, + "grad_norm": 1.8015897274017334, + "learning_rate": 4.47e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.556, + "step": 1556 + }, + { + "loss": 0.0456, + "grad_norm": 2.072335958480835, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.557, + "step": 1557 + }, + { + "loss": 0.0567, + "grad_norm": 1.921351432800293, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.558, + "step": 1558 + }, + { + "loss": 0.065, + "grad_norm": 1.5375345945358276, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5590000000000002, + "step": 1559 + }, + { + "loss": 0.0384, + "grad_norm": 1.3858362436294556, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.56, + "step": 1560 + }, + { + "loss": 0.0613, + "grad_norm": 1.8221303224563599, + "learning_rate": 4.42e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.561, + "step": 1561 + }, + { + "loss": 0.051, + "grad_norm": 1.5935691595077515, + "learning_rate": 4.41e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.562, + "step": 1562 + }, + { + "loss": 0.052, + "grad_norm": 1.4923861026763916, + "learning_rate": 4.4e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.563, + "step": 1563 + }, + { + "loss": 0.0114, + "grad_norm": 3.3136603832244873, + "learning_rate": 4.39e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.564, + "step": 1564 + }, + { + "loss": 0.0634, + "grad_norm": 1.8046377897262573, + "learning_rate": 4.38e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.565, + "step": 1565 + }, + { + "loss": 0.01, + "grad_norm": 2.8774094581604004, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.5659999999999998, + "step": 1566 + }, + { + "loss": 0.0506, + "grad_norm": 1.315585732460022, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.567, + "step": 1567 + }, + { + "loss": 0.051, + "grad_norm": 1.6535403728485107, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.568, + "step": 1568 + }, + { + "loss": 0.069, + "grad_norm": 1.9435205459594727, + "learning_rate": 4.34e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.569, + "step": 1569 + }, + { + "loss": 0.0599, + "grad_norm": 1.8793127536773682, + "learning_rate": 4.33e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.5699999999999998, + "step": 1570 + }, + { + "loss": 0.0098, + "grad_norm": 2.910207986831665, + "learning_rate": 4.32e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 1571 + }, + { + "loss": 0.0636, + "grad_norm": 2.1943273544311523, + "learning_rate": 4.31e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.572, + "step": 1572 + }, + { + "loss": 0.0567, + "grad_norm": 1.5598511695861816, + "learning_rate": 4.3e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.573, + "step": 1573 + }, + { + "loss": 0.0453, + "grad_norm": 1.9701513051986694, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 1574 + }, + { + "loss": 0.0102, + "grad_norm": 3.0775904655456543, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.575, + "step": 1575 + }, + { + "loss": 0.0422, + "grad_norm": 1.8043560981750488, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.576, + "step": 1576 + }, + { + "loss": 0.0473, + "grad_norm": 1.871073842048645, + "learning_rate": 4.26e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.577, + "step": 1577 + }, + { + "loss": 0.0514, + "grad_norm": 1.4562617540359497, + "learning_rate": 4.25e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5779999999999998, + "step": 1578 + }, + { + "loss": 0.0367, + "grad_norm": 1.4301601648330688, + "learning_rate": 4.24e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.579, + "step": 1579 + }, + { + "loss": 0.0504, + "grad_norm": 1.6110836267471313, + "learning_rate": 4.23e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.58, + "step": 1580 + }, + { + "loss": 0.074, + "grad_norm": 2.0486574172973633, + "learning_rate": 4.22e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.581, + "step": 1581 + }, + { + "loss": 0.1233, + "grad_norm": 3.3242132663726807, + "learning_rate": 4.21e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5819999999999999, + "step": 1582 + }, + { + "loss": 0.0647, + "grad_norm": 1.307567834854126, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.583, + "step": 1583 + }, + { + "loss": 0.0609, + "grad_norm": 1.7847832441329956, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.584, + "step": 1584 + }, + { + "loss": 0.0095, + "grad_norm": 2.857769727706909, + "learning_rate": 4.18e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 1585 + }, + { + "loss": 0.0358, + "grad_norm": 1.3912484645843506, + "learning_rate": 4.17e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5859999999999999, + "step": 1586 + }, + { + "loss": 0.0389, + "grad_norm": 1.5175739526748657, + "learning_rate": 4.16e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.587, + "step": 1587 + }, + { + "loss": 0.0126, + "grad_norm": 3.7526566982269287, + "learning_rate": 4.15e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.588, + "step": 1588 + }, + { + "loss": 0.0558, + "grad_norm": 1.6538053750991821, + "learning_rate": 4.14e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.589, + "step": 1589 + }, + { + "loss": 0.0538, + "grad_norm": 1.3453150987625122, + "learning_rate": 4.13e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5899999999999999, + "step": 1590 + }, + { + "loss": 0.0608, + "grad_norm": 2.0873332023620605, + "learning_rate": 4.12e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.591, + "step": 1591 + }, + { + "loss": 0.0611, + "grad_norm": 1.9410951137542725, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.592, + "step": 1592 + }, + { + "loss": 0.0769, + "grad_norm": 1.8411427736282349, + "learning_rate": 4.1e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.593, + "step": 1593 + }, + { + "loss": 0.0111, + "grad_norm": 3.2430572509765625, + "learning_rate": 4.09e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 1594 + }, + { + "loss": 0.0722, + "grad_norm": 2.1307482719421387, + "learning_rate": 4.08e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.595, + "step": 1595 + }, + { + "loss": 0.0377, + "grad_norm": 2.088995933532715, + "learning_rate": 4.07e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.596, + "step": 1596 + }, + { + "loss": 0.0617, + "grad_norm": 1.546595811843872, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.597, + "step": 1597 + }, + { + "loss": 0.0683, + "grad_norm": 1.7900023460388184, + "learning_rate": 4.05e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.5979999999999999, + "step": 1598 + }, + { + "loss": 0.057, + "grad_norm": 1.5026994943618774, + "learning_rate": 4.04e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.599, + "step": 1599 + }, + { + "loss": 0.0468, + "grad_norm": 1.8879090547561646, + "learning_rate": 4.03e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6, + "step": 1600 + }, + { + "loss": 0.0345, + "grad_norm": 1.3179066181182861, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.601, + "step": 1601 + }, + { + "loss": 0.0363, + "grad_norm": 1.297089695930481, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.6019999999999999, + "step": 1602 + }, + { + "loss": 0.0465, + "grad_norm": 1.4451963901519775, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.603, + "step": 1603 + }, + { + "loss": 0.0593, + "grad_norm": 1.6601592302322388, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.604, + "step": 1604 + }, + { + "loss": 0.0633, + "grad_norm": 1.759940266609192, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.605, + "step": 1605 + }, + { + "loss": 0.0394, + "grad_norm": 1.640942096710205, + "learning_rate": 3.97e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.6059999999999999, + "step": 1606 + }, + { + "loss": 0.0107, + "grad_norm": 3.121732711791992, + "learning_rate": 3.96e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.607, + "step": 1607 + }, + { + "loss": 0.0343, + "grad_norm": 1.376590371131897, + "learning_rate": 3.95e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.608, + "step": 1608 + }, + { + "loss": 0.0731, + "grad_norm": 1.5605193376541138, + "learning_rate": 3.94e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.609, + "step": 1609 + }, + { + "loss": 0.011, + "grad_norm": 3.3589043617248535, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6099999999999999, + "step": 1610 + }, + { + "loss": 0.0541, + "grad_norm": 1.0635466575622559, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.611, + "step": 1611 + }, + { + "loss": 0.0801, + "grad_norm": 2.1112594604492188, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.612, + "step": 1612 + }, + { + "loss": 0.0541, + "grad_norm": 1.915789008140564, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.613, + "step": 1613 + }, + { + "loss": 0.0097, + "grad_norm": 2.9668385982513428, + "learning_rate": 3.89e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 1614 + }, + { + "loss": 0.0785, + "grad_norm": 1.7575700283050537, + "learning_rate": 3.88e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 1.615, + "step": 1615 + }, + { + "loss": 0.0092, + "grad_norm": 2.8856735229492188, + "learning_rate": 3.87e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 1616 + }, + { + "loss": 0.0842, + "grad_norm": 2.108201265335083, + "learning_rate": 3.86e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.617, + "step": 1617 + }, + { + "loss": 0.0513, + "grad_norm": 1.646217942237854, + "learning_rate": 3.85e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6179999999999999, + "step": 1618 + }, + { + "loss": 0.0323, + "grad_norm": 1.7345075607299805, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.619, + "step": 1619 + }, + { + "loss": 0.0508, + "grad_norm": 2.1174609661102295, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.62, + "step": 1620 + }, + { + "loss": 0.0794, + "grad_norm": 1.751968502998352, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.621, + "step": 1621 + }, + { + "loss": 0.052, + "grad_norm": 2.0297329425811768, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6219999999999999, + "step": 1622 + }, + { + "loss": 0.0414, + "grad_norm": 1.4483790397644043, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.623, + "step": 1623 + }, + { + "loss": 0.0387, + "grad_norm": 1.6367487907409668, + "learning_rate": 3.79e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.624, + "step": 1624 + }, + { + "loss": 0.0579, + "grad_norm": 1.947627305984497, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.625, + "step": 1625 + }, + { + "loss": 0.0746, + "grad_norm": 1.7073363065719604, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.626, + "step": 1626 + }, + { + "loss": 0.07, + "grad_norm": 2.310190439224243, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.627, + "step": 1627 + }, + { + "loss": 0.0614, + "grad_norm": 1.841750979423523, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6280000000000001, + "step": 1628 + }, + { + "loss": 0.01, + "grad_norm": 3.1444506645202637, + "learning_rate": 3.74e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 1629 + }, + { + "loss": 0.0522, + "grad_norm": 1.662224292755127, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.63, + "step": 1630 + }, + { + "loss": 0.0132, + "grad_norm": 3.9977800846099854, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.631, + "step": 1631 + }, + { + "loss": 0.0544, + "grad_norm": 1.3922324180603027, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6320000000000001, + "step": 1632 + }, + { + "loss": 0.054, + "grad_norm": 2.120187759399414, + "learning_rate": 3.7e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.633, + "step": 1633 + }, + { + "loss": 0.0536, + "grad_norm": 1.914109468460083, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.634, + "step": 1634 + }, + { + "loss": 0.0598, + "grad_norm": 1.831244707107544, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.635, + "step": 1635 + }, + { + "loss": 0.0573, + "grad_norm": 1.5706382989883423, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6360000000000001, + "step": 1636 + }, + { + "loss": 0.1282, + "grad_norm": 2.7458832263946533, + "learning_rate": 3.66e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 1.637, + "step": 1637 + }, + { + "loss": 0.0356, + "grad_norm": 1.4152108430862427, + "learning_rate": 3.65e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.638, + "step": 1638 + }, + { + "loss": 0.0121, + "grad_norm": 3.4849400520324707, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.639, + "step": 1639 + }, + { + "loss": 0.0702, + "grad_norm": 1.8692002296447754, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.6400000000000001, + "step": 1640 + }, + { + "loss": 0.0601, + "grad_norm": 1.828239917755127, + "learning_rate": 3.62e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.641, + "step": 1641 + }, + { + "loss": 0.0399, + "grad_norm": 1.8158057928085327, + "learning_rate": 3.61e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.642, + "step": 1642 + }, + { + "loss": 0.0451, + "grad_norm": 1.7628754377365112, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.643, + "step": 1643 + }, + { + "loss": 0.0679, + "grad_norm": 1.837315320968628, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6440000000000001, + "step": 1644 + }, + { + "loss": 0.0112, + "grad_norm": 3.3357973098754883, + "learning_rate": 3.58e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.645, + "step": 1645 + }, + { + "loss": 0.0501, + "grad_norm": 1.5952306985855103, + "learning_rate": 3.57e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 1646 + }, + { + "loss": 0.0742, + "grad_norm": 2.5686585903167725, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.647, + "step": 1647 + }, + { + "loss": 0.0109, + "grad_norm": 3.133192777633667, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 1648 + }, + { + "loss": 0.068, + "grad_norm": 1.585485577583313, + "learning_rate": 3.54e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.649, + "step": 1649 + }, + { + "loss": 0.0687, + "grad_norm": 2.0019702911376953, + "learning_rate": 3.53e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.65, + "step": 1650 + }, + { + "loss": 0.0575, + "grad_norm": 1.6265766620635986, + "learning_rate": 3.52e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.651, + "step": 1651 + }, + { + "loss": 0.0707, + "grad_norm": 1.6374586820602417, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6520000000000001, + "step": 1652 + }, + { + "loss": 0.0697, + "grad_norm": 2.4204654693603516, + "learning_rate": 3.5e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.653, + "step": 1653 + }, + { + "loss": 0.0588, + "grad_norm": 2.1378262042999268, + "learning_rate": 3.49e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.654, + "step": 1654 + }, + { + "loss": 0.0562, + "grad_norm": 2.214315414428711, + "learning_rate": 3.48e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.655, + "step": 1655 + }, + { + "loss": 0.0124, + "grad_norm": 3.5861706733703613, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6560000000000001, + "step": 1656 + }, + { + "loss": 0.0487, + "grad_norm": 1.6121397018432617, + "learning_rate": 3.46e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.657, + "step": 1657 + }, + { + "loss": 0.0556, + "grad_norm": 2.084545850753784, + "learning_rate": 3.45e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.658, + "step": 1658 + }, + { + "loss": 0.0471, + "grad_norm": 1.8340671062469482, + "learning_rate": 3.44e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.659, + "step": 1659 + }, + { + "loss": 0.0507, + "grad_norm": 1.5023232698440552, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6600000000000001, + "step": 1660 + }, + { + "loss": 0.055, + "grad_norm": 1.5226930379867554, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.661, + "step": 1661 + }, + { + "loss": 0.0689, + "grad_norm": 1.8650307655334473, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.662, + "step": 1662 + }, + { + "loss": 0.0687, + "grad_norm": 1.4976561069488525, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.663, + "step": 1663 + }, + { + "loss": 0.012, + "grad_norm": 3.7820823192596436, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6640000000000001, + "step": 1664 + }, + { + "loss": 0.0644, + "grad_norm": 1.6768338680267334, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.665, + "step": 1665 + }, + { + "loss": 0.0508, + "grad_norm": 1.6384755373001099, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.666, + "step": 1666 + }, + { + "loss": 0.0557, + "grad_norm": 1.67027747631073, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.667, + "step": 1667 + }, + { + "loss": 0.0443, + "grad_norm": 1.8305268287658691, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6680000000000001, + "step": 1668 + }, + { + "loss": 0.0398, + "grad_norm": 1.6602362394332886, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.669, + "step": 1669 + }, + { + "loss": 0.0479, + "grad_norm": 1.694201946258545, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.67, + "step": 1670 + }, + { + "loss": 0.0693, + "grad_norm": 1.8437001705169678, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.671, + "step": 1671 + }, + { + "loss": 0.0512, + "grad_norm": 1.319399118423462, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6720000000000002, + "step": 1672 + }, + { + "loss": 0.0141, + "grad_norm": 4.160251617431641, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.673, + "step": 1673 + }, + { + "loss": 0.0473, + "grad_norm": 1.736594557762146, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 1674 + }, + { + "loss": 0.0117, + "grad_norm": 3.6965503692626953, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.675, + "step": 1675 + }, + { + "loss": 0.0129, + "grad_norm": 3.8872127532958984, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6760000000000002, + "step": 1676 + }, + { + "loss": 0.0338, + "grad_norm": 1.6114709377288818, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.677, + "step": 1677 + }, + { + "loss": 0.0401, + "grad_norm": 1.4854273796081543, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.678, + "step": 1678 + }, + { + "loss": 0.0091, + "grad_norm": 2.8193323612213135, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 1679 + }, + { + "loss": 0.0104, + "grad_norm": 3.194824457168579, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 1680 + }, + { + "loss": 0.0082, + "grad_norm": 2.627159357070923, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 1681 + }, + { + "loss": 0.0715, + "grad_norm": 2.015965223312378, + "learning_rate": 3.21e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.682, + "step": 1682 + }, + { + "loss": 0.0752, + "grad_norm": 1.8641659021377563, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.683, + "step": 1683 + }, + { + "loss": 0.0446, + "grad_norm": 1.8558416366577148, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 1684 + }, + { + "loss": 0.0754, + "grad_norm": 2.614729881286621, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.685, + "step": 1685 + }, + { + "loss": 0.0781, + "grad_norm": 2.3581247329711914, + "learning_rate": 3.17e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.686, + "step": 1686 + }, + { + "loss": 0.044, + "grad_norm": 2.02897310256958, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.687, + "step": 1687 + }, + { + "loss": 0.0576, + "grad_norm": 1.8537285327911377, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.688, + "step": 1688 + }, + { + "loss": 0.0673, + "grad_norm": 2.3672072887420654, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 1689 + }, + { + "loss": 0.0406, + "grad_norm": 2.049578905105591, + "learning_rate": 3.13e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.69, + "step": 1690 + }, + { + "loss": 0.0514, + "grad_norm": 1.8079686164855957, + "learning_rate": 3.12e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.6909999999999998, + "step": 1691 + }, + { + "loss": 0.0467, + "grad_norm": 1.5584005117416382, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.692, + "step": 1692 + }, + { + "loss": 0.0073, + "grad_norm": 2.0741705894470215, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 1693 + }, + { + "loss": 0.0501, + "grad_norm": 1.9797930717468262, + "learning_rate": 3.09e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.694, + "step": 1694 + }, + { + "loss": 0.0514, + "grad_norm": 1.531952977180481, + "learning_rate": 3.08e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 1695 + }, + { + "loss": 0.0511, + "grad_norm": 2.27657413482666, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.696, + "step": 1696 + }, + { + "loss": 0.0501, + "grad_norm": 1.5408827066421509, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.697, + "step": 1697 + }, + { + "loss": 0.0356, + "grad_norm": 1.3495177030563354, + "learning_rate": 3.05e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.698, + "step": 1698 + }, + { + "loss": 0.0524, + "grad_norm": 2.264927864074707, + "learning_rate": 3.04e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6989999999999998, + "step": 1699 + }, + { + "loss": 0.0085, + "grad_norm": 2.3997385501861572, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 1700 + }, + { + "loss": 0.0537, + "grad_norm": 2.03108811378479, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.701, + "step": 1701 + }, + { + "loss": 0.0625, + "grad_norm": 1.5735002756118774, + "learning_rate": 3.01e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.702, + "step": 1702 + }, + { + "loss": 0.0498, + "grad_norm": 1.4873791933059692, + "learning_rate": 3e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7029999999999998, + "step": 1703 + }, + { + "loss": 0.0401, + "grad_norm": 1.646492600440979, + "learning_rate": 2.99e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.704, + "step": 1704 + }, + { + "loss": 0.0092, + "grad_norm": 2.825364828109741, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 1705 + }, + { + "loss": 0.0094, + "grad_norm": 2.7768924236297607, + "learning_rate": 2.97e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 1706 + }, + { + "loss": 0.0095, + "grad_norm": 2.475404977798462, + "learning_rate": 2.96e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 1707 + }, + { + "loss": 0.0416, + "grad_norm": 2.0638792514801025, + "learning_rate": 2.95e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.708, + "step": 1708 + }, + { + "loss": 0.0544, + "grad_norm": 1.6516914367675781, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.709, + "step": 1709 + }, + { + "loss": 0.0534, + "grad_norm": 1.9903455972671509, + "learning_rate": 2.93e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.71, + "step": 1710 + }, + { + "loss": 0.061, + "grad_norm": 1.6336207389831543, + "learning_rate": 2.92e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7109999999999999, + "step": 1711 + }, + { + "loss": 0.0484, + "grad_norm": 1.5735485553741455, + "learning_rate": 2.91e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.712, + "step": 1712 + }, + { + "loss": 0.0523, + "grad_norm": 1.7996323108673096, + "learning_rate": 2.9e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.713, + "step": 1713 + }, + { + "loss": 0.0568, + "grad_norm": 1.6357063055038452, + "learning_rate": 2.89e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.714, + "step": 1714 + }, + { + "loss": 0.0097, + "grad_norm": 2.460446357727051, + "learning_rate": 2.88e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 1715 + }, + { + "loss": 0.0488, + "grad_norm": 1.7914141416549683, + "learning_rate": 2.87e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.716, + "step": 1716 + }, + { + "loss": 0.0426, + "grad_norm": 2.875281572341919, + "learning_rate": 2.86e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.717, + "step": 1717 + }, + { + "loss": 0.0535, + "grad_norm": 1.9656765460968018, + "learning_rate": 2.85e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.718, + "step": 1718 + }, + { + "loss": 0.0582, + "grad_norm": 1.7268273830413818, + "learning_rate": 2.84e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.7189999999999999, + "step": 1719 + }, + { + "loss": 0.0625, + "grad_norm": 1.7748886346817017, + "learning_rate": 2.83e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 1720 + }, + { + "loss": 0.0624, + "grad_norm": 1.655421257019043, + "learning_rate": 2.82e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.721, + "step": 1721 + }, + { + "loss": 0.0418, + "grad_norm": 1.857727289199829, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.722, + "step": 1722 + }, + { + "loss": 0.0628, + "grad_norm": 1.6072860956192017, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7229999999999999, + "step": 1723 + }, + { + "loss": 0.0079, + "grad_norm": 2.1282646656036377, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 1724 + }, + { + "loss": 0.0097, + "grad_norm": 2.870497465133667, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 1725 + }, + { + "loss": 0.0573, + "grad_norm": 2.2278597354888916, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.726, + "step": 1726 + }, + { + "loss": 0.0479, + "grad_norm": 1.6248372793197632, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.7269999999999999, + "step": 1727 + }, + { + "loss": 0.0098, + "grad_norm": 3.043905258178711, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 1728 + }, + { + "loss": 0.0515, + "grad_norm": 1.613357424736023, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.729, + "step": 1729 + }, + { + "loss": 0.0391, + "grad_norm": 1.959555983543396, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.73, + "step": 1730 + }, + { + "loss": 0.0085, + "grad_norm": 2.4167284965515137, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 1731 + }, + { + "loss": 0.0638, + "grad_norm": 1.9236712455749512, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.732, + "step": 1732 + }, + { + "loss": 0.0359, + "grad_norm": 1.9113582372665405, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.733, + "step": 1733 + }, + { + "loss": 0.0083, + "grad_norm": 2.5152554512023926, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 1734 + }, + { + "loss": 0.0471, + "grad_norm": 1.6409229040145874, + "learning_rate": 2.68e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7349999999999999, + "step": 1735 + }, + { + "loss": 0.0695, + "grad_norm": 2.0613510608673096, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.736, + "step": 1736 + }, + { + "loss": 0.057, + "grad_norm": 2.3862340450286865, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.737, + "step": 1737 + }, + { + "loss": 0.0733, + "grad_norm": 2.13395357131958, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.738, + "step": 1738 + }, + { + "loss": 0.0398, + "grad_norm": 1.8025071620941162, + "learning_rate": 2.64e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7389999999999999, + "step": 1739 + }, + { + "loss": 0.0076, + "grad_norm": 2.0499792098999023, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 1740 + }, + { + "loss": 0.061, + "grad_norm": 1.6320290565490723, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.741, + "step": 1741 + }, + { + "loss": 0.0581, + "grad_norm": 1.9588946104049683, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.742, + "step": 1742 + }, + { + "loss": 0.062, + "grad_norm": 1.8158897161483765, + "learning_rate": 2.6e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.7429999999999999, + "step": 1743 + }, + { + "loss": 0.0464, + "grad_norm": 2.4023096561431885, + "learning_rate": 2.59e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.744, + "step": 1744 + }, + { + "loss": 0.0604, + "grad_norm": 2.0760178565979004, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.745, + "step": 1745 + }, + { + "loss": 0.0721, + "grad_norm": 1.8943363428115845, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.746, + "step": 1746 + }, + { + "loss": 0.0394, + "grad_norm": 1.6580768823623657, + "learning_rate": 2.56e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.7469999999999999, + "step": 1747 + }, + { + "loss": 0.0575, + "grad_norm": 1.7064754962921143, + "learning_rate": 2.55e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.748, + "step": 1748 + }, + { + "loss": 0.1451, + "grad_norm": 5.286960124969482, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 1.749, + "step": 1749 + }, + { + "loss": 0.0367, + "grad_norm": 1.5256696939468384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.75, + "step": 1750 + }, + { + "loss": 0.0352, + "grad_norm": 1.4353508949279785, + "learning_rate": 2.52e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.751, + "step": 1751 + }, + { + "loss": 0.0544, + "grad_norm": 1.449508547782898, + "learning_rate": 2.51e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.752, + "step": 1752 + }, + { + "loss": 0.0088, + "grad_norm": 2.6737008094787598, + "learning_rate": 2.5e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 1753 + }, + { + "loss": 0.054, + "grad_norm": 1.1922411918640137, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.754, + "step": 1754 + }, + { + "loss": 0.0108, + "grad_norm": 3.180657386779785, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.755, + "step": 1755 + }, + { + "loss": 0.0636, + "grad_norm": 1.900195598602295, + "learning_rate": 2.47e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.756, + "step": 1756 + }, + { + "loss": 0.0602, + "grad_norm": 2.505511522293091, + "learning_rate": 2.46e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7570000000000001, + "step": 1757 + }, + { + "loss": 0.0516, + "grad_norm": 1.517896056175232, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.758, + "step": 1758 + }, + { + "loss": 0.0653, + "grad_norm": 1.5359817743301392, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.759, + "step": 1759 + }, + { + "loss": 0.062, + "grad_norm": 2.56500244140625, + "learning_rate": 2.43e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.76, + "step": 1760 + }, + { + "loss": 0.0616, + "grad_norm": 1.2327522039413452, + "learning_rate": 2.42e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7610000000000001, + "step": 1761 + }, + { + "loss": 0.0641, + "grad_norm": 2.0313050746917725, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.762, + "step": 1762 + }, + { + "loss": 0.0509, + "grad_norm": 1.9020798206329346, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.763, + "step": 1763 + }, + { + "loss": 0.0573, + "grad_norm": 1.3576561212539673, + "learning_rate": 2.39e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.764, + "step": 1764 + }, + { + "loss": 0.0359, + "grad_norm": 1.6285313367843628, + "learning_rate": 2.38e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7650000000000001, + "step": 1765 + }, + { + "loss": 0.0779, + "grad_norm": 2.119893789291382, + "learning_rate": 2.37e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.766, + "step": 1766 + }, + { + "loss": 0.0459, + "grad_norm": 1.8730247020721436, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.767, + "step": 1767 + }, + { + "loss": 0.0359, + "grad_norm": 1.5724204778671265, + "learning_rate": 2.35e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.768, + "step": 1768 + }, + { + "loss": 0.0375, + "grad_norm": 1.7161457538604736, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.7690000000000001, + "step": 1769 + }, + { + "loss": 0.0522, + "grad_norm": 1.3714388608932495, + "learning_rate": 2.33e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.77, + "step": 1770 + }, + { + "loss": 0.0368, + "grad_norm": 1.6326324939727783, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.771, + "step": 1771 + }, + { + "loss": 0.0526, + "grad_norm": 1.4099246263504028, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.772, + "step": 1772 + }, + { + "loss": 0.0343, + "grad_norm": 1.331606149673462, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7730000000000001, + "step": 1773 + }, + { + "loss": 0.0521, + "grad_norm": 2.03346586227417, + "learning_rate": 2.29e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.774, + "step": 1774 + }, + { + "loss": 0.0738, + "grad_norm": 2.287825584411621, + "learning_rate": 2.28e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.775, + "step": 1775 + }, + { + "loss": 0.0711, + "grad_norm": 1.560683012008667, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.776, + "step": 1776 + }, + { + "loss": 0.0483, + "grad_norm": 1.860205888748169, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.7770000000000001, + "step": 1777 + }, + { + "loss": 0.0418, + "grad_norm": 1.6539009809494019, + "learning_rate": 2.25e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.778, + "step": 1778 + }, + { + "loss": 0.0669, + "grad_norm": 1.5473995208740234, + "learning_rate": 2.24e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.779, + "step": 1779 + }, + { + "loss": 0.0488, + "grad_norm": 1.3596010208129883, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.78, + "step": 1780 + }, + { + "loss": 0.0407, + "grad_norm": 1.8577399253845215, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7810000000000001, + "step": 1781 + }, + { + "loss": 0.0639, + "grad_norm": 2.693002462387085, + "learning_rate": 2.21e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.782, + "step": 1782 + }, + { + "loss": 0.0146, + "grad_norm": 4.3713555335998535, + "learning_rate": 2.2e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.783, + "step": 1783 + }, + { + "loss": 0.0702, + "grad_norm": 1.8829140663146973, + "learning_rate": 2.19e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.784, + "step": 1784 + }, + { + "loss": 0.0145, + "grad_norm": 4.203199863433838, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.7850000000000001, + "step": 1785 + }, + { + "loss": 0.0418, + "grad_norm": 1.0440939664840698, + "learning_rate": 2.17e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.786, + "step": 1786 + }, + { + "loss": 0.0658, + "grad_norm": 1.5156137943267822, + "learning_rate": 2.16e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.787, + "step": 1787 + }, + { + "loss": 0.0506, + "grad_norm": 1.6226084232330322, + "learning_rate": 2.15e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.788, + "step": 1788 + }, + { + "loss": 0.087, + "grad_norm": 1.8399536609649658, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7890000000000001, + "step": 1789 + }, + { + "loss": 0.0607, + "grad_norm": 2.031243324279785, + "learning_rate": 2.13e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.79, + "step": 1790 + }, + { + "loss": 0.0609, + "grad_norm": 1.581013798713684, + "learning_rate": 2.12e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.791, + "step": 1791 + }, + { + "loss": 0.0149, + "grad_norm": 4.233753681182861, + "learning_rate": 2.11e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.792, + "step": 1792 + }, + { + "loss": 0.0698, + "grad_norm": 1.890411615371704, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7930000000000001, + "step": 1793 + }, + { + "loss": 0.0529, + "grad_norm": 1.3680751323699951, + "learning_rate": 2.09e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.794, + "step": 1794 + }, + { + "loss": 0.0528, + "grad_norm": 1.9651073217391968, + "learning_rate": 2.08e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.795, + "step": 1795 + }, + { + "loss": 0.0133, + "grad_norm": 3.887544631958008, + "learning_rate": 2.07e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.796, + "step": 1796 + }, + { + "loss": 0.05, + "grad_norm": 1.304778814315796, + "learning_rate": 2.06e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7970000000000002, + "step": 1797 + }, + { + "loss": 0.071, + "grad_norm": 1.9661753177642822, + "learning_rate": 2.05e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.798, + "step": 1798 + }, + { + "loss": 0.0557, + "grad_norm": 1.5037291049957275, + "learning_rate": 2.04e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.799, + "step": 1799 + }, + { + "loss": 0.0372, + "grad_norm": 1.4804255962371826, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.8, + "step": 1800 + }, + { + "loss": 0.0645, + "grad_norm": 1.577778697013855, + "learning_rate": 2.02e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.8010000000000002, + "step": 1801 + }, + { + "loss": 0.0399, + "grad_norm": 1.5963507890701294, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.802, + "step": 1802 + }, + { + "loss": 0.0612, + "grad_norm": 1.7424527406692505, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.803, + "step": 1803 + }, + { + "loss": 0.0377, + "grad_norm": 1.4296543598175049, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.804, + "step": 1804 + }, + { + "loss": 0.0378, + "grad_norm": 1.4681419134140015, + "learning_rate": 1.98e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8050000000000002, + "step": 1805 + }, + { + "loss": 0.0385, + "grad_norm": 1.876345157623291, + "learning_rate": 1.97e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.806, + "step": 1806 + }, + { + "loss": 0.0454, + "grad_norm": 1.3991385698318481, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.807, + "step": 1807 + }, + { + "loss": 0.0706, + "grad_norm": 1.6286864280700684, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.808, + "step": 1808 + }, + { + "loss": 0.0409, + "grad_norm": 1.7534390687942505, + "learning_rate": 1.94e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8090000000000002, + "step": 1809 + }, + { + "loss": 0.1302, + "grad_norm": 4.238317966461182, + "learning_rate": 1.93e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.81, + "step": 1810 + }, + { + "loss": 0.0525, + "grad_norm": 2.2462339401245117, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.811, + "step": 1811 + }, + { + "loss": 0.0609, + "grad_norm": 1.5136423110961914, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.812, + "step": 1812 + }, + { + "loss": 0.0595, + "grad_norm": 1.4645228385925293, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.813, + "step": 1813 + }, + { + "loss": 0.0485, + "grad_norm": 1.4663139581680298, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.814, + "step": 1814 + }, + { + "loss": 0.0117, + "grad_norm": 3.569246768951416, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.815, + "step": 1815 + }, + { + "loss": 0.0765, + "grad_norm": 1.4224154949188232, + "learning_rate": 1.87e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.8159999999999998, + "step": 1816 + }, + { + "loss": 0.0517, + "grad_norm": 1.4875210523605347, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.817, + "step": 1817 + }, + { + "loss": 0.0123, + "grad_norm": 3.643899440765381, + "learning_rate": 1.85e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.818, + "step": 1818 + }, + { + "loss": 0.0358, + "grad_norm": 1.7132638692855835, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.819, + "step": 1819 + }, + { + "loss": 0.0396, + "grad_norm": 1.291243553161621, + "learning_rate": 1.83e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8199999999999998, + "step": 1820 + }, + { + "loss": 0.0611, + "grad_norm": 1.6885188817977905, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.821, + "step": 1821 + }, + { + "loss": 0.0507, + "grad_norm": 1.215349555015564, + "learning_rate": 1.81e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.822, + "step": 1822 + }, + { + "loss": 0.0508, + "grad_norm": 1.5074315071105957, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.823, + "step": 1823 + }, + { + "loss": 0.0593, + "grad_norm": 1.500303030014038, + "learning_rate": 1.79e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8239999999999998, + "step": 1824 + }, + { + "loss": 0.0696, + "grad_norm": 2.0285537242889404, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.825, + "step": 1825 + }, + { + "loss": 0.051, + "grad_norm": 1.3399317264556885, + "learning_rate": 1.77e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.826, + "step": 1826 + }, + { + "loss": 0.0479, + "grad_norm": 1.868754506111145, + "learning_rate": 1.76e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.827, + "step": 1827 + }, + { + "loss": 0.0123, + "grad_norm": 3.5505826473236084, + "learning_rate": 1.75e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.8279999999999998, + "step": 1828 + }, + { + "loss": 0.0384, + "grad_norm": 1.1001877784729004, + "learning_rate": 1.74e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.829, + "step": 1829 + }, + { + "loss": 0.0503, + "grad_norm": 1.5732758045196533, + "learning_rate": 1.73e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.83, + "step": 1830 + }, + { + "loss": 0.0569, + "grad_norm": 1.4768040180206299, + "learning_rate": 1.72e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.831, + "step": 1831 + }, + { + "loss": 0.0376, + "grad_norm": 2.298859119415283, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8319999999999999, + "step": 1832 + }, + { + "loss": 0.0626, + "grad_norm": 1.4698207378387451, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 1833 + }, + { + "loss": 0.0527, + "grad_norm": 1.462391972541809, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.834, + "step": 1834 + }, + { + "loss": 0.0751, + "grad_norm": 2.242673873901367, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.835, + "step": 1835 + }, + { + "loss": 0.0633, + "grad_norm": 1.4788683652877808, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.8359999999999999, + "step": 1836 + }, + { + "loss": 0.0523, + "grad_norm": 1.5662829875946045, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.837, + "step": 1837 + }, + { + "loss": 0.0496, + "grad_norm": 1.2137081623077393, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.838, + "step": 1838 + }, + { + "loss": 0.0144, + "grad_norm": 3.972593307495117, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.839, + "step": 1839 + }, + { + "loss": 0.0612, + "grad_norm": 2.0851247310638428, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.8399999999999999, + "step": 1840 + }, + { + "loss": 0.0351, + "grad_norm": 1.7115992307662964, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.841, + "step": 1841 + }, + { + "loss": 0.0543, + "grad_norm": 1.7121071815490723, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.842, + "step": 1842 + }, + { + "loss": 0.0398, + "grad_norm": 2.520775318145752, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.843, + "step": 1843 + }, + { + "loss": 0.0588, + "grad_norm": 1.4704424142837524, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8439999999999999, + "step": 1844 + }, + { + "loss": 0.0393, + "grad_norm": 1.1732555627822876, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.845, + "step": 1845 + }, + { + "loss": 0.0126, + "grad_norm": 3.8587839603424072, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.846, + "step": 1846 + }, + { + "loss": 0.0154, + "grad_norm": 4.2589006423950195, + "learning_rate": 1.56e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.847, + "step": 1847 + }, + { + "loss": 0.0525, + "grad_norm": 1.5793870687484741, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.8479999999999999, + "step": 1848 + }, + { + "loss": 0.0711, + "grad_norm": 1.637081265449524, + "learning_rate": 1.54e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.849, + "step": 1849 + }, + { + "loss": 0.0367, + "grad_norm": 1.405205488204956, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.85, + "step": 1850 + }, + { + "loss": 0.0122, + "grad_norm": 3.7381093502044678, + "learning_rate": 1.52e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.851, + "step": 1851 + }, + { + "loss": 0.0595, + "grad_norm": 1.4563549757003784, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8519999999999999, + "step": 1852 + }, + { + "loss": 0.012, + "grad_norm": 3.3752598762512207, + "learning_rate": 1.5e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.853, + "step": 1853 + }, + { + "loss": 0.0575, + "grad_norm": 1.6581268310546875, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.854, + "step": 1854 + }, + { + "loss": 0.037, + "grad_norm": 1.6496632099151611, + "learning_rate": 1.48e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.855, + "step": 1855 + }, + { + "loss": 0.0435, + "grad_norm": 2.816823959350586, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.8559999999999999, + "step": 1856 + }, + { + "loss": 0.0691, + "grad_norm": 1.9923897981643677, + "learning_rate": 1.46e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.857, + "step": 1857 + }, + { + "loss": 0.0601, + "grad_norm": 1.9515984058380127, + "learning_rate": 1.45e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.858, + "step": 1858 + }, + { + "loss": 0.0097, + "grad_norm": 3.0719552040100098, + "learning_rate": 1.44e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 1859 + }, + { + "loss": 0.0641, + "grad_norm": 1.8086748123168945, + "learning_rate": 1.43e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8599999999999999, + "step": 1860 + }, + { + "loss": 0.067, + "grad_norm": 1.6446064710617065, + "learning_rate": 1.42e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.861, + "step": 1861 + }, + { + "loss": 0.0101, + "grad_norm": 3.0983476638793945, + "learning_rate": 1.41e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 1862 + }, + { + "loss": 0.0362, + "grad_norm": 1.6780548095703125, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.863, + "step": 1863 + }, + { + "loss": 0.054, + "grad_norm": 1.5340514183044434, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8639999999999999, + "step": 1864 + }, + { + "loss": 0.0562, + "grad_norm": 1.6704845428466797, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.865, + "step": 1865 + }, + { + "loss": 0.0647, + "grad_norm": 2.0944159030914307, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.866, + "step": 1866 + }, + { + "loss": 0.0497, + "grad_norm": 1.6780622005462646, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.867, + "step": 1867 + }, + { + "loss": 0.0531, + "grad_norm": 1.5871188640594482, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8679999999999999, + "step": 1868 + }, + { + "loss": 0.061, + "grad_norm": 1.572225570678711, + "learning_rate": 1.34e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.869, + "step": 1869 + }, + { + "loss": 0.0636, + "grad_norm": 1.7540369033813477, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.87, + "step": 1870 + }, + { + "loss": 0.0516, + "grad_norm": 1.9117010831832886, + "learning_rate": 1.32e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.871, + "step": 1871 + }, + { + "loss": 0.0516, + "grad_norm": 1.8945181369781494, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8719999999999999, + "step": 1872 + }, + { + "loss": 0.1903, + "grad_norm": 7.168573379516602, + "learning_rate": 1.3e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 1.873, + "step": 1873 + }, + { + "loss": 0.0584, + "grad_norm": 1.7484742403030396, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.874, + "step": 1874 + }, + { + "loss": 0.0592, + "grad_norm": 1.998748540878296, + "learning_rate": 1.28e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.875, + "step": 1875 + }, + { + "loss": 0.0132, + "grad_norm": 3.7218382358551025, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.876, + "step": 1876 + }, + { + "loss": 0.0397, + "grad_norm": 1.7368042469024658, + "learning_rate": 1.26e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.877, + "step": 1877 + }, + { + "loss": 0.0747, + "grad_norm": 1.7804408073425293, + "learning_rate": 1.25e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8780000000000001, + "step": 1878 + }, + { + "loss": 0.0564, + "grad_norm": 1.812559962272644, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.879, + "step": 1879 + }, + { + "loss": 0.0359, + "grad_norm": 1.5748106241226196, + "learning_rate": 1.23e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.88, + "step": 1880 + }, + { + "loss": 0.1015, + "grad_norm": 2.9346442222595215, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.881, + "step": 1881 + }, + { + "loss": 0.0714, + "grad_norm": 2.8724288940429688, + "learning_rate": 1.21e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.8820000000000001, + "step": 1882 + }, + { + "loss": 0.0544, + "grad_norm": 1.6409680843353271, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.883, + "step": 1883 + }, + { + "loss": 0.0569, + "grad_norm": 1.441733479499817, + "learning_rate": 1.19e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.884, + "step": 1884 + }, + { + "loss": 0.0709, + "grad_norm": 2.3944602012634277, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.885, + "step": 1885 + }, + { + "loss": 0.0593, + "grad_norm": 2.0737223625183105, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8860000000000001, + "step": 1886 + }, + { + "loss": 0.011, + "grad_norm": 3.4782493114471436, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.887, + "step": 1887 + }, + { + "loss": 0.0115, + "grad_norm": 3.5657458305358887, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.888, + "step": 1888 + }, + { + "loss": 0.0598, + "grad_norm": 1.5167820453643799, + "learning_rate": 1.14e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.889, + "step": 1889 + }, + { + "loss": 0.0507, + "grad_norm": 1.6942130327224731, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.8900000000000001, + "step": 1890 + }, + { + "loss": 0.05, + "grad_norm": 1.4450113773345947, + "learning_rate": 1.12e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.891, + "step": 1891 + }, + { + "loss": 0.0672, + "grad_norm": 1.7840543985366821, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.892, + "step": 1892 + }, + { + "loss": 0.0114, + "grad_norm": 3.6806554794311523, + "learning_rate": 1.1e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.893, + "step": 1893 + }, + { + "loss": 0.0433, + "grad_norm": 2.5975944995880127, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.8940000000000001, + "step": 1894 + }, + { + "loss": 0.048, + "grad_norm": 1.2934935092926025, + "learning_rate": 1.08e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.895, + "step": 1895 + }, + { + "loss": 0.0129, + "grad_norm": 3.9428789615631104, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.896, + "step": 1896 + }, + { + "loss": 0.0106, + "grad_norm": 3.178393840789795, + "learning_rate": 1.06e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.897, + "step": 1897 + }, + { + "loss": 0.0601, + "grad_norm": 1.3654727935791016, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8980000000000001, + "step": 1898 + }, + { + "loss": 0.0372, + "grad_norm": 1.596958041191101, + "learning_rate": 1.04e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.899, + "step": 1899 + }, + { + "loss": 0.0407, + "grad_norm": 1.3870348930358887, + "learning_rate": 1.03e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9, + "step": 1900 + }, + { + "loss": 0.0398, + "grad_norm": 1.8837169408798218, + "learning_rate": 1.02e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.901, + "step": 1901 + }, + { + "loss": 0.0685, + "grad_norm": 2.1320674419403076, + "learning_rate": 1.01e-06, + "num_tokens": 1308570.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9020000000000001, + "step": 1902 + }, + { + "loss": 0.0824, + "grad_norm": 2.3401284217834473, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.903, + "step": 1903 + }, + { + "loss": 0.0107, + "grad_norm": 3.2646677494049072, + "learning_rate": 9.9e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 1904 + }, + { + "loss": 0.053, + "grad_norm": 1.7195311784744263, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.905, + "step": 1905 + }, + { + "loss": 0.0388, + "grad_norm": 1.4336844682693481, + "learning_rate": 9.7e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.9060000000000001, + "step": 1906 + }, + { + "loss": 0.0496, + "grad_norm": 1.5110867023468018, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.907, + "step": 1907 + }, + { + "loss": 0.0106, + "grad_norm": 3.0311079025268555, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.908, + "step": 1908 + }, + { + "loss": 0.0536, + "grad_norm": 1.9689549207687378, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.909, + "step": 1909 + }, + { + "loss": 0.0761, + "grad_norm": 2.2891626358032227, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.9100000000000001, + "step": 1910 + }, + { + "loss": 0.0099, + "grad_norm": 2.886558771133423, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 1911 + }, + { + "loss": 0.0509, + "grad_norm": 2.247649669647217, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.912, + "step": 1912 + }, + { + "loss": 0.0396, + "grad_norm": 1.8190995454788208, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.913, + "step": 1913 + }, + { + "loss": 0.0681, + "grad_norm": 1.9473356008529663, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.9140000000000001, + "step": 1914 + }, + { + "loss": 0.0583, + "grad_norm": 1.7244383096694946, + "learning_rate": 8.8e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.915, + "step": 1915 + }, + { + "loss": 0.0497, + "grad_norm": 1.471281886100769, + "learning_rate": 8.7e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.916, + "step": 1916 + }, + { + "loss": 0.0105, + "grad_norm": 3.1323492527008057, + "learning_rate": 8.6e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.917, + "step": 1917 + }, + { + "loss": 0.0587, + "grad_norm": 1.6258044242858887, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9180000000000001, + "step": 1918 + }, + { + "loss": 0.0396, + "grad_norm": 3.7344205379486084, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.919, + "step": 1919 + }, + { + "loss": 0.0669, + "grad_norm": 1.567430853843689, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.92, + "step": 1920 + }, + { + "loss": 0.0403, + "grad_norm": 2.391710042953491, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.921, + "step": 1921 + }, + { + "loss": 0.0731, + "grad_norm": 1.7387372255325317, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 1922 + }, + { + "loss": 0.0346, + "grad_norm": 1.5562756061553955, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.923, + "step": 1923 + }, + { + "loss": 0.0094, + "grad_norm": 2.8271360397338867, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 1924 + }, + { + "loss": 0.0458, + "grad_norm": 2.486022472381592, + "learning_rate": 7.8e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.925, + "step": 1925 + }, + { + "loss": 0.0432, + "grad_norm": 1.4174907207489014, + "learning_rate": 7.7e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9260000000000002, + "step": 1926 + }, + { + "loss": 0.0685, + "grad_norm": 1.9511269330978394, + "learning_rate": 7.6e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.927, + "step": 1927 + }, + { + "loss": 0.0541, + "grad_norm": 1.7855056524276733, + "learning_rate": 7.5e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.928, + "step": 1928 + }, + { + "loss": 0.0381, + "grad_norm": 1.345107913017273, + "learning_rate": 7.4e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.929, + "step": 1929 + }, + { + "loss": 0.0405, + "grad_norm": 2.1388049125671387, + "learning_rate": 7.3e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9300000000000002, + "step": 1930 + }, + { + "loss": 0.065, + "grad_norm": 1.9286760091781616, + "learning_rate": 7.2e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.931, + "step": 1931 + }, + { + "loss": 0.0084, + "grad_norm": 2.553018808364868, + "learning_rate": 7.1e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 1932 + }, + { + "loss": 0.0591, + "grad_norm": 1.3521795272827148, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.933, + "step": 1933 + }, + { + "loss": 0.0407, + "grad_norm": 2.3110647201538086, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.9340000000000002, + "step": 1934 + }, + { + "loss": 0.0087, + "grad_norm": 2.560931921005249, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 1935 + }, + { + "loss": 0.1207, + "grad_norm": 3.6795732975006104, + "learning_rate": 6.7e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 1.936, + "step": 1936 + }, + { + "loss": 0.0079, + "grad_norm": 2.1008386611938477, + "learning_rate": 6.6e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 1937 + }, + { + "loss": 0.0087, + "grad_norm": 2.5367555618286133, + "learning_rate": 6.5e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 1938 + }, + { + "loss": 0.0518, + "grad_norm": 2.0541486740112305, + "learning_rate": 6.4e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.939, + "step": 1939 + }, + { + "loss": 0.0618, + "grad_norm": 1.8797075748443604, + "learning_rate": 6.3e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.94, + "step": 1940 + }, + { + "loss": 0.0628, + "grad_norm": 2.0876829624176025, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9409999999999998, + "step": 1941 + }, + { + "loss": 0.0453, + "grad_norm": 1.7904268503189087, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.942, + "step": 1942 + }, + { + "loss": 0.009, + "grad_norm": 2.73040771484375, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 1943 + }, + { + "loss": 0.0617, + "grad_norm": 1.6844722032546997, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.944, + "step": 1944 + }, + { + "loss": 0.0431, + "grad_norm": 1.8085075616836548, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9449999999999998, + "step": 1945 + }, + { + "loss": 0.0554, + "grad_norm": 1.8000997304916382, + "learning_rate": 5.7e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.946, + "step": 1946 + }, + { + "loss": 0.0608, + "grad_norm": 1.8177446126937866, + "learning_rate": 5.6e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.947, + "step": 1947 + }, + { + "loss": 0.0624, + "grad_norm": 1.5957430601119995, + "learning_rate": 5.5e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.948, + "step": 1948 + }, + { + "loss": 0.0615, + "grad_norm": 1.5245059728622437, + "learning_rate": 5.4e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9489999999999998, + "step": 1949 + }, + { + "loss": 0.0087, + "grad_norm": 2.8260550498962402, + "learning_rate": 5.3e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 1950 + }, + { + "loss": 0.0491, + "grad_norm": 1.5616376399993896, + "learning_rate": 5.2e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.951, + "step": 1951 + }, + { + "loss": 0.0552, + "grad_norm": 1.530611276626587, + "learning_rate": 5.1e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.952, + "step": 1952 + }, + { + "loss": 0.0563, + "grad_norm": 1.5877563953399658, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.9529999999999998, + "step": 1953 + }, + { + "loss": 0.034, + "grad_norm": 1.3671666383743286, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.954, + "step": 1954 + }, + { + "loss": 0.0447, + "grad_norm": 1.4045659303665161, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.955, + "step": 1955 + }, + { + "loss": 0.0523, + "grad_norm": 1.3664851188659668, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.956, + "step": 1956 + }, + { + "loss": 0.0545, + "grad_norm": 1.9731861352920532, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9569999999999999, + "step": 1957 + }, + { + "loss": 0.056, + "grad_norm": 1.9783090353012085, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.958, + "step": 1958 + }, + { + "loss": 0.0103, + "grad_norm": 3.2062110900878906, + "learning_rate": 4.4e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.959, + "step": 1959 + }, + { + "loss": 0.0356, + "grad_norm": 1.8231993913650513, + "learning_rate": 4.3e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.96, + "step": 1960 + }, + { + "loss": 0.0525, + "grad_norm": 1.708391785621643, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9609999999999999, + "step": 1961 + }, + { + "loss": 0.0794, + "grad_norm": 2.159344434738159, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.962, + "step": 1962 + }, + { + "loss": 0.0815, + "grad_norm": 1.9803351163864136, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 1963 + }, + { + "loss": 0.0442, + "grad_norm": 2.2135045528411865, + "learning_rate": 3.9e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.964, + "step": 1964 + }, + { + "loss": 0.0082, + "grad_norm": 2.504026174545288, + "learning_rate": 3.8e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 1965 + }, + { + "loss": 0.0524, + "grad_norm": 2.4293482303619385, + "learning_rate": 3.7e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.966, + "step": 1966 + }, + { + "loss": 0.0543, + "grad_norm": 1.5671586990356445, + "learning_rate": 3.6e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.967, + "step": 1967 + }, + { + "loss": 0.0549, + "grad_norm": 2.1507840156555176, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.968, + "step": 1968 + }, + { + "loss": 0.0561, + "grad_norm": 1.4668017625808716, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9689999999999999, + "step": 1969 + }, + { + "loss": 0.008, + "grad_norm": 2.4691226482391357, + "learning_rate": 3.3e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 1970 + }, + { + "loss": 0.0104, + "grad_norm": 3.135504722595215, + "learning_rate": 3.2e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.971, + "step": 1971 + }, + { + "loss": 0.0442, + "grad_norm": 1.5039496421813965, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 1972 + }, + { + "loss": 0.035, + "grad_norm": 1.5489939451217651, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9729999999999999, + "step": 1973 + }, + { + "loss": 0.0687, + "grad_norm": 1.601294994354248, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.974, + "step": 1974 + }, + { + "loss": 0.0629, + "grad_norm": 1.7154121398925781, + "learning_rate": 2.8e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.975, + "step": 1975 + }, + { + "loss": 0.0587, + "grad_norm": 2.0388171672821045, + "learning_rate": 2.7e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 1976 + }, + { + "loss": 0.051, + "grad_norm": 1.9510704278945923, + "learning_rate": 2.6e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9769999999999999, + "step": 1977 + }, + { + "loss": 0.0512, + "grad_norm": 1.7245160341262817, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.978, + "step": 1978 + }, + { + "loss": 0.0465, + "grad_norm": 1.383158802986145, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.979, + "step": 1979 + }, + { + "loss": 0.054, + "grad_norm": 2.2401952743530273, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.98, + "step": 1980 + }, + { + "loss": 0.0516, + "grad_norm": 2.7115116119384766, + "learning_rate": 2.2e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.9809999999999999, + "step": 1981 + }, + { + "loss": 0.0095, + "grad_norm": 2.8770017623901367, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 1982 + }, + { + "loss": 0.0618, + "grad_norm": 1.8771051168441772, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.983, + "step": 1983 + }, + { + "loss": 0.0524, + "grad_norm": 1.3788121938705444, + "learning_rate": 1.9e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.984, + "step": 1984 + }, + { + "loss": 0.0582, + "grad_norm": 1.583976149559021, + "learning_rate": 1.8e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9849999999999999, + "step": 1985 + }, + { + "loss": 0.0802, + "grad_norm": 1.9991214275360107, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.986, + "step": 1986 + }, + { + "loss": 0.0085, + "grad_norm": 2.6479129791259766, + "learning_rate": 1.6e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 1987 + }, + { + "loss": 0.06, + "grad_norm": 1.4170489311218262, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.988, + "step": 1988 + }, + { + "loss": 0.0502, + "grad_norm": 1.5151011943817139, + "learning_rate": 1.4e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9889999999999999, + "step": 1989 + }, + { + "loss": 0.0639, + "grad_norm": 1.8262159824371338, + "learning_rate": 1.3e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.99, + "step": 1990 + }, + { + "loss": 0.039, + "grad_norm": 1.5687544345855713, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.991, + "step": 1991 + }, + { + "loss": 0.0601, + "grad_norm": 1.4482007026672363, + "learning_rate": 1.1e-07, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.992, + "step": 1992 + }, + { + "loss": 0.0549, + "grad_norm": 2.2203757762908936, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9929999999999999, + "step": 1993 + }, + { + "loss": 0.0089, + "grad_norm": 2.697735071182251, + "learning_rate": 9e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 1994 + }, + { + "loss": 0.0595, + "grad_norm": 1.751516342163086, + "learning_rate": 8e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.995, + "step": 1995 + }, + { + "loss": 0.0499, + "grad_norm": 1.9196858406066895, + "learning_rate": 7e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.996, + "step": 1996 + }, + { + "loss": 0.0385, + "grad_norm": 1.7155030965805054, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.9969999999999999, + "step": 1997 + }, + { + "loss": 0.0089, + "grad_norm": 2.759751558303833, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 1998 + }, + { + "loss": 0.0393, + "grad_norm": 1.8272897005081177, + "learning_rate": 4e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.999, + "step": 1999 + }, + { + "loss": 0.0626, + "grad_norm": 1.7148770093917847, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 2.0, + "step": 2000 + }, + { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502, + "epoch": 2.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba2cfbcdcee3753e484b0a1b3adcb35ee225098 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "unsloth_available": false, + "train_runtime": 234.6302, + "train_loss": 0.19233327957964502, + "train_metrics": { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/availability.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/availability.json new file mode 100644 index 0000000000000000000000000000000000000000..f2d95c49345fee0c966ee899582d2fc611158764 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/availability.json @@ -0,0 +1,27 @@ +{ + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + } +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..7d529fa55bcfe3e68b31a5702ee3f96c63526167 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 2.158, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely to continue the sentence smoothly? Please provide a brief justification for your choice.\" rationale: \"The chosen candidate is more likely to maintain coherence with the existing context and future predictions accurately.\" \n\nMy reasoning:\n- The", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.557, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI; justification: The instruction is incomplete. Please provide a specific question or task related to the candidates mentioned in order for me to choose an appropriate candidate_id and provide a justified response.\" \"rationale\": \"This question asks for a justification, but no question or task was provided in the initial instruction. To fulfill this request, I need more information about what", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 2.04, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate is most likely to be correct for the following sentence fragment: 'The quick brown fox jumps over the lazy dog.'? Choose one candidate_id and provide your justification.\" \nrationale: The choice of 'cand_07' as the", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.059, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"I would choose candidate_id=cand_05; rationale=This candidate uses", + "candidate_id": "cand_05", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.007, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you select as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"Selecting the best candidate involves considering factors such as syntactic correctness, semantic coherence", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.127, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5778936849f9a5bb988c315271fbf3c3507aba26 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "model_index": 1, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 1, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..d8c5d1cfe6fab1b4a4647f03f5ca461b1739180f --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json @@ -0,0 +1,36011 @@ +[ + { + "loss": 2.9686, + "grad_norm": 1.1798820495605469, + "learning_rate": 2e-05, + "num_tokens": 91.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 2.9639, + "grad_norm": 1.146132469177246, + "learning_rate": 1.9995e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 1.2609, + "grad_norm": 0.2891564667224884, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 694.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 2.9479, + "grad_norm": 1.1511788368225098, + "learning_rate": 1.9985000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.002, + "step": 4 + }, + { + "loss": 0.8201, + "grad_norm": 0.27247434854507446, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1297.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 1.1688, + "grad_norm": 0.30153799057006836, + "learning_rate": 1.9975e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 2.927, + "grad_norm": 1.123976469039917, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1900.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 2.9219, + "grad_norm": 1.1258331537246704, + "learning_rate": 1.9965e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 1.2624, + "grad_norm": 0.3105297088623047, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 2503.0, + "mean_token_accuracy": 0.7592955231666565, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.8468, + "grad_norm": 0.27270445227622986, + "learning_rate": 1.9955e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.005, + "step": 10 + }, + { + "loss": 1.1895, + "grad_norm": 0.31019389629364014, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3527.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 2.8961, + "grad_norm": 1.0758286714553833, + "learning_rate": 1.9945e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 1.1822, + "grad_norm": 0.3052140772342682, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4130.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 2.8831, + "grad_norm": 1.0789313316345215, + "learning_rate": 1.9935e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.8383, + "grad_norm": 0.2903873026371002, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 4733.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 1.2037, + "grad_norm": 0.3023833632469177, + "learning_rate": 1.9925e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 1.2477, + "grad_norm": 0.28835517168045044, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 5757.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 1.237, + "grad_norm": 0.30421048402786255, + "learning_rate": 1.9915e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 2.8549, + "grad_norm": 1.0703911781311035, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6360.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 1.2092, + "grad_norm": 0.30991482734680176, + "learning_rate": 1.9905e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7690802216529846, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 1.2362, + "grad_norm": 0.3097628951072693, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7384.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 1.223, + "grad_norm": 0.31258082389831543, + "learning_rate": 1.9895000000000002e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 2.8321, + "grad_norm": 1.0650557279586792, + "learning_rate": 1.989e-05, + "num_tokens": 7987.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 1.1381, + "grad_norm": 0.31106889247894287, + "learning_rate": 1.9885e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.8059, + "grad_norm": 0.28179118037223816, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9011.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 2.8152, + "grad_norm": 1.0609599351882935, + "learning_rate": 1.9875000000000002e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 2.8078, + "grad_norm": 1.06212317943573, + "learning_rate": 1.987e-05, + "num_tokens": 9193.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 1.205, + "grad_norm": 0.3027011752128601, + "learning_rate": 1.9865e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 1.1295, + "grad_norm": 0.30131977796554565, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10217.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 2.7894, + "grad_norm": 1.0723512172698975, + "learning_rate": 1.9855000000000002e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 1.1157, + "grad_norm": 0.30370256304740906, + "learning_rate": 1.985e-05, + "num_tokens": 10820.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 1.2198, + "grad_norm": 0.3102725148200989, + "learning_rate": 1.9845e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 2.7699, + "grad_norm": 1.0780471563339233, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11423.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 2.7633, + "grad_norm": 1.0721458196640015, + "learning_rate": 1.9835000000000002e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.8241, + "grad_norm": 0.2753015458583832, + "learning_rate": 1.983e-05, + "num_tokens": 12026.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 1.2029, + "grad_norm": 0.32459118962287903, + "learning_rate": 1.9825e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 2.7393, + "grad_norm": 1.089471459388733, + "learning_rate": 1.982e-05, + "num_tokens": 12629.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 2.7339, + "grad_norm": 1.085958480834961, + "learning_rate": 1.9815000000000003e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 2.7235, + "grad_norm": 1.1013903617858887, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 12811.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 1.1925, + "grad_norm": 0.322603315114975, + "learning_rate": 1.9805e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 1.0755, + "grad_norm": 0.33030447363853455, + "learning_rate": 1.98e-05, + "num_tokens": 13835.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.8072, + "grad_norm": 0.292123407125473, + "learning_rate": 1.9795000000000003e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.7719, + "grad_norm": 0.2785574495792389, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14859.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 2.6826, + "grad_norm": 1.1196017265319824, + "learning_rate": 1.9785e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 2.6763, + "grad_norm": 1.1198991537094116, + "learning_rate": 1.978e-05, + "num_tokens": 15041.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 1.0823, + "grad_norm": 0.3456343412399292, + "learning_rate": 1.9775000000000003e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 1.1172, + "grad_norm": 0.3377469480037689, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16065.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 1.19, + "grad_norm": 0.3273194134235382, + "learning_rate": 1.9765e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 1.0897, + "grad_norm": 0.330640584230423, + "learning_rate": 1.976e-05, + "num_tokens": 17089.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 2.6381, + "grad_norm": 1.1452019214630127, + "learning_rate": 1.9755000000000003e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.7974, + "grad_norm": 0.30913424491882324, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 17692.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 1.175, + "grad_norm": 0.3387100100517273, + "learning_rate": 1.9745e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 1.1322, + "grad_norm": 0.3353443443775177, + "learning_rate": 1.974e-05, + "num_tokens": 18716.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 2.6086, + "grad_norm": 1.1715646982192993, + "learning_rate": 1.9735000000000003e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 2.5992, + "grad_norm": 1.1846489906311035, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18898.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 2.5913, + "grad_norm": 1.1861159801483154, + "learning_rate": 1.9725000000000002e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 1.1598, + "grad_norm": 0.3380836546421051, + "learning_rate": 1.972e-05, + "num_tokens": 19501.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 1.1193, + "grad_norm": 0.34247249364852905, + "learning_rate": 1.9715000000000004e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 2.5644, + "grad_norm": 1.205854892730713, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20104.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 2.5553, + "grad_norm": 1.211520791053772, + "learning_rate": 1.9705000000000002e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 2.5452, + "grad_norm": 1.2238597869873047, + "learning_rate": 1.97e-05, + "num_tokens": 20286.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 1.1531, + "grad_norm": 0.3495417535305023, + "learning_rate": 1.9695e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 1.0714, + "grad_norm": 0.3549030125141144, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21310.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.765, + "grad_norm": 0.3008621335029602, + "learning_rate": 1.9685000000000002e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 1.0392, + "grad_norm": 0.3398958444595337, + "learning_rate": 1.968e-05, + "num_tokens": 22334.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 1.0477, + "grad_norm": 0.35012176632881165, + "learning_rate": 1.9675e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 2.4882, + "grad_norm": 1.2684752941131592, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 22937.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 2.478, + "grad_norm": 1.2892162799835205, + "learning_rate": 1.9665000000000002e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 2.4664, + "grad_norm": 1.296135663986206, + "learning_rate": 1.966e-05, + "num_tokens": 23119.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.7605, + "grad_norm": 0.3300800323486328, + "learning_rate": 1.9655e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.7663, + "grad_norm": 0.33007505536079407, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24143.0, + "mean_token_accuracy": 0.8512719869613647, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 2.4349, + "grad_norm": 1.3247182369232178, + "learning_rate": 1.9645e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 1.0354, + "grad_norm": 0.3528023660182953, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 24746.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.738, + "grad_norm": 0.3283436894416809, + "learning_rate": 1.9635e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 1.1271, + "grad_norm": 0.38431045413017273, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 25770.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": 1.0373, + "grad_norm": 0.3673364818096161, + "learning_rate": 1.9625e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 1.156, + "grad_norm": 0.3851627707481384, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26794.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 2.3789, + "grad_norm": 1.3850467205047607, + "learning_rate": 1.9615e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 2.3734, + "grad_norm": 1.3814043998718262, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 26976.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 2.3599, + "grad_norm": 1.3965320587158203, + "learning_rate": 1.9605e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 2.3458, + "grad_norm": 1.4337000846862793, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27158.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.7631, + "grad_norm": 0.328967422246933, + "learning_rate": 1.9595e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 1.0816, + "grad_norm": 0.40056440234184265, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28182.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.761, + "grad_norm": 0.34349334239959717, + "learning_rate": 1.9585e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.7308, + "grad_norm": 0.35714098811149597, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29206.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 2.2886, + "grad_norm": 1.4950672388076782, + "learning_rate": 1.9575e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 2.2801, + "grad_norm": 1.5058231353759766, + "learning_rate": 1.957e-05, + "num_tokens": 29388.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 2.2683, + "grad_norm": 1.5141775608062744, + "learning_rate": 1.9565e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.9814, + "grad_norm": 0.3899815082550049, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 29991.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 1.1155, + "grad_norm": 0.40274983644485474, + "learning_rate": 1.9555e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 2.2309, + "grad_norm": 1.5758429765701294, + "learning_rate": 1.955e-05, + "num_tokens": 30594.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 1.0635, + "grad_norm": 0.4182218015193939, + "learning_rate": 1.9545e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.7083, + "grad_norm": 0.35819146037101746, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31618.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 2.1959, + "grad_norm": 1.6126611232757568, + "learning_rate": 1.9535000000000002e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 2.1797, + "grad_norm": 1.676061987876892, + "learning_rate": 1.953e-05, + "num_tokens": 31800.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 1.0347, + "grad_norm": 0.4216737151145935, + "learning_rate": 1.9525e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.6884, + "grad_norm": 0.39531153440475464, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32824.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 2.1441, + "grad_norm": 1.7453250885009766, + "learning_rate": 1.9515000000000002e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 2.1265, + "grad_norm": 1.7851935625076294, + "learning_rate": 1.951e-05, + "num_tokens": 33006.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 2.112, + "grad_norm": 1.830625057220459, + "learning_rate": 1.9505e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 2.0989, + "grad_norm": 1.851873755455017, + "learning_rate": 1.95e-05, + "num_tokens": 33188.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.6824, + "grad_norm": 0.39206984639167786, + "learning_rate": 1.9495000000000002e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.6874, + "grad_norm": 0.3998919725418091, + "learning_rate": 1.949e-05, + "num_tokens": 34212.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 1.0692, + "grad_norm": 0.45781052112579346, + "learning_rate": 1.9485e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.052, + "step": 104 + }, + { + "loss": 1.061, + "grad_norm": 0.4857180714607239, + "learning_rate": 1.948e-05, + "num_tokens": 35236.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.9418, + "grad_norm": 0.4719521701335907, + "learning_rate": 1.9475000000000002e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.9888, + "grad_norm": 0.4797465205192566, + "learning_rate": 1.947e-05, + "num_tokens": 36260.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 1.994, + "grad_norm": 2.2058191299438477, + "learning_rate": 1.9465e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.7016, + "grad_norm": 0.41740846633911133, + "learning_rate": 1.946e-05, + "num_tokens": 36863.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.6818, + "grad_norm": 0.43658050894737244, + "learning_rate": 1.9455000000000003e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.6655, + "grad_norm": 0.46398866176605225, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37887.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 1.9355, + "grad_norm": 2.4030585289001465, + "learning_rate": 1.9445e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 1.0308, + "grad_norm": 0.47935715317726135, + "learning_rate": 1.944e-05, + "num_tokens": 38490.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": 0.6529, + "grad_norm": 0.5175711512565613, + "learning_rate": 1.9435000000000003e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 1.9, + "grad_norm": 2.3800323009490967, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39093.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 1.0589, + "grad_norm": 0.5446810722351074, + "learning_rate": 1.9425e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 1.8661, + "grad_norm": 2.2952208518981934, + "learning_rate": 1.942e-05, + "num_tokens": 39696.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 1.8546, + "grad_norm": 2.2471399307250977, + "learning_rate": 1.9415000000000003e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 1.8394, + "grad_norm": 2.1859543323516846, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 39878.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.6737, + "grad_norm": 0.5614652633666992, + "learning_rate": 1.9405e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.6406, + "grad_norm": 0.5995651483535767, + "learning_rate": 1.94e-05, + "num_tokens": 40902.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.9218, + "grad_norm": 0.6819480657577515, + "learning_rate": 1.9395000000000003e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.9464, + "grad_norm": 0.6670010089874268, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 41926.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.9323, + "grad_norm": 0.8481072187423706, + "learning_rate": 1.9385e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.6372, + "grad_norm": 0.5398988127708435, + "learning_rate": 1.938e-05, + "num_tokens": 42950.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.6362, + "grad_norm": 0.5465712547302246, + "learning_rate": 1.9375e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 1.7297, + "grad_norm": 2.4601035118103027, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 43553.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.6423, + "grad_norm": 0.5248544812202454, + "learning_rate": 1.9365000000000002e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 1.7024, + "grad_norm": 2.7017173767089844, + "learning_rate": 1.936e-05, + "num_tokens": 44156.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.8623, + "grad_norm": 0.6321293711662292, + "learning_rate": 1.9355e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.8852, + "grad_norm": 0.7586547136306763, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45180.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 1.6632, + "grad_norm": 3.066443920135498, + "learning_rate": 1.9345000000000002e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 1.642, + "grad_norm": 3.3219645023345947, + "learning_rate": 1.934e-05, + "num_tokens": 45362.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 1.623, + "grad_norm": 3.5062637329101562, + "learning_rate": 1.9335e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 1.6017, + "grad_norm": 3.623307228088379, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 45544.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.8752, + "grad_norm": 0.7358177900314331, + "learning_rate": 1.9325000000000002e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.9563, + "grad_norm": 0.8089514970779419, + "learning_rate": 1.932e-05, + "num_tokens": 46568.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.9479, + "grad_norm": 0.8843920826911926, + "learning_rate": 1.9315e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 1.5158, + "grad_norm": 3.546642303466797, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47171.0, + "mean_token_accuracy": 0.7333333492279053, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.5831, + "grad_norm": 0.7032448053359985, + "learning_rate": 1.9305000000000002e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.8191, + "grad_norm": 0.9835058450698853, + "learning_rate": 1.93e-05, + "num_tokens": 48195.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.5936, + "grad_norm": 0.7396312952041626, + "learning_rate": 1.9295e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 1.4418, + "grad_norm": 3.6846494674682617, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48798.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 1.4276, + "grad_norm": 3.8224549293518066, + "learning_rate": 1.9285000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 1.4024, + "grad_norm": 3.874878168106079, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 48980.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 1.3769, + "grad_norm": 3.8388218879699707, + "learning_rate": 1.9275e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 1.3516, + "grad_norm": 3.6529314517974854, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49162.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 1.3215, + "grad_norm": 3.6978349685668945, + "learning_rate": 1.9265000000000003e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.7666666507720947, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 1.2966, + "grad_norm": 3.7301321029663086, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49344.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.9111, + "grad_norm": 0.9517998695373535, + "learning_rate": 1.9255e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 1.2327, + "grad_norm": 4.175051212310791, + "learning_rate": 1.925e-05, + "num_tokens": 49947.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 1.2076, + "grad_norm": 4.348862171173096, + "learning_rate": 1.9245000000000003e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.5662, + "grad_norm": 0.9280498623847961, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 50550.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.8844, + "grad_norm": 1.042202353477478, + "learning_rate": 1.9235e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 1.1432, + "grad_norm": NaN, + "learning_rate": 1.923e-05, + "num_tokens": 51153.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 1.1364, + "grad_norm": 3.4773733615875244, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.7888888716697693, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.5305, + "grad_norm": 1.0232493877410889, + "learning_rate": 1.9225000000000003e-05, + "num_tokens": 51756.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.8352, + "grad_norm": 1.172676920890808, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.5667, + "grad_norm": 1.041461706161499, + "learning_rate": 1.9215e-05, + "num_tokens": 52780.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.5104, + "grad_norm": 1.050549030303955, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.875, + "grad_norm": 1.1163139343261719, + "learning_rate": 1.9205000000000003e-05, + "num_tokens": 53804.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.799, + "grad_norm": 0.9202898740768433, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 1.0468, + "grad_norm": 6.722721576690674, + "learning_rate": 1.9195000000000002e-05, + "num_tokens": 54407.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 1.032, + "grad_norm": 6.30849027633667, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.8387, + "grad_norm": 0.8642046451568604, + "learning_rate": 1.9185000000000004e-05, + "num_tokens": 55010.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.8299, + "grad_norm": 0.8796883821487427, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.9957, + "grad_norm": 6.16769552230835, + "learning_rate": 1.9175000000000002e-05, + "num_tokens": 55613.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.7521, + "grad_norm": 0.8700262904167175, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.5251, + "grad_norm": 1.2144312858581543, + "learning_rate": 1.9165000000000004e-05, + "num_tokens": 56637.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": 0.76, + "grad_norm": 0.9009570479393005, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.941, + "grad_norm": 5.8355841636657715, + "learning_rate": 1.9155000000000002e-05, + "num_tokens": 57240.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.928, + "grad_norm": 5.541483402252197, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.716, + "grad_norm": 1.0414000749588013, + "learning_rate": 1.9145000000000004e-05, + "num_tokens": 57843.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.8929, + "grad_norm": 4.810738563537598, + "learning_rate": 1.914e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.7684, + "grad_norm": 1.2132883071899414, + "learning_rate": 1.9135000000000002e-05, + "num_tokens": 58446.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.6497, + "grad_norm": 1.1370697021484375, + "learning_rate": 1.913e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.6995, + "grad_norm": 1.2495081424713135, + "learning_rate": 1.9125000000000004e-05, + "num_tokens": 59470.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.4539, + "grad_norm": 1.0713244676589966, + "learning_rate": 1.912e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.8311, + "grad_norm": 8.016578674316406, + "learning_rate": 1.9115000000000002e-05, + "num_tokens": 60073.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.7657, + "grad_norm": 1.6656423807144165, + "learning_rate": 1.911e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.7687, + "grad_norm": 1.0611323118209839, + "learning_rate": 1.9105e-05, + "num_tokens": 61097.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.8062, + "grad_norm": 10.057961463928223, + "learning_rate": 1.91e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.4494, + "grad_norm": 0.8912132978439331, + "learning_rate": 1.9095000000000003e-05, + "num_tokens": 61700.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.7813, + "grad_norm": 8.121318817138672, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.762, + "grad_norm": 7.607242584228516, + "learning_rate": 1.9085e-05, + "num_tokens": 61882.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.7692, + "grad_norm": 1.015843391418457, + "learning_rate": 1.908e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.7587, + "grad_norm": 0.9659166932106018, + "learning_rate": 1.9075000000000003e-05, + "num_tokens": 62906.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.6702, + "grad_norm": 1.6121653318405151, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.7191, + "grad_norm": 5.08962345123291, + "learning_rate": 1.9065e-05, + "num_tokens": 63509.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.7033, + "grad_norm": 1.2752808332443237, + "learning_rate": 1.906e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.7025, + "grad_norm": 5.420579433441162, + "learning_rate": 1.9055e-05, + "num_tokens": 64112.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.6507, + "grad_norm": 0.9945167899131775, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.5894, + "grad_norm": 1.0229939222335815, + "learning_rate": 1.9045e-05, + "num_tokens": 65136.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.6627, + "grad_norm": 9.837233543395996, + "learning_rate": 1.904e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.7, + "grad_norm": 1.4510327577590942, + "learning_rate": 1.9035e-05, + "num_tokens": 65739.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.6437, + "grad_norm": 11.414746284484863, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.631, + "grad_norm": 10.233067512512207, + "learning_rate": 1.9025e-05, + "num_tokens": 65921.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.6945, + "grad_norm": 1.3608763217926025, + "learning_rate": 1.902e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.6546, + "grad_norm": 1.217339038848877, + "learning_rate": 1.9015e-05, + "num_tokens": 66945.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.6805, + "grad_norm": 1.5453741550445557, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.5748, + "grad_norm": 4.581247806549072, + "learning_rate": 1.9005000000000002e-05, + "num_tokens": 67548.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.6366, + "grad_norm": 1.6470707654953003, + "learning_rate": 1.9e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.4235, + "grad_norm": 0.9932326078414917, + "learning_rate": 1.8995e-05, + "num_tokens": 68572.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": 0.6296, + "grad_norm": 1.9582555294036865, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.5822, + "grad_norm": 1.569627046585083, + "learning_rate": 1.8985000000000002e-05, + "num_tokens": 69596.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.5748, + "grad_norm": 1.2322492599487305, + "learning_rate": 1.898e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.6398, + "grad_norm": 1.6496992111206055, + "learning_rate": 1.8975e-05, + "num_tokens": 70620.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.3614, + "grad_norm": 1.1484179496765137, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.6247, + "grad_norm": 2.376291275024414, + "learning_rate": 1.8965000000000002e-05, + "num_tokens": 71644.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.5296, + "grad_norm": 1.148452877998352, + "learning_rate": 1.896e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.3511, + "grad_norm": 1.6766430139541626, + "learning_rate": 1.8955e-05, + "num_tokens": 72668.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.5254, + "grad_norm": 13.195364952087402, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.5164, + "grad_norm": 10.336882591247559, + "learning_rate": 1.8945000000000002e-05, + "num_tokens": 72850.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.5768, + "grad_norm": 1.2533048391342163, + "learning_rate": 1.894e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.5941, + "grad_norm": 1.1360353231430054, + "learning_rate": 1.8935e-05, + "num_tokens": 73874.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.4831, + "grad_norm": 6.034897327423096, + "learning_rate": 1.893e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.4774, + "grad_norm": 5.36783504486084, + "learning_rate": 1.8925000000000003e-05, + "num_tokens": 74056.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.3472, + "grad_norm": 2.312915563583374, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.4547, + "grad_norm": 5.124778747558594, + "learning_rate": 1.8915e-05, + "num_tokens": 74659.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.4438, + "grad_norm": 3.7214717864990234, + "learning_rate": 1.891e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.5071, + "grad_norm": 1.825179100036621, + "learning_rate": 1.8905000000000003e-05, + "num_tokens": 75262.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.4157, + "grad_norm": 2.892442464828491, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.4085, + "grad_norm": 3.1406774520874023, + "learning_rate": 1.8895e-05, + "num_tokens": 75444.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.532, + "grad_norm": 2.529170274734497, + "learning_rate": 1.889e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.3828, + "grad_norm": 3.846367597579956, + "learning_rate": 1.8885000000000003e-05, + "num_tokens": 76047.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.5073, + "grad_norm": 2.1968491077423096, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.5165, + "grad_norm": 1.508063793182373, + "learning_rate": 1.8875e-05, + "num_tokens": 77071.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.3491, + "grad_norm": 2.4780421257019043, + "learning_rate": 1.887e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.3379, + "grad_norm": 2.2446343898773193, + "learning_rate": 1.8865000000000003e-05, + "num_tokens": 77253.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.3318, + "grad_norm": 3.05029296875, + "learning_rate": 1.886e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.3173, + "grad_norm": 2.2870967388153076, + "learning_rate": 1.8855e-05, + "num_tokens": 77435.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.3278, + "grad_norm": 1.3750704526901245, + "learning_rate": 1.885e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.2964, + "grad_norm": 2.238151788711548, + "learning_rate": 1.8845000000000003e-05, + "num_tokens": 78038.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.305, + "grad_norm": 1.4246138334274292, + "learning_rate": 1.884e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.3385, + "grad_norm": 1.810808777809143, + "learning_rate": 1.8835000000000002e-05, + "num_tokens": 79062.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.5181, + "grad_norm": 2.939674139022827, + "learning_rate": 1.883e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.4909, + "grad_norm": 2.4543910026550293, + "learning_rate": 1.8825000000000004e-05, + "num_tokens": 80086.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.2604, + "grad_norm": 2.63846492767334, + "learning_rate": 1.882e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.2533, + "grad_norm": 3.536795139312744, + "learning_rate": 1.8815000000000002e-05, + "num_tokens": 80268.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.2449, + "grad_norm": 2.941943645477295, + "learning_rate": 1.881e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.4928, + "grad_norm": 2.69899582862854, + "learning_rate": 1.8805000000000004e-05, + "num_tokens": 80871.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.3019, + "grad_norm": 1.5328068733215332, + "learning_rate": 1.88e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.4154, + "grad_norm": 5.932051181793213, + "learning_rate": 1.8795000000000002e-05, + "num_tokens": 81895.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.4072, + "grad_norm": 3.7254579067230225, + "learning_rate": 1.879e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.2266, + "grad_norm": 4.67811918258667, + "learning_rate": 1.8785e-05, + "num_tokens": 82498.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.2835, + "grad_norm": 2.31062650680542, + "learning_rate": 1.878e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.222, + "grad_norm": 4.9225335121154785, + "learning_rate": 1.8775000000000002e-05, + "num_tokens": 83101.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.4098, + "grad_norm": 2.3302409648895264, + "learning_rate": 1.877e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.4401, + "grad_norm": 1.917952299118042, + "learning_rate": 1.8765e-05, + "num_tokens": 84125.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.3927, + "grad_norm": 4.312741279602051, + "learning_rate": 1.876e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.2032, + "grad_norm": 4.237610340118408, + "learning_rate": 1.8755000000000003e-05, + "num_tokens": 84728.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.2, + "grad_norm": 4.144465446472168, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.1974, + "grad_norm": 4.548800945281982, + "learning_rate": 1.8745e-05, + "num_tokens": 84910.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.2936, + "grad_norm": 1.368138313293457, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.4425, + "grad_norm": 1.6547119617462158, + "learning_rate": 1.8735e-05, + "num_tokens": 85934.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.1815, + "grad_norm": 1.936987042427063, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.3853, + "grad_norm": 1.9844653606414795, + "learning_rate": 1.8725e-05, + "num_tokens": 86537.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.3816, + "grad_norm": 2.563992977142334, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.1717, + "grad_norm": 1.9275789260864258, + "learning_rate": 1.8715e-05, + "num_tokens": 87140.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.3635, + "grad_norm": 2.198817014694214, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.166, + "grad_norm": 2.225175380706787, + "learning_rate": 1.8705e-05, + "num_tokens": 87743.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.1618, + "grad_norm": 1.4393062591552734, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.3188, + "grad_norm": 1.8201826810836792, + "learning_rate": 1.8695e-05, + "num_tokens": 88346.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.3957, + "grad_norm": 1.8483490943908691, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.3545, + "grad_norm": 2.5658915042877197, + "learning_rate": 1.8685e-05, + "num_tokens": 89370.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.4109, + "grad_norm": 2.197061777114868, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.3934, + "grad_norm": 1.9570775032043457, + "learning_rate": 1.8675e-05, + "num_tokens": 90394.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.149, + "grad_norm": 2.242249011993408, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.3673, + "grad_norm": 2.5640757083892822, + "learning_rate": 1.8665000000000002e-05, + "num_tokens": 90997.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.3437, + "grad_norm": 1.6239393949508667, + "learning_rate": 1.866e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.1448, + "grad_norm": 2.4205758571624756, + "learning_rate": 1.8655e-05, + "num_tokens": 91600.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.2803, + "grad_norm": 1.5447510480880737, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.2501, + "grad_norm": 1.2362499237060547, + "learning_rate": 1.8645000000000002e-05, + "num_tokens": 92624.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.263, + "grad_norm": 1.3345736265182495, + "learning_rate": 1.864e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.3598, + "grad_norm": 5.145051002502441, + "learning_rate": 1.8635e-05, + "num_tokens": 93648.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.143, + "grad_norm": 3.363790988922119, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.3858, + "grad_norm": 2.9212327003479004, + "learning_rate": 1.8625000000000002e-05, + "num_tokens": 94251.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.1404, + "grad_norm": 2.9169602394104004, + "learning_rate": 1.862e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.2422, + "grad_norm": 1.9243407249450684, + "learning_rate": 1.8615e-05, + "num_tokens": 94854.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.3585, + "grad_norm": 4.024987697601318, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.3474, + "grad_norm": 2.019094944000244, + "learning_rate": 1.8605000000000002e-05, + "num_tokens": 95878.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.3368, + "grad_norm": 1.5415781736373901, + "learning_rate": 1.86e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.1373, + "grad_norm": 3.6068742275238037, + "learning_rate": 1.8595e-05, + "num_tokens": 96481.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.2176, + "grad_norm": 1.1446317434310913, + "learning_rate": 1.859e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.1328, + "grad_norm": 3.26859974861145, + "learning_rate": 1.8585000000000002e-05, + "num_tokens": 97084.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.131, + "grad_norm": 2.849381446838379, + "learning_rate": 1.858e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.3323, + "grad_norm": 4.831865310668945, + "learning_rate": 1.8575e-05, + "num_tokens": 97687.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.3036, + "grad_norm": 1.8017945289611816, + "learning_rate": 1.857e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.3478, + "grad_norm": 4.759650707244873, + "learning_rate": 1.8565000000000003e-05, + "num_tokens": 98711.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.1239, + "grad_norm": 1.6707216501235962, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.3554, + "grad_norm": 3.568655014038086, + "learning_rate": 1.8555e-05, + "num_tokens": 99314.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.1219, + "grad_norm": 1.743139624595642, + "learning_rate": 1.855e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.3297, + "grad_norm": 3.192558526992798, + "learning_rate": 1.8545000000000003e-05, + "num_tokens": 99917.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.339, + "grad_norm": 2.8700854778289795, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.3341, + "grad_norm": 3.1597092151641846, + "learning_rate": 1.8535e-05, + "num_tokens": 100941.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.3151, + "grad_norm": 2.549912929534912, + "learning_rate": 1.853e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.249, + "grad_norm": 4.164290904998779, + "learning_rate": 1.8525000000000003e-05, + "num_tokens": 101965.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.2877, + "grad_norm": 1.8462411165237427, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.2215, + "grad_norm": 1.49083411693573, + "learning_rate": 1.8515e-05, + "num_tokens": 102989.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.2631, + "grad_norm": 1.5168116092681885, + "learning_rate": 1.851e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.3179, + "grad_norm": 3.1732399463653564, + "learning_rate": 1.8505000000000003e-05, + "num_tokens": 104013.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.315, + "grad_norm": 2.9725892543792725, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.2763, + "grad_norm": 1.4138047695159912, + "learning_rate": 1.8495e-05, + "num_tokens": 105037.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.3151, + "grad_norm": 2.3229987621307373, + "learning_rate": 1.849e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.2862, + "grad_norm": 3.2318272590637207, + "learning_rate": 1.8485000000000003e-05, + "num_tokens": 106061.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.2339, + "grad_norm": 3.401787757873535, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.2094, + "grad_norm": 2.1061453819274902, + "learning_rate": 1.8475000000000002e-05, + "num_tokens": 107085.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.2863, + "grad_norm": 1.6479979753494263, + "learning_rate": 1.847e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.1445, + "grad_norm": 7.635932445526123, + "learning_rate": 1.8465e-05, + "num_tokens": 107688.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.1347, + "grad_norm": 6.305334091186523, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.2233, + "grad_norm": 3.41860294342041, + "learning_rate": 1.8455000000000002e-05, + "num_tokens": 108291.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.128, + "grad_norm": 5.801213264465332, + "learning_rate": 1.845e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.1283, + "grad_norm": 5.675178527832031, + "learning_rate": 1.8445e-05, + "num_tokens": 108473.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.3029, + "grad_norm": 5.509076118469238, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.1112, + "grad_norm": 2.6948108673095703, + "learning_rate": 1.8435000000000002e-05, + "num_tokens": 109076.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.107, + "grad_norm": 2.523871421813965, + "learning_rate": 1.843e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.2636, + "grad_norm": 2.1710612773895264, + "learning_rate": 1.8425e-05, + "num_tokens": 109679.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.2891, + "grad_norm": 2.2263383865356445, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.2611, + "grad_norm": 1.752862572669983, + "learning_rate": 1.8415e-05, + "num_tokens": 110703.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.1023, + "grad_norm": 3.256633996963501, + "learning_rate": 1.841e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.1009, + "grad_norm": 2.10860276222229, + "learning_rate": 1.8405e-05, + "num_tokens": 110885.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.2849, + "grad_norm": 3.3475303649902344, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.2727, + "grad_norm": 2.763415575027466, + "learning_rate": 1.8395e-05, + "num_tokens": 111909.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.1914, + "grad_norm": 1.7206056118011475, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.2981, + "grad_norm": 4.825778484344482, + "learning_rate": 1.8385e-05, + "num_tokens": 112933.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.2575, + "grad_norm": 2.3532052040100098, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.3108, + "grad_norm": 2.1766650676727295, + "learning_rate": 1.8375e-05, + "num_tokens": 113957.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.2547, + "grad_norm": 1.6271114349365234, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.2451, + "grad_norm": 1.533071517944336, + "learning_rate": 1.8365e-05, + "num_tokens": 114981.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.2362, + "grad_norm": 1.4881736040115356, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0898, + "grad_norm": 1.764446496963501, + "learning_rate": 1.8355e-05, + "num_tokens": 115584.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.2345, + "grad_norm": 1.3447750806808472, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.2802, + "grad_norm": 3.713470458984375, + "learning_rate": 1.8345e-05, + "num_tokens": 116608.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.1853, + "grad_norm": 1.427515983581543, + "learning_rate": 1.834e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0921, + "grad_norm": 2.3074567317962646, + "learning_rate": 1.8335e-05, + "num_tokens": 117211.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0887, + "grad_norm": 2.2687530517578125, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.2126, + "grad_norm": 3.1814491748809814, + "learning_rate": 1.8325e-05, + "num_tokens": 117814.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0881, + "grad_norm": 2.606569528579712, + "learning_rate": 1.832e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.1751, + "grad_norm": 2.4892592430114746, + "learning_rate": 1.8315e-05, + "num_tokens": 118417.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.2011, + "grad_norm": 2.357940673828125, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.2168, + "grad_norm": 2.8288958072662354, + "learning_rate": 1.8305000000000002e-05, + "num_tokens": 119441.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.183, + "grad_norm": 1.945565104484558, + "learning_rate": 1.83e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0746, + "grad_norm": 1.7267169952392578, + "learning_rate": 1.8295e-05, + "num_tokens": 120044.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0733, + "grad_norm": 1.9393048286437988, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0728, + "grad_norm": 2.1715469360351562, + "learning_rate": 1.8285000000000002e-05, + "num_tokens": 120226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0704, + "grad_norm": 2.0847175121307373, + "learning_rate": 1.828e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.1791, + "grad_norm": 1.5438156127929688, + "learning_rate": 1.8275e-05, + "num_tokens": 120829.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.2073, + "grad_norm": 1.6084765195846558, + "learning_rate": 1.827e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.2215, + "grad_norm": 1.543698787689209, + "learning_rate": 1.8265000000000002e-05, + "num_tokens": 121853.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.1904, + "grad_norm": 1.41824209690094, + "learning_rate": 1.826e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.2005, + "grad_norm": 1.6803160905838013, + "learning_rate": 1.8255e-05, + "num_tokens": 122877.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0605, + "grad_norm": 1.5710349082946777, + "learning_rate": 1.825e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0615, + "grad_norm": 1.633989691734314, + "learning_rate": 1.8245000000000002e-05, + "num_tokens": 123059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.1828, + "grad_norm": 1.6902644634246826, + "learning_rate": 1.824e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0558, + "grad_norm": 1.7157853841781616, + "learning_rate": 1.8235e-05, + "num_tokens": 123662.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0515, + "grad_norm": 1.4476577043533325, + "learning_rate": 1.823e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0502, + "grad_norm": 2.1938326358795166, + "learning_rate": 1.8225000000000003e-05, + "num_tokens": 123844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.1783, + "grad_norm": 2.738436460494995, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.275, + "grad_norm": 3.493831157684326, + "learning_rate": 1.8215e-05, + "num_tokens": 124868.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.1786, + "grad_norm": 1.7162284851074219, + "learning_rate": 1.821e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0448, + "grad_norm": 2.925360679626465, + "learning_rate": 1.8205000000000003e-05, + "num_tokens": 125471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.3138, + "grad_norm": 4.2967753410339355, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.0381, + "grad_norm": 1.3151957988739014, + "learning_rate": 1.8195e-05, + "num_tokens": 126074.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.1773, + "grad_norm": 1.440629243850708, + "learning_rate": 1.819e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0361, + "grad_norm": 1.378117561340332, + "learning_rate": 1.8185000000000003e-05, + "num_tokens": 126677.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0357, + "grad_norm": 1.3120638132095337, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 1.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0333, + "grad_norm": 1.1625266075134277, + "learning_rate": 1.8175e-05, + "num_tokens": 126859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0292, + "grad_norm": 1.198464035987854, + "learning_rate": 1.817e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.193, + "grad_norm": 1.9310072660446167, + "learning_rate": 1.8165000000000003e-05, + "num_tokens": 127462.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": 0.209, + "grad_norm": 1.7112150192260742, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.1398, + "grad_norm": 1.4659478664398193, + "learning_rate": 1.8155e-05, + "num_tokens": 128486.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.1688, + "grad_norm": 3.3470299243927, + "learning_rate": 1.815e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.2416, + "grad_norm": 3.232045888900757, + "learning_rate": 1.8145e-05, + "num_tokens": 129510.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0242, + "grad_norm": 2.809112548828125, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 1.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.0222, + "grad_norm": 2.652397394180298, + "learning_rate": 1.8135000000000002e-05, + "num_tokens": 129692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.1619, + "grad_norm": 1.6935186386108398, + "learning_rate": 1.813e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0206, + "grad_norm": 1.8048573732376099, + "learning_rate": 1.8125e-05, + "num_tokens": 130295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.0199, + "grad_norm": 1.7344465255737305, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0174, + "grad_norm": 1.6794533729553223, + "learning_rate": 1.8115000000000002e-05, + "num_tokens": 130477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0172, + "grad_norm": 2.995704174041748, + "learning_rate": 1.811e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 1.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.241, + "grad_norm": 2.3058347702026367, + "learning_rate": 1.8105e-05, + "num_tokens": 131080.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.2068, + "grad_norm": 2.030050277709961, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.1573, + "grad_norm": 2.108264207839966, + "learning_rate": 1.8095000000000002e-05, + "num_tokens": 132104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0128, + "grad_norm": 0.9666662812232971, + "learning_rate": 1.809e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.1613, + "grad_norm": 1.9703510999679565, + "learning_rate": 1.8085e-05, + "num_tokens": 132707.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.1579, + "grad_norm": 1.7536500692367554, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.2503, + "grad_norm": 3.074944257736206, + "learning_rate": 1.8075000000000002e-05, + "num_tokens": 133731.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.154, + "grad_norm": 2.3541879653930664, + "learning_rate": 1.807e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.1655, + "grad_norm": 1.2853813171386719, + "learning_rate": 1.8065e-05, + "num_tokens": 134755.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.1481, + "grad_norm": 1.4534378051757812, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0156, + "grad_norm": 2.346766710281372, + "learning_rate": 1.8055000000000002e-05, + "num_tokens": 135358.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0179, + "grad_norm": 2.7506628036499023, + "learning_rate": 1.805e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 1.0, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.2665, + "grad_norm": 7.800353050231934, + "learning_rate": 1.8045e-05, + "num_tokens": 135961.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0137, + "grad_norm": 1.6062291860580444, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 1.0, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.1298, + "grad_norm": 1.9706884622573853, + "learning_rate": 1.8035000000000003e-05, + "num_tokens": 136564.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.1587, + "grad_norm": 4.288624286651611, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.1706, + "grad_norm": 2.351865291595459, + "learning_rate": 1.8025e-05, + "num_tokens": 137588.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.1391, + "grad_norm": 2.3107855319976807, + "learning_rate": 1.802e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0116, + "grad_norm": 1.2413067817687988, + "learning_rate": 1.8015000000000003e-05, + "num_tokens": 138191.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.1528, + "grad_norm": 2.238205671310425, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0111, + "grad_norm": 1.0291837453842163, + "learning_rate": 1.8005e-05, + "num_tokens": 138794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": 0.2551, + "grad_norm": 3.0084855556488037, + "learning_rate": 1.8e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.271, + "grad_norm": 3.355750560760498, + "learning_rate": 1.7995000000000003e-05, + "num_tokens": 139818.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.1479, + "grad_norm": 3.3119289875030518, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.1951, + "grad_norm": 3.4890756607055664, + "learning_rate": 1.7985e-05, + "num_tokens": 140842.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.1439, + "grad_norm": 2.5274429321289062, + "learning_rate": 1.798e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.1537, + "grad_norm": 3.0909008979797363, + "learning_rate": 1.7975000000000003e-05, + "num_tokens": 141866.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0107, + "grad_norm": 2.0530686378479004, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 1.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.011, + "grad_norm": 1.7325184345245361, + "learning_rate": 1.7965e-05, + "num_tokens": 142048.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.135, + "grad_norm": 1.9106756448745728, + "learning_rate": 1.796e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.177, + "grad_norm": 3.206461191177368, + "learning_rate": 1.7955000000000003e-05, + "num_tokens": 143072.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0095, + "grad_norm": 0.8696625828742981, + "learning_rate": 1.795e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 1.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.1656, + "grad_norm": 5.9883856773376465, + "learning_rate": 1.7945000000000002e-05, + "num_tokens": 143675.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.2393, + "grad_norm": 3.601959466934204, + "learning_rate": 1.794e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0092, + "grad_norm": 1.547377586364746, + "learning_rate": 1.7935000000000004e-05, + "num_tokens": 144278.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0099, + "grad_norm": 1.7349345684051514, + "learning_rate": 1.793e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 1.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.1454, + "grad_norm": 2.134899377822876, + "learning_rate": 1.7925000000000002e-05, + "num_tokens": 144881.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.2317, + "grad_norm": 3.7199866771698, + "learning_rate": 1.792e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.2081, + "grad_norm": 3.7679033279418945, + "learning_rate": 1.7915000000000004e-05, + "num_tokens": 145905.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0084, + "grad_norm": 0.7981175184249878, + "learning_rate": 1.791e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 1.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0078, + "grad_norm": 0.624564528465271, + "learning_rate": 1.7905000000000002e-05, + "num_tokens": 146087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.153, + "grad_norm": 1.46378755569458, + "learning_rate": 1.79e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0085, + "grad_norm": 1.403277039527893, + "learning_rate": 1.7895000000000004e-05, + "num_tokens": 146690.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.1413, + "grad_norm": 2.821493148803711, + "learning_rate": 1.789e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.1268, + "grad_norm": 2.5567212104797363, + "learning_rate": 1.7885000000000002e-05, + "num_tokens": 147714.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.1303, + "grad_norm": 2.5823540687561035, + "learning_rate": 1.788e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0075, + "grad_norm": 1.26413094997406, + "learning_rate": 1.7875e-05, + "num_tokens": 148317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0067, + "grad_norm": 0.9559513330459595, + "learning_rate": 1.787e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0069, + "grad_norm": 0.641984224319458, + "learning_rate": 1.7865000000000003e-05, + "num_tokens": 148499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.1762, + "grad_norm": 2.6874637603759766, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0062, + "grad_norm": 0.4612693786621094, + "learning_rate": 1.7855e-05, + "num_tokens": 149102.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.1284, + "grad_norm": 2.1469764709472656, + "learning_rate": 1.785e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.1216, + "grad_norm": 2.77829909324646, + "learning_rate": 1.7845000000000003e-05, + "num_tokens": 150126.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0056, + "grad_norm": 0.3416956067085266, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 1.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0055, + "grad_norm": 0.3599971830844879, + "learning_rate": 1.7835e-05, + "num_tokens": 150308.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0054, + "grad_norm": 0.3336946368217468, + "learning_rate": 1.783e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 1.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.1384, + "grad_norm": 2.486008882522583, + "learning_rate": 1.7825e-05, + "num_tokens": 150911.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.1366, + "grad_norm": 1.806955337524414, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.0053, + "grad_norm": 0.3250260651111603, + "learning_rate": 1.7815e-05, + "num_tokens": 151514.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0048, + "grad_norm": 0.33809739351272583, + "learning_rate": 1.781e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 1.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.1241, + "grad_norm": 1.514503002166748, + "learning_rate": 1.7805e-05, + "num_tokens": 152117.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.1369, + "grad_norm": 1.73817777633667, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.005, + "grad_norm": 0.6402959227561951, + "learning_rate": 1.7795e-05, + "num_tokens": 152720.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.1392, + "grad_norm": 2.1087169647216797, + "learning_rate": 1.779e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0056, + "grad_norm": 0.7931351661682129, + "learning_rate": 1.7785e-05, + "num_tokens": 153323.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.1216, + "grad_norm": 2.559343099594116, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.1415, + "grad_norm": 3.7847163677215576, + "learning_rate": 1.7775000000000002e-05, + "num_tokens": 154347.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0056, + "grad_norm": 0.6650505661964417, + "learning_rate": 1.777e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0058, + "grad_norm": 0.6711560487747192, + "learning_rate": 1.7765e-05, + "num_tokens": 154529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.1339, + "grad_norm": 2.383869171142578, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.1384, + "grad_norm": 2.9380829334259033, + "learning_rate": 1.7755000000000002e-05, + "num_tokens": 155553.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.1355, + "grad_norm": 3.530726432800293, + "learning_rate": 1.775e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0057, + "grad_norm": 0.6963756680488586, + "learning_rate": 1.7745e-05, + "num_tokens": 156156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0047, + "grad_norm": 0.45467251539230347, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.1322, + "grad_norm": 2.1101133823394775, + "learning_rate": 1.7735000000000002e-05, + "num_tokens": 156759.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.3436, + "grad_norm": 10.156854629516602, + "learning_rate": 1.773e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.1111, + "grad_norm": 1.9533101320266724, + "learning_rate": 1.7725e-05, + "num_tokens": 157783.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0054, + "grad_norm": 0.571807861328125, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 1.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0045, + "grad_norm": 0.6374226808547974, + "learning_rate": 1.7715000000000002e-05, + "num_tokens": 157965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.1115, + "grad_norm": 1.9669644832611084, + "learning_rate": 1.771e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.1336, + "grad_norm": 1.4811934232711792, + "learning_rate": 1.7705e-05, + "num_tokens": 158989.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.2041, + "grad_norm": 3.112797737121582, + "learning_rate": 1.77e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0045, + "grad_norm": 0.5766833424568176, + "learning_rate": 1.7695000000000003e-05, + "num_tokens": 159592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.1237, + "grad_norm": 1.863338589668274, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.1236, + "grad_norm": 2.4069719314575195, + "learning_rate": 1.7685e-05, + "num_tokens": 160616.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0055, + "grad_norm": 0.8338965177536011, + "learning_rate": 1.768e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 1.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0044, + "grad_norm": 0.5481887459754944, + "learning_rate": 1.7675000000000003e-05, + "num_tokens": 160798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.1354, + "grad_norm": 4.145319938659668, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.1279, + "grad_norm": 3.560887575149536, + "learning_rate": 1.7665e-05, + "num_tokens": 161822.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0044, + "grad_norm": 0.43582797050476074, + "learning_rate": 1.766e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 1.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.004, + "grad_norm": 0.3212014138698578, + "learning_rate": 1.7655000000000003e-05, + "num_tokens": 162004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.1956, + "grad_norm": 2.662240982055664, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0038, + "grad_norm": 0.32649490237236023, + "learning_rate": 1.7645e-05, + "num_tokens": 162607.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0039, + "grad_norm": 0.33435314893722534, + "learning_rate": 1.764e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": 0.1217, + "grad_norm": 3.422117233276367, + "learning_rate": 1.7635000000000003e-05, + "num_tokens": 163210.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.1169, + "grad_norm": 1.9841532707214355, + "learning_rate": 1.763e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0035, + "grad_norm": 0.23611226677894592, + "learning_rate": 1.7625e-05, + "num_tokens": 163813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0036, + "grad_norm": 0.35102367401123047, + "learning_rate": 1.762e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 1.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0034, + "grad_norm": 0.22219745814800262, + "learning_rate": 1.7615000000000003e-05, + "num_tokens": 163995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.1109, + "grad_norm": 1.8000237941741943, + "learning_rate": 1.761e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0034, + "grad_norm": 0.4621182084083557, + "learning_rate": 1.7605000000000002e-05, + "num_tokens": 164598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0035, + "grad_norm": 0.5149714350700378, + "learning_rate": 1.76e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.004, + "grad_norm": 0.5277268886566162, + "learning_rate": 1.7595000000000003e-05, + "num_tokens": 164780.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.1178, + "grad_norm": 1.9578617811203003, + "learning_rate": 1.759e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0032, + "grad_norm": 0.30999821424484253, + "learning_rate": 1.7585000000000002e-05, + "num_tokens": 165383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0032, + "grad_norm": 0.3227098882198334, + "learning_rate": 1.758e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 1.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0032, + "grad_norm": 0.2970958352088928, + "learning_rate": 1.7575000000000004e-05, + "num_tokens": 165565.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.1054, + "grad_norm": 3.3750076293945312, + "learning_rate": 1.757e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.003, + "grad_norm": 0.315746933221817, + "learning_rate": 1.7565000000000002e-05, + "num_tokens": 166168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.1014, + "grad_norm": 1.7110451459884644, + "learning_rate": 1.756e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.1009, + "grad_norm": 2.0282938480377197, + "learning_rate": 1.7555e-05, + "num_tokens": 167192.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0029, + "grad_norm": 0.18862634897232056, + "learning_rate": 1.755e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 1.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.1251, + "grad_norm": 1.5325688123703003, + "learning_rate": 1.7545000000000002e-05, + "num_tokens": 167795.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0032, + "grad_norm": 0.37112897634506226, + "learning_rate": 1.754e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 1.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.0031, + "grad_norm": 0.32201266288757324, + "learning_rate": 1.7535e-05, + "num_tokens": 167977.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.003, + "grad_norm": 0.32648831605911255, + "learning_rate": 1.753e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 1.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": 0.1251, + "grad_norm": 2.044515371322632, + "learning_rate": 1.7525000000000002e-05, + "num_tokens": 168580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.1099, + "grad_norm": 2.5852344036102295, + "learning_rate": 1.752e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0032, + "grad_norm": 0.33884692192077637, + "learning_rate": 1.7515e-05, + "num_tokens": 169183.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.1006, + "grad_norm": 1.9987916946411133, + "learning_rate": 1.751e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0895, + "grad_norm": 2.697984457015991, + "learning_rate": 1.7505e-05, + "num_tokens": 170207.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.0034, + "grad_norm": 0.4763769507408142, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 1.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0899, + "grad_norm": 3.0565173625946045, + "learning_rate": 1.7495e-05, + "num_tokens": 170810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0909, + "grad_norm": 1.3817325830459595, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0044, + "grad_norm": 0.8519660830497742, + "learning_rate": 1.7485e-05, + "num_tokens": 171413.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.1095, + "grad_norm": 2.0203707218170166, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0048, + "grad_norm": 1.1067970991134644, + "learning_rate": 1.7475e-05, + "num_tokens": 172016.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.1167, + "grad_norm": 2.3915855884552, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0049, + "grad_norm": 1.0700874328613281, + "learning_rate": 1.7465e-05, + "num_tokens": 172619.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.004, + "grad_norm": 0.6739718317985535, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 1.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.1176, + "grad_norm": 2.5957095623016357, + "learning_rate": 1.7455e-05, + "num_tokens": 173222.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": 0.0763, + "grad_norm": 2.0077261924743652, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0028, + "grad_norm": 0.2505457103252411, + "learning_rate": 1.7445e-05, + "num_tokens": 173825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0025, + "grad_norm": 0.1596791297197342, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 1.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.1892, + "grad_norm": 2.4415338039398193, + "learning_rate": 1.7435e-05, + "num_tokens": 174428.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.1134, + "grad_norm": 2.0744497776031494, + "learning_rate": 1.743e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0991, + "grad_norm": 2.4540417194366455, + "learning_rate": 1.7425e-05, + "num_tokens": 175452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0025, + "grad_norm": 0.17656919360160828, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.1227, + "grad_norm": 2.1174721717834473, + "learning_rate": 1.7415000000000002e-05, + "num_tokens": 176055.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0026, + "grad_norm": 0.23843693733215332, + "learning_rate": 1.741e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 1.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.1103, + "grad_norm": 3.4821200370788574, + "learning_rate": 1.7405e-05, + "num_tokens": 176658.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0027, + "grad_norm": 0.3274306654930115, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 1.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0924, + "grad_norm": 1.685363531112671, + "learning_rate": 1.7395000000000002e-05, + "num_tokens": 177261.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0028, + "grad_norm": 0.3265073299407959, + "learning_rate": 1.739e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 1.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.1099, + "grad_norm": 3.1508426666259766, + "learning_rate": 1.7385e-05, + "num_tokens": 177864.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.1034, + "grad_norm": 1.8193601369857788, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.1016, + "grad_norm": 1.59476637840271, + "learning_rate": 1.7375000000000002e-05, + "num_tokens": 178888.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.1998, + "grad_norm": 3.547844648361206, + "learning_rate": 1.737e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.004, + "grad_norm": 0.7272564172744751, + "learning_rate": 1.7365e-05, + "num_tokens": 179491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0046, + "grad_norm": 0.918525755405426, + "learning_rate": 1.736e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 1.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.1078, + "grad_norm": 2.3493764400482178, + "learning_rate": 1.7355000000000002e-05, + "num_tokens": 180094.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0042, + "grad_norm": 0.7224324941635132, + "learning_rate": 1.735e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 1.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0043, + "grad_norm": 0.6705859303474426, + "learning_rate": 1.7345e-05, + "num_tokens": 180276.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.1953, + "grad_norm": 2.93843674659729, + "learning_rate": 1.734e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.0034, + "grad_norm": 0.46903571486473083, + "learning_rate": 1.7335000000000003e-05, + "num_tokens": 180879.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0938, + "grad_norm": 2.1053452491760254, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0026, + "grad_norm": 0.24292589724063873, + "learning_rate": 1.7325e-05, + "num_tokens": 181482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0922, + "grad_norm": 2.257225275039673, + "learning_rate": 1.732e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.003, + "grad_norm": 0.4069388508796692, + "learning_rate": 1.7315000000000003e-05, + "num_tokens": 182085.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.183, + "grad_norm": 3.2919442653656006, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.1693, + "grad_norm": 2.224686861038208, + "learning_rate": 1.7305e-05, + "num_tokens": 183109.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.1085, + "grad_norm": 1.8910117149353027, + "learning_rate": 1.73e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0026, + "grad_norm": 0.40661975741386414, + "learning_rate": 1.7295000000000003e-05, + "num_tokens": 183712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0027, + "grad_norm": 0.4873325228691101, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 1.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0028, + "grad_norm": 0.6161079406738281, + "learning_rate": 1.7285e-05, + "num_tokens": 183894.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0027, + "grad_norm": 0.4630989134311676, + "learning_rate": 1.728e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 1.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0985, + "grad_norm": 1.9053902626037598, + "learning_rate": 1.7275000000000003e-05, + "num_tokens": 184497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.0026, + "grad_norm": 0.37032097578048706, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 1.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.0024, + "grad_norm": 0.27917778491973877, + "learning_rate": 1.7265e-05, + "num_tokens": 184679.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0826, + "grad_norm": 2.2242591381073, + "learning_rate": 1.726e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0023, + "grad_norm": 0.22320418059825897, + "learning_rate": 1.7255000000000003e-05, + "num_tokens": 185282.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0958, + "grad_norm": 2.1955316066741943, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.1204, + "grad_norm": 2.8383123874664307, + "learning_rate": 1.7245000000000002e-05, + "num_tokens": 186306.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0025, + "grad_norm": 0.2997134327888489, + "learning_rate": 1.724e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0024, + "grad_norm": 0.24415498971939087, + "learning_rate": 1.7235e-05, + "num_tokens": 186488.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0885, + "grad_norm": 2.02583384513855, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0927, + "grad_norm": 2.139193534851074, + "learning_rate": 1.7225000000000002e-05, + "num_tokens": 187512.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0025, + "grad_norm": 0.3212721347808838, + "learning_rate": 1.722e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.1594, + "grad_norm": 1.6018428802490234, + "learning_rate": 1.7215e-05, + "num_tokens": 188115.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0027, + "grad_norm": 0.43617552518844604, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 1.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.1228, + "grad_norm": 1.8676470518112183, + "learning_rate": 1.7205000000000002e-05, + "num_tokens": 188718.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": 0.1523, + "grad_norm": 2.5800390243530273, + "learning_rate": 1.72e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0036, + "grad_norm": 0.7294099926948547, + "learning_rate": 1.7195e-05, + "num_tokens": 189321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.0797, + "grad_norm": 2.594087600708008, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.1031, + "grad_norm": 3.2291526794433594, + "learning_rate": 1.7185e-05, + "num_tokens": 190345.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0036, + "grad_norm": 0.7465726733207703, + "learning_rate": 1.718e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 1.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.1692, + "grad_norm": 2.709357500076294, + "learning_rate": 1.7175e-05, + "num_tokens": 190948.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.1003, + "grad_norm": 2.117990493774414, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.1015, + "grad_norm": 2.4742591381073, + "learning_rate": 1.7165e-05, + "num_tokens": 191972.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0911, + "grad_norm": 2.098302125930786, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.1107, + "grad_norm": 1.915540337562561, + "learning_rate": 1.7155e-05, + "num_tokens": 192996.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0049, + "grad_norm": 1.0682960748672485, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0965, + "grad_norm": 1.5651695728302002, + "learning_rate": 1.7145e-05, + "num_tokens": 193599.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.103, + "grad_norm": 2.3110480308532715, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.005, + "grad_norm": 1.1688706874847412, + "learning_rate": 1.7135e-05, + "num_tokens": 194202.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0801, + "grad_norm": 2.4091689586639404, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.159, + "grad_norm": 2.0551347732543945, + "learning_rate": 1.7125e-05, + "num_tokens": 195226.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.004, + "grad_norm": 0.8690920472145081, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0052, + "grad_norm": 1.225834608078003, + "learning_rate": 1.7115e-05, + "num_tokens": 195408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0038, + "grad_norm": 0.7105492949485779, + "learning_rate": 1.711e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0027, + "grad_norm": 0.3135615587234497, + "learning_rate": 1.7105e-05, + "num_tokens": 195590.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0025, + "grad_norm": 0.33731189370155334, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 1.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0024, + "grad_norm": 0.6950210928916931, + "learning_rate": 1.7095e-05, + "num_tokens": 195772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.166, + "grad_norm": 3.7873523235321045, + "learning_rate": 1.709e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.081, + "grad_norm": 2.6900861263275146, + "learning_rate": 1.7085e-05, + "num_tokens": 196796.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.002, + "grad_norm": 0.19354696571826935, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 1.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0935, + "grad_norm": 2.4997594356536865, + "learning_rate": 1.7075e-05, + "num_tokens": 197399.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.002, + "grad_norm": 0.24508339166641235, + "learning_rate": 1.707e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 1.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0019, + "grad_norm": 0.1790609359741211, + "learning_rate": 1.7065e-05, + "num_tokens": 197581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.1101, + "grad_norm": 2.382162570953369, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.1892, + "grad_norm": 3.0123023986816406, + "learning_rate": 1.7055000000000002e-05, + "num_tokens": 198605.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0019, + "grad_norm": 0.27882760763168335, + "learning_rate": 1.705e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0019, + "grad_norm": 0.23136040568351746, + "learning_rate": 1.7045e-05, + "num_tokens": 198787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.1046, + "grad_norm": 1.8799446821212769, + "learning_rate": 1.704e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0018, + "grad_norm": 0.23780478537082672, + "learning_rate": 1.7035000000000002e-05, + "num_tokens": 199390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0849, + "grad_norm": 1.9498792886734009, + "learning_rate": 1.703e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.298, + "step": 596 + }, + { + "loss": 0.0953, + "grad_norm": 2.2400667667388916, + "learning_rate": 1.7025e-05, + "num_tokens": 200414.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.002, + "grad_norm": 0.3908434510231018, + "learning_rate": 1.702e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 1.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0939, + "grad_norm": 2.667379140853882, + "learning_rate": 1.7015000000000002e-05, + "num_tokens": 201017.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.0745, + "grad_norm": 2.066331624984741, + "learning_rate": 1.701e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0025, + "grad_norm": 0.5688944458961487, + "learning_rate": 1.7005e-05, + "num_tokens": 201620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.1069, + "grad_norm": 2.021451950073242, + "learning_rate": 1.7e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.003, + "grad_norm": 0.6418687105178833, + "learning_rate": 1.6995000000000002e-05, + "num_tokens": 202223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0029, + "grad_norm": 0.6194710731506348, + "learning_rate": 1.699e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 1.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.1193, + "grad_norm": 3.001216411590576, + "learning_rate": 1.6985e-05, + "num_tokens": 202826.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": 0.1078, + "grad_norm": 2.1146023273468018, + "learning_rate": 1.698e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.098, + "grad_norm": 3.064103841781616, + "learning_rate": 1.6975000000000003e-05, + "num_tokens": 203850.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0747, + "grad_norm": 3.1524202823638916, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.1506, + "grad_norm": 3.1213419437408447, + "learning_rate": 1.6965e-05, + "num_tokens": 204874.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0038, + "grad_norm": 0.8761835098266602, + "learning_rate": 1.696e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0733, + "grad_norm": 2.0461108684539795, + "learning_rate": 1.6955000000000003e-05, + "num_tokens": 205477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0948, + "grad_norm": 2.52803111076355, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0036, + "grad_norm": 0.837294340133667, + "learning_rate": 1.6945e-05, + "num_tokens": 206080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0036, + "grad_norm": 0.8330880403518677, + "learning_rate": 1.694e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0608, + "grad_norm": 1.6941643953323364, + "learning_rate": 1.6935000000000003e-05, + "num_tokens": 206683.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0897, + "grad_norm": 1.850446105003357, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.0933, + "grad_norm": 2.3541157245635986, + "learning_rate": 1.6925e-05, + "num_tokens": 207707.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0026, + "grad_norm": 0.45243605971336365, + "learning_rate": 1.692e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0694, + "grad_norm": 2.299668312072754, + "learning_rate": 1.6915e-05, + "num_tokens": 208310.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0029, + "grad_norm": 0.6032459139823914, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.0967, + "grad_norm": 2.7924766540527344, + "learning_rate": 1.6905e-05, + "num_tokens": 208913.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0027, + "grad_norm": 0.5459297299385071, + "learning_rate": 1.69e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0033, + "grad_norm": 0.7005264759063721, + "learning_rate": 1.6895e-05, + "num_tokens": 209095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0712, + "grad_norm": 2.0087270736694336, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0816, + "grad_norm": 2.023620843887329, + "learning_rate": 1.6885000000000002e-05, + "num_tokens": 210119.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0884, + "grad_norm": 3.3579723834991455, + "learning_rate": 1.688e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.1001, + "grad_norm": 2.1446380615234375, + "learning_rate": 1.6875e-05, + "num_tokens": 211143.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0024, + "grad_norm": 0.46906810998916626, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.003, + "grad_norm": 0.6180875897407532, + "learning_rate": 1.6865000000000002e-05, + "num_tokens": 211325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0024, + "grad_norm": 0.44018203020095825, + "learning_rate": 1.686e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0021, + "grad_norm": 0.3610388934612274, + "learning_rate": 1.6855e-05, + "num_tokens": 211507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0025, + "grad_norm": 0.42492103576660156, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0785, + "grad_norm": 2.052070379257202, + "learning_rate": 1.6845000000000002e-05, + "num_tokens": 212110.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0955, + "grad_norm": 1.5501021146774292, + "learning_rate": 1.684e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0017, + "grad_norm": 0.14774425327777863, + "learning_rate": 1.6835e-05, + "num_tokens": 212713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0016, + "grad_norm": 0.13003599643707275, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0016, + "grad_norm": 0.11263933777809143, + "learning_rate": 1.6825000000000002e-05, + "num_tokens": 212895.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0814, + "grad_norm": 2.4652907848358154, + "learning_rate": 1.682e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0016, + "grad_norm": 0.1284048706293106, + "learning_rate": 1.6815e-05, + "num_tokens": 213498.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0016, + "grad_norm": 0.14626798033714294, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 1.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0984, + "grad_norm": 2.53958797454834, + "learning_rate": 1.6805000000000003e-05, + "num_tokens": 214101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0983, + "grad_norm": 2.0881552696228027, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0016, + "grad_norm": 0.14537213742733002, + "learning_rate": 1.6795e-05, + "num_tokens": 214704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.0642, + "grad_norm": 2.0831480026245117, + "learning_rate": 1.679e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.0016, + "grad_norm": 0.12770842015743256, + "learning_rate": 1.6785000000000003e-05, + "num_tokens": 215307.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0878, + "grad_norm": 2.531637668609619, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0864, + "grad_norm": 2.4697654247283936, + "learning_rate": 1.6775e-05, + "num_tokens": 216331.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0865, + "grad_norm": 1.655576229095459, + "learning_rate": 1.677e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.1086, + "grad_norm": 2.826423168182373, + "learning_rate": 1.6765000000000003e-05, + "num_tokens": 217355.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.1042, + "grad_norm": 3.4096198081970215, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.0027, + "grad_norm": 0.5534147620201111, + "learning_rate": 1.6755e-05, + "num_tokens": 217958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0611, + "grad_norm": 1.5646562576293945, + "learning_rate": 1.675e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0033, + "grad_norm": 1.048545479774475, + "learning_rate": 1.6745000000000003e-05, + "num_tokens": 218561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.005, + "grad_norm": 1.3414465188980103, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0032, + "grad_norm": 0.636330246925354, + "learning_rate": 1.6735e-05, + "num_tokens": 218743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0882, + "grad_norm": 1.7900675535202026, + "learning_rate": 1.673e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.0883, + "grad_norm": 1.8037763833999634, + "learning_rate": 1.6725000000000003e-05, + "num_tokens": 219767.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0733, + "grad_norm": 1.7987661361694336, + "learning_rate": 1.672e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0033, + "grad_norm": 0.6671841740608215, + "learning_rate": 1.6715000000000002e-05, + "num_tokens": 220370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": 0.0699, + "grad_norm": 2.178269147872925, + "learning_rate": 1.671e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0653, + "grad_norm": 2.165506601333618, + "learning_rate": 1.6705000000000004e-05, + "num_tokens": 221394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0828, + "grad_norm": 1.837323546409607, + "learning_rate": 1.67e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0798, + "grad_norm": 2.296050548553467, + "learning_rate": 1.6695000000000002e-05, + "num_tokens": 222418.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.169, + "grad_norm": 3.554818868637085, + "learning_rate": 1.669e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.1585, + "grad_norm": 2.993666887283325, + "learning_rate": 1.6685000000000004e-05, + "num_tokens": 223442.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0864, + "grad_norm": 3.0106112957000732, + "learning_rate": 1.668e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0605, + "grad_norm": 1.362823247909546, + "learning_rate": 1.6675000000000002e-05, + "num_tokens": 224466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.0055, + "grad_norm": 1.2802313566207886, + "learning_rate": 1.667e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0906, + "grad_norm": 2.1969728469848633, + "learning_rate": 1.6665000000000004e-05, + "num_tokens": 225069.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.0919, + "grad_norm": 3.0707828998565674, + "learning_rate": 1.666e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0061, + "grad_norm": 1.514074444770813, + "learning_rate": 1.6655000000000002e-05, + "num_tokens": 225672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0968, + "grad_norm": 2.7561936378479004, + "learning_rate": 1.665e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0887, + "grad_norm": 2.4263193607330322, + "learning_rate": 1.6645e-05, + "num_tokens": 226696.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0924, + "grad_norm": 2.360464572906494, + "learning_rate": 1.664e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.0926, + "grad_norm": 2.564941644668579, + "learning_rate": 1.6635000000000003e-05, + "num_tokens": 227720.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0849, + "grad_norm": 3.0359439849853516, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.1488, + "grad_norm": 2.505728006362915, + "learning_rate": 1.6625e-05, + "num_tokens": 228744.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0757, + "grad_norm": 1.8170560598373413, + "learning_rate": 1.662e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0083, + "grad_norm": 2.0260066986083984, + "learning_rate": 1.6615000000000003e-05, + "num_tokens": 229347.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0088, + "grad_norm": 2.0579655170440674, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0698, + "grad_norm": 2.465139865875244, + "learning_rate": 1.6605e-05, + "num_tokens": 229950.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.0865, + "grad_norm": 2.2099132537841797, + "learning_rate": 1.66e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0585, + "grad_norm": 2.1250336170196533, + "learning_rate": 1.6595e-05, + "num_tokens": 230974.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0047, + "grad_norm": 1.0128132104873657, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 1.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0934, + "grad_norm": 2.2283778190612793, + "learning_rate": 1.6585e-05, + "num_tokens": 231577.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0616, + "grad_norm": 1.5224443674087524, + "learning_rate": 1.658e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0831, + "grad_norm": 2.9646942615509033, + "learning_rate": 1.6575e-05, + "num_tokens": 232601.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.1237, + "grad_norm": 2.9797046184539795, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0636, + "grad_norm": 2.184934139251709, + "learning_rate": 1.6565e-05, + "num_tokens": 233625.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0624, + "grad_norm": 2.1586413383483887, + "learning_rate": 1.656e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.004, + "grad_norm": 0.7300480604171753, + "learning_rate": 1.6555e-05, + "num_tokens": 234228.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0034, + "grad_norm": 0.6544972062110901, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 1.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0696, + "grad_norm": 2.013485908508301, + "learning_rate": 1.6545e-05, + "num_tokens": 234831.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0029, + "grad_norm": 0.5221191048622131, + "learning_rate": 1.654e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 1.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0772, + "grad_norm": 1.8417952060699463, + "learning_rate": 1.6535e-05, + "num_tokens": 235434.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0713, + "grad_norm": 1.9944443702697754, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.0658, + "grad_norm": 1.900722861289978, + "learning_rate": 1.6525000000000002e-05, + "num_tokens": 236458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0679, + "grad_norm": 2.4299168586730957, + "learning_rate": 1.652e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.06, + "grad_norm": 1.561680793762207, + "learning_rate": 1.6515e-05, + "num_tokens": 237482.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0739, + "grad_norm": 1.774482011795044, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0436, + "grad_norm": 1.7762006521224976, + "learning_rate": 1.6505000000000002e-05, + "num_tokens": 238506.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0034, + "grad_norm": 0.7131043672561646, + "learning_rate": 1.65e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0823, + "grad_norm": 2.994682550430298, + "learning_rate": 1.6495e-05, + "num_tokens": 239109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": 0.0776, + "grad_norm": 2.6362464427948, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0846, + "grad_norm": 2.8052642345428467, + "learning_rate": 1.6485000000000002e-05, + "num_tokens": 240133.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0048, + "grad_norm": 1.1239407062530518, + "learning_rate": 1.648e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 1.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0841, + "grad_norm": 2.1707019805908203, + "learning_rate": 1.6475e-05, + "num_tokens": 240736.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0423, + "grad_norm": 1.9918863773345947, + "learning_rate": 1.647e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.0903, + "grad_norm": 2.1334235668182373, + "learning_rate": 1.6465000000000002e-05, + "num_tokens": 241760.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0067, + "grad_norm": 1.6682239770889282, + "learning_rate": 1.646e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 1.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0652, + "grad_norm": 1.4505804777145386, + "learning_rate": 1.6455e-05, + "num_tokens": 242363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0769, + "grad_norm": 1.6511123180389404, + "learning_rate": 1.645e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.006, + "grad_norm": 1.3824306726455688, + "learning_rate": 1.6445000000000003e-05, + "num_tokens": 242966.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0742, + "grad_norm": 2.109647512435913, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.1414, + "grad_norm": 2.5469703674316406, + "learning_rate": 1.6435e-05, + "num_tokens": 243990.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0669, + "grad_norm": 1.3465361595153809, + "learning_rate": 1.643e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.082, + "grad_norm": 2.1633052825927734, + "learning_rate": 1.6425000000000003e-05, + "num_tokens": 245014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0043, + "grad_norm": 0.926991879940033, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.143, + "grad_norm": 2.2284176349639893, + "learning_rate": 1.6415e-05, + "num_tokens": 245617.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0981, + "grad_norm": 2.301908493041992, + "learning_rate": 1.641e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0448, + "grad_norm": 1.2258681058883667, + "learning_rate": 1.6405000000000003e-05, + "num_tokens": 246641.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.0043, + "grad_norm": 0.9370044469833374, + "learning_rate": 1.64e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 1.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0788, + "grad_norm": 3.762192964553833, + "learning_rate": 1.6395e-05, + "num_tokens": 247244.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.0046, + "grad_norm": 0.9186903238296509, + "learning_rate": 1.639e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 1.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0035, + "grad_norm": 0.6930652260780334, + "learning_rate": 1.6385000000000003e-05, + "num_tokens": 247426.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.3322, + "grad_norm": 9.659932136535645, + "learning_rate": 1.638e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0662, + "grad_norm": 1.7305420637130737, + "learning_rate": 1.6375e-05, + "num_tokens": 248450.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0024, + "grad_norm": 0.3103489577770233, + "learning_rate": 1.637e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 1.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0915, + "grad_norm": 2.235250234603882, + "learning_rate": 1.6365000000000003e-05, + "num_tokens": 249053.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0596, + "grad_norm": 2.24996280670166, + "learning_rate": 1.636e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0679, + "grad_norm": 2.596879005432129, + "learning_rate": 1.6355000000000002e-05, + "num_tokens": 250077.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0642, + "grad_norm": 1.9771475791931152, + "learning_rate": 1.635e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0024, + "grad_norm": 0.7699919939041138, + "learning_rate": 1.6345000000000004e-05, + "num_tokens": 250680.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792540490627289, + "learning_rate": 1.634e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0021, + "grad_norm": 0.32606813311576843, + "learning_rate": 1.6335000000000002e-05, + "num_tokens": 250862.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0574, + "grad_norm": 2.3009800910949707, + "learning_rate": 1.633e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0966, + "grad_norm": 2.396700859069824, + "learning_rate": 1.6325e-05, + "num_tokens": 251886.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.1378, + "grad_norm": 2.726357936859131, + "learning_rate": 1.632e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0022, + "grad_norm": 0.36913836002349854, + "learning_rate": 1.6315000000000002e-05, + "num_tokens": 252489.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0021, + "grad_norm": 0.34592556953430176, + "learning_rate": 1.631e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0024, + "grad_norm": 0.45417988300323486, + "learning_rate": 1.6305e-05, + "num_tokens": 252671.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0672, + "grad_norm": 2.153691053390503, + "learning_rate": 1.63e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0021, + "grad_norm": 0.35626691579818726, + "learning_rate": 1.6295000000000002e-05, + "num_tokens": 253274.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0021, + "grad_norm": 0.37343284487724304, + "learning_rate": 1.629e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.002, + "grad_norm": 0.34979110956192017, + "learning_rate": 1.6285e-05, + "num_tokens": 253456.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.078, + "grad_norm": 2.1453590393066406, + "learning_rate": 1.628e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0019, + "grad_norm": 0.21562984585762024, + "learning_rate": 1.6275e-05, + "num_tokens": 254059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0017, + "grad_norm": 0.18868863582611084, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.075, + "grad_norm": 2.238870143890381, + "learning_rate": 1.6265e-05, + "num_tokens": 254662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0712, + "grad_norm": 1.3297274112701416, + "learning_rate": 1.626e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.2668, + "grad_norm": 6.078666687011719, + "learning_rate": 1.6255e-05, + "num_tokens": 255686.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0017, + "grad_norm": 0.18387450277805328, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 1.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0017, + "grad_norm": 0.1908990740776062, + "learning_rate": 1.6245e-05, + "num_tokens": 255868.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0796, + "grad_norm": 1.9942879676818848, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0017, + "grad_norm": 0.18278343975543976, + "learning_rate": 1.6235e-05, + "num_tokens": 256471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0017, + "grad_norm": 0.2012937068939209, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0019, + "grad_norm": 0.23027914762496948, + "learning_rate": 1.6225e-05, + "num_tokens": 256653.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.088, + "grad_norm": 2.3463082313537598, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0015, + "grad_norm": 0.1516222059726715, + "learning_rate": 1.6215e-05, + "num_tokens": 257256.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0592, + "grad_norm": 1.780516505241394, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0016, + "grad_norm": 0.1569552719593048, + "learning_rate": 1.6205e-05, + "num_tokens": 257859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0015, + "grad_norm": 0.15376536548137665, + "learning_rate": 1.62e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0016, + "grad_norm": 0.16803313791751862, + "learning_rate": 1.6195e-05, + "num_tokens": 258041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0877, + "grad_norm": 1.7319484949111938, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0015, + "grad_norm": 0.14868228137493134, + "learning_rate": 1.6185000000000002e-05, + "num_tokens": 258644.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0703, + "grad_norm": 1.626076102256775, + "learning_rate": 1.618e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0852, + "grad_norm": 1.4952802658081055, + "learning_rate": 1.6175e-05, + "num_tokens": 259668.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0782, + "grad_norm": 1.6785380840301514, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0806, + "grad_norm": 1.424209475517273, + "learning_rate": 1.6165000000000002e-05, + "num_tokens": 260692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0018, + "grad_norm": 0.27588197588920593, + "learning_rate": 1.616e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 1.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0682, + "grad_norm": 2.780993938446045, + "learning_rate": 1.6155e-05, + "num_tokens": 261295.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.0027, + "grad_norm": 0.5201116800308228, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0028, + "grad_norm": 0.5331841111183167, + "learning_rate": 1.6145000000000002e-05, + "num_tokens": 261477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.1404, + "grad_norm": 3.156398296356201, + "learning_rate": 1.614e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.003, + "grad_norm": 0.5515365600585938, + "learning_rate": 1.6135e-05, + "num_tokens": 262080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.0029, + "grad_norm": 0.5499039888381958, + "learning_rate": 1.613e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0473, + "grad_norm": 1.4062751531600952, + "learning_rate": 1.6125000000000002e-05, + "num_tokens": 262683.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0638, + "grad_norm": 1.5207608938217163, + "learning_rate": 1.612e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0018, + "grad_norm": 0.24566565454006195, + "learning_rate": 1.6115e-05, + "num_tokens": 263286.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0019, + "grad_norm": 0.26229217648506165, + "learning_rate": 1.611e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 1.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0019, + "grad_norm": 0.2518826425075531, + "learning_rate": 1.6105000000000003e-05, + "num_tokens": 263468.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.066, + "grad_norm": 1.8491489887237549, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0771, + "grad_norm": 2.3547780513763428, + "learning_rate": 1.6095e-05, + "num_tokens": 264492.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": 0.067, + "grad_norm": 1.581396222114563, + "learning_rate": 1.609e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0017, + "grad_norm": 0.22524242103099823, + "learning_rate": 1.6085000000000003e-05, + "num_tokens": 265095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0825, + "grad_norm": 1.542362928390503, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.0019, + "grad_norm": 0.2753300964832306, + "learning_rate": 1.6075e-05, + "num_tokens": 265698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0594, + "grad_norm": 2.435917377471924, + "learning_rate": 1.607e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0671, + "grad_norm": 1.3892773389816284, + "learning_rate": 1.6065000000000003e-05, + "num_tokens": 266722.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0552, + "grad_norm": 1.9706708192825317, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0029, + "grad_norm": 0.5541112422943115, + "learning_rate": 1.6055e-05, + "num_tokens": 267325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0765, + "grad_norm": 2.187875270843506, + "learning_rate": 1.605e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0533, + "grad_norm": 1.9069744348526, + "learning_rate": 1.6045000000000003e-05, + "num_tokens": 268349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0034, + "grad_norm": 0.6806110739707947, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0033, + "grad_norm": 0.6904415488243103, + "learning_rate": 1.6035e-05, + "num_tokens": 268531.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0835, + "grad_norm": 1.7817496061325073, + "learning_rate": 1.603e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.003, + "grad_norm": 0.576019823551178, + "learning_rate": 1.6025000000000003e-05, + "num_tokens": 269134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0444, + "grad_norm": 2.0043082237243652, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0456, + "grad_norm": 1.6300431489944458, + "learning_rate": 1.6015e-05, + "num_tokens": 270158.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.002, + "grad_norm": 0.3286590874195099, + "learning_rate": 1.601e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0593, + "grad_norm": 3.0931613445281982, + "learning_rate": 1.6005e-05, + "num_tokens": 270761.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0774, + "grad_norm": 2.7380502223968506, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0025, + "grad_norm": 0.5391877293586731, + "learning_rate": 1.5995000000000002e-05, + "num_tokens": 271364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0022, + "grad_norm": 0.43329155445098877, + "learning_rate": 1.599e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0753, + "grad_norm": 2.46846866607666, + "learning_rate": 1.5985e-05, + "num_tokens": 271967.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0021, + "grad_norm": 0.3546755313873291, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0023, + "grad_norm": 0.4083067774772644, + "learning_rate": 1.5975000000000002e-05, + "num_tokens": 272149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.002, + "grad_norm": 0.3581921458244324, + "learning_rate": 1.597e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0613, + "grad_norm": 2.8087387084960938, + "learning_rate": 1.5965e-05, + "num_tokens": 272752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0015, + "grad_norm": 0.1888950765132904, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0671, + "grad_norm": 2.2728195190429688, + "learning_rate": 1.5955e-05, + "num_tokens": 273355.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0829, + "grad_norm": 2.8371574878692627, + "learning_rate": 1.595e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0013, + "grad_norm": 0.12679244577884674, + "learning_rate": 1.5945e-05, + "num_tokens": 273958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0014, + "grad_norm": 0.14318323135375977, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0013, + "grad_norm": 0.12078670412302017, + "learning_rate": 1.5935e-05, + "num_tokens": 274140.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0605, + "grad_norm": 2.762150764465332, + "learning_rate": 1.593e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0013, + "grad_norm": 0.1383422166109085, + "learning_rate": 1.5925e-05, + "num_tokens": 274743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0012, + "grad_norm": 0.1123310998082161, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0802, + "grad_norm": 2.965071201324463, + "learning_rate": 1.5915e-05, + "num_tokens": 275346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.1343, + "grad_norm": 3.2984137535095215, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0829, + "grad_norm": 1.568178415298462, + "learning_rate": 1.5905e-05, + "num_tokens": 276370.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0014, + "grad_norm": 0.21307793259620667, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 1.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0579, + "grad_norm": 2.5958898067474365, + "learning_rate": 1.5895e-05, + "num_tokens": 276973.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0013, + "grad_norm": 0.1617453545331955, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0014, + "grad_norm": 0.1798456758260727, + "learning_rate": 1.5885e-05, + "num_tokens": 277155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0016, + "grad_norm": 0.20433904230594635, + "learning_rate": 1.588e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0486, + "grad_norm": 1.5812333822250366, + "learning_rate": 1.5875e-05, + "num_tokens": 277758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.1437, + "grad_norm": 3.0360054969787598, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0804, + "grad_norm": 2.6603028774261475, + "learning_rate": 1.5865e-05, + "num_tokens": 278782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": 0.0814, + "grad_norm": 1.870706558227539, + "learning_rate": 1.586e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0718, + "grad_norm": 1.5813627243041992, + "learning_rate": 1.5855e-05, + "num_tokens": 279806.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0729, + "grad_norm": 2.107619285583496, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0683, + "grad_norm": 1.209026575088501, + "learning_rate": 1.5845e-05, + "num_tokens": 280830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.2674, + "grad_norm": 6.916773319244385, + "learning_rate": 1.584e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0581, + "grad_norm": 2.1409847736358643, + "learning_rate": 1.5835e-05, + "num_tokens": 281854.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0054, + "grad_norm": 1.191935420036316, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0057, + "grad_norm": 1.2228178977966309, + "learning_rate": 1.5825000000000002e-05, + "num_tokens": 282036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.005, + "grad_norm": 1.1271437406539917, + "learning_rate": 1.582e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0545, + "grad_norm": 2.2059969902038574, + "learning_rate": 1.5815e-05, + "num_tokens": 282639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.1348, + "grad_norm": 2.8853166103363037, + "learning_rate": 1.581e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0027, + "grad_norm": 0.5147932767868042, + "learning_rate": 1.5805000000000002e-05, + "num_tokens": 283242.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0514, + "grad_norm": 1.7287933826446533, + "learning_rate": 1.58e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0024, + "grad_norm": 0.41022399067878723, + "learning_rate": 1.5795e-05, + "num_tokens": 283845.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0021, + "grad_norm": 0.31408146023750305, + "learning_rate": 1.579e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 1.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0019, + "grad_norm": 0.3368740677833557, + "learning_rate": 1.5785000000000002e-05, + "num_tokens": 284027.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0733, + "grad_norm": 1.9898301362991333, + "learning_rate": 1.578e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.2631, + "grad_norm": 6.1759562492370605, + "learning_rate": 1.5775e-05, + "num_tokens": 285051.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0518, + "grad_norm": 1.7494398355484009, + "learning_rate": 1.577e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0634, + "grad_norm": 3.39536452293396, + "learning_rate": 1.5765000000000002e-05, + "num_tokens": 286075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0015, + "grad_norm": 0.16311416029930115, + "learning_rate": 1.576e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0014, + "grad_norm": 0.1292622685432434, + "learning_rate": 1.5755e-05, + "num_tokens": 286257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0487, + "grad_norm": 1.4789959192276, + "learning_rate": 1.575e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0704, + "grad_norm": 1.8533966541290283, + "learning_rate": 1.5745000000000003e-05, + "num_tokens": 287281.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0431, + "grad_norm": 1.6309059858322144, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.125, + "grad_norm": 1.811131238937378, + "learning_rate": 1.5735e-05, + "num_tokens": 288305.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0018, + "grad_norm": 0.2807428240776062, + "learning_rate": 1.573e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 1.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.0991, + "grad_norm": 2.5759706497192383, + "learning_rate": 1.5725000000000003e-05, + "num_tokens": 288908.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0646, + "grad_norm": 2.325784206390381, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0021, + "grad_norm": 0.398372620344162, + "learning_rate": 1.5715e-05, + "num_tokens": 289511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.002, + "grad_norm": 0.34870296716690063, + "learning_rate": 1.571e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0458, + "grad_norm": 1.5269895792007446, + "learning_rate": 1.5705000000000003e-05, + "num_tokens": 290114.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0023, + "grad_norm": 0.4617532789707184, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.1164, + "grad_norm": 2.049588680267334, + "learning_rate": 1.5695e-05, + "num_tokens": 290717.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0707, + "grad_norm": 3.5546929836273193, + "learning_rate": 1.569e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0686, + "grad_norm": 1.6962814331054688, + "learning_rate": 1.5685e-05, + "num_tokens": 291741.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0509, + "grad_norm": 1.9832770824432373, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0028, + "grad_norm": 0.5347197651863098, + "learning_rate": 1.5675e-05, + "num_tokens": 292344.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.0716, + "grad_norm": 2.209432363510132, + "learning_rate": 1.567e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0826, + "grad_norm": 1.7408462762832642, + "learning_rate": 1.5665e-05, + "num_tokens": 293368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0553, + "grad_norm": 1.7983943223953247, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0042, + "grad_norm": 0.8812737464904785, + "learning_rate": 1.5655000000000002e-05, + "num_tokens": 293971.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0649, + "grad_norm": 2.0859007835388184, + "learning_rate": 1.565e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0581, + "grad_norm": 1.566475510597229, + "learning_rate": 1.5645e-05, + "num_tokens": 294995.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0045, + "grad_norm": 0.9423922896385193, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0517, + "grad_norm": 1.8182531595230103, + "learning_rate": 1.5635e-05, + "num_tokens": 295598.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.1177, + "grad_norm": 2.7388081550598145, + "learning_rate": 1.563e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.1132, + "grad_norm": 2.579310655593872, + "learning_rate": 1.5625e-05, + "num_tokens": 296622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": 0.065, + "grad_norm": 1.4705184698104858, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0032, + "grad_norm": 0.6671587228775024, + "learning_rate": 1.5615000000000002e-05, + "num_tokens": 297225.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0579, + "grad_norm": 2.3290131092071533, + "learning_rate": 1.561e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0814, + "grad_norm": 2.8370614051818848, + "learning_rate": 1.5605e-05, + "num_tokens": 298249.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0689, + "grad_norm": 2.715596914291382, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.0671, + "grad_norm": 1.7622898817062378, + "learning_rate": 1.5595000000000002e-05, + "num_tokens": 299273.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0042, + "grad_norm": 0.9052322506904602, + "learning_rate": 1.559e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.049, + "grad_norm": 1.3162498474121094, + "learning_rate": 1.5585e-05, + "num_tokens": 299876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0036, + "grad_norm": 0.7319129109382629, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 1.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.0032, + "grad_norm": 0.6452810764312744, + "learning_rate": 1.5575000000000002e-05, + "num_tokens": 300058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0757, + "grad_norm": 2.2865378856658936, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0538, + "grad_norm": 1.7665457725524902, + "learning_rate": 1.5565e-05, + "num_tokens": 301082.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.047, + "grad_norm": 1.9683163166046143, + "learning_rate": 1.556e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0664, + "grad_norm": 2.087733030319214, + "learning_rate": 1.5555000000000003e-05, + "num_tokens": 302106.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0023, + "grad_norm": 0.39902573823928833, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 1.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0021, + "grad_norm": 0.34475409984588623, + "learning_rate": 1.5545e-05, + "num_tokens": 302288.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0532, + "grad_norm": 1.763016700744629, + "learning_rate": 1.554e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0625, + "grad_norm": 2.4447097778320312, + "learning_rate": 1.5535000000000003e-05, + "num_tokens": 303312.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.2444, + "grad_norm": 5.089849948883057, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.1233, + "grad_norm": 1.9174350500106812, + "learning_rate": 1.5525e-05, + "num_tokens": 304336.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.002, + "grad_norm": 0.34749460220336914, + "learning_rate": 1.552e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 1.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.075, + "grad_norm": 1.8123295307159424, + "learning_rate": 1.5515000000000003e-05, + "num_tokens": 304939.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0942, + "grad_norm": 2.2524919509887695, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0023, + "grad_norm": 0.4282050132751465, + "learning_rate": 1.5505e-05, + "num_tokens": 305542.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0022, + "grad_norm": 0.4201665222644806, + "learning_rate": 1.55e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0022, + "grad_norm": 0.38267236948013306, + "learning_rate": 1.5495000000000003e-05, + "num_tokens": 305724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0668, + "grad_norm": 1.5852563381195068, + "learning_rate": 1.549e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0686, + "grad_norm": 2.5186655521392822, + "learning_rate": 1.5485e-05, + "num_tokens": 306748.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0018, + "grad_norm": 0.3009900450706482, + "learning_rate": 1.548e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 1.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0591, + "grad_norm": 2.0340046882629395, + "learning_rate": 1.5475000000000003e-05, + "num_tokens": 307351.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.0652, + "grad_norm": 2.206228017807007, + "learning_rate": 1.547e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0485, + "grad_norm": 1.763405203819275, + "learning_rate": 1.5465000000000002e-05, + "num_tokens": 308375.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.002, + "grad_norm": 0.35779571533203125, + "learning_rate": 1.546e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0019, + "grad_norm": 0.32313865423202515, + "learning_rate": 1.5455000000000004e-05, + "num_tokens": 308557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0749, + "grad_norm": 2.2083141803741455, + "learning_rate": 1.545e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0762, + "grad_norm": 1.5048847198486328, + "learning_rate": 1.5445000000000002e-05, + "num_tokens": 309581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0536, + "grad_norm": 1.6958098411560059, + "learning_rate": 1.544e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0718, + "grad_norm": 1.9835456609725952, + "learning_rate": 1.5435000000000004e-05, + "num_tokens": 310605.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0459, + "grad_norm": 1.618090033531189, + "learning_rate": 1.543e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0611, + "grad_norm": 1.508302092552185, + "learning_rate": 1.5425000000000002e-05, + "num_tokens": 311629.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.1341, + "grad_norm": 3.744704008102417, + "learning_rate": 1.542e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0649, + "grad_norm": 1.4073272943496704, + "learning_rate": 1.5415e-05, + "num_tokens": 312653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0059, + "grad_norm": 1.3199745416641235, + "learning_rate": 1.541e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0445, + "grad_norm": 1.7224688529968262, + "learning_rate": 1.5405000000000002e-05, + "num_tokens": 313256.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.0697, + "grad_norm": 1.5272228717803955, + "learning_rate": 1.54e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0061, + "grad_norm": 1.3069825172424316, + "learning_rate": 1.5395e-05, + "num_tokens": 313859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0059, + "grad_norm": 1.285326600074768, + "learning_rate": 1.539e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0044, + "grad_norm": 0.9240864515304565, + "learning_rate": 1.5385000000000003e-05, + "num_tokens": 314041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0535, + "grad_norm": 1.9520580768585205, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0426, + "grad_norm": 1.3014405965805054, + "learning_rate": 1.5375e-05, + "num_tokens": 315065.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0024, + "grad_norm": 0.4011932611465454, + "learning_rate": 1.537e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0019, + "grad_norm": 0.2749421298503876, + "learning_rate": 1.5365e-05, + "num_tokens": 315247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0022, + "grad_norm": 0.31892502307891846, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 1.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0483, + "grad_norm": 2.0664267539978027, + "learning_rate": 1.5355e-05, + "num_tokens": 315850.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0819, + "grad_norm": 2.846149206161499, + "learning_rate": 1.535e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0013, + "grad_norm": 0.1373102068901062, + "learning_rate": 1.5345e-05, + "num_tokens": 316453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0013, + "grad_norm": 0.1736987680196762, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.053, + "grad_norm": 1.4268443584442139, + "learning_rate": 1.5335e-05, + "num_tokens": 317056.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0917, + "grad_norm": 1.9649128913879395, + "learning_rate": 1.533e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.1411, + "grad_norm": 2.5292632579803467, + "learning_rate": 1.5325e-05, + "num_tokens": 318080.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0527, + "grad_norm": 1.9480016231536865, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.0846, + "grad_norm": 2.2493338584899902, + "learning_rate": 1.5315e-05, + "num_tokens": 319104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0013, + "grad_norm": 0.13474015891551971, + "learning_rate": 1.531e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0832, + "grad_norm": 1.5178154706954956, + "learning_rate": 1.5305e-05, + "num_tokens": 319707.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": 0.0795, + "grad_norm": 2.071016788482666, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.1163, + "grad_norm": 2.11936092376709, + "learning_rate": 1.5295000000000002e-05, + "num_tokens": 320731.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0018, + "grad_norm": 0.2738206088542938, + "learning_rate": 1.529e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 1.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0674, + "grad_norm": 1.7774465084075928, + "learning_rate": 1.5285e-05, + "num_tokens": 321334.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0019, + "grad_norm": 0.3061210513114929, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.1228, + "grad_norm": 2.0818684101104736, + "learning_rate": 1.5275000000000002e-05, + "num_tokens": 321937.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0716, + "grad_norm": 1.6649255752563477, + "learning_rate": 1.527e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0026, + "grad_norm": 0.477672815322876, + "learning_rate": 1.5265e-05, + "num_tokens": 322540.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0728, + "grad_norm": 1.9350183010101318, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0631, + "grad_norm": 1.786603569984436, + "learning_rate": 1.5255000000000002e-05, + "num_tokens": 323564.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.1006, + "grad_norm": 2.4447789192199707, + "learning_rate": 1.525e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0034, + "grad_norm": 0.6078147292137146, + "learning_rate": 1.5245e-05, + "num_tokens": 324167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0769, + "grad_norm": 1.76687753200531, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.1099, + "grad_norm": 1.7330924272537231, + "learning_rate": 1.5235000000000002e-05, + "num_tokens": 325191.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.1119, + "grad_norm": 2.317302942276001, + "learning_rate": 1.523e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0047, + "grad_norm": 0.8692587018013, + "learning_rate": 1.5225e-05, + "num_tokens": 325794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0752, + "grad_norm": 2.7787444591522217, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0042, + "grad_norm": 0.7904698252677917, + "learning_rate": 1.5215000000000003e-05, + "num_tokens": 326397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0637, + "grad_norm": 1.9206311702728271, + "learning_rate": 1.521e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0641, + "grad_norm": 1.5487322807312012, + "learning_rate": 1.5205000000000001e-05, + "num_tokens": 327421.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0034, + "grad_norm": 0.6128824949264526, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0026, + "grad_norm": 0.4303649365901947, + "learning_rate": 1.5195000000000003e-05, + "num_tokens": 327603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0024, + "grad_norm": 0.3603818118572235, + "learning_rate": 1.519e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 1.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.0722, + "grad_norm": 1.3239399194717407, + "learning_rate": 1.5185000000000001e-05, + "num_tokens": 328206.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0714, + "grad_norm": 1.5037869215011597, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0462, + "grad_norm": 1.4942961931228638, + "learning_rate": 1.5175000000000001e-05, + "num_tokens": 329230.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0019, + "grad_norm": 0.2582552134990692, + "learning_rate": 1.517e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0018, + "grad_norm": 0.22304527461528778, + "learning_rate": 1.5165000000000001e-05, + "num_tokens": 329412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.083, + "grad_norm": 2.117966890335083, + "learning_rate": 1.516e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.0018, + "grad_norm": 0.21721050143241882, + "learning_rate": 1.5155000000000001e-05, + "num_tokens": 330015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0016, + "grad_norm": 0.20195893943309784, + "learning_rate": 1.515e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0807, + "grad_norm": 2.2437827587127686, + "learning_rate": 1.5145000000000002e-05, + "num_tokens": 330618.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0803, + "grad_norm": 2.0074269771575928, + "learning_rate": 1.514e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.1081, + "grad_norm": 2.117880344390869, + "learning_rate": 1.5135000000000002e-05, + "num_tokens": 331642.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0498, + "grad_norm": 1.624760389328003, + "learning_rate": 1.513e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0017, + "grad_norm": 0.2406463772058487, + "learning_rate": 1.5125e-05, + "num_tokens": 332245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.078, + "grad_norm": 1.9976122379302979, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0017, + "grad_norm": 0.2691337466239929, + "learning_rate": 1.5115000000000002e-05, + "num_tokens": 332848.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0017, + "grad_norm": 0.3240523040294647, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.002, + "grad_norm": 0.3948870897293091, + "learning_rate": 1.5105e-05, + "num_tokens": 333030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.058, + "grad_norm": 2.228799343109131, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0019, + "grad_norm": 0.30388572812080383, + "learning_rate": 1.5095000000000002e-05, + "num_tokens": 333633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0018, + "grad_norm": 0.23492957651615143, + "learning_rate": 1.509e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0767, + "grad_norm": 1.961020588874817, + "learning_rate": 1.5085e-05, + "num_tokens": 334236.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0015, + "grad_norm": 0.18129733204841614, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0016, + "grad_norm": 0.20082105696201324, + "learning_rate": 1.5075000000000002e-05, + "num_tokens": 334418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0715, + "grad_norm": 1.6847742795944214, + "learning_rate": 1.507e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.1066, + "grad_norm": 1.804700255393982, + "learning_rate": 1.5065e-05, + "num_tokens": 335442.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0017, + "grad_norm": 0.24969542026519775, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 1.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.076, + "grad_norm": 1.119564175605774, + "learning_rate": 1.5055000000000002e-05, + "num_tokens": 336045.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.1127, + "grad_norm": 1.9994937181472778, + "learning_rate": 1.505e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0018, + "grad_norm": 0.27987295389175415, + "learning_rate": 1.5045e-05, + "num_tokens": 336648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0019, + "grad_norm": 0.3454192876815796, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0023, + "grad_norm": 0.4122897684574127, + "learning_rate": 1.5035000000000003e-05, + "num_tokens": 336830.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": 0.1004, + "grad_norm": 1.930411696434021, + "learning_rate": 1.503e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0019, + "grad_norm": 0.29886701703071594, + "learning_rate": 1.5025000000000001e-05, + "num_tokens": 337433.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0016, + "grad_norm": 0.2443024218082428, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.0673, + "grad_norm": 1.4124706983566284, + "learning_rate": 1.5015000000000001e-05, + "num_tokens": 338036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0799, + "grad_norm": 2.3533709049224854, + "learning_rate": 1.501e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0713, + "grad_norm": 1.8907470703125, + "learning_rate": 1.5005000000000001e-05, + "num_tokens": 339060.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0689, + "grad_norm": 2.691020965576172, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0497, + "grad_norm": 1.6671160459518433, + "learning_rate": 1.4995000000000001e-05, + "num_tokens": 340084.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.002, + "grad_norm": 0.29797157645225525, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 1.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0019, + "grad_norm": 0.29996100068092346, + "learning_rate": 1.4985000000000001e-05, + "num_tokens": 340266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.0024, + "grad_norm": 0.4070133566856384, + "learning_rate": 1.498e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0021, + "grad_norm": 0.3220314681529999, + "learning_rate": 1.4975000000000001e-05, + "num_tokens": 340448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0019, + "grad_norm": 0.3058181405067444, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0018, + "grad_norm": 0.28231292963027954, + "learning_rate": 1.4965e-05, + "num_tokens": 340630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0692, + "grad_norm": 1.5155085325241089, + "learning_rate": 1.496e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.0683, + "grad_norm": 1.8045986890792847, + "learning_rate": 1.4955000000000002e-05, + "num_tokens": 341654.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.0408, + "grad_norm": 1.349377989768982, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0734, + "grad_norm": 1.7803888320922852, + "learning_rate": 1.4945e-05, + "num_tokens": 342678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0014, + "grad_norm": 0.1658269613981247, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 1.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0625, + "grad_norm": 1.7009806632995605, + "learning_rate": 1.4935000000000002e-05, + "num_tokens": 343281.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0017, + "grad_norm": 0.25617343187332153, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.0625, + "grad_norm": 1.769629955291748, + "learning_rate": 1.4925e-05, + "num_tokens": 343884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0017, + "grad_norm": 0.2548482418060303, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 1.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.0016, + "grad_norm": 0.2222324013710022, + "learning_rate": 1.4915000000000002e-05, + "num_tokens": 344066.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0774, + "grad_norm": 4.686360836029053, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0459, + "grad_norm": 2.749084234237671, + "learning_rate": 1.4905e-05, + "num_tokens": 345090.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.1302, + "grad_norm": 4.177389621734619, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.1173, + "grad_norm": 4.055930137634277, + "learning_rate": 1.4895000000000002e-05, + "num_tokens": 346114.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.002, + "grad_norm": 0.3603017032146454, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": 0.0693, + "grad_norm": 1.6064629554748535, + "learning_rate": 1.4885e-05, + "num_tokens": 346717.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0652, + "grad_norm": 1.3037128448486328, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0689, + "grad_norm": 2.06034779548645, + "learning_rate": 1.4875000000000002e-05, + "num_tokens": 347741.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0029, + "grad_norm": 0.5724895596504211, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 1.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0033, + "grad_norm": 0.6629590392112732, + "learning_rate": 1.4865e-05, + "num_tokens": 347923.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0024, + "grad_norm": 0.453980416059494, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 1.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0024, + "grad_norm": 0.4251463711261749, + "learning_rate": 1.4855000000000001e-05, + "num_tokens": 348105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0019, + "grad_norm": 0.30966171622276306, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 1.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.002, + "grad_norm": 0.3118286430835724, + "learning_rate": 1.4845000000000001e-05, + "num_tokens": 348287.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0555, + "grad_norm": 1.792464256286621, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0675, + "grad_norm": 1.5182185173034668, + "learning_rate": 1.4835000000000001e-05, + "num_tokens": 349311.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0674, + "grad_norm": 2.3636367321014404, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0656, + "grad_norm": 2.3102426528930664, + "learning_rate": 1.4825000000000001e-05, + "num_tokens": 350335.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0649, + "grad_norm": 1.6550447940826416, + "learning_rate": 1.482e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0633, + "grad_norm": 1.6831378936767578, + "learning_rate": 1.4815000000000001e-05, + "num_tokens": 351359.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0012, + "grad_norm": 0.14287354052066803, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 1.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0506, + "grad_norm": 1.8767977952957153, + "learning_rate": 1.4805e-05, + "num_tokens": 351962.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0798, + "grad_norm": 1.768181562423706, + "learning_rate": 1.48e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0537, + "grad_norm": 1.7165502309799194, + "learning_rate": 1.4795000000000001e-05, + "num_tokens": 352986.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0016, + "grad_norm": 0.24984677135944366, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.06, + "grad_norm": 1.5225651264190674, + "learning_rate": 1.4785e-05, + "num_tokens": 353589.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0453, + "grad_norm": 1.48419988155365, + "learning_rate": 1.478e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": 0.0693, + "grad_norm": 1.9988808631896973, + "learning_rate": 1.4775000000000002e-05, + "num_tokens": 354613.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0419, + "grad_norm": 1.4052188396453857, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0582, + "grad_norm": 1.6217740774154663, + "learning_rate": 1.4765e-05, + "num_tokens": 355637.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0778, + "grad_norm": 1.9261959791183472, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0593, + "grad_norm": 1.315152645111084, + "learning_rate": 1.4755000000000002e-05, + "num_tokens": 356661.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0069, + "grad_norm": 1.2978978157043457, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0412, + "grad_norm": 1.215545654296875, + "learning_rate": 1.4745e-05, + "num_tokens": 357264.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0075, + "grad_norm": 1.4120475053787231, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 1.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": 0.033, + "grad_norm": 1.2826626300811768, + "learning_rate": 1.4735000000000002e-05, + "num_tokens": 357867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.0074, + "grad_norm": 1.4002093076705933, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0695, + "grad_norm": 2.1978306770324707, + "learning_rate": 1.4725e-05, + "num_tokens": 358470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0693, + "grad_norm": 1.8518682718276978, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0048, + "grad_norm": 0.920648455619812, + "learning_rate": 1.4715000000000002e-05, + "num_tokens": 359073.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0041, + "grad_norm": 0.7800686955451965, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0515, + "grad_norm": 2.606135606765747, + "learning_rate": 1.4705e-05, + "num_tokens": 359676.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0024, + "grad_norm": 0.40420445799827576, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 1.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0719, + "grad_norm": 1.9594024419784546, + "learning_rate": 1.4695e-05, + "num_tokens": 360279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0018, + "grad_norm": 0.245815709233284, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.0787, + "grad_norm": 2.42266845703125, + "learning_rate": 1.4685000000000001e-05, + "num_tokens": 360882.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0014, + "grad_norm": 0.19625961780548096, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 1.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0014, + "grad_norm": 0.18439820408821106, + "learning_rate": 1.4675000000000001e-05, + "num_tokens": 361064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0012, + "grad_norm": 0.15009146928787231, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0961, + "grad_norm": 1.6586538553237915, + "learning_rate": 1.4665000000000001e-05, + "num_tokens": 361667.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.056, + "grad_norm": 1.6204346418380737, + "learning_rate": 1.466e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0629, + "grad_norm": 3.179530382156372, + "learning_rate": 1.4655000000000001e-05, + "num_tokens": 362691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0475, + "grad_norm": 1.5324857234954834, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0475, + "grad_norm": 1.6246694326400757, + "learning_rate": 1.4645e-05, + "num_tokens": 363715.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.1217, + "grad_norm": 3.528550624847412, + "learning_rate": 1.464e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0013, + "grad_norm": 0.17739705741405487, + "learning_rate": 1.4635000000000001e-05, + "num_tokens": 364318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0736, + "grad_norm": 1.7169992923736572, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.1137, + "grad_norm": 2.5113534927368164, + "learning_rate": 1.4625e-05, + "num_tokens": 365342.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.105, + "grad_norm": 2.1154234409332275, + "learning_rate": 1.462e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.0014, + "grad_norm": 0.19033615291118622, + "learning_rate": 1.4615000000000002e-05, + "num_tokens": 365945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0521, + "grad_norm": 1.7730141878128052, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.0016, + "grad_norm": 0.24216671288013458, + "learning_rate": 1.4605e-05, + "num_tokens": 366548.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0018, + "grad_norm": 0.27462536096572876, + "learning_rate": 1.46e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": 0.0744, + "grad_norm": 1.9374821186065674, + "learning_rate": 1.4595000000000002e-05, + "num_tokens": 367151.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0679, + "grad_norm": 1.6294903755187988, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0495, + "grad_norm": 1.4929898977279663, + "learning_rate": 1.4585e-05, + "num_tokens": 368175.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0026, + "grad_norm": 0.4472891092300415, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0023, + "grad_norm": 0.36597439646720886, + "learning_rate": 1.4575000000000002e-05, + "num_tokens": 368357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0024, + "grad_norm": 0.42359644174575806, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0022, + "grad_norm": 0.37764036655426025, + "learning_rate": 1.4565e-05, + "num_tokens": 368539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0021, + "grad_norm": 0.34881848096847534, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0019, + "grad_norm": 0.2842845320701599, + "learning_rate": 1.4555000000000002e-05, + "num_tokens": 368721.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0016, + "grad_norm": 0.23593850433826447, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.0773, + "grad_norm": 1.4594675302505493, + "learning_rate": 1.4545e-05, + "num_tokens": 369324.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.1, + "grad_norm": 1.863494873046875, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0013, + "grad_norm": 0.13081954419612885, + "learning_rate": 1.4535e-05, + "num_tokens": 369927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0443, + "grad_norm": 1.7305635213851929, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0012, + "grad_norm": 0.12010564655065536, + "learning_rate": 1.4525e-05, + "num_tokens": 370530.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.046, + "grad_norm": 1.4965153932571411, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0013, + "grad_norm": 0.1335715800523758, + "learning_rate": 1.4515e-05, + "num_tokens": 371133.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0789, + "grad_norm": 2.0868091583251953, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0013, + "grad_norm": 0.1260039061307907, + "learning_rate": 1.4505000000000001e-05, + "num_tokens": 371736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0013, + "grad_norm": 0.1729843020439148, + "learning_rate": 1.45e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0014, + "grad_norm": 0.1744985431432724, + "learning_rate": 1.4495000000000001e-05, + "num_tokens": 371918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0012, + "grad_norm": 0.12203537672758102, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.1175, + "grad_norm": 2.857239007949829, + "learning_rate": 1.4485e-05, + "num_tokens": 372521.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0012, + "grad_norm": 0.13221806287765503, + "learning_rate": 1.448e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0558, + "grad_norm": 1.8117022514343262, + "learning_rate": 1.4475000000000001e-05, + "num_tokens": 373124.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.0746, + "grad_norm": 1.5601890087127686, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0667, + "grad_norm": 2.6270835399627686, + "learning_rate": 1.4465e-05, + "num_tokens": 374148.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.07, + "grad_norm": 2.4209983348846436, + "learning_rate": 1.446e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0759, + "grad_norm": 1.9546290636062622, + "learning_rate": 1.4455000000000001e-05, + "num_tokens": 375172.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0673, + "grad_norm": 2.9238405227661133, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0505, + "grad_norm": 1.4308744668960571, + "learning_rate": 1.4445e-05, + "num_tokens": 376196.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0491, + "grad_norm": 1.8547859191894531, + "learning_rate": 1.444e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0611, + "grad_norm": 1.7769485712051392, + "learning_rate": 1.4435000000000002e-05, + "num_tokens": 377220.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0026, + "grad_norm": 0.4414771497249603, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 1.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0645, + "grad_norm": 2.1288139820098877, + "learning_rate": 1.4425e-05, + "num_tokens": 377823.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0449, + "grad_norm": 1.480977177619934, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0622, + "grad_norm": 1.4551938772201538, + "learning_rate": 1.4415000000000002e-05, + "num_tokens": 378847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0435, + "grad_norm": 1.613083004951477, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0666, + "grad_norm": 1.3638219833374023, + "learning_rate": 1.4405e-05, + "num_tokens": 379871.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0447, + "grad_norm": 1.5498117208480835, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0423, + "grad_norm": 1.8802024126052856, + "learning_rate": 1.4395000000000002e-05, + "num_tokens": 380895.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0077, + "grad_norm": 1.3431289196014404, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0075, + "grad_norm": 1.2728586196899414, + "learning_rate": 1.4385e-05, + "num_tokens": 381077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0072, + "grad_norm": 1.205004096031189, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0776, + "grad_norm": 1.9510324001312256, + "learning_rate": 1.4375e-05, + "num_tokens": 381680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0585, + "grad_norm": 1.6569032669067383, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0437, + "grad_norm": 1.996708631515503, + "learning_rate": 1.4365000000000002e-05, + "num_tokens": 382704.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.1022, + "grad_norm": 1.9323452711105347, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.1023, + "grad_norm": 2.318890333175659, + "learning_rate": 1.4355e-05, + "num_tokens": 383728.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0406, + "grad_norm": 1.4253126382827759, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0032, + "grad_norm": 0.5123540759086609, + "learning_rate": 1.4345000000000002e-05, + "num_tokens": 384331.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0466, + "grad_norm": 1.6153643131256104, + "learning_rate": 1.434e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.003, + "grad_norm": 0.468280553817749, + "learning_rate": 1.4335e-05, + "num_tokens": 384934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0028, + "grad_norm": 0.4284001588821411, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0484, + "grad_norm": 1.9119105339050293, + "learning_rate": 1.4325000000000003e-05, + "num_tokens": 385537.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0616, + "grad_norm": 2.9587130546569824, + "learning_rate": 1.432e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0645, + "grad_norm": 2.1663818359375, + "learning_rate": 1.4315000000000001e-05, + "num_tokens": 386561.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0022, + "grad_norm": 0.33302196860313416, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0019, + "grad_norm": 0.2560519278049469, + "learning_rate": 1.4305000000000003e-05, + "num_tokens": 386743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0504, + "grad_norm": 2.333263397216797, + "learning_rate": 1.43e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0514, + "grad_norm": 1.790854573249817, + "learning_rate": 1.4295000000000001e-05, + "num_tokens": 387767.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0478, + "grad_norm": 1.8263012170791626, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0017, + "grad_norm": 0.22925561666488647, + "learning_rate": 1.4285000000000003e-05, + "num_tokens": 388370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0508, + "grad_norm": 1.9549782276153564, + "learning_rate": 1.428e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0461, + "grad_norm": 2.7456071376800537, + "learning_rate": 1.4275000000000001e-05, + "num_tokens": 389394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0019, + "grad_norm": 0.25512465834617615, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0018, + "grad_norm": 0.2454918771982193, + "learning_rate": 1.4265000000000001e-05, + "num_tokens": 389576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.0016, + "grad_norm": 0.20499202609062195, + "learning_rate": 1.426e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0016, + "grad_norm": 0.22024467587471008, + "learning_rate": 1.4255000000000002e-05, + "num_tokens": 389758.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.1054, + "grad_norm": 1.7958146333694458, + "learning_rate": 1.425e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0016, + "grad_norm": 0.19123780727386475, + "learning_rate": 1.4245000000000002e-05, + "num_tokens": 390361.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0015, + "grad_norm": 0.1973554641008377, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0515, + "grad_norm": 1.5054925680160522, + "learning_rate": 1.4235000000000002e-05, + "num_tokens": 390964.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0645, + "grad_norm": 1.4418784379959106, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0428, + "grad_norm": 1.3686002492904663, + "learning_rate": 1.4225000000000002e-05, + "num_tokens": 391988.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0015, + "grad_norm": 0.18040749430656433, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 1.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0666, + "grad_norm": 1.9525736570358276, + "learning_rate": 1.4215e-05, + "num_tokens": 392591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0416, + "grad_norm": 1.5055146217346191, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0016, + "grad_norm": 0.21493053436279297, + "learning_rate": 1.4205000000000002e-05, + "num_tokens": 393194.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0483, + "grad_norm": 1.4553972482681274, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0017, + "grad_norm": 0.24199633300304413, + "learning_rate": 1.4195e-05, + "num_tokens": 393797.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0017, + "grad_norm": 0.22347070276737213, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0446, + "grad_norm": 1.314347743988037, + "learning_rate": 1.4185000000000002e-05, + "num_tokens": 394400.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.002, + "grad_norm": 0.3113741874694824, + "learning_rate": 1.418e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0634, + "grad_norm": 1.786219596862793, + "learning_rate": 1.4175e-05, + "num_tokens": 395003.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0906, + "grad_norm": 2.9753689765930176, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0019, + "grad_norm": 0.2806491255760193, + "learning_rate": 1.4165000000000002e-05, + "num_tokens": 395606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0444, + "grad_norm": 1.8984386920928955, + "learning_rate": 1.416e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0023, + "grad_norm": 0.3554719090461731, + "learning_rate": 1.4155000000000001e-05, + "num_tokens": 396209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0021, + "grad_norm": 0.3154850900173187, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.002, + "grad_norm": 0.2822473347187042, + "learning_rate": 1.4145000000000003e-05, + "num_tokens": 396391.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.0933, + "grad_norm": 2.0030465126037598, + "learning_rate": 1.414e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0018, + "grad_norm": 0.25846239924430847, + "learning_rate": 1.4135000000000001e-05, + "num_tokens": 396994.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0576, + "grad_norm": 1.3536447286605835, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.0018, + "grad_norm": 0.23509684205055237, + "learning_rate": 1.4125000000000003e-05, + "num_tokens": 397597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0402, + "grad_norm": 1.1482503414154053, + "learning_rate": 1.412e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.065, + "grad_norm": 1.7037919759750366, + "learning_rate": 1.4115000000000001e-05, + "num_tokens": 398621.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0691, + "grad_norm": 1.7646807432174683, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0804, + "grad_norm": 1.7181248664855957, + "learning_rate": 1.4105000000000001e-05, + "num_tokens": 399645.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0019, + "grad_norm": 0.2505536675453186, + "learning_rate": 1.41e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0618, + "grad_norm": 1.5859951972961426, + "learning_rate": 1.4095000000000001e-05, + "num_tokens": 400248.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0019, + "grad_norm": 0.2755191922187805, + "learning_rate": 1.409e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 1.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0555, + "grad_norm": 1.4727070331573486, + "learning_rate": 1.4085000000000002e-05, + "num_tokens": 400851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0481, + "grad_norm": 1.8706026077270508, + "learning_rate": 1.408e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.0474, + "grad_norm": 1.1995218992233276, + "learning_rate": 1.4075000000000002e-05, + "num_tokens": 401875.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0543, + "grad_norm": 1.2178373336791992, + "learning_rate": 1.407e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0562, + "grad_norm": 1.595617413520813, + "learning_rate": 1.4065000000000002e-05, + "num_tokens": 402899.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0029, + "grad_norm": 0.46309027075767517, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 1.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0031, + "grad_norm": 0.5019537210464478, + "learning_rate": 1.4055e-05, + "num_tokens": 403081.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.0481, + "grad_norm": 1.4502179622650146, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0612, + "grad_norm": 1.3172924518585205, + "learning_rate": 1.4045000000000002e-05, + "num_tokens": 404105.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0643, + "grad_norm": 1.8145051002502441, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0939, + "grad_norm": 2.2837142944335938, + "learning_rate": 1.4035e-05, + "num_tokens": 405129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0444, + "grad_norm": 1.4133625030517578, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0762, + "grad_norm": 3.3270263671875, + "learning_rate": 1.4025000000000002e-05, + "num_tokens": 406153.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0382, + "grad_norm": 1.5502580404281616, + "learning_rate": 1.402e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0624, + "grad_norm": 2.8620283603668213, + "learning_rate": 1.4015e-05, + "num_tokens": 407177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0062, + "grad_norm": 0.9600316286087036, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": 0.232, + "grad_norm": 6.662532329559326, + "learning_rate": 1.4005000000000002e-05, + "num_tokens": 407780.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.2308, + "grad_norm": 5.728747844696045, + "learning_rate": 1.4e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0064, + "grad_norm": 1.0067918300628662, + "learning_rate": 1.3995e-05, + "num_tokens": 408383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0717, + "grad_norm": 2.222224712371826, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0049, + "grad_norm": 0.7748068571090698, + "learning_rate": 1.3985000000000002e-05, + "num_tokens": 408986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0042, + "grad_norm": 0.6555838584899902, + "learning_rate": 1.398e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.1053, + "grad_norm": 2.1453135013580322, + "learning_rate": 1.3975000000000001e-05, + "num_tokens": 409589.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0814, + "grad_norm": 2.092453718185425, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0025, + "grad_norm": 0.37734025716781616, + "learning_rate": 1.3965000000000003e-05, + "num_tokens": 410192.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.0859, + "grad_norm": 2.4313082695007324, + "learning_rate": 1.396e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0571, + "grad_norm": 1.533075213432312, + "learning_rate": 1.3955000000000001e-05, + "num_tokens": 411216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0907, + "grad_norm": 1.7440866231918335, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0015, + "grad_norm": 0.19383682310581207, + "learning_rate": 1.3945000000000001e-05, + "num_tokens": 411819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0015, + "grad_norm": 0.1786634922027588, + "learning_rate": 1.394e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.065, + "grad_norm": 2.1025426387786865, + "learning_rate": 1.3935000000000001e-05, + "num_tokens": 412422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0879, + "grad_norm": 1.9717315435409546, + "learning_rate": 1.393e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0726, + "grad_norm": 2.1733202934265137, + "learning_rate": 1.3925000000000001e-05, + "num_tokens": 413446.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": 0.0635, + "grad_norm": 2.1671876907348633, + "learning_rate": 1.392e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0728, + "grad_norm": 1.5356316566467285, + "learning_rate": 1.3915000000000001e-05, + "num_tokens": 414470.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0014, + "grad_norm": 0.16603456437587738, + "learning_rate": 1.391e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0558, + "grad_norm": 1.9890317916870117, + "learning_rate": 1.3905000000000002e-05, + "num_tokens": 415073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0015, + "grad_norm": 0.20005646347999573, + "learning_rate": 1.39e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.1005, + "grad_norm": 3.5178253650665283, + "learning_rate": 1.3895e-05, + "num_tokens": 415676.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0702, + "grad_norm": 2.5081353187561035, + "learning_rate": 1.389e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0017, + "grad_norm": 0.23757857084274292, + "learning_rate": 1.3885000000000002e-05, + "num_tokens": 416279.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0531, + "grad_norm": 1.5659825801849365, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.002, + "grad_norm": 0.3491363525390625, + "learning_rate": 1.3875e-05, + "num_tokens": 416882.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0663, + "grad_norm": 1.5751999616622925, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0018, + "grad_norm": 0.3209178149700165, + "learning_rate": 1.3865000000000002e-05, + "num_tokens": 417485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0019, + "grad_norm": 0.3630707561969757, + "learning_rate": 1.386e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0437, + "grad_norm": 1.6397857666015625, + "learning_rate": 1.3855e-05, + "num_tokens": 418088.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0585, + "grad_norm": 2.164947748184204, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0754, + "grad_norm": 1.7066527605056763, + "learning_rate": 1.3845000000000002e-05, + "num_tokens": 419112.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0021, + "grad_norm": 0.3518334627151489, + "learning_rate": 1.384e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 1.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.0505, + "grad_norm": 1.5215017795562744, + "learning_rate": 1.3835e-05, + "num_tokens": 419715.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0572, + "grad_norm": 1.9514737129211426, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0023, + "grad_norm": 0.4249929189682007, + "learning_rate": 1.3825000000000002e-05, + "num_tokens": 420318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0661, + "grad_norm": 1.7851744890213013, + "learning_rate": 1.382e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": 0.0621, + "grad_norm": 1.3740767240524292, + "learning_rate": 1.3815e-05, + "num_tokens": 421342.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0841, + "grad_norm": 2.665015459060669, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0028, + "grad_norm": 0.4941730797290802, + "learning_rate": 1.3805000000000003e-05, + "num_tokens": 421945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.044, + "grad_norm": 1.4924557209014893, + "learning_rate": 1.38e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.0511, + "grad_norm": 2.1234307289123535, + "learning_rate": 1.3795000000000001e-05, + "num_tokens": 422969.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0426, + "grad_norm": 1.1785792112350464, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.0773, + "grad_norm": 1.6448895931243896, + "learning_rate": 1.3785000000000001e-05, + "num_tokens": 423993.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0594, + "grad_norm": 1.792230486869812, + "learning_rate": 1.378e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0535, + "grad_norm": 1.3552350997924805, + "learning_rate": 1.3775000000000001e-05, + "num_tokens": 425017.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0388, + "grad_norm": 1.0532437562942505, + "learning_rate": 1.377e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0759, + "grad_norm": 2.1115078926086426, + "learning_rate": 1.3765000000000001e-05, + "num_tokens": 426041.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0056, + "grad_norm": 0.8818362355232239, + "learning_rate": 1.376e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 1.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0051, + "grad_norm": 0.8002524971961975, + "learning_rate": 1.3755000000000001e-05, + "num_tokens": 426223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0619, + "grad_norm": 2.207181692123413, + "learning_rate": 1.375e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0053, + "grad_norm": 0.814557671546936, + "learning_rate": 1.3745000000000001e-05, + "num_tokens": 426826.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0621, + "grad_norm": 1.6394788026809692, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.0678, + "grad_norm": 1.9382132291793823, + "learning_rate": 1.3735e-05, + "num_tokens": 427850.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0396, + "grad_norm": 1.3062744140625, + "learning_rate": 1.373e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.1056, + "grad_norm": 1.7765963077545166, + "learning_rate": 1.3725000000000002e-05, + "num_tokens": 428874.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0036, + "grad_norm": 0.5703164339065552, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.062, + "grad_norm": 1.6491400003433228, + "learning_rate": 1.3715e-05, + "num_tokens": 429477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0414, + "grad_norm": 1.2670550346374512, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0977, + "grad_norm": 2.5612552165985107, + "learning_rate": 1.3705000000000002e-05, + "num_tokens": 430501.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.043, + "grad_norm": 1.5120333433151245, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0513, + "grad_norm": 1.3469822406768799, + "learning_rate": 1.3695e-05, + "num_tokens": 431525.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.052, + "grad_norm": 1.3584448099136353, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0043, + "grad_norm": 0.6871080994606018, + "learning_rate": 1.3685000000000002e-05, + "num_tokens": 432128.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0038, + "grad_norm": 0.6316184401512146, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 1.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0039, + "grad_norm": 0.6172608733177185, + "learning_rate": 1.3675e-05, + "num_tokens": 432310.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0034, + "grad_norm": 0.5193918943405151, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": 0.0545, + "grad_norm": 1.789426326751709, + "learning_rate": 1.3665000000000002e-05, + "num_tokens": 432913.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.0681, + "grad_norm": 1.8359259366989136, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0856, + "grad_norm": 2.033186197280884, + "learning_rate": 1.3655e-05, + "num_tokens": 433937.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0019, + "grad_norm": 0.2717677354812622, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0902, + "grad_norm": 1.8082786798477173, + "learning_rate": 1.3645000000000002e-05, + "num_tokens": 434540.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0019, + "grad_norm": 0.27892598509788513, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0017, + "grad_norm": 0.21636277437210083, + "learning_rate": 1.3635e-05, + "num_tokens": 434722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0017, + "grad_norm": 0.21708306670188904, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0442, + "grad_norm": 1.8083100318908691, + "learning_rate": 1.3625e-05, + "num_tokens": 435325.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0015, + "grad_norm": 0.16797110438346863, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0013, + "grad_norm": 0.1489250212907791, + "learning_rate": 1.3615000000000001e-05, + "num_tokens": 435507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0013, + "grad_norm": 0.14432698488235474, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0845, + "grad_norm": 1.7793538570404053, + "learning_rate": 1.3605000000000001e-05, + "num_tokens": 436110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.072, + "grad_norm": 2.0468149185180664, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0012, + "grad_norm": 0.13057845830917358, + "learning_rate": 1.3595000000000001e-05, + "num_tokens": 436713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0012, + "grad_norm": 0.1187715157866478, + "learning_rate": 1.359e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0652, + "grad_norm": 1.7846852540969849, + "learning_rate": 1.3585000000000001e-05, + "num_tokens": 437316.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.001, + "grad_norm": 0.09880056232213974, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 1.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0543, + "grad_norm": 1.7948801517486572, + "learning_rate": 1.3575e-05, + "num_tokens": 437919.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0613, + "grad_norm": 1.7139854431152344, + "learning_rate": 1.357e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0932, + "grad_norm": 2.8757143020629883, + "learning_rate": 1.3565000000000001e-05, + "num_tokens": 438943.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0542, + "grad_norm": 1.7751576900482178, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0011, + "grad_norm": 0.10208199918270111, + "learning_rate": 1.3555e-05, + "num_tokens": 439546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0441, + "grad_norm": 1.3240106105804443, + "learning_rate": 1.355e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0013, + "grad_norm": 0.14222493767738342, + "learning_rate": 1.3545000000000002e-05, + "num_tokens": 440149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0013, + "grad_norm": 0.15622317790985107, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.049, + "grad_norm": 1.685028076171875, + "learning_rate": 1.3535e-05, + "num_tokens": 440752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0013, + "grad_norm": 0.15723161399364471, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0014, + "grad_norm": 0.1701563447713852, + "learning_rate": 1.3525000000000002e-05, + "num_tokens": 440934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0554, + "grad_norm": 1.94820237159729, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": 0.0868, + "grad_norm": 1.4613052606582642, + "learning_rate": 1.3515e-05, + "num_tokens": 441958.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0606, + "grad_norm": 1.5318107604980469, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0695, + "grad_norm": 1.676740050315857, + "learning_rate": 1.3505000000000002e-05, + "num_tokens": 442982.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0588, + "grad_norm": 1.5801854133605957, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.002, + "grad_norm": 0.27110394835472107, + "learning_rate": 1.3495e-05, + "num_tokens": 443585.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0493, + "grad_norm": 1.5821062326431274, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0409, + "grad_norm": 1.4319894313812256, + "learning_rate": 1.3485000000000002e-05, + "num_tokens": 444609.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0451, + "grad_norm": 1.562462329864502, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0621, + "grad_norm": 1.4181314706802368, + "learning_rate": 1.3475e-05, + "num_tokens": 445633.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0031, + "grad_norm": 0.48450395464897156, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0561, + "grad_norm": 1.5698680877685547, + "learning_rate": 1.3465e-05, + "num_tokens": 446236.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0035, + "grad_norm": 0.5244553685188293, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0036, + "grad_norm": 0.534037709236145, + "learning_rate": 1.3455e-05, + "num_tokens": 446418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0728, + "grad_norm": 2.4191722869873047, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0694, + "grad_norm": 2.0287888050079346, + "learning_rate": 1.3445000000000001e-05, + "num_tokens": 447442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.057, + "grad_norm": 1.7234476804733276, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0033, + "grad_norm": 0.48596495389938354, + "learning_rate": 1.3435000000000001e-05, + "num_tokens": 448045.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0572, + "grad_norm": 1.4727040529251099, + "learning_rate": 1.343e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0031, + "grad_norm": 0.4591142535209656, + "learning_rate": 1.3425000000000001e-05, + "num_tokens": 448648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0578, + "grad_norm": 1.542529582977295, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0544, + "grad_norm": 1.567787766456604, + "learning_rate": 1.3415e-05, + "num_tokens": 449672.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.048, + "grad_norm": 1.4822731018066406, + "learning_rate": 1.341e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0033, + "grad_norm": 0.47298771142959595, + "learning_rate": 1.3405000000000001e-05, + "num_tokens": 450275.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.0885, + "grad_norm": 2.084674119949341, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0571, + "grad_norm": 1.5821152925491333, + "learning_rate": 1.3395e-05, + "num_tokens": 451299.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.003, + "grad_norm": 0.44274547696113586, + "learning_rate": 1.339e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0461, + "grad_norm": 1.7462387084960938, + "learning_rate": 1.3385000000000001e-05, + "num_tokens": 451902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0557, + "grad_norm": 1.9857844114303589, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0419, + "grad_norm": 1.386896014213562, + "learning_rate": 1.3375e-05, + "num_tokens": 452926.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0457, + "grad_norm": 1.6964994668960571, + "learning_rate": 1.337e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.0029, + "grad_norm": 0.42876869440078735, + "learning_rate": 1.3365000000000002e-05, + "num_tokens": 453529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.1072, + "grad_norm": 2.350618839263916, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0495, + "grad_norm": 1.449182152748108, + "learning_rate": 1.3355e-05, + "num_tokens": 454553.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0608, + "grad_norm": 2.024829149246216, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0431, + "grad_norm": 1.3092213869094849, + "learning_rate": 1.3345000000000002e-05, + "num_tokens": 455577.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0035, + "grad_norm": 0.5321254134178162, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 1.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0033, + "grad_norm": 0.4984612762928009, + "learning_rate": 1.3335e-05, + "num_tokens": 455759.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.2288, + "grad_norm": 3.947110652923584, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0031, + "grad_norm": 0.4745834767818451, + "learning_rate": 1.3325000000000002e-05, + "num_tokens": 456362.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0032, + "grad_norm": 0.5151614546775818, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0542, + "grad_norm": 1.0336432456970215, + "learning_rate": 1.3315e-05, + "num_tokens": 456965.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0562, + "grad_norm": 1.5250927209854126, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0431, + "grad_norm": 1.4132592678070068, + "learning_rate": 1.3305e-05, + "num_tokens": 457989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.074, + "grad_norm": 1.864004373550415, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.0023, + "grad_norm": 0.32277822494506836, + "learning_rate": 1.3295e-05, + "num_tokens": 458592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": 0.0656, + "grad_norm": 1.8421293497085571, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0784, + "grad_norm": 1.431746482849121, + "learning_rate": 1.3285e-05, + "num_tokens": 459616.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0859, + "grad_norm": 2.2143869400024414, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0638, + "grad_norm": 2.397982597351074, + "learning_rate": 1.3275e-05, + "num_tokens": 460640.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.072, + "grad_norm": 1.9987224340438843, + "learning_rate": 1.327e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0026, + "grad_norm": 0.3712107837200165, + "learning_rate": 1.3265000000000001e-05, + "num_tokens": 461243.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0027, + "grad_norm": 0.3893998861312866, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 1.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0024, + "grad_norm": 0.3540315330028534, + "learning_rate": 1.3255e-05, + "num_tokens": 461425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0022, + "grad_norm": 0.3253246545791626, + "learning_rate": 1.325e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 1.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0585, + "grad_norm": 1.6001460552215576, + "learning_rate": 1.3245000000000001e-05, + "num_tokens": 462028.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.0472, + "grad_norm": 1.4387136697769165, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.002, + "grad_norm": 0.2645460069179535, + "learning_rate": 1.3235e-05, + "num_tokens": 462631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0486, + "grad_norm": 1.7650330066680908, + "learning_rate": 1.323e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0018, + "grad_norm": 0.23414187133312225, + "learning_rate": 1.3225000000000001e-05, + "num_tokens": 463234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0019, + "grad_norm": 0.2595520317554474, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0598, + "grad_norm": 1.4952349662780762, + "learning_rate": 1.3215e-05, + "num_tokens": 463837.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.0777, + "grad_norm": 1.956957221031189, + "learning_rate": 1.321e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0426, + "grad_norm": 1.263728141784668, + "learning_rate": 1.3205000000000001e-05, + "num_tokens": 464861.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0018, + "grad_norm": 0.2717933654785156, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 1.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0017, + "grad_norm": 0.24730290472507477, + "learning_rate": 1.3195e-05, + "num_tokens": 465043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0017, + "grad_norm": 0.25752246379852295, + "learning_rate": 1.319e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0594, + "grad_norm": 1.2743943929672241, + "learning_rate": 1.3185000000000002e-05, + "num_tokens": 465646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0468, + "grad_norm": 1.4228495359420776, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0015, + "grad_norm": 0.2151045948266983, + "learning_rate": 1.3175e-05, + "num_tokens": 466249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0707, + "grad_norm": 1.637633204460144, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0557, + "grad_norm": 1.91914963722229, + "learning_rate": 1.3165000000000002e-05, + "num_tokens": 467273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0017, + "grad_norm": 0.22663576900959015, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0447, + "grad_norm": 1.3842930793762207, + "learning_rate": 1.3155e-05, + "num_tokens": 467876.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0402, + "grad_norm": 1.3382936716079712, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.0722, + "grad_norm": 1.7016624212265015, + "learning_rate": 1.3145e-05, + "num_tokens": 468900.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0603, + "grad_norm": 1.7416592836380005, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0545, + "grad_norm": 2.0610973834991455, + "learning_rate": 1.3135e-05, + "num_tokens": 469924.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0027, + "grad_norm": 0.42048102617263794, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0647, + "grad_norm": 1.5505709648132324, + "learning_rate": 1.3125e-05, + "num_tokens": 470527.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0899, + "grad_norm": 1.7793169021606445, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0032, + "grad_norm": 0.5216090083122253, + "learning_rate": 1.3115000000000002e-05, + "num_tokens": 471130.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0886, + "grad_norm": 1.749000906944275, + "learning_rate": 1.311e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0917, + "grad_norm": 2.4577291011810303, + "learning_rate": 1.3105e-05, + "num_tokens": 472154.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0032, + "grad_norm": 0.5224512815475464, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0637, + "grad_norm": 1.690381646156311, + "learning_rate": 1.3095000000000003e-05, + "num_tokens": 472757.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0718, + "grad_norm": 2.1140615940093994, + "learning_rate": 1.309e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0041, + "grad_norm": 0.6610037684440613, + "learning_rate": 1.3085000000000001e-05, + "num_tokens": 473360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.1995, + "grad_norm": 5.919976711273193, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0035, + "grad_norm": 0.5762227177619934, + "learning_rate": 1.3075000000000003e-05, + "num_tokens": 473963.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0035, + "grad_norm": 0.558562695980072, + "learning_rate": 1.307e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 1.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0029, + "grad_norm": 0.4903852343559265, + "learning_rate": 1.3065000000000001e-05, + "num_tokens": 474145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0024, + "grad_norm": 0.40001630783081055, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 1.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.002, + "grad_norm": 0.3093484044075012, + "learning_rate": 1.3055000000000003e-05, + "num_tokens": 474327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0813, + "grad_norm": 1.846347451210022, + "learning_rate": 1.305e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0466, + "grad_norm": 1.9397575855255127, + "learning_rate": 1.3045000000000001e-05, + "num_tokens": 475351.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.0012, + "grad_norm": 0.1433739811182022, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 1.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0647, + "grad_norm": 1.7246447801589966, + "learning_rate": 1.3035000000000001e-05, + "num_tokens": 475954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0774, + "grad_norm": 1.6557238101959229, + "learning_rate": 1.303e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0732, + "grad_norm": 1.2370885610580444, + "learning_rate": 1.3025000000000002e-05, + "num_tokens": 476978.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0011, + "grad_norm": 0.11068759858608246, + "learning_rate": 1.302e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 1.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0921, + "grad_norm": 2.1499900817871094, + "learning_rate": 1.3015000000000002e-05, + "num_tokens": 477581.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0012, + "grad_norm": 0.12917853891849518, + "learning_rate": 1.301e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0644, + "grad_norm": 1.2409875392913818, + "learning_rate": 1.3005000000000002e-05, + "num_tokens": 478184.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0594, + "grad_norm": 1.3983649015426636, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0013, + "grad_norm": 0.17072346806526184, + "learning_rate": 1.2995000000000002e-05, + "num_tokens": 478787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0623, + "grad_norm": 1.6930880546569824, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0395, + "grad_norm": 1.0536465644836426, + "learning_rate": 1.2985e-05, + "num_tokens": 479811.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0593, + "grad_norm": 1.2563151121139526, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.0455, + "grad_norm": 1.3295787572860718, + "learning_rate": 1.2975000000000002e-05, + "num_tokens": 480835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.074, + "grad_norm": 1.3767396211624146, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0456, + "grad_norm": 1.3392114639282227, + "learning_rate": 1.2965e-05, + "num_tokens": 481859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.091, + "grad_norm": 2.6617116928100586, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0783, + "grad_norm": 2.208951473236084, + "learning_rate": 1.2955000000000002e-05, + "num_tokens": 482883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0026, + "grad_norm": 0.425293892621994, + "learning_rate": 1.295e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 1.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0751, + "grad_norm": 1.7252588272094727, + "learning_rate": 1.2945e-05, + "num_tokens": 483486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0032, + "grad_norm": 0.5211181640625, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 1.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": 0.0931, + "grad_norm": 2.448201894760132, + "learning_rate": 1.2935000000000002e-05, + "num_tokens": 484089.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.059, + "grad_norm": 1.2256298065185547, + "learning_rate": 1.293e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0037, + "grad_norm": 0.5853725671768188, + "learning_rate": 1.2925e-05, + "num_tokens": 484692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0667, + "grad_norm": 1.6646796464920044, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0034, + "grad_norm": 0.5198765993118286, + "learning_rate": 1.2915000000000003e-05, + "num_tokens": 485295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.06, + "grad_norm": 1.8327956199645996, + "learning_rate": 1.291e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0578, + "grad_norm": 1.4550710916519165, + "learning_rate": 1.2905000000000001e-05, + "num_tokens": 486319.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0035, + "grad_norm": 0.5253085494041443, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 1.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": 0.0664, + "grad_norm": 2.0553388595581055, + "learning_rate": 1.2895000000000003e-05, + "num_tokens": 486922.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0507, + "grad_norm": 1.2666943073272705, + "learning_rate": 1.289e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0419, + "grad_norm": 1.1951980590820312, + "learning_rate": 1.2885000000000001e-05, + "num_tokens": 487946.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0521, + "grad_norm": 1.5074187517166138, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0039, + "grad_norm": 0.5865699648857117, + "learning_rate": 1.2875000000000001e-05, + "num_tokens": 488549.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0032, + "grad_norm": 0.4775572121143341, + "learning_rate": 1.287e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 1.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0861, + "grad_norm": 1.977977991104126, + "learning_rate": 1.2865000000000001e-05, + "num_tokens": 489152.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0415, + "grad_norm": 1.351745843887329, + "learning_rate": 1.286e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0027, + "grad_norm": 0.3994472920894623, + "learning_rate": 1.2855000000000001e-05, + "num_tokens": 489755.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0027, + "grad_norm": 0.40307220816612244, + "learning_rate": 1.285e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 1.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0023, + "grad_norm": 0.3672088086605072, + "learning_rate": 1.2845000000000002e-05, + "num_tokens": 489937.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0024, + "grad_norm": 0.3693186938762665, + "learning_rate": 1.284e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0022, + "grad_norm": 0.3379809856414795, + "learning_rate": 1.2835000000000002e-05, + "num_tokens": 490119.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0692, + "grad_norm": 1.80624520778656, + "learning_rate": 1.283e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0015, + "grad_norm": 0.19782321155071259, + "learning_rate": 1.2825e-05, + "num_tokens": 490722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.0765, + "grad_norm": 2.1652674674987793, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0524, + "grad_norm": 1.3651760816574097, + "learning_rate": 1.2815000000000002e-05, + "num_tokens": 491746.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0013, + "grad_norm": 0.15779025852680206, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 1.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0405, + "grad_norm": 1.4021095037460327, + "learning_rate": 1.2805e-05, + "num_tokens": 492349.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0012, + "grad_norm": 0.14934077858924866, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0466, + "grad_norm": 1.3255256414413452, + "learning_rate": 1.2795000000000002e-05, + "num_tokens": 492952.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0011, + "grad_norm": 0.13669109344482422, + "learning_rate": 1.279e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0706, + "grad_norm": 2.915336847305298, + "learning_rate": 1.2785e-05, + "num_tokens": 493555.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0012, + "grad_norm": 0.14015723764896393, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 1.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0731, + "grad_norm": 1.5240583419799805, + "learning_rate": 1.2775000000000002e-05, + "num_tokens": 494158.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0011, + "grad_norm": 0.11803555488586426, + "learning_rate": 1.277e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0011, + "grad_norm": 0.13458400964736938, + "learning_rate": 1.2765e-05, + "num_tokens": 494340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0012, + "grad_norm": 0.14607498049736023, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0011, + "grad_norm": 0.12011824548244476, + "learning_rate": 1.2755000000000002e-05, + "num_tokens": 494522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0011, + "grad_norm": 0.13116565346717834, + "learning_rate": 1.275e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0011, + "grad_norm": 0.11727877706289291, + "learning_rate": 1.2745e-05, + "num_tokens": 494704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.0501, + "grad_norm": 1.6986955404281616, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0472, + "grad_norm": 1.4376126527786255, + "learning_rate": 1.2735000000000003e-05, + "num_tokens": 495728.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.001, + "grad_norm": 0.11870448291301727, + "learning_rate": 1.273e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0011, + "grad_norm": 0.11969612538814545, + "learning_rate": 1.2725000000000001e-05, + "num_tokens": 495910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0493, + "grad_norm": 1.3840702772140503, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.001, + "grad_norm": 0.10890035331249237, + "learning_rate": 1.2715000000000001e-05, + "num_tokens": 496513.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0011, + "grad_norm": 0.12227390706539154, + "learning_rate": 1.271e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0752, + "grad_norm": 2.110506057739258, + "learning_rate": 1.2705000000000001e-05, + "num_tokens": 497116.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0011, + "grad_norm": 0.1325536072254181, + "learning_rate": 1.27e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 1.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0386, + "grad_norm": 1.118979811668396, + "learning_rate": 1.2695000000000001e-05, + "num_tokens": 497719.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.066, + "grad_norm": 1.572615623474121, + "learning_rate": 1.269e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0679, + "grad_norm": 1.6447997093200684, + "learning_rate": 1.2685000000000001e-05, + "num_tokens": 498743.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0012, + "grad_norm": 0.1418675184249878, + "learning_rate": 1.268e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0471, + "grad_norm": 1.3554447889328003, + "learning_rate": 1.2675000000000001e-05, + "num_tokens": 499346.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0012, + "grad_norm": 0.1589028388261795, + "learning_rate": 1.267e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0943, + "grad_norm": 2.5991010665893555, + "learning_rate": 1.2665e-05, + "num_tokens": 499949.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0495, + "grad_norm": 1.6441336870193481, + "learning_rate": 1.266e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0765, + "grad_norm": 1.842661738395691, + "learning_rate": 1.2655000000000002e-05, + "num_tokens": 500973.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0016, + "grad_norm": 0.22247855365276337, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0401, + "grad_norm": 1.3632177114486694, + "learning_rate": 1.2645e-05, + "num_tokens": 501576.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0021, + "grad_norm": 0.31719765067100525, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0376, + "grad_norm": 1.1765908002853394, + "learning_rate": 1.2635000000000002e-05, + "num_tokens": 502179.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0024, + "grad_norm": 0.33981993794441223, + "learning_rate": 1.263e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 1.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0712, + "grad_norm": 1.7833467721939087, + "learning_rate": 1.2625e-05, + "num_tokens": 502782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0407, + "grad_norm": 1.2483290433883667, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0597, + "grad_norm": 1.2847890853881836, + "learning_rate": 1.2615000000000002e-05, + "num_tokens": 503806.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0706, + "grad_norm": 2.0048041343688965, + "learning_rate": 1.261e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0033, + "grad_norm": 0.48029038310050964, + "learning_rate": 1.2605e-05, + "num_tokens": 504409.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0853, + "grad_norm": 1.8489866256713867, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0664, + "grad_norm": 1.9049607515335083, + "learning_rate": 1.2595000000000002e-05, + "num_tokens": 505433.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0038, + "grad_norm": 0.5629300475120544, + "learning_rate": 1.259e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0035, + "grad_norm": 0.5016162395477295, + "learning_rate": 1.2585e-05, + "num_tokens": 505615.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.0034, + "grad_norm": 0.533896803855896, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 1.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0398, + "grad_norm": 1.6724116802215576, + "learning_rate": 1.2575000000000002e-05, + "num_tokens": 506218.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0649, + "grad_norm": 1.1757819652557373, + "learning_rate": 1.257e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0028, + "grad_norm": 0.3974631726741791, + "learning_rate": 1.2565e-05, + "num_tokens": 506821.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0022, + "grad_norm": 0.33079567551612854, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 1.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0612, + "grad_norm": 1.6804654598236084, + "learning_rate": 1.2555000000000001e-05, + "num_tokens": 507424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0814, + "grad_norm": 1.6637822389602661, + "learning_rate": 1.255e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0463, + "grad_norm": 1.2395890951156616, + "learning_rate": 1.2545000000000001e-05, + "num_tokens": 508448.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0022, + "grad_norm": 0.3290168046951294, + "learning_rate": 1.254e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0474, + "grad_norm": 1.62813138961792, + "learning_rate": 1.2535000000000001e-05, + "num_tokens": 509051.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0432, + "grad_norm": 1.1684247255325317, + "learning_rate": 1.253e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.191, + "grad_norm": 4.108924865722656, + "learning_rate": 1.2525000000000001e-05, + "num_tokens": 510075.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0022, + "grad_norm": 0.32842448353767395, + "learning_rate": 1.252e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0692, + "grad_norm": 1.0593329668045044, + "learning_rate": 1.2515000000000001e-05, + "num_tokens": 510678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.046, + "grad_norm": 1.279249906539917, + "learning_rate": 1.251e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0021, + "grad_norm": 0.32091253995895386, + "learning_rate": 1.2505e-05, + "num_tokens": 511281.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0404, + "grad_norm": 1.2973002195358276, + "learning_rate": 1.25e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0023, + "grad_norm": 0.34064143896102905, + "learning_rate": 1.2495000000000001e-05, + "num_tokens": 511884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0793, + "grad_norm": 1.864046573638916, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0023, + "grad_norm": 0.3757898211479187, + "learning_rate": 1.2485e-05, + "num_tokens": 512487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0024, + "grad_norm": 0.381061315536499, + "learning_rate": 1.248e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 1.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0676, + "grad_norm": 1.62307608127594, + "learning_rate": 1.2475000000000002e-05, + "num_tokens": 513090.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.047, + "grad_norm": 1.570786476135254, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0566, + "grad_norm": 1.7626087665557861, + "learning_rate": 1.2465e-05, + "num_tokens": 514114.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0581, + "grad_norm": 1.7678264379501343, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0417, + "grad_norm": 1.4467406272888184, + "learning_rate": 1.2455000000000002e-05, + "num_tokens": 515138.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0712, + "grad_norm": 1.5711795091629028, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.0026, + "grad_norm": 0.41801631450653076, + "learning_rate": 1.2445e-05, + "num_tokens": 515741.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0499, + "grad_norm": 1.5882858037948608, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0504, + "grad_norm": 1.1772035360336304, + "learning_rate": 1.2435000000000002e-05, + "num_tokens": 516765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0618, + "grad_norm": 1.7687872648239136, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0664, + "grad_norm": 1.677937626838684, + "learning_rate": 1.2425e-05, + "num_tokens": 517789.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.004, + "grad_norm": 0.654071569442749, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 1.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0409, + "grad_norm": 1.5208879709243774, + "learning_rate": 1.2415000000000002e-05, + "num_tokens": 518392.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0035, + "grad_norm": 0.5567553639411926, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 1.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0798, + "grad_norm": 2.2302029132843018, + "learning_rate": 1.2405e-05, + "num_tokens": 518995.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0669, + "grad_norm": 2.0240256786346436, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0839, + "grad_norm": 1.8468784093856812, + "learning_rate": 1.2395e-05, + "num_tokens": 520019.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0584, + "grad_norm": 2.1111018657684326, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0043, + "grad_norm": 0.755431592464447, + "learning_rate": 1.2385000000000001e-05, + "num_tokens": 520622.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0432, + "grad_norm": 1.864660620689392, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0693, + "grad_norm": 3.3374569416046143, + "learning_rate": 1.2375000000000001e-05, + "num_tokens": 521646.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0388, + "grad_norm": 1.5575084686279297, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.0645, + "grad_norm": 1.5467334985733032, + "learning_rate": 1.2365000000000001e-05, + "num_tokens": 522670.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0037, + "grad_norm": 0.5897421836853027, + "learning_rate": 1.236e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 1.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0819, + "grad_norm": 3.0543386936187744, + "learning_rate": 1.2355000000000001e-05, + "num_tokens": 523273.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.004, + "grad_norm": 0.647894024848938, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0032, + "grad_norm": 0.5120076537132263, + "learning_rate": 1.2345e-05, + "num_tokens": 523455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0032, + "grad_norm": 0.50294429063797, + "learning_rate": 1.234e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0654, + "grad_norm": 1.3424628973007202, + "learning_rate": 1.2335000000000001e-05, + "num_tokens": 524058.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0898, + "grad_norm": 2.0473086833953857, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0023, + "grad_norm": 0.36929139494895935, + "learning_rate": 1.2325e-05, + "num_tokens": 524661.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0021, + "grad_norm": 0.3227180540561676, + "learning_rate": 1.232e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0663, + "grad_norm": 1.83015775680542, + "learning_rate": 1.2315000000000002e-05, + "num_tokens": 525264.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0657, + "grad_norm": 1.8247884511947632, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0016, + "grad_norm": 0.21814872324466705, + "learning_rate": 1.2305e-05, + "num_tokens": 525867.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.07, + "grad_norm": 1.3606796264648438, + "learning_rate": 1.23e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.0521, + "grad_norm": 1.5558913946151733, + "learning_rate": 1.2295000000000002e-05, + "num_tokens": 526891.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": 0.0768, + "grad_norm": 1.718390703201294, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.2012, + "grad_norm": 3.623452663421631, + "learning_rate": 1.2285e-05, + "num_tokens": 527915.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0922, + "grad_norm": 2.289684534072876, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.0665, + "grad_norm": 1.6864427328109741, + "learning_rate": 1.2275000000000002e-05, + "num_tokens": 528939.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0017, + "grad_norm": 0.2226596623659134, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.195, + "grad_norm": 3.805149555206299, + "learning_rate": 1.2265e-05, + "num_tokens": 529542.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0651, + "grad_norm": 1.3887238502502441, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0016, + "grad_norm": 0.20220878720283508, + "learning_rate": 1.2255000000000002e-05, + "num_tokens": 530145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0456, + "grad_norm": 1.4763877391815186, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0017, + "grad_norm": 0.2297908216714859, + "learning_rate": 1.2245e-05, + "num_tokens": 530748.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0479, + "grad_norm": 1.846569538116455, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0018, + "grad_norm": 0.2527587115764618, + "learning_rate": 1.2235e-05, + "num_tokens": 531351.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0828, + "grad_norm": 1.8091585636138916, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.002, + "grad_norm": 0.29240918159484863, + "learning_rate": 1.2225e-05, + "num_tokens": 531954.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.0568, + "grad_norm": 1.4905025959014893, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0022, + "grad_norm": 0.29934078454971313, + "learning_rate": 1.2215e-05, + "num_tokens": 532557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.0655, + "grad_norm": 1.620811939239502, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0699, + "grad_norm": 1.4509178400039673, + "learning_rate": 1.2205000000000001e-05, + "num_tokens": 533581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0537, + "grad_norm": 1.6190178394317627, + "learning_rate": 1.22e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.083, + "grad_norm": 2.0025248527526855, + "learning_rate": 1.2195000000000001e-05, + "num_tokens": 534605.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3503265976905823, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": 0.0699, + "grad_norm": 1.2692803144454956, + "learning_rate": 1.2185e-05, + "num_tokens": 535208.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0024, + "grad_norm": 0.3514065146446228, + "learning_rate": 1.218e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 1.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0025, + "grad_norm": 0.3770548701286316, + "learning_rate": 1.2175000000000001e-05, + "num_tokens": 535390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0024, + "grad_norm": 0.3553021550178528, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 1.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0657, + "grad_norm": 1.3145198822021484, + "learning_rate": 1.2165e-05, + "num_tokens": 535993.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0453, + "grad_norm": 1.1688368320465088, + "learning_rate": 1.216e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.1801, + "grad_norm": 3.7217485904693604, + "learning_rate": 1.2155000000000001e-05, + "num_tokens": 537017.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0029, + "grad_norm": 0.4446180462837219, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 1.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0863, + "grad_norm": 2.0155787467956543, + "learning_rate": 1.2145e-05, + "num_tokens": 537620.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0022, + "grad_norm": 0.3482968807220459, + "learning_rate": 1.214e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0023, + "grad_norm": 0.32771721482276917, + "learning_rate": 1.2135000000000002e-05, + "num_tokens": 537802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.056, + "grad_norm": 1.8173542022705078, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.039, + "grad_norm": 1.1963605880737305, + "learning_rate": 1.2125e-05, + "num_tokens": 538826.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.0594, + "grad_norm": 1.7138198614120483, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.002, + "grad_norm": 0.2943565249443054, + "learning_rate": 1.2115000000000002e-05, + "num_tokens": 539429.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.002, + "grad_norm": 0.2892753481864929, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0019, + "grad_norm": 0.2714136838912964, + "learning_rate": 1.2105e-05, + "num_tokens": 539611.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0504, + "grad_norm": 1.0601574182510376, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0019, + "grad_norm": 0.2627917230129242, + "learning_rate": 1.2095000000000002e-05, + "num_tokens": 540214.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0566, + "grad_norm": 1.1405881643295288, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0018, + "grad_norm": 0.2452574223279953, + "learning_rate": 1.2085e-05, + "num_tokens": 540817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.0018, + "grad_norm": 0.24650417268276215, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0014, + "grad_norm": 0.19634543359279633, + "learning_rate": 1.2075e-05, + "num_tokens": 540999.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.0014, + "grad_norm": 0.17830893397331238, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.045, + "grad_norm": 1.1427490711212158, + "learning_rate": 1.2065e-05, + "num_tokens": 541602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.043, + "grad_norm": 1.0804896354675293, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0729, + "grad_norm": 1.6100242137908936, + "learning_rate": 1.2055e-05, + "num_tokens": 542626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.0585, + "grad_norm": 1.2319777011871338, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0014, + "grad_norm": 0.18333016335964203, + "learning_rate": 1.2045e-05, + "num_tokens": 543229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0014, + "grad_norm": 0.17933838069438934, + "learning_rate": 1.204e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.0606, + "grad_norm": 1.531948208808899, + "learning_rate": 1.2035e-05, + "num_tokens": 543832.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.0798, + "grad_norm": 1.4439104795455933, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.0798, + "grad_norm": 1.6658635139465332, + "learning_rate": 1.2025e-05, + "num_tokens": 544856.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.0666, + "grad_norm": 1.2919996976852417, + "learning_rate": 1.202e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.0526, + "grad_norm": 1.7219940423965454, + "learning_rate": 1.2015000000000001e-05, + "num_tokens": 545880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0453, + "grad_norm": 1.3877556324005127, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0675, + "grad_norm": 1.6357606649398804, + "learning_rate": 1.2005e-05, + "num_tokens": 546904.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0023, + "grad_norm": 0.3360651433467865, + "learning_rate": 1.2e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0025, + "grad_norm": 0.36647501587867737, + "learning_rate": 1.1995000000000001e-05, + "num_tokens": 547086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.1876, + "grad_norm": 3.880563974380493, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0026, + "grad_norm": 0.3927272856235504, + "learning_rate": 1.1985e-05, + "num_tokens": 547689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0616, + "grad_norm": 1.807646632194519, + "learning_rate": 1.198e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.0939, + "grad_norm": 3.455456018447876, + "learning_rate": 1.1975000000000001e-05, + "num_tokens": 548713.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0576, + "grad_norm": 1.2851530313491821, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0613, + "grad_norm": 1.2460367679595947, + "learning_rate": 1.1965e-05, + "num_tokens": 549737.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0498, + "grad_norm": 1.8220652341842651, + "learning_rate": 1.196e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0029, + "grad_norm": 0.43996259570121765, + "learning_rate": 1.1955000000000002e-05, + "num_tokens": 550340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.164, + "grad_norm": 3.639434814453125, + "learning_rate": 1.195e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0033, + "grad_norm": 0.49846982955932617, + "learning_rate": 1.1945e-05, + "num_tokens": 550943.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.0034, + "grad_norm": 0.5146701335906982, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0552, + "grad_norm": 0.9798343777656555, + "learning_rate": 1.1935000000000002e-05, + "num_tokens": 551546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0033, + "grad_norm": 0.49275118112564087, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 1.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.0699, + "grad_norm": 1.1279994249343872, + "learning_rate": 1.1925e-05, + "num_tokens": 552149.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0029, + "grad_norm": 0.4336951673030853, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0682, + "grad_norm": 1.8408714532852173, + "learning_rate": 1.1915e-05, + "num_tokens": 552752.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0025, + "grad_norm": 0.3696609139442444, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0468, + "grad_norm": 1.6169545650482178, + "learning_rate": 1.1905e-05, + "num_tokens": 553355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0669, + "grad_norm": 1.641153335571289, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0019, + "grad_norm": 0.2700659930706024, + "learning_rate": 1.1895e-05, + "num_tokens": 553958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0021, + "grad_norm": 0.30612003803253174, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0755, + "grad_norm": 1.821285367012024, + "learning_rate": 1.1885e-05, + "num_tokens": 554561.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0628, + "grad_norm": 1.6025607585906982, + "learning_rate": 1.188e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0018, + "grad_norm": 0.24747499823570251, + "learning_rate": 1.1875e-05, + "num_tokens": 555164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0017, + "grad_norm": 0.2355332225561142, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 1.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0016, + "grad_norm": 0.22167058289051056, + "learning_rate": 1.1865000000000002e-05, + "num_tokens": 555346.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0014, + "grad_norm": 0.1909945011138916, + "learning_rate": 1.186e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0014, + "grad_norm": 0.17070873081684113, + "learning_rate": 1.1855e-05, + "num_tokens": 555528.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0714, + "grad_norm": 1.4018418788909912, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0859, + "grad_norm": 2.558520793914795, + "learning_rate": 1.1845000000000003e-05, + "num_tokens": 556552.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0012, + "grad_norm": 0.14977574348449707, + "learning_rate": 1.184e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0011, + "grad_norm": 0.12937067449092865, + "learning_rate": 1.1835000000000001e-05, + "num_tokens": 556734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0604, + "grad_norm": 1.5028055906295776, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0012, + "grad_norm": 0.13798221945762634, + "learning_rate": 1.1825000000000003e-05, + "num_tokens": 557337.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0508, + "grad_norm": 1.1325984001159668, + "learning_rate": 1.182e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0608, + "grad_norm": 1.3021001815795898, + "learning_rate": 1.1815000000000001e-05, + "num_tokens": 558361.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0563, + "grad_norm": 1.5208338499069214, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.0669, + "grad_norm": 1.6899033784866333, + "learning_rate": 1.1805000000000001e-05, + "num_tokens": 559385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0582, + "grad_norm": 1.563767910003662, + "learning_rate": 1.18e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.0674, + "grad_norm": 1.4604460000991821, + "learning_rate": 1.1795000000000001e-05, + "num_tokens": 560409.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.051, + "grad_norm": 1.4536890983581543, + "learning_rate": 1.179e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0681, + "grad_norm": 1.4582575559616089, + "learning_rate": 1.1785000000000002e-05, + "num_tokens": 561433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0625, + "grad_norm": 1.5202876329421997, + "learning_rate": 1.178e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0018, + "grad_norm": 0.25325441360473633, + "learning_rate": 1.1775000000000002e-05, + "num_tokens": 562036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0533, + "grad_norm": 1.4468379020690918, + "learning_rate": 1.177e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0023, + "grad_norm": 0.32276058197021484, + "learning_rate": 1.1765000000000002e-05, + "num_tokens": 562639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0025, + "grad_norm": 0.36645182967185974, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.067, + "grad_norm": 2.532277822494507, + "learning_rate": 1.1755e-05, + "num_tokens": 563242.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0025, + "grad_norm": 0.3641115427017212, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 1.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0621, + "grad_norm": 1.6259859800338745, + "learning_rate": 1.1745000000000002e-05, + "num_tokens": 563845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.0431, + "grad_norm": 1.5126338005065918, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0416, + "grad_norm": 1.3851490020751953, + "learning_rate": 1.1735e-05, + "num_tokens": 564869.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0621, + "grad_norm": 1.7890119552612305, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.0661, + "grad_norm": 1.2367877960205078, + "learning_rate": 1.1725000000000002e-05, + "num_tokens": 565893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0032, + "grad_norm": 0.49922677874565125, + "learning_rate": 1.172e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 1.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.0033, + "grad_norm": 0.49921202659606934, + "learning_rate": 1.1715e-05, + "num_tokens": 566075.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.0035, + "grad_norm": 0.5215579867362976, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0031, + "grad_norm": 0.43590739369392395, + "learning_rate": 1.1705000000000002e-05, + "num_tokens": 566257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0397, + "grad_norm": 1.2309280633926392, + "learning_rate": 1.17e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.04, + "grad_norm": 1.2009049654006958, + "learning_rate": 1.1695e-05, + "num_tokens": 567281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0608, + "grad_norm": 1.7890830039978027, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0022, + "grad_norm": 0.33328190445899963, + "learning_rate": 1.1685000000000002e-05, + "num_tokens": 567884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0725, + "grad_norm": 1.7722251415252686, + "learning_rate": 1.168e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.002, + "grad_norm": 0.2905958592891693, + "learning_rate": 1.1675000000000001e-05, + "num_tokens": 568487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0778, + "grad_norm": 1.8844209909439087, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0558, + "grad_norm": 1.4232587814331055, + "learning_rate": 1.1665000000000003e-05, + "num_tokens": 569511.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0588, + "grad_norm": 1.4562510251998901, + "learning_rate": 1.166e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.0019, + "grad_norm": 0.2660907804965973, + "learning_rate": 1.1655000000000001e-05, + "num_tokens": 570114.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.079, + "grad_norm": 1.9491440057754517, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.055, + "grad_norm": 1.847509741783142, + "learning_rate": 1.1645000000000001e-05, + "num_tokens": 571138.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0517, + "grad_norm": 1.504838466644287, + "learning_rate": 1.164e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": 0.0416, + "grad_norm": 1.0979009866714478, + "learning_rate": 1.1635000000000001e-05, + "num_tokens": 572162.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0448, + "grad_norm": 1.3496202230453491, + "learning_rate": 1.163e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0026, + "grad_norm": 0.382183700799942, + "learning_rate": 1.1625000000000001e-05, + "num_tokens": 572765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0026, + "grad_norm": 0.37047019600868225, + "learning_rate": 1.162e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 1.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0702, + "grad_norm": 1.7991583347320557, + "learning_rate": 1.1615000000000001e-05, + "num_tokens": 573368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0442, + "grad_norm": 1.4013893604278564, + "learning_rate": 1.161e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.0409, + "grad_norm": 1.3295344114303589, + "learning_rate": 1.1605000000000002e-05, + "num_tokens": 574392.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.0388, + "grad_norm": 1.3626537322998047, + "learning_rate": 1.16e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0031, + "grad_norm": 0.4437231123447418, + "learning_rate": 1.1595e-05, + "num_tokens": 574995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0036, + "grad_norm": 0.5210691094398499, + "learning_rate": 1.159e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.065, + "grad_norm": 2.1340172290802, + "learning_rate": 1.1585000000000002e-05, + "num_tokens": 575598.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0647, + "grad_norm": 1.9830479621887207, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0684, + "grad_norm": 2.2673563957214355, + "learning_rate": 1.1575e-05, + "num_tokens": 576622.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0031, + "grad_norm": 0.44506582617759705, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.038, + "grad_norm": 1.131693959236145, + "learning_rate": 1.1565000000000002e-05, + "num_tokens": 577225.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.0369, + "grad_norm": 1.1869642734527588, + "learning_rate": 1.156e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0031, + "grad_norm": 0.4332590401172638, + "learning_rate": 1.1555e-05, + "num_tokens": 577828.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0026, + "grad_norm": 0.359754741191864, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0028, + "grad_norm": 0.3960857689380646, + "learning_rate": 1.1545000000000002e-05, + "num_tokens": 578010.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0569, + "grad_norm": 1.7389343976974487, + "learning_rate": 1.154e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0714, + "grad_norm": 1.75542414188385, + "learning_rate": 1.1535e-05, + "num_tokens": 579034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0026, + "grad_norm": 0.3733665943145752, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 1.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151729702949524, + "learning_rate": 1.1525000000000002e-05, + "num_tokens": 579216.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0725, + "grad_norm": 2.008699417114258, + "learning_rate": 1.152e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0679, + "grad_norm": 2.3607006072998047, + "learning_rate": 1.1515e-05, + "num_tokens": 580240.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.043, + "grad_norm": 1.3802534341812134, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0018, + "grad_norm": 0.24884727597236633, + "learning_rate": 1.1505000000000003e-05, + "num_tokens": 580843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0517, + "grad_norm": 1.4253575801849365, + "learning_rate": 1.15e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0479, + "grad_norm": 1.2443790435791016, + "learning_rate": 1.1495000000000001e-05, + "num_tokens": 581867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0017, + "grad_norm": 0.22854706645011902, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0742, + "grad_norm": 1.5941340923309326, + "learning_rate": 1.1485000000000001e-05, + "num_tokens": 582470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.002, + "grad_norm": 0.27522599697113037, + "learning_rate": 1.148e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0019, + "grad_norm": 0.2548190653324127, + "learning_rate": 1.1475000000000001e-05, + "num_tokens": 582652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0586, + "grad_norm": 0.9956546425819397, + "learning_rate": 1.147e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0399, + "grad_norm": 1.2318187952041626, + "learning_rate": 1.1465000000000001e-05, + "num_tokens": 583676.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.057, + "grad_norm": 1.2258297204971313, + "learning_rate": 1.146e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0593, + "grad_norm": 1.4450581073760986, + "learning_rate": 1.1455000000000001e-05, + "num_tokens": 584700.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0589, + "grad_norm": 2.703789472579956, + "learning_rate": 1.145e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0022, + "grad_norm": 0.2988422214984894, + "learning_rate": 1.1445000000000001e-05, + "num_tokens": 585303.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.002, + "grad_norm": 0.2543957829475403, + "learning_rate": 1.144e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0634, + "grad_norm": 1.5069470405578613, + "learning_rate": 1.1435e-05, + "num_tokens": 585906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0777, + "grad_norm": 1.8321071863174438, + "learning_rate": 1.143e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0705, + "grad_norm": 1.7684837579727173, + "learning_rate": 1.1425000000000002e-05, + "num_tokens": 586930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0646, + "grad_norm": 1.7334975004196167, + "learning_rate": 1.142e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0657, + "grad_norm": 1.7223514318466187, + "learning_rate": 1.1415e-05, + "num_tokens": 587954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0551, + "grad_norm": 2.0270273685455322, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0642, + "grad_norm": 1.5014370679855347, + "learning_rate": 1.1405000000000002e-05, + "num_tokens": 588978.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0766, + "grad_norm": 1.7329357862472534, + "learning_rate": 1.14e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.0038, + "grad_norm": 0.5561279654502869, + "learning_rate": 1.1395e-05, + "num_tokens": 589581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0805, + "grad_norm": 2.5624947547912598, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0036, + "grad_norm": 0.5101985931396484, + "learning_rate": 1.1385000000000002e-05, + "num_tokens": 590184.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0564, + "grad_norm": 1.227173924446106, + "learning_rate": 1.138e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0036, + "grad_norm": 0.5354023575782776, + "learning_rate": 1.1375e-05, + "num_tokens": 590787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0443, + "grad_norm": 1.4744853973388672, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0715, + "grad_norm": 1.5623061656951904, + "learning_rate": 1.1365000000000002e-05, + "num_tokens": 591811.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0529, + "grad_norm": 1.357082486152649, + "learning_rate": 1.136e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0037, + "grad_norm": 0.54876309633255, + "learning_rate": 1.1355e-05, + "num_tokens": 592414.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.0635, + "grad_norm": 1.2679226398468018, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0458, + "grad_norm": 1.1748446226119995, + "learning_rate": 1.1345000000000002e-05, + "num_tokens": 593438.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0035, + "grad_norm": 0.5624827146530151, + "learning_rate": 1.134e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.003, + "grad_norm": 0.4557420015335083, + "learning_rate": 1.1335e-05, + "num_tokens": 593620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.003, + "grad_norm": 0.46185532212257385, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0028, + "grad_norm": 0.42278051376342773, + "learning_rate": 1.1325e-05, + "num_tokens": 593802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0453, + "grad_norm": 1.387130856513977, + "learning_rate": 1.132e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025925099849701, + "learning_rate": 1.1315000000000001e-05, + "num_tokens": 594405.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0022, + "grad_norm": 0.33897924423217773, + "learning_rate": 1.131e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 1.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0737, + "grad_norm": 1.979303240776062, + "learning_rate": 1.1305000000000001e-05, + "num_tokens": 595008.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.064, + "grad_norm": 1.5425118207931519, + "learning_rate": 1.13e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0671, + "grad_norm": 1.1620323657989502, + "learning_rate": 1.1295000000000001e-05, + "num_tokens": 596032.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0785, + "grad_norm": 2.378268003463745, + "learning_rate": 1.129e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0016, + "grad_norm": 0.22170788049697876, + "learning_rate": 1.1285000000000001e-05, + "num_tokens": 596635.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0015, + "grad_norm": 0.20151561498641968, + "learning_rate": 1.128e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0017, + "grad_norm": 0.2272740602493286, + "learning_rate": 1.1275e-05, + "num_tokens": 596817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.0013, + "grad_norm": 0.15716217458248138, + "learning_rate": 1.127e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0609, + "grad_norm": 1.5205357074737549, + "learning_rate": 1.1265000000000001e-05, + "num_tokens": 597420.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0013, + "grad_norm": 0.16709472239017487, + "learning_rate": 1.126e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0771, + "grad_norm": 1.7946810722351074, + "learning_rate": 1.1255e-05, + "num_tokens": 598023.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0581, + "grad_norm": 1.250422716140747, + "learning_rate": 1.125e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0566, + "grad_norm": 1.8859542608261108, + "learning_rate": 1.1245000000000002e-05, + "num_tokens": 599047.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.042, + "grad_norm": 1.3896710872650146, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0012, + "grad_norm": 0.13600599765777588, + "learning_rate": 1.1235e-05, + "num_tokens": 599650.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0455, + "grad_norm": 1.2671265602111816, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0792, + "grad_norm": 1.9507051706314087, + "learning_rate": 1.1225000000000002e-05, + "num_tokens": 600674.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0015, + "grad_norm": 0.18869547545909882, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0643, + "grad_norm": 2.124163866043091, + "learning_rate": 1.1215e-05, + "num_tokens": 601277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0017, + "grad_norm": 0.22649085521697998, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 1.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0014, + "grad_norm": 0.1775384545326233, + "learning_rate": 1.1205000000000002e-05, + "num_tokens": 601459.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0675, + "grad_norm": 2.2713491916656494, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0504, + "grad_norm": 1.3982276916503906, + "learning_rate": 1.1195e-05, + "num_tokens": 602483.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0478, + "grad_norm": 1.40345299243927, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0655, + "grad_norm": 2.0257670879364014, + "learning_rate": 1.1185000000000002e-05, + "num_tokens": 603507.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.0019, + "grad_norm": 0.2651630938053131, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": 0.0772, + "grad_norm": 2.0185799598693848, + "learning_rate": 1.1175e-05, + "num_tokens": 604110.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0022, + "grad_norm": 0.30773913860321045, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 1.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0401, + "grad_norm": 1.1661447286605835, + "learning_rate": 1.1165e-05, + "num_tokens": 604713.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0805, + "grad_norm": 2.5561182498931885, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.0023, + "grad_norm": 0.3356492221355438, + "learning_rate": 1.1155e-05, + "num_tokens": 605316.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0519, + "grad_norm": 1.2280339002609253, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.0412, + "grad_norm": 1.1461997032165527, + "learning_rate": 1.1145000000000001e-05, + "num_tokens": 606340.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0024, + "grad_norm": 0.33912718296051025, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0026, + "grad_norm": 0.3827052116394043, + "learning_rate": 1.1135000000000001e-05, + "num_tokens": 606522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025944471359253, + "learning_rate": 1.113e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0025, + "grad_norm": 0.34845641255378723, + "learning_rate": 1.1125000000000001e-05, + "num_tokens": 606704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0704, + "grad_norm": 1.9853920936584473, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0431, + "grad_norm": 1.3894938230514526, + "learning_rate": 1.1115e-05, + "num_tokens": 607728.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.069, + "grad_norm": 1.2977555990219116, + "learning_rate": 1.111e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0366, + "grad_norm": 1.1859874725341797, + "learning_rate": 1.1105000000000001e-05, + "num_tokens": 608752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0022, + "grad_norm": 0.3078896105289459, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.002, + "grad_norm": 0.28668129444122314, + "learning_rate": 1.1095e-05, + "num_tokens": 608934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0021, + "grad_norm": 0.30314162373542786, + "learning_rate": 1.109e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0741, + "grad_norm": 1.5230200290679932, + "learning_rate": 1.1085000000000001e-05, + "num_tokens": 609537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.002, + "grad_norm": 0.26326534152030945, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 1.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.002, + "grad_norm": 0.2711552381515503, + "learning_rate": 1.1075e-05, + "num_tokens": 609719.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.0616, + "grad_norm": 1.274338960647583, + "learning_rate": 1.107e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.0016, + "grad_norm": 0.2114490568637848, + "learning_rate": 1.1065000000000002e-05, + "num_tokens": 610322.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0682, + "grad_norm": 1.6731176376342773, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0813, + "grad_norm": 1.9255222082138062, + "learning_rate": 1.1055e-05, + "num_tokens": 611346.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0016, + "grad_norm": 0.21615324914455414, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0688, + "grad_norm": 1.5003544092178345, + "learning_rate": 1.1045000000000002e-05, + "num_tokens": 611949.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0014, + "grad_norm": 0.18165816366672516, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 1.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0583, + "grad_norm": 1.9068502187728882, + "learning_rate": 1.1035e-05, + "num_tokens": 612552.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0015, + "grad_norm": 0.18768055737018585, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0014, + "grad_norm": 0.1921229511499405, + "learning_rate": 1.1025000000000002e-05, + "num_tokens": 612734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0015, + "grad_norm": 0.19404935836791992, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0619, + "grad_norm": 1.6527628898620605, + "learning_rate": 1.1015e-05, + "num_tokens": 613337.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0413, + "grad_norm": 1.2340315580368042, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0015, + "grad_norm": 0.19533570110797882, + "learning_rate": 1.1005e-05, + "num_tokens": 613940.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0639, + "grad_norm": 1.0601844787597656, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0014, + "grad_norm": 0.18472979962825775, + "learning_rate": 1.0995e-05, + "num_tokens": 614543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0642, + "grad_norm": 1.2736060619354248, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0581, + "grad_norm": 1.4980621337890625, + "learning_rate": 1.0985e-05, + "num_tokens": 615567.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0489, + "grad_norm": 1.1453659534454346, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0625, + "grad_norm": 1.6183781623840332, + "learning_rate": 1.0975e-05, + "num_tokens": 616591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0018, + "grad_norm": 0.24508105218410492, + "learning_rate": 1.097e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 1.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.002, + "grad_norm": 0.2894340753555298, + "learning_rate": 1.0965000000000001e-05, + "num_tokens": 616773.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0394, + "grad_norm": 1.3422820568084717, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0018, + "grad_norm": 0.26346835494041443, + "learning_rate": 1.0955e-05, + "num_tokens": 617376.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.002, + "grad_norm": 0.28616681694984436, + "learning_rate": 1.095e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0629, + "grad_norm": 1.515001654624939, + "learning_rate": 1.0945000000000001e-05, + "num_tokens": 617979.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.0429, + "grad_norm": 1.3231642246246338, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0498, + "grad_norm": 1.3477892875671387, + "learning_rate": 1.0935e-05, + "num_tokens": 619003.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.0686, + "grad_norm": 1.4584791660308838, + "learning_rate": 1.093e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.0021, + "grad_norm": 0.29815393686294556, + "learning_rate": 1.0925000000000001e-05, + "num_tokens": 619606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.087, + "grad_norm": 2.550358533859253, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.0021, + "grad_norm": 0.3024434447288513, + "learning_rate": 1.0915e-05, + "num_tokens": 620209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0559, + "grad_norm": 1.8500303030014038, + "learning_rate": 1.091e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0024, + "grad_norm": 0.3702225685119629, + "learning_rate": 1.0905000000000001e-05, + "num_tokens": 620812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0782, + "grad_norm": 1.9154956340789795, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0613, + "grad_norm": 1.6961833238601685, + "learning_rate": 1.0895e-05, + "num_tokens": 621836.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0022, + "grad_norm": 0.3193221390247345, + "learning_rate": 1.089e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0025, + "grad_norm": 0.36297887563705444, + "learning_rate": 1.0885000000000002e-05, + "num_tokens": 622018.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0022, + "grad_norm": 0.3415636420249939, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0408, + "grad_norm": 1.2334237098693848, + "learning_rate": 1.0875e-05, + "num_tokens": 622621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.002, + "grad_norm": 0.2912217974662781, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.043, + "grad_norm": 1.9397270679473877, + "learning_rate": 1.0865000000000002e-05, + "num_tokens": 623224.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.0395, + "grad_norm": 1.2516388893127441, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0018, + "grad_norm": 0.24329343438148499, + "learning_rate": 1.0855e-05, + "num_tokens": 623827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0019, + "grad_norm": 0.2603467106819153, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0588, + "grad_norm": 1.736319661140442, + "learning_rate": 1.0845e-05, + "num_tokens": 624430.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0014, + "grad_norm": 0.19694186747074127, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 1.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.0015, + "grad_norm": 0.20471760630607605, + "learning_rate": 1.0835e-05, + "num_tokens": 624612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0016, + "grad_norm": 0.21806074678897858, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0014, + "grad_norm": 0.19000421464443207, + "learning_rate": 1.0825e-05, + "num_tokens": 624794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.0516, + "grad_norm": 1.4601935148239136, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0713, + "grad_norm": 2.011367082595825, + "learning_rate": 1.0815e-05, + "num_tokens": 625818.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0012, + "grad_norm": 0.15841880440711975, + "learning_rate": 1.081e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 1.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0711, + "grad_norm": 2.100233793258667, + "learning_rate": 1.0805e-05, + "num_tokens": 626421.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0012, + "grad_norm": 0.1544499695301056, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0012, + "grad_norm": 0.15288732945919037, + "learning_rate": 1.0794999999999999e-05, + "num_tokens": 626603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.0379, + "grad_norm": 1.210354208946228, + "learning_rate": 1.079e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.042, + "grad_norm": 1.1011019945144653, + "learning_rate": 1.0785000000000001e-05, + "num_tokens": 627627.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0646, + "grad_norm": 1.4223557710647583, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0011, + "grad_norm": 0.14515887200832367, + "learning_rate": 1.0775e-05, + "num_tokens": 628230.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0012, + "grad_norm": 0.14745497703552246, + "learning_rate": 1.077e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0013, + "grad_norm": 0.16342398524284363, + "learning_rate": 1.0765000000000001e-05, + "num_tokens": 628412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0711, + "grad_norm": 1.4518134593963623, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0719, + "grad_norm": 1.6602455377578735, + "learning_rate": 1.0755e-05, + "num_tokens": 629436.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0676, + "grad_norm": 1.4668382406234741, + "learning_rate": 1.075e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0675, + "grad_norm": 1.7040259838104248, + "learning_rate": 1.0745000000000001e-05, + "num_tokens": 630460.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0015, + "grad_norm": 0.2076033502817154, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 1.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0578, + "grad_norm": 1.4224144220352173, + "learning_rate": 1.0735e-05, + "num_tokens": 631063.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0359, + "grad_norm": 1.0415198802947998, + "learning_rate": 1.073e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0549, + "grad_norm": 1.3249598741531372, + "learning_rate": 1.0725000000000001e-05, + "num_tokens": 632087.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27819395065307617, + "learning_rate": 1.072e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 1.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.002, + "grad_norm": 0.28510138392448425, + "learning_rate": 1.0715e-05, + "num_tokens": 632269.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0023, + "grad_norm": 0.33845254778862, + "learning_rate": 1.071e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.0022, + "grad_norm": 0.3247784972190857, + "learning_rate": 1.0705000000000002e-05, + "num_tokens": 632451.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.043, + "grad_norm": 1.0912247896194458, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.0578, + "grad_norm": 1.1355180740356445, + "learning_rate": 1.0695e-05, + "num_tokens": 633475.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.0024, + "grad_norm": 0.3479563593864441, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 1.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0022, + "grad_norm": 0.3158959448337555, + "learning_rate": 1.0685e-05, + "num_tokens": 633657.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": 0.0428, + "grad_norm": 1.4031771421432495, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.055, + "grad_norm": 1.2979878187179565, + "learning_rate": 1.0675e-05, + "num_tokens": 634681.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0021, + "grad_norm": 0.30659785866737366, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0407, + "grad_norm": 1.1281771659851074, + "learning_rate": 1.0665e-05, + "num_tokens": 635284.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0021, + "grad_norm": 0.3046596050262451, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 1.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.002, + "grad_norm": 0.29561498761177063, + "learning_rate": 1.0655e-05, + "num_tokens": 635466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.042, + "grad_norm": 1.11528480052948, + "learning_rate": 1.065e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0603, + "grad_norm": 1.633859634399414, + "learning_rate": 1.0645e-05, + "num_tokens": 636490.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0617, + "grad_norm": 1.5089678764343262, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0393, + "grad_norm": 1.644981026649475, + "learning_rate": 1.0634999999999999e-05, + "num_tokens": 637514.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0548, + "grad_norm": 1.4219714403152466, + "learning_rate": 1.063e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0022, + "grad_norm": 0.3061341941356659, + "learning_rate": 1.0625e-05, + "num_tokens": 638117.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0439, + "grad_norm": 1.3055533170700073, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0844, + "grad_norm": 2.4925858974456787, + "learning_rate": 1.0615000000000003e-05, + "num_tokens": 639141.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0409, + "grad_norm": 1.2279584407806396, + "learning_rate": 1.061e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.0023, + "grad_norm": 0.3406059145927429, + "learning_rate": 1.0605000000000001e-05, + "num_tokens": 639744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.0024, + "grad_norm": 0.3423788249492645, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 1.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0758, + "grad_norm": 2.193775177001953, + "learning_rate": 1.0595000000000003e-05, + "num_tokens": 640347.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0397, + "grad_norm": 1.2993077039718628, + "learning_rate": 1.059e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0025, + "grad_norm": 0.37831318378448486, + "learning_rate": 1.0585000000000001e-05, + "num_tokens": 640950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0695, + "grad_norm": 1.9661240577697754, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0514, + "grad_norm": 1.348526954650879, + "learning_rate": 1.0575000000000001e-05, + "num_tokens": 641974.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0422, + "grad_norm": 1.4465380907058716, + "learning_rate": 1.057e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0792, + "grad_norm": 1.823074460029602, + "learning_rate": 1.0565000000000001e-05, + "num_tokens": 642998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0707, + "grad_norm": 1.9393905401229858, + "learning_rate": 1.056e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": 0.0765, + "grad_norm": 2.4390299320220947, + "learning_rate": 1.0555000000000001e-05, + "num_tokens": 644022.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0377, + "grad_norm": 1.2858082056045532, + "learning_rate": 1.055e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.036, + "grad_norm": 1.1891300678253174, + "learning_rate": 1.0545000000000002e-05, + "num_tokens": 645046.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0596, + "grad_norm": 1.3432769775390625, + "learning_rate": 1.054e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0409, + "grad_norm": 1.3289687633514404, + "learning_rate": 1.0535000000000002e-05, + "num_tokens": 646070.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0703, + "grad_norm": 1.9712656736373901, + "learning_rate": 1.053e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0674, + "grad_norm": 1.360931634902954, + "learning_rate": 1.0525e-05, + "num_tokens": 647094.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0712, + "grad_norm": 1.7070671319961548, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0553, + "grad_norm": 1.2540414333343506, + "learning_rate": 1.0515000000000002e-05, + "num_tokens": 648118.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0462, + "grad_norm": 1.0861750841140747, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0606, + "grad_norm": 1.2730586528778076, + "learning_rate": 1.0505e-05, + "num_tokens": 649142.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0678, + "grad_norm": 1.881486177444458, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0358, + "grad_norm": 1.520228385925293, + "learning_rate": 1.0495000000000002e-05, + "num_tokens": 650166.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0102, + "grad_norm": 1.2519571781158447, + "learning_rate": 1.049e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0603, + "grad_norm": 1.7512507438659668, + "learning_rate": 1.0485e-05, + "num_tokens": 650769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": 0.0422, + "grad_norm": 1.2172882556915283, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0111, + "grad_norm": 1.2125916481018066, + "learning_rate": 1.0475000000000002e-05, + "num_tokens": 651372.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0104, + "grad_norm": 1.187291145324707, + "learning_rate": 1.047e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.067, + "grad_norm": 1.5227930545806885, + "learning_rate": 1.0465e-05, + "num_tokens": 651975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0512, + "grad_norm": 1.1584064960479736, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0585, + "grad_norm": 1.5452741384506226, + "learning_rate": 1.0455000000000002e-05, + "num_tokens": 652999.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.037, + "grad_norm": 1.2185399532318115, + "learning_rate": 1.045e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0073, + "grad_norm": 0.8913355469703674, + "learning_rate": 1.0445e-05, + "num_tokens": 653602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.1718, + "grad_norm": 3.605719804763794, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0545, + "grad_norm": 0.8743512034416199, + "learning_rate": 1.0435000000000003e-05, + "num_tokens": 654626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0601, + "grad_norm": 1.5047037601470947, + "learning_rate": 1.043e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0048, + "grad_norm": 0.6472101211547852, + "learning_rate": 1.0425000000000001e-05, + "num_tokens": 655229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0819, + "grad_norm": 2.8786802291870117, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0722, + "grad_norm": 1.6400585174560547, + "learning_rate": 1.0415000000000001e-05, + "num_tokens": 656253.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0379, + "grad_norm": 1.1578104496002197, + "learning_rate": 1.041e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0651, + "grad_norm": 1.9455623626708984, + "learning_rate": 1.0405000000000001e-05, + "num_tokens": 657277.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0588, + "grad_norm": 1.3513238430023193, + "learning_rate": 1.04e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0584, + "grad_norm": 2.0099873542785645, + "learning_rate": 1.0395000000000001e-05, + "num_tokens": 658301.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0422, + "grad_norm": 1.1260371208190918, + "learning_rate": 1.039e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.1567, + "grad_norm": 4.341492652893066, + "learning_rate": 1.0385000000000001e-05, + "num_tokens": 659325.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0034, + "grad_norm": 0.5023797154426575, + "learning_rate": 1.038e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.0515, + "grad_norm": 1.3957620859146118, + "learning_rate": 1.0375000000000001e-05, + "num_tokens": 659928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.074, + "grad_norm": 1.8058022260665894, + "learning_rate": 1.037e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.0683, + "grad_norm": 1.5976930856704712, + "learning_rate": 1.0365e-05, + "num_tokens": 660952.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.042, + "grad_norm": 1.2127424478530884, + "learning_rate": 1.036e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0525, + "grad_norm": 1.24295175075531, + "learning_rate": 1.0355000000000002e-05, + "num_tokens": 661976.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0553, + "grad_norm": 1.3676091432571411, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0043, + "grad_norm": 0.5990502834320068, + "learning_rate": 1.0345e-05, + "num_tokens": 662579.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.0651, + "grad_norm": 1.8467062711715698, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.0035, + "grad_norm": 0.4997740089893341, + "learning_rate": 1.0335000000000002e-05, + "num_tokens": 663182.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0564, + "grad_norm": 0.9972801804542542, + "learning_rate": 1.033e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0587, + "grad_norm": 1.6288121938705444, + "learning_rate": 1.0325e-05, + "num_tokens": 664206.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0387, + "grad_norm": 1.0264148712158203, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.0044, + "grad_norm": 0.6445260047912598, + "learning_rate": 1.0315000000000002e-05, + "num_tokens": 664809.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0379, + "grad_norm": 1.0764647722244263, + "learning_rate": 1.031e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.0483, + "grad_norm": 1.6414856910705566, + "learning_rate": 1.0305e-05, + "num_tokens": 665833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.0392, + "grad_norm": 1.0878779888153076, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0721, + "grad_norm": 1.8314939737319946, + "learning_rate": 1.0295000000000002e-05, + "num_tokens": 666857.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0404, + "grad_norm": 1.2442834377288818, + "learning_rate": 1.029e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0344, + "grad_norm": 1.0829095840454102, + "learning_rate": 1.0285e-05, + "num_tokens": 667881.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.005, + "grad_norm": 0.7069464921951294, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 1.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0701, + "grad_norm": 1.8649088144302368, + "learning_rate": 1.0275000000000002e-05, + "num_tokens": 668484.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0445, + "grad_norm": 1.5859991312026978, + "learning_rate": 1.027e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.0617, + "grad_norm": 1.400742530822754, + "learning_rate": 1.0265e-05, + "num_tokens": 669508.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0527, + "grad_norm": 1.4805254936218262, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.0052, + "grad_norm": 0.7180629968643188, + "learning_rate": 1.0255000000000001e-05, + "num_tokens": 670111.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0404, + "grad_norm": 1.3597116470336914, + "learning_rate": 1.025e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0054, + "grad_norm": 0.7400949597358704, + "learning_rate": 1.0245000000000001e-05, + "num_tokens": 670714.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.0049, + "grad_norm": 0.6836004853248596, + "learning_rate": 1.024e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 1.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0736, + "grad_norm": 2.3706512451171875, + "learning_rate": 1.0235000000000001e-05, + "num_tokens": 671317.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0045, + "grad_norm": 0.6252732872962952, + "learning_rate": 1.023e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0538, + "grad_norm": 1.2009153366088867, + "learning_rate": 1.0225000000000001e-05, + "num_tokens": 671920.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0032, + "grad_norm": 0.4667681157588959, + "learning_rate": 1.022e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 1.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0612, + "grad_norm": 1.505027413368225, + "learning_rate": 1.0215000000000001e-05, + "num_tokens": 672523.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.0551, + "grad_norm": 1.3336291313171387, + "learning_rate": 1.021e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0402, + "grad_norm": 1.1181267499923706, + "learning_rate": 1.0205e-05, + "num_tokens": 673547.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0633, + "grad_norm": 1.5764997005462646, + "learning_rate": 1.02e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0024, + "grad_norm": 0.33718812465667725, + "learning_rate": 1.0195000000000001e-05, + "num_tokens": 674150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0683, + "grad_norm": 1.428412675857544, + "learning_rate": 1.019e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441157937049866, + "learning_rate": 1.0185e-05, + "num_tokens": 674753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0023, + "grad_norm": 0.33211714029312134, + "learning_rate": 1.018e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0022, + "grad_norm": 0.3089843988418579, + "learning_rate": 1.0175000000000002e-05, + "num_tokens": 674935.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0635, + "grad_norm": 1.286823034286499, + "learning_rate": 1.017e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0419, + "grad_norm": 1.0465713739395142, + "learning_rate": 1.0165e-05, + "num_tokens": 675959.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0019, + "grad_norm": 0.27270686626434326, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 1.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0614, + "grad_norm": 1.536331295967102, + "learning_rate": 1.0155000000000002e-05, + "num_tokens": 676562.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0521, + "grad_norm": 1.3282392024993896, + "learning_rate": 1.015e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0573, + "grad_norm": 1.3458013534545898, + "learning_rate": 1.0145e-05, + "num_tokens": 677586.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0607, + "grad_norm": 1.5142616033554077, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0427, + "grad_norm": 1.3866674900054932, + "learning_rate": 1.0135000000000002e-05, + "num_tokens": 678610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0652, + "grad_norm": 1.3013007640838623, + "learning_rate": 1.013e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0021, + "grad_norm": 0.2967868447303772, + "learning_rate": 1.0125e-05, + "num_tokens": 679213.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.002, + "grad_norm": 0.2977685332298279, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0022, + "grad_norm": 0.3109460473060608, + "learning_rate": 1.0115000000000002e-05, + "num_tokens": 679395.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0563, + "grad_norm": 1.1927019357681274, + "learning_rate": 1.011e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0019, + "grad_norm": 0.27015697956085205, + "learning_rate": 1.0105e-05, + "num_tokens": 679998.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.054, + "grad_norm": 1.8113130331039429, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0688, + "grad_norm": 1.6508032083511353, + "learning_rate": 1.0095e-05, + "num_tokens": 681022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0502, + "grad_norm": 1.1528620719909668, + "learning_rate": 1.009e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29425331950187683, + "learning_rate": 1.0085000000000001e-05, + "num_tokens": 681625.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0641, + "grad_norm": 1.702049732208252, + "learning_rate": 1.008e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.036, + "grad_norm": 1.1969891786575317, + "learning_rate": 1.0075000000000001e-05, + "num_tokens": 682649.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0022, + "grad_norm": 0.31679248809814453, + "learning_rate": 1.007e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 1.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.0403, + "grad_norm": 1.1920922994613647, + "learning_rate": 1.0065000000000001e-05, + "num_tokens": 683252.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0544, + "grad_norm": 1.1415454149246216, + "learning_rate": 1.006e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0028, + "grad_norm": 0.42351487278938293, + "learning_rate": 1.0055000000000001e-05, + "num_tokens": 683855.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0466, + "grad_norm": 1.6247456073760986, + "learning_rate": 1.005e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0524, + "grad_norm": 1.2605568170547485, + "learning_rate": 1.0045e-05, + "num_tokens": 684879.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.057, + "grad_norm": 1.483921766281128, + "learning_rate": 1.004e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0029, + "grad_norm": 0.420865923166275, + "learning_rate": 1.0035000000000001e-05, + "num_tokens": 685482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0483, + "grad_norm": 1.9411001205444336, + "learning_rate": 1.003e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0418, + "grad_norm": 1.1357734203338623, + "learning_rate": 1.0025e-05, + "num_tokens": 686506.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0031, + "grad_norm": 0.4264874756336212, + "learning_rate": 1.002e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0628, + "grad_norm": 1.5096089839935303, + "learning_rate": 1.0015000000000002e-05, + "num_tokens": 687109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.003, + "grad_norm": 0.41657188534736633, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0, + "step": 2000 + }, + { + "loss": 0.0028, + "grad_norm": 0.3918426036834717, + "learning_rate": 1.0005e-05, + "num_tokens": 687291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0005, + "step": 2001 + }, + { + "loss": 0.0524, + "grad_norm": 1.1938209533691406, + "learning_rate": 1e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.001, + "step": 2002 + }, + { + "loss": 0.0027, + "grad_norm": 0.3788990080356598, + "learning_rate": 9.995000000000002e-06, + "num_tokens": 687894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0015, + "step": 2003 + }, + { + "loss": 0.0025, + "grad_norm": 0.3577810227870941, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 687985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.002, + "step": 2004 + }, + { + "loss": 0.0024, + "grad_norm": 0.3305366039276123, + "learning_rate": 9.985000000000002e-06, + "num_tokens": 688076.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0025, + "step": 2005 + }, + { + "loss": 0.002, + "grad_norm": 0.277047336101532, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 688167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.003, + "step": 2006 + }, + { + "loss": 0.0019, + "grad_norm": 0.2567979693412781, + "learning_rate": 9.975000000000002e-06, + "num_tokens": 688258.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0035, + "step": 2007 + }, + { + "loss": 0.0682, + "grad_norm": 1.844512701034546, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.004, + "step": 2008 + }, + { + "loss": 0.0487, + "grad_norm": 1.2499569654464722, + "learning_rate": 9.965000000000002e-06, + "num_tokens": 689282.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0045, + "step": 2009 + }, + { + "loss": 0.0432, + "grad_norm": 1.2406448125839233, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.005, + "step": 2010 + }, + { + "loss": 0.0804, + "grad_norm": 1.833058476448059, + "learning_rate": 9.955000000000002e-06, + "num_tokens": 690306.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0055, + "step": 2011 + }, + { + "loss": 0.0464, + "grad_norm": 1.3244189023971558, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.006, + "step": 2012 + }, + { + "loss": 0.0416, + "grad_norm": 1.044066309928894, + "learning_rate": 9.945e-06, + "num_tokens": 691330.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0065, + "step": 2013 + }, + { + "loss": 0.0646, + "grad_norm": 1.5272581577301025, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.007, + "step": 2014 + }, + { + "loss": 0.0401, + "grad_norm": 1.2222588062286377, + "learning_rate": 9.935e-06, + "num_tokens": 692354.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0075, + "step": 2015 + }, + { + "loss": 0.0833, + "grad_norm": 2.3880302906036377, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.008, + "step": 2016 + }, + { + "loss": 0.0661, + "grad_norm": 1.666345238685608, + "learning_rate": 9.925e-06, + "num_tokens": 693378.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0085, + "step": 2017 + }, + { + "loss": 0.061, + "grad_norm": 1.2552286386489868, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.009, + "step": 2018 + }, + { + "loss": 0.0022, + "grad_norm": 0.2978605329990387, + "learning_rate": 9.915e-06, + "num_tokens": 693981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0095, + "step": 2019 + }, + { + "loss": 0.0419, + "grad_norm": 1.1351749897003174, + "learning_rate": 9.91e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.01, + "step": 2020 + }, + { + "loss": 0.0028, + "grad_norm": 0.4339805245399475, + "learning_rate": 9.905000000000001e-06, + "num_tokens": 694584.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0105, + "step": 2021 + }, + { + "loss": 0.0027, + "grad_norm": 0.3737834393978119, + "learning_rate": 9.9e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 2022 + }, + { + "loss": 0.0724, + "grad_norm": 1.6216633319854736, + "learning_rate": 9.895000000000001e-06, + "num_tokens": 695187.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0115, + "step": 2023 + }, + { + "loss": 0.0026, + "grad_norm": 0.38558149337768555, + "learning_rate": 9.89e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 1.0, + "epoch": 1.012, + "step": 2024 + }, + { + "loss": 0.0457, + "grad_norm": 1.2241498231887817, + "learning_rate": 9.885000000000001e-06, + "num_tokens": 695790.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0125, + "step": 2025 + }, + { + "loss": 0.0387, + "grad_norm": 1.4335367679595947, + "learning_rate": 9.88e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.013, + "step": 2026 + }, + { + "loss": 0.0716, + "grad_norm": 1.5836760997772217, + "learning_rate": 9.875000000000001e-06, + "num_tokens": 696814.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0135, + "step": 2027 + }, + { + "loss": 0.0419, + "grad_norm": 1.2072887420654297, + "learning_rate": 9.87e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.014, + "step": 2028 + }, + { + "loss": 0.0376, + "grad_norm": 0.9630845189094543, + "learning_rate": 9.865000000000001e-06, + "num_tokens": 697838.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.0145, + "step": 2029 + }, + { + "loss": 0.0562, + "grad_norm": 1.396782636642456, + "learning_rate": 9.86e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.015, + "step": 2030 + }, + { + "loss": 0.0611, + "grad_norm": 1.526076316833496, + "learning_rate": 9.855000000000001e-06, + "num_tokens": 698862.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0155, + "step": 2031 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280098915100098, + "learning_rate": 9.85e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.016, + "step": 2032 + }, + { + "loss": 0.0036, + "grad_norm": 0.5271911025047302, + "learning_rate": 9.845000000000001e-06, + "num_tokens": 699044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0165, + "step": 2033 + }, + { + "loss": 0.0638, + "grad_norm": 1.2341188192367554, + "learning_rate": 9.84e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.017, + "step": 2034 + }, + { + "loss": 0.0386, + "grad_norm": 1.0637688636779785, + "learning_rate": 9.835000000000002e-06, + "num_tokens": 700068.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0175, + "step": 2035 + }, + { + "loss": 0.0036, + "grad_norm": 0.52369225025177, + "learning_rate": 9.83e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 1.0, + "epoch": 1.018, + "step": 2036 + }, + { + "loss": 0.0494, + "grad_norm": 2.351320266723633, + "learning_rate": 9.825000000000002e-06, + "num_tokens": 700671.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0185, + "step": 2037 + }, + { + "loss": 0.0034, + "grad_norm": 0.4984705150127411, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.019, + "step": 2038 + }, + { + "loss": 0.0406, + "grad_norm": 1.5286310911178589, + "learning_rate": 9.815000000000002e-06, + "num_tokens": 701274.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0195, + "step": 2039 + }, + { + "loss": 0.0523, + "grad_norm": 1.7273446321487427, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.02, + "step": 2040 + }, + { + "loss": 0.0033, + "grad_norm": 0.4823690950870514, + "learning_rate": 9.805000000000002e-06, + "num_tokens": 701877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0205, + "step": 2041 + }, + { + "loss": 0.0032, + "grad_norm": 0.4507608711719513, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.021, + "step": 2042 + }, + { + "loss": 0.0703, + "grad_norm": 1.77262544631958, + "learning_rate": 9.795000000000002e-06, + "num_tokens": 702480.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0215, + "step": 2043 + }, + { + "loss": 0.0026, + "grad_norm": 0.3709382116794586, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.022, + "step": 2044 + }, + { + "loss": 0.0683, + "grad_norm": 3.5564355850219727, + "learning_rate": 9.785e-06, + "num_tokens": 703083.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0225, + "step": 2045 + }, + { + "loss": 0.0024, + "grad_norm": 0.3166162967681885, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.023, + "step": 2046 + }, + { + "loss": 0.0022, + "grad_norm": 0.2928009331226349, + "learning_rate": 9.775e-06, + "num_tokens": 703265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0235, + "step": 2047 + }, + { + "loss": 0.0621, + "grad_norm": 1.902612566947937, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.024, + "step": 2048 + }, + { + "loss": 0.0018, + "grad_norm": 0.23954610526561737, + "learning_rate": 9.765e-06, + "num_tokens": 703868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0245, + "step": 2049 + }, + { + "loss": 0.0409, + "grad_norm": 1.3355653285980225, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.025, + "step": 2050 + }, + { + "loss": 0.0705, + "grad_norm": 1.6696054935455322, + "learning_rate": 9.755e-06, + "num_tokens": 704892.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0255, + "step": 2051 + }, + { + "loss": 0.0016, + "grad_norm": 0.22299779951572418, + "learning_rate": 9.75e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.026, + "step": 2052 + }, + { + "loss": 0.0016, + "grad_norm": 0.21063728630542755, + "learning_rate": 9.745e-06, + "num_tokens": 705074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0265, + "step": 2053 + }, + { + "loss": 0.0696, + "grad_norm": 1.6844984292984009, + "learning_rate": 9.74e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.027, + "step": 2054 + }, + { + "loss": 0.0714, + "grad_norm": 1.5383219718933105, + "learning_rate": 9.735e-06, + "num_tokens": 706098.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0275, + "step": 2055 + }, + { + "loss": 0.0015, + "grad_norm": 0.19807161390781403, + "learning_rate": 9.73e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.028, + "step": 2056 + }, + { + "loss": 0.0014, + "grad_norm": 0.19030039012432098, + "learning_rate": 9.725000000000001e-06, + "num_tokens": 706280.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0285, + "step": 2057 + }, + { + "loss": 0.0013, + "grad_norm": 0.16322408616542816, + "learning_rate": 9.72e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 1.0, + "epoch": 1.029, + "step": 2058 + }, + { + "loss": 0.0014, + "grad_norm": 0.17665083706378937, + "learning_rate": 9.715000000000001e-06, + "num_tokens": 706462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0295, + "step": 2059 + }, + { + "loss": 0.0669, + "grad_norm": 1.8765722513198853, + "learning_rate": 9.71e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.03, + "step": 2060 + }, + { + "loss": 0.0768, + "grad_norm": 1.7586760520935059, + "learning_rate": 9.705000000000001e-06, + "num_tokens": 707486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0305, + "step": 2061 + }, + { + "loss": 0.0696, + "grad_norm": 1.258619785308838, + "learning_rate": 9.7e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.031, + "step": 2062 + }, + { + "loss": 0.0493, + "grad_norm": 1.2884832620620728, + "learning_rate": 9.695000000000001e-06, + "num_tokens": 708510.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0315, + "step": 2063 + }, + { + "loss": 0.0012, + "grad_norm": 0.15901947021484375, + "learning_rate": 9.69e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.032, + "step": 2064 + }, + { + "loss": 0.0656, + "grad_norm": 1.3002307415008545, + "learning_rate": 9.685000000000001e-06, + "num_tokens": 709113.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0325, + "step": 2065 + }, + { + "loss": 0.0013, + "grad_norm": 0.17090171575546265, + "learning_rate": 9.68e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.033, + "step": 2066 + }, + { + "loss": 0.0013, + "grad_norm": 0.1825355738401413, + "learning_rate": 9.675000000000001e-06, + "num_tokens": 709295.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0335, + "step": 2067 + }, + { + "loss": 0.0459, + "grad_norm": 1.092247724533081, + "learning_rate": 9.67e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.034, + "step": 2068 + }, + { + "loss": 0.0648, + "grad_norm": 1.4761494398117065, + "learning_rate": 9.665000000000001e-06, + "num_tokens": 710319.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0345, + "step": 2069 + }, + { + "loss": 0.0014, + "grad_norm": 0.1826472133398056, + "learning_rate": 9.66e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 1.0, + "epoch": 1.035, + "step": 2070 + }, + { + "loss": 0.0461, + "grad_norm": 1.338349461555481, + "learning_rate": 9.655000000000002e-06, + "num_tokens": 710922.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0355, + "step": 2071 + }, + { + "loss": 0.0567, + "grad_norm": 1.0566164255142212, + "learning_rate": 9.65e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.036, + "step": 2072 + }, + { + "loss": 0.0015, + "grad_norm": 0.19834326207637787, + "learning_rate": 9.645000000000002e-06, + "num_tokens": 711525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0365, + "step": 2073 + }, + { + "loss": 0.0418, + "grad_norm": 1.210045576095581, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.037, + "step": 2074 + }, + { + "loss": 0.0016, + "grad_norm": 0.22290614247322083, + "learning_rate": 9.635000000000002e-06, + "num_tokens": 712128.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0375, + "step": 2075 + }, + { + "loss": 0.0695, + "grad_norm": 1.4690190553665161, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.038, + "step": 2076 + }, + { + "loss": 0.0016, + "grad_norm": 0.2209765613079071, + "learning_rate": 9.625e-06, + "num_tokens": 712731.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0385, + "step": 2077 + }, + { + "loss": 0.0018, + "grad_norm": 0.23313096165657043, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 2078 + }, + { + "loss": 0.0017, + "grad_norm": 0.23196078836917877, + "learning_rate": 9.615e-06, + "num_tokens": 712913.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0395, + "step": 2079 + }, + { + "loss": 0.0541, + "grad_norm": 1.220723032951355, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.04, + "step": 2080 + }, + { + "loss": 0.0018, + "grad_norm": 0.2516387403011322, + "learning_rate": 9.605e-06, + "num_tokens": 713516.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0405, + "step": 2081 + }, + { + "loss": 0.0424, + "grad_norm": 1.0561903715133667, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.041, + "step": 2082 + }, + { + "loss": 0.0438, + "grad_norm": 1.2110846042633057, + "learning_rate": 9.595e-06, + "num_tokens": 714540.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0415, + "step": 2083 + }, + { + "loss": 0.0018, + "grad_norm": 0.24697688221931458, + "learning_rate": 9.59e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 1.0, + "epoch": 1.042, + "step": 2084 + }, + { + "loss": 0.0388, + "grad_norm": 1.0054850578308105, + "learning_rate": 9.585e-06, + "num_tokens": 715143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0425, + "step": 2085 + }, + { + "loss": 0.0713, + "grad_norm": 1.8077067136764526, + "learning_rate": 9.58e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.043, + "step": 2086 + }, + { + "loss": 0.0018, + "grad_norm": 0.24363017082214355, + "learning_rate": 9.575e-06, + "num_tokens": 715746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0435, + "step": 2087 + }, + { + "loss": 0.0016, + "grad_norm": 0.21341845393180847, + "learning_rate": 9.57e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 2088 + }, + { + "loss": 0.0391, + "grad_norm": 1.3833376169204712, + "learning_rate": 9.565e-06, + "num_tokens": 716349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0445, + "step": 2089 + }, + { + "loss": 0.0393, + "grad_norm": 0.9772108793258667, + "learning_rate": 9.56e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.045, + "step": 2090 + }, + { + "loss": 0.002, + "grad_norm": 0.283633828163147, + "learning_rate": 9.555e-06, + "num_tokens": 716952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0455, + "step": 2091 + }, + { + "loss": 0.0728, + "grad_norm": 1.849652647972107, + "learning_rate": 9.55e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.046, + "step": 2092 + }, + { + "loss": 0.0022, + "grad_norm": 0.3161669969558716, + "learning_rate": 9.545000000000001e-06, + "num_tokens": 717555.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0465, + "step": 2093 + }, + { + "loss": 0.0587, + "grad_norm": 1.600858449935913, + "learning_rate": 9.54e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.047, + "step": 2094 + }, + { + "loss": 0.0021, + "grad_norm": 0.2948978543281555, + "learning_rate": 9.535000000000001e-06, + "num_tokens": 718158.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0475, + "step": 2095 + }, + { + "loss": 0.0019, + "grad_norm": 0.27492448687553406, + "learning_rate": 9.53e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.048, + "step": 2096 + }, + { + "loss": 0.0382, + "grad_norm": 1.2440471649169922, + "learning_rate": 9.525000000000001e-06, + "num_tokens": 718761.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0485, + "step": 2097 + }, + { + "loss": 0.058, + "grad_norm": 1.5657495260238647, + "learning_rate": 9.52e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 2098 + }, + { + "loss": 0.0018, + "grad_norm": 0.2510983645915985, + "learning_rate": 9.515000000000001e-06, + "num_tokens": 719364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0495, + "step": 2099 + }, + { + "loss": 0.0677, + "grad_norm": 2.6615045070648193, + "learning_rate": 9.51e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.05, + "step": 2100 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355963945388794, + "learning_rate": 9.505000000000001e-06, + "num_tokens": 719967.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0505, + "step": 2101 + }, + { + "loss": 0.0628, + "grad_norm": 1.4263781309127808, + "learning_rate": 9.5e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.051, + "step": 2102 + }, + { + "loss": 0.0384, + "grad_norm": 1.3316160440444946, + "learning_rate": 9.495000000000001e-06, + "num_tokens": 720991.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0515, + "step": 2103 + }, + { + "loss": 0.0413, + "grad_norm": 1.2754371166229248, + "learning_rate": 9.49e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.052, + "step": 2104 + }, + { + "loss": 0.0551, + "grad_norm": 1.9524251222610474, + "learning_rate": 9.485000000000002e-06, + "num_tokens": 722015.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0525, + "step": 2105 + }, + { + "loss": 0.0551, + "grad_norm": 1.5522267818450928, + "learning_rate": 9.48e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.053, + "step": 2106 + }, + { + "loss": 0.0019, + "grad_norm": 0.27614012360572815, + "learning_rate": 9.475000000000002e-06, + "num_tokens": 722618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0535, + "step": 2107 + }, + { + "loss": 0.0606, + "grad_norm": 1.409346103668213, + "learning_rate": 9.47e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.054, + "step": 2108 + }, + { + "loss": 0.0024, + "grad_norm": 0.357972115278244, + "learning_rate": 9.465e-06, + "num_tokens": 723221.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0545, + "step": 2109 + }, + { + "loss": 0.0023, + "grad_norm": 0.3270082175731659, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 1.0, + "epoch": 1.055, + "step": 2110 + }, + { + "loss": 0.0024, + "grad_norm": 0.3454654812812805, + "learning_rate": 9.455e-06, + "num_tokens": 723403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0555, + "step": 2111 + }, + { + "loss": 0.0024, + "grad_norm": 0.352299302816391, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 1.0, + "epoch": 1.056, + "step": 2112 + }, + { + "loss": 0.002, + "grad_norm": 0.27746516466140747, + "learning_rate": 9.445e-06, + "num_tokens": 723585.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0565, + "step": 2113 + }, + { + "loss": 0.002, + "grad_norm": 0.2780683636665344, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 1.0, + "epoch": 1.057, + "step": 2114 + }, + { + "loss": 0.0464, + "grad_norm": 1.5355291366577148, + "learning_rate": 9.435e-06, + "num_tokens": 724188.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0575, + "step": 2115 + }, + { + "loss": 0.0017, + "grad_norm": 0.2329765260219574, + "learning_rate": 9.43e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.058, + "step": 2116 + }, + { + "loss": 0.0015, + "grad_norm": 0.20377217233181, + "learning_rate": 9.425e-06, + "num_tokens": 724370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0585, + "step": 2117 + }, + { + "loss": 0.0014, + "grad_norm": 0.1731068193912506, + "learning_rate": 9.42e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 2118 + }, + { + "loss": 0.0349, + "grad_norm": 1.301210641860962, + "learning_rate": 9.415e-06, + "num_tokens": 724973.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.0594999999999999, + "step": 2119 + }, + { + "loss": 0.0012, + "grad_norm": 0.15070641040802002, + "learning_rate": 9.41e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.06, + "step": 2120 + }, + { + "loss": 0.0012, + "grad_norm": 0.13666701316833496, + "learning_rate": 9.405e-06, + "num_tokens": 725155.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0605, + "step": 2121 + }, + { + "loss": 0.0011, + "grad_norm": 0.13183920085430145, + "learning_rate": 9.4e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 2122 + }, + { + "loss": 0.0735, + "grad_norm": 2.157339096069336, + "learning_rate": 9.395e-06, + "num_tokens": 725758.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.0615, + "step": 2123 + }, + { + "loss": 0.0434, + "grad_norm": 1.441329836845398, + "learning_rate": 9.39e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.062, + "step": 2124 + }, + { + "loss": 0.001, + "grad_norm": 0.11148537695407867, + "learning_rate": 9.385e-06, + "num_tokens": 726361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0625, + "step": 2125 + }, + { + "loss": 0.0363, + "grad_norm": 1.2650766372680664, + "learning_rate": 9.38e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 1.063, + "step": 2126 + }, + { + "loss": 0.042, + "grad_norm": 1.170820951461792, + "learning_rate": 9.375000000000001e-06, + "num_tokens": 727385.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0635, + "step": 2127 + }, + { + "loss": 0.0375, + "grad_norm": 1.31922447681427, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.064, + "step": 2128 + }, + { + "loss": 0.0009, + "grad_norm": 0.10702881962060928, + "learning_rate": 9.365000000000001e-06, + "num_tokens": 727988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0645, + "step": 2129 + }, + { + "loss": 0.001, + "grad_norm": 0.12134991586208344, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 2130 + }, + { + "loss": 0.001, + "grad_norm": 0.12518537044525146, + "learning_rate": 9.355000000000001e-06, + "num_tokens": 728170.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0655000000000001, + "step": 2131 + }, + { + "loss": 0.0443, + "grad_norm": 1.5640217065811157, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.066, + "step": 2132 + }, + { + "loss": 0.043, + "grad_norm": 1.7402693033218384, + "learning_rate": 9.345000000000001e-06, + "num_tokens": 729194.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0665, + "step": 2133 + }, + { + "loss": 0.0572, + "grad_norm": 1.478943109512329, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.067, + "step": 2134 + }, + { + "loss": 0.0436, + "grad_norm": 1.75895357131958, + "learning_rate": 9.335000000000001e-06, + "num_tokens": 730218.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0675, + "step": 2135 + }, + { + "loss": 0.0011, + "grad_norm": 0.14104828238487244, + "learning_rate": 9.33e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.068, + "step": 2136 + }, + { + "loss": 0.0014, + "grad_norm": 0.1940988302230835, + "learning_rate": 9.325000000000001e-06, + "num_tokens": 730400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0685, + "step": 2137 + }, + { + "loss": 0.0012, + "grad_norm": 0.15279027819633484, + "learning_rate": 9.32e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 2138 + }, + { + "loss": 0.0627, + "grad_norm": 1.8744264841079712, + "learning_rate": 9.315000000000001e-06, + "num_tokens": 731003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0695000000000001, + "step": 2139 + }, + { + "loss": 0.045, + "grad_norm": 1.4347468614578247, + "learning_rate": 9.31e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.07, + "step": 2140 + }, + { + "loss": 0.0711, + "grad_norm": 1.9654953479766846, + "learning_rate": 9.305000000000002e-06, + "num_tokens": 732027.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0705, + "step": 2141 + }, + { + "loss": 0.0723, + "grad_norm": 1.851762294769287, + "learning_rate": 9.3e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 2142 + }, + { + "loss": 0.0397, + "grad_norm": 1.1016762256622314, + "learning_rate": 9.295e-06, + "num_tokens": 733051.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0715, + "step": 2143 + }, + { + "loss": 0.0614, + "grad_norm": 1.278972864151001, + "learning_rate": 9.29e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.072, + "step": 2144 + }, + { + "loss": 0.0578, + "grad_norm": 1.5237491130828857, + "learning_rate": 9.285e-06, + "num_tokens": 734075.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0725, + "step": 2145 + }, + { + "loss": 0.0021, + "grad_norm": 0.29453045129776, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.073, + "step": 2146 + }, + { + "loss": 0.0756, + "grad_norm": 1.90165376663208, + "learning_rate": 9.275e-06, + "num_tokens": 734678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0735, + "step": 2147 + }, + { + "loss": 0.0025, + "grad_norm": 0.3552635610103607, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.074, + "step": 2148 + }, + { + "loss": 0.0615, + "grad_norm": 1.3596733808517456, + "learning_rate": 9.265e-06, + "num_tokens": 735281.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0745, + "step": 2149 + }, + { + "loss": 0.0571, + "grad_norm": 1.0499508380889893, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.075, + "step": 2150 + }, + { + "loss": 0.0593, + "grad_norm": 1.4813532829284668, + "learning_rate": 9.255e-06, + "num_tokens": 736305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0755, + "step": 2151 + }, + { + "loss": 0.0451, + "grad_norm": 1.1956957578659058, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.076, + "step": 2152 + }, + { + "loss": 0.0035, + "grad_norm": 0.5021563172340393, + "learning_rate": 9.245e-06, + "num_tokens": 736908.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0765, + "step": 2153 + }, + { + "loss": 0.0035, + "grad_norm": 0.5023340582847595, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 2154 + }, + { + "loss": 0.0593, + "grad_norm": 1.3515294790267944, + "learning_rate": 9.235e-06, + "num_tokens": 737511.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0775, + "step": 2155 + }, + { + "loss": 0.0036, + "grad_norm": 0.5020677447319031, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.078, + "step": 2156 + }, + { + "loss": 0.0034, + "grad_norm": 0.4873979985713959, + "learning_rate": 9.225e-06, + "num_tokens": 737693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0785, + "step": 2157 + }, + { + "loss": 0.0582, + "grad_norm": 1.3766424655914307, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.079, + "step": 2158 + }, + { + "loss": 0.0631, + "grad_norm": 1.1943955421447754, + "learning_rate": 9.215e-06, + "num_tokens": 738717.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0795, + "step": 2159 + }, + { + "loss": 0.003, + "grad_norm": 0.43413516879081726, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.08, + "step": 2160 + }, + { + "loss": 0.0031, + "grad_norm": 0.44669783115386963, + "learning_rate": 9.205e-06, + "num_tokens": 738899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0805, + "step": 2161 + }, + { + "loss": 0.0561, + "grad_norm": 1.3388497829437256, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.081, + "step": 2162 + }, + { + "loss": 0.0426, + "grad_norm": 1.8933428525924683, + "learning_rate": 9.195000000000001e-06, + "num_tokens": 739923.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0815, + "step": 2163 + }, + { + "loss": 0.06, + "grad_norm": 1.3706074953079224, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.082, + "step": 2164 + }, + { + "loss": 0.0621, + "grad_norm": 1.443211555480957, + "learning_rate": 9.185000000000001e-06, + "num_tokens": 740947.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0825, + "step": 2165 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098005950450897, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 1.0, + "epoch": 1.083, + "step": 2166 + }, + { + "loss": 0.06, + "grad_norm": 1.2332003116607666, + "learning_rate": 9.175000000000001e-06, + "num_tokens": 741550.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.0835, + "step": 2167 + }, + { + "loss": 0.0682, + "grad_norm": 1.4077450037002563, + "learning_rate": 9.17e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.084, + "step": 2168 + }, + { + "loss": 0.0584, + "grad_norm": 1.4201141595840454, + "learning_rate": 9.165000000000001e-06, + "num_tokens": 742574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0845, + "step": 2169 + }, + { + "loss": 0.0024, + "grad_norm": 0.3220980167388916, + "learning_rate": 9.16e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.085, + "step": 2170 + }, + { + "loss": 0.0571, + "grad_norm": 1.3979272842407227, + "learning_rate": 9.155000000000001e-06, + "num_tokens": 743177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0855, + "step": 2171 + }, + { + "loss": 0.0572, + "grad_norm": 1.6924889087677002, + "learning_rate": 9.15e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.086, + "step": 2172 + }, + { + "loss": 0.0708, + "grad_norm": 1.7350118160247803, + "learning_rate": 9.145000000000001e-06, + "num_tokens": 744201.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0865, + "step": 2173 + }, + { + "loss": 0.0024, + "grad_norm": 0.3453267812728882, + "learning_rate": 9.14e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 1.0, + "epoch": 1.087, + "step": 2174 + }, + { + "loss": 0.0028, + "grad_norm": 0.3845599293708801, + "learning_rate": 9.135e-06, + "num_tokens": 744383.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0875, + "step": 2175 + }, + { + "loss": 0.0023, + "grad_norm": 0.32928982377052307, + "learning_rate": 9.13e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.088, + "step": 2176 + }, + { + "loss": 0.0025, + "grad_norm": 0.3593277335166931, + "learning_rate": 9.125e-06, + "num_tokens": 744565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0885, + "step": 2177 + }, + { + "loss": 0.0447, + "grad_norm": 1.6252307891845703, + "learning_rate": 9.12e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.089, + "step": 2178 + }, + { + "loss": 0.0664, + "grad_norm": 1.3326979875564575, + "learning_rate": 9.115e-06, + "num_tokens": 745589.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0895, + "step": 2179 + }, + { + "loss": 0.0713, + "grad_norm": 2.490602493286133, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.09, + "step": 2180 + }, + { + "loss": 0.0577, + "grad_norm": 1.2613682746887207, + "learning_rate": 9.105e-06, + "num_tokens": 746613.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0905, + "step": 2181 + }, + { + "loss": 0.0604, + "grad_norm": 1.8400533199310303, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.091, + "step": 2182 + }, + { + "loss": 0.0546, + "grad_norm": 1.577405571937561, + "learning_rate": 9.095e-06, + "num_tokens": 747637.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0915, + "step": 2183 + }, + { + "loss": 0.1758, + "grad_norm": 3.9485361576080322, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 1.092, + "step": 2184 + }, + { + "loss": 0.0407, + "grad_norm": 1.4230077266693115, + "learning_rate": 9.085e-06, + "num_tokens": 748661.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0925, + "step": 2185 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441873788833618, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 1.0, + "epoch": 1.093, + "step": 2186 + }, + { + "loss": 0.0574, + "grad_norm": 1.059336543083191, + "learning_rate": 9.075e-06, + "num_tokens": 749264.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0935, + "step": 2187 + }, + { + "loss": 0.0022, + "grad_norm": 0.3150666058063507, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.094, + "step": 2188 + }, + { + "loss": 0.0401, + "grad_norm": 1.1904288530349731, + "learning_rate": 9.065e-06, + "num_tokens": 749867.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0945, + "step": 2189 + }, + { + "loss": 0.0024, + "grad_norm": 0.3425971567630768, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 1.0, + "epoch": 1.095, + "step": 2190 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606398403644562, + "learning_rate": 9.055e-06, + "num_tokens": 750049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0955, + "step": 2191 + }, + { + "loss": 0.0025, + "grad_norm": 0.3754805028438568, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.096, + "step": 2192 + }, + { + "loss": 0.0512, + "grad_norm": 1.1577214002609253, + "learning_rate": 9.045e-06, + "num_tokens": 750652.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0965, + "step": 2193 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151845633983612, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 1.0, + "epoch": 1.097, + "step": 2194 + }, + { + "loss": 0.0386, + "grad_norm": 1.1814777851104736, + "learning_rate": 9.035e-06, + "num_tokens": 751255.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0975, + "step": 2195 + }, + { + "loss": 0.002, + "grad_norm": 0.2940640151500702, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.098, + "step": 2196 + }, + { + "loss": 0.0021, + "grad_norm": 0.3114289939403534, + "learning_rate": 9.025e-06, + "num_tokens": 751437.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0985, + "step": 2197 + }, + { + "loss": 0.0587, + "grad_norm": 1.5265949964523315, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.099, + "step": 2198 + }, + { + "loss": 0.0584, + "grad_norm": 1.182391881942749, + "learning_rate": 9.015000000000001e-06, + "num_tokens": 752461.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0995, + "step": 2199 + }, + { + "loss": 0.0018, + "grad_norm": 0.2633577287197113, + "learning_rate": 9.01e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1, + "step": 2200 + }, + { + "loss": 0.0019, + "grad_norm": 0.26985710859298706, + "learning_rate": 9.005000000000001e-06, + "num_tokens": 752643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1005, + "step": 2201 + }, + { + "loss": 0.0017, + "grad_norm": 0.23652321100234985, + "learning_rate": 9e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 2202 + }, + { + "loss": 0.0578, + "grad_norm": 1.4083077907562256, + "learning_rate": 8.995000000000001e-06, + "num_tokens": 753246.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1015, + "step": 2203 + }, + { + "loss": 0.0595, + "grad_norm": 1.427134394645691, + "learning_rate": 8.99e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.102, + "step": 2204 + }, + { + "loss": 0.0539, + "grad_norm": 1.3228328227996826, + "learning_rate": 8.985000000000001e-06, + "num_tokens": 754270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1025, + "step": 2205 + }, + { + "loss": 0.0015, + "grad_norm": 0.2133481651544571, + "learning_rate": 8.98e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.103, + "step": 2206 + }, + { + "loss": 0.0821, + "grad_norm": 2.5287461280822754, + "learning_rate": 8.975e-06, + "num_tokens": 754873.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1035, + "step": 2207 + }, + { + "loss": 0.0623, + "grad_norm": 1.4041988849639893, + "learning_rate": 8.97e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.104, + "step": 2208 + }, + { + "loss": 0.0409, + "grad_norm": 1.1858478784561157, + "learning_rate": 8.965e-06, + "num_tokens": 755897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1045, + "step": 2209 + }, + { + "loss": 0.0583, + "grad_norm": 1.219450831413269, + "learning_rate": 8.96e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.105, + "step": 2210 + }, + { + "loss": 0.0414, + "grad_norm": 1.1721197366714478, + "learning_rate": 8.955e-06, + "num_tokens": 756921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1055, + "step": 2211 + }, + { + "loss": 0.053, + "grad_norm": 1.277345895767212, + "learning_rate": 8.95e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.106, + "step": 2212 + }, + { + "loss": 0.0625, + "grad_norm": 1.3503938913345337, + "learning_rate": 8.945e-06, + "num_tokens": 757945.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1065, + "step": 2213 + }, + { + "loss": 0.002, + "grad_norm": 0.30203038454055786, + "learning_rate": 8.94e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.107, + "step": 2214 + }, + { + "loss": 0.0022, + "grad_norm": 0.35174328088760376, + "learning_rate": 8.935e-06, + "num_tokens": 758127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1075, + "step": 2215 + }, + { + "loss": 0.0423, + "grad_norm": 1.168192744255066, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.108, + "step": 2216 + }, + { + "loss": 0.0764, + "grad_norm": 1.3265845775604248, + "learning_rate": 8.925e-06, + "num_tokens": 759151.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1085, + "step": 2217 + }, + { + "loss": 0.1833, + "grad_norm": 3.288583755493164, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 1.109, + "step": 2218 + }, + { + "loss": 0.0029, + "grad_norm": 0.44568195939064026, + "learning_rate": 8.915e-06, + "num_tokens": 759754.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1095, + "step": 2219 + }, + { + "loss": 0.0027, + "grad_norm": 0.409576416015625, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 2220 + }, + { + "loss": 0.0033, + "grad_norm": 0.4960649907588959, + "learning_rate": 8.905e-06, + "num_tokens": 759936.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1105, + "step": 2221 + }, + { + "loss": 0.1642, + "grad_norm": 2.6913421154022217, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.111, + "step": 2222 + }, + { + "loss": 0.0715, + "grad_norm": 1.5037237405776978, + "learning_rate": 8.895e-06, + "num_tokens": 760960.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1115, + "step": 2223 + }, + { + "loss": 0.0562, + "grad_norm": 1.152312159538269, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.112, + "step": 2224 + }, + { + "loss": 0.0025, + "grad_norm": 0.3840191960334778, + "learning_rate": 8.885e-06, + "num_tokens": 761563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1125, + "step": 2225 + }, + { + "loss": 0.0421, + "grad_norm": 1.0708019733428955, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.113, + "step": 2226 + }, + { + "loss": 0.0713, + "grad_norm": 1.2928557395935059, + "learning_rate": 8.875e-06, + "num_tokens": 762587.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1135, + "step": 2227 + }, + { + "loss": 0.0622, + "grad_norm": 1.3733391761779785, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 2228 + }, + { + "loss": 0.0029, + "grad_norm": 0.42555150389671326, + "learning_rate": 8.865e-06, + "num_tokens": 763190.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1145, + "step": 2229 + }, + { + "loss": 0.0457, + "grad_norm": 1.3084357976913452, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.115, + "step": 2230 + }, + { + "loss": 0.0027, + "grad_norm": 0.3956111967563629, + "learning_rate": 8.855e-06, + "num_tokens": 763793.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1155, + "step": 2231 + }, + { + "loss": 0.066, + "grad_norm": 1.3650692701339722, + "learning_rate": 8.85e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.116, + "step": 2232 + }, + { + "loss": 0.0029, + "grad_norm": 0.4088021516799927, + "learning_rate": 8.845000000000001e-06, + "num_tokens": 764396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1165, + "step": 2233 + }, + { + "loss": 0.0397, + "grad_norm": 1.2808146476745605, + "learning_rate": 8.84e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.117, + "step": 2234 + }, + { + "loss": 0.0027, + "grad_norm": 0.3983195126056671, + "learning_rate": 8.835000000000001e-06, + "num_tokens": 764999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1175, + "step": 2235 + }, + { + "loss": 0.0423, + "grad_norm": 1.1593605279922485, + "learning_rate": 8.83e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1179999999999999, + "step": 2236 + }, + { + "loss": 0.0649, + "grad_norm": 1.5087552070617676, + "learning_rate": 8.825000000000001e-06, + "num_tokens": 766023.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1185, + "step": 2237 + }, + { + "loss": 0.0683, + "grad_norm": 1.5192102193832397, + "learning_rate": 8.82e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.119, + "step": 2238 + }, + { + "loss": 0.0588, + "grad_norm": 1.386413812637329, + "learning_rate": 8.815e-06, + "num_tokens": 767047.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1195, + "step": 2239 + }, + { + "loss": 0.14, + "grad_norm": 2.439119815826416, + "learning_rate": 8.81e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.12, + "step": 2240 + }, + { + "loss": 0.0029, + "grad_norm": 0.4191952049732208, + "learning_rate": 8.805e-06, + "num_tokens": 767650.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1205, + "step": 2241 + }, + { + "loss": 0.0397, + "grad_norm": 1.169542908668518, + "learning_rate": 8.8e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.121, + "step": 2242 + }, + { + "loss": 0.0584, + "grad_norm": 1.2895692586898804, + "learning_rate": 8.795e-06, + "num_tokens": 768674.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1215, + "step": 2243 + }, + { + "loss": 0.0582, + "grad_norm": 1.274592638015747, + "learning_rate": 8.79e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1219999999999999, + "step": 2244 + }, + { + "loss": 0.0032, + "grad_norm": 0.44238153100013733, + "learning_rate": 8.785e-06, + "num_tokens": 769277.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1225, + "step": 2245 + }, + { + "loss": 0.0032, + "grad_norm": 0.4488213360309601, + "learning_rate": 8.78e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.123, + "step": 2246 + }, + { + "loss": 0.003, + "grad_norm": 0.43088752031326294, + "learning_rate": 8.775e-06, + "num_tokens": 769459.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1235, + "step": 2247 + }, + { + "loss": 0.0366, + "grad_norm": 1.2531421184539795, + "learning_rate": 8.77e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.124, + "step": 2248 + }, + { + "loss": 0.0029, + "grad_norm": 0.40329650044441223, + "learning_rate": 8.765e-06, + "num_tokens": 770062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1245, + "step": 2249 + }, + { + "loss": 0.0527, + "grad_norm": 1.196119785308838, + "learning_rate": 8.76e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.125, + "step": 2250 + }, + { + "loss": 0.0468, + "grad_norm": 1.571480393409729, + "learning_rate": 8.755e-06, + "num_tokens": 771086.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1255, + "step": 2251 + }, + { + "loss": 0.0024, + "grad_norm": 0.32946687936782837, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.126, + "step": 2252 + }, + { + "loss": 0.0023, + "grad_norm": 0.3213779628276825, + "learning_rate": 8.745000000000002e-06, + "num_tokens": 771268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1265, + "step": 2253 + }, + { + "loss": 0.0381, + "grad_norm": 1.36893630027771, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.127, + "step": 2254 + }, + { + "loss": 0.0023, + "grad_norm": 0.3214550316333771, + "learning_rate": 8.735000000000002e-06, + "num_tokens": 771871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1275, + "step": 2255 + }, + { + "loss": 0.0389, + "grad_norm": 1.1307684183120728, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1280000000000001, + "step": 2256 + }, + { + "loss": 0.0021, + "grad_norm": 0.30145928263664246, + "learning_rate": 8.725000000000002e-06, + "num_tokens": 772474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1285, + "step": 2257 + }, + { + "loss": 0.0018, + "grad_norm": 0.24611108005046844, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.129, + "step": 2258 + }, + { + "loss": 0.0652, + "grad_norm": 1.5593312978744507, + "learning_rate": 8.715e-06, + "num_tokens": 773077.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1295, + "step": 2259 + }, + { + "loss": 0.1724, + "grad_norm": 3.1925134658813477, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.13, + "step": 2260 + }, + { + "loss": 0.0016, + "grad_norm": 0.2210361361503601, + "learning_rate": 8.705e-06, + "num_tokens": 773680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1305, + "step": 2261 + }, + { + "loss": 0.044, + "grad_norm": 1.1579885482788086, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.131, + "step": 2262 + }, + { + "loss": 0.0812, + "grad_norm": 2.0770068168640137, + "learning_rate": 8.695e-06, + "num_tokens": 774704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1315, + "step": 2263 + }, + { + "loss": 0.0376, + "grad_norm": 1.1654012203216553, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1320000000000001, + "step": 2264 + }, + { + "loss": 0.0017, + "grad_norm": 0.22535240650177002, + "learning_rate": 8.685e-06, + "num_tokens": 775307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1325, + "step": 2265 + }, + { + "loss": 0.0017, + "grad_norm": 0.2348785251379013, + "learning_rate": 8.68e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 2266 + }, + { + "loss": 0.0017, + "grad_norm": 0.24279342591762543, + "learning_rate": 8.675e-06, + "num_tokens": 775489.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1335, + "step": 2267 + }, + { + "loss": 0.0748, + "grad_norm": 1.5453892946243286, + "learning_rate": 8.67e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.134, + "step": 2268 + }, + { + "loss": 0.0015, + "grad_norm": 0.20795051753520966, + "learning_rate": 8.665000000000001e-06, + "num_tokens": 776092.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1345, + "step": 2269 + }, + { + "loss": 0.0016, + "grad_norm": 0.21314096450805664, + "learning_rate": 8.66e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 2270 + }, + { + "loss": 0.0016, + "grad_norm": 0.22147318720817566, + "learning_rate": 8.655000000000001e-06, + "num_tokens": 776274.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1355, + "step": 2271 + }, + { + "loss": 0.0511, + "grad_norm": 1.1325373649597168, + "learning_rate": 8.65e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1360000000000001, + "step": 2272 + }, + { + "loss": 0.0014, + "grad_norm": 0.18845656514167786, + "learning_rate": 8.645000000000001e-06, + "num_tokens": 776877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1365, + "step": 2273 + }, + { + "loss": 0.0013, + "grad_norm": 0.16952817142009735, + "learning_rate": 8.64e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 2274 + }, + { + "loss": 0.0621, + "grad_norm": 1.329026222229004, + "learning_rate": 8.635000000000001e-06, + "num_tokens": 777480.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1375, + "step": 2275 + }, + { + "loss": 0.0416, + "grad_norm": 1.105779767036438, + "learning_rate": 8.63e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.138, + "step": 2276 + }, + { + "loss": 0.0467, + "grad_norm": 1.1847842931747437, + "learning_rate": 8.625000000000001e-06, + "num_tokens": 778504.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1385, + "step": 2277 + }, + { + "loss": 0.0414, + "grad_norm": 1.0636855363845825, + "learning_rate": 8.62e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.139, + "step": 2278 + }, + { + "loss": 0.058, + "grad_norm": 1.3789916038513184, + "learning_rate": 8.615000000000001e-06, + "num_tokens": 779528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1395, + "step": 2279 + }, + { + "loss": 0.0649, + "grad_norm": 1.1419354677200317, + "learning_rate": 8.61e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1400000000000001, + "step": 2280 + }, + { + "loss": 0.0014, + "grad_norm": 0.19384142756462097, + "learning_rate": 8.605000000000001e-06, + "num_tokens": 780131.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1405, + "step": 2281 + }, + { + "loss": 0.0015, + "grad_norm": 0.19773858785629272, + "learning_rate": 8.6e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 2282 + }, + { + "loss": 0.0557, + "grad_norm": 1.190521001815796, + "learning_rate": 8.595000000000002e-06, + "num_tokens": 780734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1415, + "step": 2283 + }, + { + "loss": 0.0017, + "grad_norm": 0.23638860881328583, + "learning_rate": 8.59e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 1.0, + "epoch": 1.142, + "step": 2284 + }, + { + "loss": 0.0017, + "grad_norm": 0.24933819472789764, + "learning_rate": 8.585000000000002e-06, + "num_tokens": 780916.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1425, + "step": 2285 + }, + { + "loss": 0.0017, + "grad_norm": 0.22720065712928772, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 2286 + }, + { + "loss": 0.0416, + "grad_norm": 1.214958667755127, + "learning_rate": 8.575000000000002e-06, + "num_tokens": 781519.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1435, + "step": 2287 + }, + { + "loss": 0.054, + "grad_norm": 0.9985194206237793, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.144, + "step": 2288 + }, + { + "loss": 0.0017, + "grad_norm": 0.24114187061786652, + "learning_rate": 8.565000000000002e-06, + "num_tokens": 782122.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1445, + "step": 2289 + }, + { + "loss": 0.0574, + "grad_norm": 1.4530028104782104, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.145, + "step": 2290 + }, + { + "loss": 0.0018, + "grad_norm": 0.2544173002243042, + "learning_rate": 8.555e-06, + "num_tokens": 782725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1455, + "step": 2291 + }, + { + "loss": 0.0017, + "grad_norm": 0.23475930094718933, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 2292 + }, + { + "loss": 0.0708, + "grad_norm": 1.619470477104187, + "learning_rate": 8.545e-06, + "num_tokens": 783328.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1465, + "step": 2293 + }, + { + "loss": 0.0019, + "grad_norm": 0.2572467029094696, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 1.0, + "epoch": 1.147, + "step": 2294 + }, + { + "loss": 0.0019, + "grad_norm": 0.26701951026916504, + "learning_rate": 8.535e-06, + "num_tokens": 783510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1475, + "step": 2295 + }, + { + "loss": 0.0471, + "grad_norm": 1.147359848022461, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.148, + "step": 2296 + }, + { + "loss": 0.0485, + "grad_norm": 1.0665885210037231, + "learning_rate": 8.525e-06, + "num_tokens": 784534.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1485, + "step": 2297 + }, + { + "loss": 0.0017, + "grad_norm": 0.23322324454784393, + "learning_rate": 8.52e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.149, + "step": 2298 + }, + { + "loss": 0.0667, + "grad_norm": 1.4317374229431152, + "learning_rate": 8.515e-06, + "num_tokens": 785137.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1495, + "step": 2299 + }, + { + "loss": 0.0651, + "grad_norm": 1.4495528936386108, + "learning_rate": 8.51e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.15, + "step": 2300 + }, + { + "loss": 0.0018, + "grad_norm": 0.24990759789943695, + "learning_rate": 8.505e-06, + "num_tokens": 785740.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1505, + "step": 2301 + }, + { + "loss": 0.0673, + "grad_norm": 1.3833082914352417, + "learning_rate": 8.5e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.151, + "step": 2302 + }, + { + "loss": 0.0384, + "grad_norm": 1.0650711059570312, + "learning_rate": 8.495e-06, + "num_tokens": 786764.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1515, + "step": 2303 + }, + { + "loss": 0.0017, + "grad_norm": 0.2362237423658371, + "learning_rate": 8.49e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.152, + "step": 2304 + }, + { + "loss": 0.0362, + "grad_norm": 1.2261658906936646, + "learning_rate": 8.485000000000001e-06, + "num_tokens": 787367.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1525, + "step": 2305 + }, + { + "loss": 0.0021, + "grad_norm": 0.285277396440506, + "learning_rate": 8.48e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 1.0, + "epoch": 1.153, + "step": 2306 + }, + { + "loss": 0.0018, + "grad_norm": 0.24331547319889069, + "learning_rate": 8.475000000000001e-06, + "num_tokens": 787549.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1535, + "step": 2307 + }, + { + "loss": 0.057, + "grad_norm": 1.260392427444458, + "learning_rate": 8.47e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.154, + "step": 2308 + }, + { + "loss": 0.002, + "grad_norm": 0.26841071248054504, + "learning_rate": 8.465000000000001e-06, + "num_tokens": 788152.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1545, + "step": 2309 + }, + { + "loss": 0.0018, + "grad_norm": 0.25016698241233826, + "learning_rate": 8.46e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 2310 + }, + { + "loss": 0.002, + "grad_norm": 0.2738337218761444, + "learning_rate": 8.455000000000001e-06, + "num_tokens": 788334.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1555, + "step": 2311 + }, + { + "loss": 0.0017, + "grad_norm": 0.2311965376138687, + "learning_rate": 8.45e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 2312 + }, + { + "loss": 0.0608, + "grad_norm": 1.6522681713104248, + "learning_rate": 8.445000000000001e-06, + "num_tokens": 788937.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1565, + "step": 2313 + }, + { + "loss": 0.0595, + "grad_norm": 1.3370118141174316, + "learning_rate": 8.44e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.157, + "step": 2314 + }, + { + "loss": 0.0706, + "grad_norm": 1.5185800790786743, + "learning_rate": 8.435000000000001e-06, + "num_tokens": 789961.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1575, + "step": 2315 + }, + { + "loss": 0.0015, + "grad_norm": 0.20058579742908478, + "learning_rate": 8.43e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 1.0, + "epoch": 1.158, + "step": 2316 + }, + { + "loss": 0.0736, + "grad_norm": 1.6871758699417114, + "learning_rate": 8.425000000000001e-06, + "num_tokens": 790564.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1585, + "step": 2317 + }, + { + "loss": 0.0684, + "grad_norm": 1.7638912200927734, + "learning_rate": 8.42e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.159, + "step": 2318 + }, + { + "loss": 0.0017, + "grad_norm": 0.23336097598075867, + "learning_rate": 8.415000000000002e-06, + "num_tokens": 791167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1595, + "step": 2319 + }, + { + "loss": 0.0596, + "grad_norm": 1.3170890808105469, + "learning_rate": 8.41e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.16, + "step": 2320 + }, + { + "loss": 0.0566, + "grad_norm": 1.8501343727111816, + "learning_rate": 8.405000000000002e-06, + "num_tokens": 792191.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1605, + "step": 2321 + }, + { + "loss": 0.0679, + "grad_norm": 1.3065072298049927, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.161, + "step": 2322 + }, + { + "loss": 0.0577, + "grad_norm": 1.3374840021133423, + "learning_rate": 8.395e-06, + "num_tokens": 793215.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1615, + "step": 2323 + }, + { + "loss": 0.0651, + "grad_norm": 1.2627785205841064, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.162, + "step": 2324 + }, + { + "loss": 0.0589, + "grad_norm": 1.1249433755874634, + "learning_rate": 8.385e-06, + "num_tokens": 794239.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1625, + "step": 2325 + }, + { + "loss": 0.0022, + "grad_norm": 0.31153878569602966, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.163, + "step": 2326 + }, + { + "loss": 0.0376, + "grad_norm": 1.2043869495391846, + "learning_rate": 8.375e-06, + "num_tokens": 794842.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1635, + "step": 2327 + }, + { + "loss": 0.0024, + "grad_norm": 0.3410504162311554, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 1.0, + "epoch": 1.164, + "step": 2328 + }, + { + "loss": 0.0497, + "grad_norm": 1.3358232975006104, + "learning_rate": 8.365e-06, + "num_tokens": 795445.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1645, + "step": 2329 + }, + { + "loss": 0.062, + "grad_norm": 1.3019129037857056, + "learning_rate": 8.36e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.165, + "step": 2330 + }, + { + "loss": 0.1411, + "grad_norm": 3.1003713607788086, + "learning_rate": 8.355e-06, + "num_tokens": 796469.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.1655, + "step": 2331 + }, + { + "loss": 0.0675, + "grad_norm": 1.4928791522979736, + "learning_rate": 8.35e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.166, + "step": 2332 + }, + { + "loss": 0.0032, + "grad_norm": 0.47702810168266296, + "learning_rate": 8.345e-06, + "num_tokens": 797072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1665, + "step": 2333 + }, + { + "loss": 0.0486, + "grad_norm": 1.189456820487976, + "learning_rate": 8.34e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.167, + "step": 2334 + }, + { + "loss": 0.0033, + "grad_norm": 0.5152677893638611, + "learning_rate": 8.335e-06, + "num_tokens": 797675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1675, + "step": 2335 + }, + { + "loss": 0.0463, + "grad_norm": 1.3805276155471802, + "learning_rate": 8.33e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.168, + "step": 2336 + }, + { + "loss": 0.0653, + "grad_norm": 1.7025351524353027, + "learning_rate": 8.325e-06, + "num_tokens": 798699.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1685, + "step": 2337 + }, + { + "loss": 0.0031, + "grad_norm": 0.44580474495887756, + "learning_rate": 8.32e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 1.0, + "epoch": 1.169, + "step": 2338 + }, + { + "loss": 0.0462, + "grad_norm": 1.3915964365005493, + "learning_rate": 8.315000000000001e-06, + "num_tokens": 799302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1695, + "step": 2339 + }, + { + "loss": 0.0689, + "grad_norm": 1.3206253051757812, + "learning_rate": 8.31e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.17, + "step": 2340 + }, + { + "loss": 0.0616, + "grad_norm": 1.0774954557418823, + "learning_rate": 8.305000000000001e-06, + "num_tokens": 800326.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1705, + "step": 2341 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280348658561707, + "learning_rate": 8.3e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 1.0, + "epoch": 1.171, + "step": 2342 + }, + { + "loss": 0.0534, + "grad_norm": 1.1514171361923218, + "learning_rate": 8.295000000000001e-06, + "num_tokens": 800929.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1715, + "step": 2343 + }, + { + "loss": 0.0034, + "grad_norm": 0.4936150014400482, + "learning_rate": 8.29e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 1.0, + "epoch": 1.172, + "step": 2344 + }, + { + "loss": 0.0411, + "grad_norm": 1.091706395149231, + "learning_rate": 8.285000000000001e-06, + "num_tokens": 801532.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1724999999999999, + "step": 2345 + }, + { + "loss": 0.0633, + "grad_norm": 1.2277299165725708, + "learning_rate": 8.28e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.173, + "step": 2346 + }, + { + "loss": 0.0032, + "grad_norm": 0.4532278776168823, + "learning_rate": 8.275000000000001e-06, + "num_tokens": 802135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1735, + "step": 2347 + }, + { + "loss": 0.0033, + "grad_norm": 0.467818021774292, + "learning_rate": 8.27e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.174, + "step": 2348 + }, + { + "loss": 0.0528, + "grad_norm": 1.7821072340011597, + "learning_rate": 8.265000000000001e-06, + "num_tokens": 802738.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1745, + "step": 2349 + }, + { + "loss": 0.0415, + "grad_norm": 1.4086565971374512, + "learning_rate": 8.26e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.175, + "step": 2350 + }, + { + "loss": 0.045, + "grad_norm": 1.1930326223373413, + "learning_rate": 8.255000000000001e-06, + "num_tokens": 803762.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1755, + "step": 2351 + }, + { + "loss": 0.0028, + "grad_norm": 0.4077257215976715, + "learning_rate": 8.25e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 1.0, + "epoch": 1.176, + "step": 2352 + }, + { + "loss": 0.0535, + "grad_norm": 1.0156196355819702, + "learning_rate": 8.245000000000002e-06, + "num_tokens": 804365.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1764999999999999, + "step": 2353 + }, + { + "loss": 0.0544, + "grad_norm": 1.701621413230896, + "learning_rate": 8.24e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.177, + "step": 2354 + }, + { + "loss": 0.0408, + "grad_norm": 1.3804023265838623, + "learning_rate": 8.235e-06, + "num_tokens": 805389.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1775, + "step": 2355 + }, + { + "loss": 0.0538, + "grad_norm": 1.4935331344604492, + "learning_rate": 8.23e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.178, + "step": 2356 + }, + { + "loss": 0.0031, + "grad_norm": 0.46967241168022156, + "learning_rate": 8.225e-06, + "num_tokens": 805992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1785, + "step": 2357 + }, + { + "loss": 0.003, + "grad_norm": 0.4181312620639801, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.179, + "step": 2358 + }, + { + "loss": 0.003, + "grad_norm": 0.4292071461677551, + "learning_rate": 8.215e-06, + "num_tokens": 806174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1795, + "step": 2359 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606574833393097, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.18, + "step": 2360 + }, + { + "loss": 0.0384, + "grad_norm": 1.0812703371047974, + "learning_rate": 8.205e-06, + "num_tokens": 806777.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1804999999999999, + "step": 2361 + }, + { + "loss": 0.0025, + "grad_norm": 0.36413413286209106, + "learning_rate": 8.2e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.181, + "step": 2362 + }, + { + "loss": 0.0632, + "grad_norm": 1.3525351285934448, + "learning_rate": 8.195e-06, + "num_tokens": 807380.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1815, + "step": 2363 + }, + { + "loss": 0.0021, + "grad_norm": 0.29519718885421753, + "learning_rate": 8.19e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.182, + "step": 2364 + }, + { + "loss": 0.002, + "grad_norm": 0.28825369477272034, + "learning_rate": 8.185e-06, + "num_tokens": 807562.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1825, + "step": 2365 + }, + { + "loss": 0.0364, + "grad_norm": 1.0907576084136963, + "learning_rate": 8.18e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.183, + "step": 2366 + }, + { + "loss": 0.0682, + "grad_norm": 1.3050081729888916, + "learning_rate": 8.175e-06, + "num_tokens": 808586.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1835, + "step": 2367 + }, + { + "loss": 0.0424, + "grad_norm": 1.141483187675476, + "learning_rate": 8.17e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.184, + "step": 2368 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355233788490295, + "learning_rate": 8.165e-06, + "num_tokens": 809189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1844999999999999, + "step": 2369 + }, + { + "loss": 0.0744, + "grad_norm": 1.7785593271255493, + "learning_rate": 8.16e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.185, + "step": 2370 + }, + { + "loss": 0.0657, + "grad_norm": 1.3623268604278564, + "learning_rate": 8.155e-06, + "num_tokens": 810213.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1855, + "step": 2371 + }, + { + "loss": 0.0549, + "grad_norm": 1.1436368227005005, + "learning_rate": 8.15e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.186, + "step": 2372 + }, + { + "loss": 0.0539, + "grad_norm": 1.2383182048797607, + "learning_rate": 8.145e-06, + "num_tokens": 811237.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1865, + "step": 2373 + }, + { + "loss": 0.0018, + "grad_norm": 0.24816246330738068, + "learning_rate": 8.14e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.187, + "step": 2374 + }, + { + "loss": 0.0409, + "grad_norm": 1.240695834159851, + "learning_rate": 8.135000000000001e-06, + "num_tokens": 811840.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1875, + "step": 2375 + }, + { + "loss": 0.0364, + "grad_norm": 0.927349328994751, + "learning_rate": 8.13e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.188, + "step": 2376 + }, + { + "loss": 0.002, + "grad_norm": 0.28636854887008667, + "learning_rate": 8.125000000000001e-06, + "num_tokens": 812443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1885, + "step": 2377 + }, + { + "loss": 0.0021, + "grad_norm": 0.3085651397705078, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 2378 + }, + { + "loss": 0.0733, + "grad_norm": 1.627233862876892, + "learning_rate": 8.115000000000001e-06, + "num_tokens": 813046.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1895, + "step": 2379 + }, + { + "loss": 0.0523, + "grad_norm": 1.2803730964660645, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.19, + "step": 2380 + }, + { + "loss": 0.0358, + "grad_norm": 1.134440302848816, + "learning_rate": 8.105000000000001e-06, + "num_tokens": 814070.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1905000000000001, + "step": 2381 + }, + { + "loss": 0.062, + "grad_norm": 1.7024178504943848, + "learning_rate": 8.1e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.191, + "step": 2382 + }, + { + "loss": 0.0555, + "grad_norm": 1.755904197692871, + "learning_rate": 8.095000000000001e-06, + "num_tokens": 815094.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1915, + "step": 2383 + }, + { + "loss": 0.0028, + "grad_norm": 0.4056146442890167, + "learning_rate": 8.09e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 1.0, + "epoch": 1.192, + "step": 2384 + }, + { + "loss": 0.0415, + "grad_norm": 1.3847079277038574, + "learning_rate": 8.085000000000001e-06, + "num_tokens": 815697.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1925, + "step": 2385 + }, + { + "loss": 0.041, + "grad_norm": 1.05851149559021, + "learning_rate": 8.08e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.193, + "step": 2386 + }, + { + "loss": 0.0683, + "grad_norm": 1.5797926187515259, + "learning_rate": 8.075000000000001e-06, + "num_tokens": 816721.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1935, + "step": 2387 + }, + { + "loss": 0.003, + "grad_norm": 0.44755682349205017, + "learning_rate": 8.07e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 1.0, + "epoch": 1.194, + "step": 2388 + }, + { + "loss": 0.0035, + "grad_norm": 0.5333588719367981, + "learning_rate": 8.065e-06, + "num_tokens": 816903.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1945000000000001, + "step": 2389 + }, + { + "loss": 0.0034, + "grad_norm": 0.5025861263275146, + "learning_rate": 8.06e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 2390 + }, + { + "loss": 0.0657, + "grad_norm": 1.9265213012695312, + "learning_rate": 8.055e-06, + "num_tokens": 817506.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1955, + "step": 2391 + }, + { + "loss": 0.0029, + "grad_norm": 0.4326709508895874, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.196, + "step": 2392 + }, + { + "loss": 0.0385, + "grad_norm": 1.282583236694336, + "learning_rate": 8.045e-06, + "num_tokens": 818109.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1965, + "step": 2393 + }, + { + "loss": 0.048, + "grad_norm": 1.7246921062469482, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.197, + "step": 2394 + }, + { + "loss": 0.0529, + "grad_norm": 1.3816536664962769, + "learning_rate": 8.035e-06, + "num_tokens": 819133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1975, + "step": 2395 + }, + { + "loss": 0.0025, + "grad_norm": 0.36934202909469604, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 1.0, + "epoch": 1.198, + "step": 2396 + }, + { + "loss": 0.0701, + "grad_norm": 1.844415307044983, + "learning_rate": 8.025e-06, + "num_tokens": 819736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1985000000000001, + "step": 2397 + }, + { + "loss": 0.0026, + "grad_norm": 0.3918537199497223, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 1.0, + "epoch": 1.199, + "step": 2398 + }, + { + "loss": 0.0025, + "grad_norm": 0.3629172444343567, + "learning_rate": 8.015e-06, + "num_tokens": 819918.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1995, + "step": 2399 + }, + { + "loss": 0.0593, + "grad_norm": 1.3562273979187012, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2, + "step": 2400 + }, + { + "loss": 0.0415, + "grad_norm": 1.1191670894622803, + "learning_rate": 8.005e-06, + "num_tokens": 820942.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2005, + "step": 2401 + }, + { + "loss": 0.0021, + "grad_norm": 0.3028194308280945, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 1.0, + "epoch": 1.201, + "step": 2402 + }, + { + "loss": 0.0021, + "grad_norm": 0.3161010444164276, + "learning_rate": 7.995e-06, + "num_tokens": 821124.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2015, + "step": 2403 + }, + { + "loss": 0.0631, + "grad_norm": 1.4275634288787842, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.202, + "step": 2404 + }, + { + "loss": 0.0018, + "grad_norm": 0.2525792121887207, + "learning_rate": 7.985e-06, + "num_tokens": 821727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2025000000000001, + "step": 2405 + }, + { + "loss": 0.0576, + "grad_norm": 1.2019566297531128, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.203, + "step": 2406 + }, + { + "loss": 0.0019, + "grad_norm": 0.28433406352996826, + "learning_rate": 7.975e-06, + "num_tokens": 822330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2035, + "step": 2407 + }, + { + "loss": 0.0018, + "grad_norm": 0.26680925488471985, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 1.0, + "epoch": 1.204, + "step": 2408 + }, + { + "loss": 0.0523, + "grad_norm": 1.5135900974273682, + "learning_rate": 7.965e-06, + "num_tokens": 822933.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2045, + "step": 2409 + }, + { + "loss": 0.0595, + "grad_norm": 1.425874948501587, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.205, + "step": 2410 + }, + { + "loss": 0.0688, + "grad_norm": 1.7353657484054565, + "learning_rate": 7.955000000000001e-06, + "num_tokens": 823957.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2055, + "step": 2411 + }, + { + "loss": 0.0016, + "grad_norm": 0.22734731435775757, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.206, + "step": 2412 + }, + { + "loss": 0.0016, + "grad_norm": 0.22473861277103424, + "learning_rate": 7.945000000000001e-06, + "num_tokens": 824139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2065, + "step": 2413 + }, + { + "loss": 0.0016, + "grad_norm": 0.23369428515434265, + "learning_rate": 7.94e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 2414 + }, + { + "loss": 0.0018, + "grad_norm": 0.25014567375183105, + "learning_rate": 7.935000000000001e-06, + "num_tokens": 824321.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2075, + "step": 2415 + }, + { + "loss": 0.0701, + "grad_norm": 1.4806315898895264, + "learning_rate": 7.93e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.208, + "step": 2416 + }, + { + "loss": 0.0015, + "grad_norm": 0.1993637979030609, + "learning_rate": 7.925000000000001e-06, + "num_tokens": 824924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2085, + "step": 2417 + }, + { + "loss": 0.0548, + "grad_norm": 1.2813140153884888, + "learning_rate": 7.92e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.209, + "step": 2418 + }, + { + "loss": 0.0552, + "grad_norm": 1.2722525596618652, + "learning_rate": 7.915000000000001e-06, + "num_tokens": 825948.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2095, + "step": 2419 + }, + { + "loss": 0.0013, + "grad_norm": 0.17925392091274261, + "learning_rate": 7.91e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 1.0, + "epoch": 1.21, + "step": 2420 + }, + { + "loss": 0.0013, + "grad_norm": 0.18519414961338043, + "learning_rate": 7.905e-06, + "num_tokens": 826130.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2105, + "step": 2421 + }, + { + "loss": 0.041, + "grad_norm": 1.3869478702545166, + "learning_rate": 7.9e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.211, + "step": 2422 + }, + { + "loss": 0.0013, + "grad_norm": 0.1751483976840973, + "learning_rate": 7.895e-06, + "num_tokens": 826733.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2115, + "step": 2423 + }, + { + "loss": 0.05, + "grad_norm": 1.0098025798797607, + "learning_rate": 7.89e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.212, + "step": 2424 + }, + { + "loss": 0.0605, + "grad_norm": 1.3178874254226685, + "learning_rate": 7.885e-06, + "num_tokens": 827757.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2125, + "step": 2425 + }, + { + "loss": 0.0013, + "grad_norm": 0.18827441334724426, + "learning_rate": 7.88e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 1.0, + "epoch": 1.213, + "step": 2426 + }, + { + "loss": 0.064, + "grad_norm": 1.4484566450119019, + "learning_rate": 7.875e-06, + "num_tokens": 828360.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2135, + "step": 2427 + }, + { + "loss": 0.0014, + "grad_norm": 0.19540052115917206, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 1.0, + "epoch": 1.214, + "step": 2428 + }, + { + "loss": 0.0623, + "grad_norm": 1.3592177629470825, + "learning_rate": 7.865e-06, + "num_tokens": 828963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2145, + "step": 2429 + }, + { + "loss": 0.0014, + "grad_norm": 0.20412060618400574, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.215, + "step": 2430 + }, + { + "loss": 0.0617, + "grad_norm": 1.755582332611084, + "learning_rate": 7.855e-06, + "num_tokens": 829566.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2155, + "step": 2431 + }, + { + "loss": 0.0631, + "grad_norm": 1.2380058765411377, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.216, + "step": 2432 + }, + { + "loss": 0.0375, + "grad_norm": 1.3119670152664185, + "learning_rate": 7.845e-06, + "num_tokens": 830590.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2165, + "step": 2433 + }, + { + "loss": 0.0015, + "grad_norm": 0.22137387096881866, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 1.0, + "epoch": 1.217, + "step": 2434 + }, + { + "loss": 0.0017, + "grad_norm": 0.2416553795337677, + "learning_rate": 7.835e-06, + "num_tokens": 830772.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2175, + "step": 2435 + }, + { + "loss": 0.0015, + "grad_norm": 0.21708650887012482, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 2436 + }, + { + "loss": 0.0016, + "grad_norm": 0.23922832310199738, + "learning_rate": 7.825e-06, + "num_tokens": 830954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2185, + "step": 2437 + }, + { + "loss": 0.0016, + "grad_norm": 0.2385343313217163, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 2438 + }, + { + "loss": 0.065, + "grad_norm": 1.4742591381072998, + "learning_rate": 7.815e-06, + "num_tokens": 831557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2195, + "step": 2439 + }, + { + "loss": 0.0016, + "grad_norm": 0.2341725379228592, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 1.0, + "epoch": 1.22, + "step": 2440 + }, + { + "loss": 0.0615, + "grad_norm": 1.4791371822357178, + "learning_rate": 7.805e-06, + "num_tokens": 832160.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2205, + "step": 2441 + }, + { + "loss": 0.048, + "grad_norm": 1.601716160774231, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.221, + "step": 2442 + }, + { + "loss": 0.0014, + "grad_norm": 0.19947591423988342, + "learning_rate": 7.795e-06, + "num_tokens": 832763.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2215, + "step": 2443 + }, + { + "loss": 0.0801, + "grad_norm": 1.753954291343689, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.222, + "step": 2444 + }, + { + "loss": 0.0015, + "grad_norm": 0.21398615837097168, + "learning_rate": 7.785000000000001e-06, + "num_tokens": 833366.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2225, + "step": 2445 + }, + { + "loss": 0.0655, + "grad_norm": 1.799574851989746, + "learning_rate": 7.78e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.223, + "step": 2446 + }, + { + "loss": 0.0438, + "grad_norm": 1.332261085510254, + "learning_rate": 7.775000000000001e-06, + "num_tokens": 834390.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2235, + "step": 2447 + }, + { + "loss": 0.044, + "grad_norm": 1.238344430923462, + "learning_rate": 7.77e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.224, + "step": 2448 + }, + { + "loss": 0.0015, + "grad_norm": 0.2137579768896103, + "learning_rate": 7.765000000000001e-06, + "num_tokens": 834993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2245, + "step": 2449 + }, + { + "loss": 0.0438, + "grad_norm": 1.1821973323822021, + "learning_rate": 7.76e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.225, + "step": 2450 + }, + { + "loss": 0.0562, + "grad_norm": 1.4905529022216797, + "learning_rate": 7.755000000000001e-06, + "num_tokens": 836017.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2255, + "step": 2451 + }, + { + "loss": 0.0015, + "grad_norm": 0.21731820702552795, + "learning_rate": 7.75e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 1.0, + "epoch": 1.226, + "step": 2452 + }, + { + "loss": 0.0017, + "grad_norm": 0.25909724831581116, + "learning_rate": 7.745e-06, + "num_tokens": 836199.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2265, + "step": 2453 + }, + { + "loss": 0.0016, + "grad_norm": 0.22781187295913696, + "learning_rate": 7.74e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 2454 + }, + { + "loss": 0.0016, + "grad_norm": 0.24323998391628265, + "learning_rate": 7.735e-06, + "num_tokens": 836381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2275, + "step": 2455 + }, + { + "loss": 0.0594, + "grad_norm": 1.5349161624908447, + "learning_rate": 7.73e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.228, + "step": 2456 + }, + { + "loss": 0.0017, + "grad_norm": 0.24151335656642914, + "learning_rate": 7.725e-06, + "num_tokens": 836984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2285, + "step": 2457 + }, + { + "loss": 0.0016, + "grad_norm": 0.23347225785255432, + "learning_rate": 7.72e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 2458 + }, + { + "loss": 0.0017, + "grad_norm": 0.24232612550258636, + "learning_rate": 7.715e-06, + "num_tokens": 837166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2295, + "step": 2459 + }, + { + "loss": 0.0016, + "grad_norm": 0.23151801526546478, + "learning_rate": 7.71e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 2460 + }, + { + "loss": 0.0586, + "grad_norm": 1.4122602939605713, + "learning_rate": 7.705e-06, + "num_tokens": 837769.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2305, + "step": 2461 + }, + { + "loss": 0.0014, + "grad_norm": 0.19469626247882843, + "learning_rate": 7.7e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.231, + "step": 2462 + }, + { + "loss": 0.0637, + "grad_norm": 1.675697684288025, + "learning_rate": 7.695e-06, + "num_tokens": 838372.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2315, + "step": 2463 + }, + { + "loss": 0.0013, + "grad_norm": 0.17535777390003204, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.232, + "step": 2464 + }, + { + "loss": 0.0549, + "grad_norm": 1.1719900369644165, + "learning_rate": 7.685e-06, + "num_tokens": 838975.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2325, + "step": 2465 + }, + { + "loss": 0.0013, + "grad_norm": 0.16398227214813232, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.233, + "step": 2466 + }, + { + "loss": 0.0674, + "grad_norm": 1.7502342462539673, + "learning_rate": 7.675e-06, + "num_tokens": 839578.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2335, + "step": 2467 + }, + { + "loss": 0.0013, + "grad_norm": 0.17352193593978882, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.234, + "step": 2468 + }, + { + "loss": 0.063, + "grad_norm": 1.5015274286270142, + "learning_rate": 7.665e-06, + "num_tokens": 840181.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2345, + "step": 2469 + }, + { + "loss": 0.0611, + "grad_norm": 1.3142430782318115, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2349999999999999, + "step": 2470 + }, + { + "loss": 0.0589, + "grad_norm": 1.3366830348968506, + "learning_rate": 7.655e-06, + "num_tokens": 841205.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2355, + "step": 2471 + }, + { + "loss": 0.0013, + "grad_norm": 0.17301248013973236, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.236, + "step": 2472 + }, + { + "loss": 0.0435, + "grad_norm": 1.1996126174926758, + "learning_rate": 7.645e-06, + "num_tokens": 841808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2365, + "step": 2473 + }, + { + "loss": 0.0015, + "grad_norm": 0.21387803554534912, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.237, + "step": 2474 + }, + { + "loss": 0.064, + "grad_norm": 1.3917018175125122, + "learning_rate": 7.635e-06, + "num_tokens": 842411.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2375, + "step": 2475 + }, + { + "loss": 0.0014, + "grad_norm": 0.20352397859096527, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.238, + "step": 2476 + }, + { + "loss": 0.0015, + "grad_norm": 0.21035854518413544, + "learning_rate": 7.625e-06, + "num_tokens": 842593.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2385, + "step": 2477 + }, + { + "loss": 0.0384, + "grad_norm": 1.1954495906829834, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.2389999999999999, + "step": 2478 + }, + { + "loss": 0.0398, + "grad_norm": 1.3171675205230713, + "learning_rate": 7.615e-06, + "num_tokens": 843617.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2395, + "step": 2479 + }, + { + "loss": 0.0016, + "grad_norm": 0.22742266952991486, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.24, + "step": 2480 + }, + { + "loss": 0.0505, + "grad_norm": 1.463847041130066, + "learning_rate": 7.605e-06, + "num_tokens": 844220.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2405, + "step": 2481 + }, + { + "loss": 0.0634, + "grad_norm": 1.0150220394134521, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.241, + "step": 2482 + }, + { + "loss": 0.0628, + "grad_norm": 1.2490217685699463, + "learning_rate": 7.595e-06, + "num_tokens": 845244.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2415, + "step": 2483 + }, + { + "loss": 0.0568, + "grad_norm": 0.9812212586402893, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.242, + "step": 2484 + }, + { + "loss": 0.0684, + "grad_norm": 1.4887269735336304, + "learning_rate": 7.585e-06, + "num_tokens": 846268.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2425, + "step": 2485 + }, + { + "loss": 0.002, + "grad_norm": 0.2907889485359192, + "learning_rate": 7.58e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2429999999999999, + "step": 2486 + }, + { + "loss": 0.0024, + "grad_norm": 0.3490116596221924, + "learning_rate": 7.575e-06, + "num_tokens": 846450.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2435, + "step": 2487 + }, + { + "loss": 0.0379, + "grad_norm": 0.9351921081542969, + "learning_rate": 7.57e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.244, + "step": 2488 + }, + { + "loss": 0.0409, + "grad_norm": 1.486227035522461, + "learning_rate": 7.565e-06, + "num_tokens": 847474.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2445, + "step": 2489 + }, + { + "loss": 0.0024, + "grad_norm": 0.35926783084869385, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.245, + "step": 2490 + }, + { + "loss": 0.0547, + "grad_norm": 1.216343879699707, + "learning_rate": 7.5550000000000005e-06, + "num_tokens": 848077.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2455, + "step": 2491 + }, + { + "loss": 0.0622, + "grad_norm": 1.0978708267211914, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.246, + "step": 2492 + }, + { + "loss": 0.0026, + "grad_norm": 0.3695952892303467, + "learning_rate": 7.545e-06, + "num_tokens": 848680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2465, + "step": 2493 + }, + { + "loss": 0.0712, + "grad_norm": 1.1717898845672607, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2469999999999999, + "step": 2494 + }, + { + "loss": 0.003, + "grad_norm": 0.4548373818397522, + "learning_rate": 7.535e-06, + "num_tokens": 849283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2475, + "step": 2495 + }, + { + "loss": 0.003, + "grad_norm": 0.4568769335746765, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.248, + "step": 2496 + }, + { + "loss": 0.0024, + "grad_norm": 0.36542901396751404, + "learning_rate": 7.525e-06, + "num_tokens": 849465.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2485, + "step": 2497 + }, + { + "loss": 0.0566, + "grad_norm": 1.315274715423584, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.249, + "step": 2498 + }, + { + "loss": 0.0026, + "grad_norm": 0.39514294266700745, + "learning_rate": 7.515e-06, + "num_tokens": 850068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2495, + "step": 2499 + }, + { + "loss": 0.0678, + "grad_norm": 1.530604362487793, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.25, + "step": 2500 + }, + { + "loss": 0.0022, + "grad_norm": 0.3104536533355713, + "learning_rate": 7.505e-06, + "num_tokens": 850671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2505, + "step": 2501 + }, + { + "loss": 0.0019, + "grad_norm": 0.2783941924571991, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.251, + "step": 2502 + }, + { + "loss": 0.0597, + "grad_norm": 1.77070951461792, + "learning_rate": 7.495000000000001e-06, + "num_tokens": 851274.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2515, + "step": 2503 + }, + { + "loss": 0.0019, + "grad_norm": 0.2808924913406372, + "learning_rate": 7.49e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 1.0, + "epoch": 1.252, + "step": 2504 + }, + { + "loss": 0.0441, + "grad_norm": 1.070281982421875, + "learning_rate": 7.485000000000001e-06, + "num_tokens": 851877.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2525, + "step": 2505 + }, + { + "loss": 0.0018, + "grad_norm": 0.25118544697761536, + "learning_rate": 7.48e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2530000000000001, + "step": 2506 + }, + { + "loss": 0.0698, + "grad_norm": 1.3499447107315063, + "learning_rate": 7.475000000000001e-06, + "num_tokens": 852480.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2535, + "step": 2507 + }, + { + "loss": 0.0016, + "grad_norm": 0.23157145082950592, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.254, + "step": 2508 + }, + { + "loss": 0.0384, + "grad_norm": 1.1759817600250244, + "learning_rate": 7.465000000000001e-06, + "num_tokens": 853083.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2545, + "step": 2509 + }, + { + "loss": 0.0017, + "grad_norm": 0.24023179709911346, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.255, + "step": 2510 + }, + { + "loss": 0.0559, + "grad_norm": 1.3075677156448364, + "learning_rate": 7.4550000000000015e-06, + "num_tokens": 853686.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2555, + "step": 2511 + }, + { + "loss": 0.0691, + "grad_norm": 1.5931618213653564, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.256, + "step": 2512 + }, + { + "loss": 0.0015, + "grad_norm": 0.21379417181015015, + "learning_rate": 7.445000000000001e-06, + "num_tokens": 854289.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2565, + "step": 2513 + }, + { + "loss": 0.0016, + "grad_norm": 0.22427783906459808, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 2514 + }, + { + "loss": 0.0585, + "grad_norm": 1.3955110311508179, + "learning_rate": 7.435000000000001e-06, + "num_tokens": 854892.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2575, + "step": 2515 + }, + { + "loss": 0.0016, + "grad_norm": 0.22540539503097534, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.258, + "step": 2516 + }, + { + "loss": 0.0015, + "grad_norm": 0.20957466959953308, + "learning_rate": 7.425000000000001e-06, + "num_tokens": 855074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2585, + "step": 2517 + }, + { + "loss": 0.0013, + "grad_norm": 0.17798997461795807, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 2518 + }, + { + "loss": 0.0681, + "grad_norm": 1.692757487297058, + "learning_rate": 7.415000000000001e-06, + "num_tokens": 855677.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2595, + "step": 2519 + }, + { + "loss": 0.0013, + "grad_norm": 0.18327295780181885, + "learning_rate": 7.41e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 1.0, + "epoch": 1.26, + "step": 2520 + }, + { + "loss": 0.0694, + "grad_norm": 1.3426337242126465, + "learning_rate": 7.405000000000001e-06, + "num_tokens": 856280.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2605, + "step": 2521 + }, + { + "loss": 0.0575, + "grad_norm": 1.3755184412002563, + "learning_rate": 7.4e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2610000000000001, + "step": 2522 + }, + { + "loss": 0.0012, + "grad_norm": 0.15550144016742706, + "learning_rate": 7.395000000000001e-06, + "num_tokens": 856883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2615, + "step": 2523 + }, + { + "loss": 0.0013, + "grad_norm": 0.18434429168701172, + "learning_rate": 7.39e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 2524 + }, + { + "loss": 0.0561, + "grad_norm": 1.3532037734985352, + "learning_rate": 7.385000000000001e-06, + "num_tokens": 857486.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2625, + "step": 2525 + }, + { + "loss": 0.0783, + "grad_norm": 2.749722719192505, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.263, + "step": 2526 + }, + { + "loss": 0.0739, + "grad_norm": 1.7389228343963623, + "learning_rate": 7.375000000000001e-06, + "num_tokens": 858510.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2635, + "step": 2527 + }, + { + "loss": 0.0596, + "grad_norm": 1.5434712171554565, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 2528 + }, + { + "loss": 0.0012, + "grad_norm": 0.16660870611667633, + "learning_rate": 7.365000000000001e-06, + "num_tokens": 859113.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2645, + "step": 2529 + }, + { + "loss": 0.0466, + "grad_norm": 1.1618560552597046, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2650000000000001, + "step": 2530 + }, + { + "loss": 0.066, + "grad_norm": 1.4426238536834717, + "learning_rate": 7.355000000000001e-06, + "num_tokens": 860137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2655, + "step": 2531 + }, + { + "loss": 0.0014, + "grad_norm": 0.1874425858259201, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 1.0, + "epoch": 1.266, + "step": 2532 + }, + { + "loss": 0.0574, + "grad_norm": 1.2460824251174927, + "learning_rate": 7.345000000000001e-06, + "num_tokens": 860740.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2665, + "step": 2533 + }, + { + "loss": 0.0722, + "grad_norm": 1.7045679092407227, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.267, + "step": 2534 + }, + { + "loss": 0.0641, + "grad_norm": 1.4023394584655762, + "learning_rate": 7.335000000000001e-06, + "num_tokens": 861764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2675, + "step": 2535 + }, + { + "loss": 0.0018, + "grad_norm": 0.25083932280540466, + "learning_rate": 7.33e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.268, + "step": 2536 + }, + { + "loss": 0.0625, + "grad_norm": 1.2308841943740845, + "learning_rate": 7.325000000000001e-06, + "num_tokens": 862367.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2685, + "step": 2537 + }, + { + "loss": 0.1399, + "grad_norm": 2.6957058906555176, + "learning_rate": 7.32e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.2690000000000001, + "step": 2538 + }, + { + "loss": 0.0403, + "grad_norm": 1.0539931058883667, + "learning_rate": 7.315000000000001e-06, + "num_tokens": 863391.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2695, + "step": 2539 + }, + { + "loss": 0.0603, + "grad_norm": 1.6862679719924927, + "learning_rate": 7.31e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.27, + "step": 2540 + }, + { + "loss": 0.0022, + "grad_norm": 0.3110877275466919, + "learning_rate": 7.305000000000001e-06, + "num_tokens": 863994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2705, + "step": 2541 + }, + { + "loss": 0.0521, + "grad_norm": 1.1967720985412598, + "learning_rate": 7.3e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.271, + "step": 2542 + }, + { + "loss": 0.1383, + "grad_norm": 2.653751850128174, + "learning_rate": 7.295000000000001e-06, + "num_tokens": 865018.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.2715, + "step": 2543 + }, + { + "loss": 0.0025, + "grad_norm": 0.3700110614299774, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.272, + "step": 2544 + }, + { + "loss": 0.0031, + "grad_norm": 0.42906609177589417, + "learning_rate": 7.2850000000000006e-06, + "num_tokens": 865200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2725, + "step": 2545 + }, + { + "loss": 0.0437, + "grad_norm": 1.104537010192871, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2730000000000001, + "step": 2546 + }, + { + "loss": 0.0027, + "grad_norm": 0.3919247090816498, + "learning_rate": 7.275000000000001e-06, + "num_tokens": 865803.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2735, + "step": 2547 + }, + { + "loss": 0.0029, + "grad_norm": 0.4317328929901123, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 2548 + }, + { + "loss": 0.0025, + "grad_norm": 0.37341031432151794, + "learning_rate": 7.265000000000001e-06, + "num_tokens": 865985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2745, + "step": 2549 + }, + { + "loss": 0.0416, + "grad_norm": 1.0737035274505615, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.275, + "step": 2550 + }, + { + "loss": 0.0646, + "grad_norm": 1.3107216358184814, + "learning_rate": 7.255000000000001e-06, + "num_tokens": 867009.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2755, + "step": 2551 + }, + { + "loss": 0.0381, + "grad_norm": 0.9233097434043884, + "learning_rate": 7.25e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.276, + "step": 2552 + }, + { + "loss": 0.056, + "grad_norm": 1.2655408382415771, + "learning_rate": 7.245000000000001e-06, + "num_tokens": 868033.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2765, + "step": 2553 + }, + { + "loss": 0.0519, + "grad_norm": 1.2633070945739746, + "learning_rate": 7.24e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2770000000000001, + "step": 2554 + }, + { + "loss": 0.0666, + "grad_norm": 1.5826315879821777, + "learning_rate": 7.235000000000001e-06, + "num_tokens": 869057.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2775, + "step": 2555 + }, + { + "loss": 0.0026, + "grad_norm": 0.3732459545135498, + "learning_rate": 7.23e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 1.0, + "epoch": 1.278, + "step": 2556 + }, + { + "loss": 0.0384, + "grad_norm": 0.9308870434761047, + "learning_rate": 7.225000000000001e-06, + "num_tokens": 869660.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2785, + "step": 2557 + }, + { + "loss": 0.0027, + "grad_norm": 0.3898535668849945, + "learning_rate": 7.22e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 1.0, + "epoch": 1.279, + "step": 2558 + }, + { + "loss": 0.0416, + "grad_norm": 1.0320757627487183, + "learning_rate": 7.215000000000001e-06, + "num_tokens": 870263.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2795, + "step": 2559 + }, + { + "loss": 0.0028, + "grad_norm": 0.4121858477592468, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 1.0, + "epoch": 1.28, + "step": 2560 + }, + { + "loss": 0.0028, + "grad_norm": 0.4276776611804962, + "learning_rate": 7.2050000000000005e-06, + "num_tokens": 870445.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2805, + "step": 2561 + }, + { + "loss": 0.0407, + "grad_norm": 0.9345077872276306, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2810000000000001, + "step": 2562 + }, + { + "loss": 0.0025, + "grad_norm": 0.3605985641479492, + "learning_rate": 7.1950000000000006e-06, + "num_tokens": 871048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2814999999999999, + "step": 2563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346655070781708, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.282, + "step": 2564 + }, + { + "loss": 0.0744, + "grad_norm": 1.8985601663589478, + "learning_rate": 7.185000000000001e-06, + "num_tokens": 871651.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2825, + "step": 2565 + }, + { + "loss": 0.0388, + "grad_norm": 0.96394282579422, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.283, + "step": 2566 + }, + { + "loss": 0.0682, + "grad_norm": 1.4056230783462524, + "learning_rate": 7.175000000000001e-06, + "num_tokens": 872675.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2835, + "step": 2567 + }, + { + "loss": 0.0022, + "grad_norm": 0.3106633722782135, + "learning_rate": 7.17e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 1.0, + "epoch": 1.284, + "step": 2568 + }, + { + "loss": 0.0384, + "grad_norm": 1.064553141593933, + "learning_rate": 7.165000000000001e-06, + "num_tokens": 873278.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2845, + "step": 2569 + }, + { + "loss": 0.0626, + "grad_norm": 1.0392028093338013, + "learning_rate": 7.16e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.285, + "step": 2570 + }, + { + "loss": 0.0022, + "grad_norm": 0.30655112862586975, + "learning_rate": 7.155000000000001e-06, + "num_tokens": 873881.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2854999999999999, + "step": 2571 + }, + { + "loss": 0.0673, + "grad_norm": 1.5468289852142334, + "learning_rate": 7.15e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.286, + "step": 2572 + }, + { + "loss": 0.0498, + "grad_norm": 1.2830432653427124, + "learning_rate": 7.145000000000001e-06, + "num_tokens": 874905.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2865, + "step": 2573 + }, + { + "loss": 0.055, + "grad_norm": 1.0863239765167236, + "learning_rate": 7.14e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 2574 + }, + { + "loss": 0.0606, + "grad_norm": 1.434999704360962, + "learning_rate": 7.135000000000001e-06, + "num_tokens": 875929.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2875, + "step": 2575 + }, + { + "loss": 0.0532, + "grad_norm": 1.290963888168335, + "learning_rate": 7.13e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.288, + "step": 2576 + }, + { + "loss": 0.0026, + "grad_norm": 0.36665645241737366, + "learning_rate": 7.125e-06, + "num_tokens": 876532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2885, + "step": 2577 + }, + { + "loss": 0.0485, + "grad_norm": 1.2393323183059692, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.289, + "step": 2578 + }, + { + "loss": 0.0029, + "grad_norm": 0.3994691073894501, + "learning_rate": 7.1150000000000005e-06, + "num_tokens": 877135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2894999999999999, + "step": 2579 + }, + { + "loss": 0.0544, + "grad_norm": 1.361981987953186, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.29, + "step": 2580 + }, + { + "loss": 0.0529, + "grad_norm": 1.1892880201339722, + "learning_rate": 7.105000000000001e-06, + "num_tokens": 878159.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2905, + "step": 2581 + }, + { + "loss": 0.069, + "grad_norm": 1.5022639036178589, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.291, + "step": 2582 + }, + { + "loss": 0.0594, + "grad_norm": 1.2174897193908691, + "learning_rate": 7.095000000000001e-06, + "num_tokens": 879183.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2915, + "step": 2583 + }, + { + "loss": 0.0723, + "grad_norm": 2.1814920902252197, + "learning_rate": 7.09e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.292, + "step": 2584 + }, + { + "loss": 0.0544, + "grad_norm": 1.1524139642715454, + "learning_rate": 7.085000000000001e-06, + "num_tokens": 880207.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2925, + "step": 2585 + }, + { + "loss": 0.0035, + "grad_norm": 0.5082859396934509, + "learning_rate": 7.08e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.293, + "step": 2586 + }, + { + "loss": 0.0034, + "grad_norm": 0.49455657601356506, + "learning_rate": 7.075000000000001e-06, + "num_tokens": 880389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2934999999999999, + "step": 2587 + }, + { + "loss": 0.0516, + "grad_norm": 1.1291673183441162, + "learning_rate": 7.07e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.294, + "step": 2588 + }, + { + "loss": 0.0402, + "grad_norm": 1.073132038116455, + "learning_rate": 7.065000000000001e-06, + "num_tokens": 881413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2945, + "step": 2589 + }, + { + "loss": 0.0409, + "grad_norm": 1.1712205410003662, + "learning_rate": 7.06e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.295, + "step": 2590 + }, + { + "loss": 0.0596, + "grad_norm": 1.2515616416931152, + "learning_rate": 7.055000000000001e-06, + "num_tokens": 882437.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2955, + "step": 2591 + }, + { + "loss": 0.0039, + "grad_norm": 0.5442217588424683, + "learning_rate": 7.05e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.296, + "step": 2592 + }, + { + "loss": 0.0041, + "grad_norm": 0.5982818603515625, + "learning_rate": 7.045e-06, + "num_tokens": 882619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2965, + "step": 2593 + }, + { + "loss": 0.0558, + "grad_norm": 1.3499200344085693, + "learning_rate": 7.04e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.297, + "step": 2594 + }, + { + "loss": 0.0038, + "grad_norm": 0.5531075596809387, + "learning_rate": 7.035e-06, + "num_tokens": 883222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2974999999999999, + "step": 2595 + }, + { + "loss": 0.0716, + "grad_norm": 1.8495835065841675, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.298, + "step": 2596 + }, + { + "loss": 0.0387, + "grad_norm": 1.2195173501968384, + "learning_rate": 7.0250000000000005e-06, + "num_tokens": 884246.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2985, + "step": 2597 + }, + { + "loss": 0.0715, + "grad_norm": 1.7892330884933472, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.299, + "step": 2598 + }, + { + "loss": 0.0034, + "grad_norm": 0.5045487284660339, + "learning_rate": 7.015000000000001e-06, + "num_tokens": 884849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2995, + "step": 2599 + }, + { + "loss": 0.0551, + "grad_norm": 1.5834842920303345, + "learning_rate": 7.01e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3, + "step": 2600 + }, + { + "loss": 0.0037, + "grad_norm": 0.5456190705299377, + "learning_rate": 7.005000000000001e-06, + "num_tokens": 885452.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3005, + "step": 2601 + }, + { + "loss": 0.0036, + "grad_norm": 0.5648893117904663, + "learning_rate": 7e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.301, + "step": 2602 + }, + { + "loss": 0.06, + "grad_norm": 1.417505145072937, + "learning_rate": 6.995000000000001e-06, + "num_tokens": 886055.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3014999999999999, + "step": 2603 + }, + { + "loss": 0.0684, + "grad_norm": 1.5355315208435059, + "learning_rate": 6.99e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.302, + "step": 2604 + }, + { + "loss": 0.0027, + "grad_norm": 0.4013388454914093, + "learning_rate": 6.985000000000001e-06, + "num_tokens": 886658.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3025, + "step": 2605 + }, + { + "loss": 0.0026, + "grad_norm": 0.38935649394989014, + "learning_rate": 6.98e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 1.0, + "epoch": 1.303, + "step": 2606 + }, + { + "loss": 0.0578, + "grad_norm": 1.1277109384536743, + "learning_rate": 6.975000000000001e-06, + "num_tokens": 887261.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3035, + "step": 2607 + }, + { + "loss": 0.0023, + "grad_norm": 0.3507567048072815, + "learning_rate": 6.97e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.304, + "step": 2608 + }, + { + "loss": 0.0021, + "grad_norm": 0.3047695755958557, + "learning_rate": 6.965e-06, + "num_tokens": 887443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3045, + "step": 2609 + }, + { + "loss": 0.0564, + "grad_norm": 1.2580876350402832, + "learning_rate": 6.96e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.305, + "step": 2610 + }, + { + "loss": 0.0018, + "grad_norm": 0.26692500710487366, + "learning_rate": 6.955e-06, + "num_tokens": 888046.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3054999999999999, + "step": 2611 + }, + { + "loss": 0.0601, + "grad_norm": 1.2882280349731445, + "learning_rate": 6.95e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.306, + "step": 2612 + }, + { + "loss": 0.0662, + "grad_norm": 1.3626042604446411, + "learning_rate": 6.945e-06, + "num_tokens": 889070.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3065, + "step": 2613 + }, + { + "loss": 0.0015, + "grad_norm": 0.20663970708847046, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 1.0, + "epoch": 1.307, + "step": 2614 + }, + { + "loss": 0.0421, + "grad_norm": 1.0858242511749268, + "learning_rate": 6.9350000000000005e-06, + "num_tokens": 889673.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3075, + "step": 2615 + }, + { + "loss": 0.061, + "grad_norm": 1.1361438035964966, + "learning_rate": 6.93e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.308, + "step": 2616 + }, + { + "loss": 0.053, + "grad_norm": 1.0651867389678955, + "learning_rate": 6.925000000000001e-06, + "num_tokens": 890697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3085, + "step": 2617 + }, + { + "loss": 0.0648, + "grad_norm": 1.4413301944732666, + "learning_rate": 6.92e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.309, + "step": 2618 + }, + { + "loss": 0.0016, + "grad_norm": 0.23106220364570618, + "learning_rate": 6.915000000000001e-06, + "num_tokens": 891300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3094999999999999, + "step": 2619 + }, + { + "loss": 0.0596, + "grad_norm": 1.1959160566329956, + "learning_rate": 6.91e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.31, + "step": 2620 + }, + { + "loss": 0.0625, + "grad_norm": 1.4631091356277466, + "learning_rate": 6.905000000000001e-06, + "num_tokens": 892324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3105, + "step": 2621 + }, + { + "loss": 0.0385, + "grad_norm": 1.1421785354614258, + "learning_rate": 6.9e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.311, + "step": 2622 + }, + { + "loss": 0.0644, + "grad_norm": 1.3361622095108032, + "learning_rate": 6.895000000000001e-06, + "num_tokens": 893348.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3115, + "step": 2623 + }, + { + "loss": 0.0393, + "grad_norm": 1.3101776838302612, + "learning_rate": 6.89e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.312, + "step": 2624 + }, + { + "loss": 0.0415, + "grad_norm": 1.2668944597244263, + "learning_rate": 6.885e-06, + "num_tokens": 894372.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3125, + "step": 2625 + }, + { + "loss": 0.0637, + "grad_norm": 1.8910597562789917, + "learning_rate": 6.88e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.313, + "step": 2626 + }, + { + "loss": 0.0385, + "grad_norm": 1.383195161819458, + "learning_rate": 6.875e-06, + "num_tokens": 895396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3135, + "step": 2627 + }, + { + "loss": 0.0029, + "grad_norm": 0.41114333271980286, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.314, + "step": 2628 + }, + { + "loss": 0.0709, + "grad_norm": 2.5799410343170166, + "learning_rate": 6.865e-06, + "num_tokens": 895999.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3145, + "step": 2629 + }, + { + "loss": 0.0717, + "grad_norm": 1.9481109380722046, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.315, + "step": 2630 + }, + { + "loss": 0.0031, + "grad_norm": 0.4399254620075226, + "learning_rate": 6.8550000000000004e-06, + "num_tokens": 896602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3155000000000001, + "step": 2631 + }, + { + "loss": 0.0692, + "grad_norm": 1.7998204231262207, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.316, + "step": 2632 + }, + { + "loss": 0.0589, + "grad_norm": 1.2681806087493896, + "learning_rate": 6.8450000000000005e-06, + "num_tokens": 897626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3165, + "step": 2633 + }, + { + "loss": 0.1572, + "grad_norm": 2.9861464500427246, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.317, + "step": 2634 + }, + { + "loss": 0.0033, + "grad_norm": 0.4804554879665375, + "learning_rate": 6.835000000000001e-06, + "num_tokens": 898229.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3175, + "step": 2635 + }, + { + "loss": 0.0039, + "grad_norm": 0.5298879742622375, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 1.0, + "epoch": 1.318, + "step": 2636 + }, + { + "loss": 0.0033, + "grad_norm": 0.45830750465393066, + "learning_rate": 6.825000000000001e-06, + "num_tokens": 898411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3185, + "step": 2637 + }, + { + "loss": 0.0759, + "grad_norm": 2.195838451385498, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.319, + "step": 2638 + }, + { + "loss": 0.0028, + "grad_norm": 0.3985951840877533, + "learning_rate": 6.815000000000001e-06, + "num_tokens": 899014.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3195000000000001, + "step": 2639 + }, + { + "loss": 0.0435, + "grad_norm": 1.082383155822754, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.32, + "step": 2640 + }, + { + "loss": 0.0031, + "grad_norm": 0.4386924207210541, + "learning_rate": 6.805000000000001e-06, + "num_tokens": 899617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3205, + "step": 2641 + }, + { + "loss": 0.044, + "grad_norm": 1.3280903100967407, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.321, + "step": 2642 + }, + { + "loss": 0.0024, + "grad_norm": 0.34161683917045593, + "learning_rate": 6.795e-06, + "num_tokens": 900220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3215, + "step": 2643 + }, + { + "loss": 0.0026, + "grad_norm": 0.3536019027233124, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.322, + "step": 2644 + }, + { + "loss": 0.0721, + "grad_norm": 1.825214147567749, + "learning_rate": 6.785e-06, + "num_tokens": 900823.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.3225, + "step": 2645 + }, + { + "loss": 0.0603, + "grad_norm": 1.441401481628418, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.323, + "step": 2646 + }, + { + "loss": 0.0552, + "grad_norm": 1.026498556137085, + "learning_rate": 6.775e-06, + "num_tokens": 901847.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3235000000000001, + "step": 2647 + }, + { + "loss": 0.0607, + "grad_norm": 1.567400574684143, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.324, + "step": 2648 + }, + { + "loss": 0.0365, + "grad_norm": 1.1754707098007202, + "learning_rate": 6.7650000000000005e-06, + "num_tokens": 902871.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3245, + "step": 2649 + }, + { + "loss": 0.0634, + "grad_norm": 1.0925911664962769, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.325, + "step": 2650 + }, + { + "loss": 0.0022, + "grad_norm": 0.3080379068851471, + "learning_rate": 6.7550000000000005e-06, + "num_tokens": 903474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3255, + "step": 2651 + }, + { + "loss": 0.0024, + "grad_norm": 0.3412145972251892, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.326, + "step": 2652 + }, + { + "loss": 0.0612, + "grad_norm": 1.387506127357483, + "learning_rate": 6.745000000000001e-06, + "num_tokens": 904077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3265, + "step": 2653 + }, + { + "loss": 0.0543, + "grad_norm": 1.0726388692855835, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 2654 + }, + { + "loss": 0.0515, + "grad_norm": 1.3620095252990723, + "learning_rate": 6.735000000000001e-06, + "num_tokens": 905101.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3275000000000001, + "step": 2655 + }, + { + "loss": 0.0536, + "grad_norm": 0.999693751335144, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.328, + "step": 2656 + }, + { + "loss": 0.0725, + "grad_norm": 1.338326096534729, + "learning_rate": 6.725000000000001e-06, + "num_tokens": 906125.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3285, + "step": 2657 + }, + { + "loss": 0.0025, + "grad_norm": 0.3621944487094879, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.329, + "step": 2658 + }, + { + "loss": 0.0027, + "grad_norm": 0.3732605576515198, + "learning_rate": 6.715e-06, + "num_tokens": 906307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3295, + "step": 2659 + }, + { + "loss": 0.0025, + "grad_norm": 0.3675785958766937, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 2660 + }, + { + "loss": 0.0546, + "grad_norm": 1.420166015625, + "learning_rate": 6.705e-06, + "num_tokens": 906910.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3305, + "step": 2661 + }, + { + "loss": 0.065, + "grad_norm": 1.7972251176834106, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.331, + "step": 2662 + }, + { + "loss": 0.0026, + "grad_norm": 0.38739708065986633, + "learning_rate": 6.695e-06, + "num_tokens": 907513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3315000000000001, + "step": 2663 + }, + { + "loss": 0.0621, + "grad_norm": 1.1773098707199097, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.332, + "step": 2664 + }, + { + "loss": 0.047, + "grad_norm": 1.3367711305618286, + "learning_rate": 6.685e-06, + "num_tokens": 908537.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3325, + "step": 2665 + }, + { + "loss": 0.0614, + "grad_norm": 1.5761219263076782, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.333, + "step": 2666 + }, + { + "loss": 0.0028, + "grad_norm": 0.39666748046875, + "learning_rate": 6.6750000000000005e-06, + "num_tokens": 909140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3335, + "step": 2667 + }, + { + "loss": 0.0026, + "grad_norm": 0.38161027431488037, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 2668 + }, + { + "loss": 0.0027, + "grad_norm": 0.3782355785369873, + "learning_rate": 6.6650000000000006e-06, + "num_tokens": 909322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3345, + "step": 2669 + }, + { + "loss": 0.0449, + "grad_norm": 1.2690225839614868, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.335, + "step": 2670 + }, + { + "loss": 0.0618, + "grad_norm": 1.4404915571212769, + "learning_rate": 6.655000000000001e-06, + "num_tokens": 910346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3355000000000001, + "step": 2671 + }, + { + "loss": 0.0593, + "grad_norm": 1.6381967067718506, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.336, + "step": 2672 + }, + { + "loss": 0.0023, + "grad_norm": 0.3195578455924988, + "learning_rate": 6.645000000000001e-06, + "num_tokens": 910949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3365, + "step": 2673 + }, + { + "loss": 0.1244, + "grad_norm": 2.2930221557617188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.337, + "step": 2674 + }, + { + "loss": 0.061, + "grad_norm": 1.1066110134124756, + "learning_rate": 6.635e-06, + "num_tokens": 911973.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3375, + "step": 2675 + }, + { + "loss": 0.0023, + "grad_norm": 0.3287852704524994, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.338, + "step": 2676 + }, + { + "loss": 0.0723, + "grad_norm": 1.8842978477478027, + "learning_rate": 6.625e-06, + "num_tokens": 912576.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3385, + "step": 2677 + }, + { + "loss": 0.0616, + "grad_norm": 1.410254955291748, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.339, + "step": 2678 + }, + { + "loss": 0.0661, + "grad_norm": 1.7658559083938599, + "learning_rate": 6.615e-06, + "num_tokens": 913600.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3395000000000001, + "step": 2679 + }, + { + "loss": 0.0023, + "grad_norm": 0.3321514427661896, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.34, + "step": 2680 + }, + { + "loss": 0.0026, + "grad_norm": 0.38943803310394287, + "learning_rate": 6.605e-06, + "num_tokens": 913782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3405, + "step": 2681 + }, + { + "loss": 0.0533, + "grad_norm": 1.220119833946228, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.341, + "step": 2682 + }, + { + "loss": 0.0577, + "grad_norm": 1.4489399194717407, + "learning_rate": 6.595e-06, + "num_tokens": 914806.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3415, + "step": 2683 + }, + { + "loss": 0.0534, + "grad_norm": 1.437482237815857, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.342, + "step": 2684 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185518980026245, + "learning_rate": 6.5850000000000005e-06, + "num_tokens": 915409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3425, + "step": 2685 + }, + { + "loss": 0.0557, + "grad_norm": 1.233544945716858, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.343, + "step": 2686 + }, + { + "loss": 0.1326, + "grad_norm": 2.9976046085357666, + "learning_rate": 6.5750000000000006e-06, + "num_tokens": 916433.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3435000000000001, + "step": 2687 + }, + { + "loss": 0.0555, + "grad_norm": 1.1236023902893066, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3439999999999999, + "step": 2688 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615230619907379, + "learning_rate": 6.565000000000001e-06, + "num_tokens": 917036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3445, + "step": 2689 + }, + { + "loss": 0.0613, + "grad_norm": 1.391479730606079, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.345, + "step": 2690 + }, + { + "loss": 0.0023, + "grad_norm": 0.32829907536506653, + "learning_rate": 6.555e-06, + "num_tokens": 917639.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3455, + "step": 2691 + }, + { + "loss": 0.0025, + "grad_norm": 0.35658934712409973, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 1.0, + "epoch": 1.346, + "step": 2692 + }, + { + "loss": 0.0028, + "grad_norm": 0.40413787961006165, + "learning_rate": 6.545e-06, + "num_tokens": 917821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3465, + "step": 2693 + }, + { + "loss": 0.0023, + "grad_norm": 0.3243667185306549, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 1.0, + "epoch": 1.347, + "step": 2694 + }, + { + "loss": 0.0023, + "grad_norm": 0.33630460500717163, + "learning_rate": 6.535e-06, + "num_tokens": 918003.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3475, + "step": 2695 + }, + { + "loss": 0.0529, + "grad_norm": 1.6163023710250854, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3479999999999999, + "step": 2696 + }, + { + "loss": 0.0678, + "grad_norm": 1.5625479221343994, + "learning_rate": 6.525e-06, + "num_tokens": 919027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3485, + "step": 2697 + }, + { + "loss": 0.0676, + "grad_norm": 1.5719348192214966, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.349, + "step": 2698 + }, + { + "loss": 0.002, + "grad_norm": 0.2859533727169037, + "learning_rate": 6.515e-06, + "num_tokens": 919630.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3495, + "step": 2699 + }, + { + "loss": 0.0434, + "grad_norm": 1.324418067932129, + "learning_rate": 6.51e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.35, + "step": 2700 + }, + { + "loss": 0.042, + "grad_norm": 1.3165403604507446, + "learning_rate": 6.505e-06, + "num_tokens": 920654.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3505, + "step": 2701 + }, + { + "loss": 0.0018, + "grad_norm": 0.2492700070142746, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.351, + "step": 2702 + }, + { + "loss": 0.1336, + "grad_norm": 2.710927963256836, + "learning_rate": 6.4950000000000005e-06, + "num_tokens": 921257.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.3515, + "step": 2703 + }, + { + "loss": 0.059, + "grad_norm": 1.8472118377685547, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3519999999999999, + "step": 2704 + }, + { + "loss": 0.0448, + "grad_norm": 1.164633870124817, + "learning_rate": 6.485000000000001e-06, + "num_tokens": 922281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3525, + "step": 2705 + }, + { + "loss": 0.0544, + "grad_norm": 1.3916175365447998, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.353, + "step": 2706 + }, + { + "loss": 0.0463, + "grad_norm": 1.397131085395813, + "learning_rate": 6.475e-06, + "num_tokens": 923305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3535, + "step": 2707 + }, + { + "loss": 0.0019, + "grad_norm": 0.26947012543678284, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.354, + "step": 2708 + }, + { + "loss": 0.0017, + "grad_norm": 0.23892365396022797, + "learning_rate": 6.465e-06, + "num_tokens": 923487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3545, + "step": 2709 + }, + { + "loss": 0.0018, + "grad_norm": 0.25066784024238586, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 2710 + }, + { + "loss": 0.0435, + "grad_norm": 1.2238185405731201, + "learning_rate": 6.455e-06, + "num_tokens": 924090.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3555, + "step": 2711 + }, + { + "loss": 0.0019, + "grad_norm": 0.26420801877975464, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3559999999999999, + "step": 2712 + }, + { + "loss": 0.0572, + "grad_norm": 1.1416776180267334, + "learning_rate": 6.445e-06, + "num_tokens": 924693.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3565, + "step": 2713 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754037082195282, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.357, + "step": 2714 + }, + { + "loss": 0.0018, + "grad_norm": 0.25344598293304443, + "learning_rate": 6.435e-06, + "num_tokens": 924875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3575, + "step": 2715 + }, + { + "loss": 0.0017, + "grad_norm": 0.23587873578071594, + "learning_rate": 6.43e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 2716 + }, + { + "loss": 0.0701, + "grad_norm": 1.6822742223739624, + "learning_rate": 6.425e-06, + "num_tokens": 925478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3585, + "step": 2717 + }, + { + "loss": 0.0017, + "grad_norm": 0.22698912024497986, + "learning_rate": 6.42e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 1.0, + "epoch": 1.359, + "step": 2718 + }, + { + "loss": 0.044, + "grad_norm": 1.2083390951156616, + "learning_rate": 6.415e-06, + "num_tokens": 926081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3595, + "step": 2719 + }, + { + "loss": 0.0017, + "grad_norm": 0.23327840864658356, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3599999999999999, + "step": 2720 + }, + { + "loss": 0.0557, + "grad_norm": 1.281182885169983, + "learning_rate": 6.4050000000000005e-06, + "num_tokens": 926684.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3605, + "step": 2721 + }, + { + "loss": 0.0539, + "grad_norm": 1.1743288040161133, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.361, + "step": 2722 + }, + { + "loss": 0.0646, + "grad_norm": 1.2470465898513794, + "learning_rate": 6.395e-06, + "num_tokens": 927708.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3615, + "step": 2723 + }, + { + "loss": 0.0015, + "grad_norm": 0.20256949961185455, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 1.0, + "epoch": 1.362, + "step": 2724 + }, + { + "loss": 0.0394, + "grad_norm": 1.1593482494354248, + "learning_rate": 6.385e-06, + "num_tokens": 928311.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3625, + "step": 2725 + }, + { + "loss": 0.0737, + "grad_norm": 1.937491774559021, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.363, + "step": 2726 + }, + { + "loss": 0.0438, + "grad_norm": 1.1960216760635376, + "learning_rate": 6.375e-06, + "num_tokens": 929335.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3635, + "step": 2727 + }, + { + "loss": 0.0016, + "grad_norm": 0.21763351559638977, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3639999999999999, + "step": 2728 + }, + { + "loss": 0.0017, + "grad_norm": 0.24479590356349945, + "learning_rate": 6.365e-06, + "num_tokens": 929517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3645, + "step": 2729 + }, + { + "loss": 0.0619, + "grad_norm": 1.315623164176941, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.365, + "step": 2730 + }, + { + "loss": 0.0016, + "grad_norm": 0.2220989614725113, + "learning_rate": 6.355e-06, + "num_tokens": 930120.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3655, + "step": 2731 + }, + { + "loss": 0.0017, + "grad_norm": 0.2321062982082367, + "learning_rate": 6.35e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 2732 + }, + { + "loss": 0.0017, + "grad_norm": 0.23798637092113495, + "learning_rate": 6.345e-06, + "num_tokens": 930302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3665, + "step": 2733 + }, + { + "loss": 0.0577, + "grad_norm": 1.2568942308425903, + "learning_rate": 6.34e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.367, + "step": 2734 + }, + { + "loss": 0.041, + "grad_norm": 1.6406105756759644, + "learning_rate": 6.335e-06, + "num_tokens": 931326.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3675, + "step": 2735 + }, + { + "loss": 0.0517, + "grad_norm": 1.235734224319458, + "learning_rate": 6.33e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 2736 + }, + { + "loss": 0.0423, + "grad_norm": 0.9826679825782776, + "learning_rate": 6.3250000000000004e-06, + "num_tokens": 932350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3685, + "step": 2737 + }, + { + "loss": 0.0018, + "grad_norm": 0.26410505175590515, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.369, + "step": 2738 + }, + { + "loss": 0.002, + "grad_norm": 0.2839818596839905, + "learning_rate": 6.315e-06, + "num_tokens": 932532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3695, + "step": 2739 + }, + { + "loss": 0.0533, + "grad_norm": 1.2392011880874634, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.37, + "step": 2740 + }, + { + "loss": 0.0017, + "grad_norm": 0.23982419073581696, + "learning_rate": 6.305e-06, + "num_tokens": 933135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3705, + "step": 2741 + }, + { + "loss": 0.0548, + "grad_norm": 1.4777438640594482, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.371, + "step": 2742 + }, + { + "loss": 0.0019, + "grad_norm": 0.2724550664424896, + "learning_rate": 6.295e-06, + "num_tokens": 933738.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3715, + "step": 2743 + }, + { + "loss": 0.0019, + "grad_norm": 0.2623855173587799, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3719999999999999, + "step": 2744 + }, + { + "loss": 0.0583, + "grad_norm": 1.0648019313812256, + "learning_rate": 6.285e-06, + "num_tokens": 934341.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3725, + "step": 2745 + }, + { + "loss": 0.0725, + "grad_norm": 1.589500069618225, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.373, + "step": 2746 + }, + { + "loss": 0.0617, + "grad_norm": 1.4101024866104126, + "learning_rate": 6.275e-06, + "num_tokens": 935365.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3735, + "step": 2747 + }, + { + "loss": 0.0019, + "grad_norm": 0.2686757743358612, + "learning_rate": 6.27e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 1.0, + "epoch": 1.374, + "step": 2748 + }, + { + "loss": 0.0451, + "grad_norm": 1.6723026037216187, + "learning_rate": 6.265e-06, + "num_tokens": 935968.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3745, + "step": 2749 + }, + { + "loss": 0.1481, + "grad_norm": 2.561096668243408, + "learning_rate": 6.26e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.375, + "step": 2750 + }, + { + "loss": 0.0593, + "grad_norm": 1.1495637893676758, + "learning_rate": 6.255e-06, + "num_tokens": 936992.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3755, + "step": 2751 + }, + { + "loss": 0.0583, + "grad_norm": 1.0880846977233887, + "learning_rate": 6.25e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.376, + "step": 2752 + }, + { + "loss": 0.0641, + "grad_norm": 1.4671814441680908, + "learning_rate": 6.245000000000001e-06, + "num_tokens": 938016.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3765, + "step": 2753 + }, + { + "loss": 0.0022, + "grad_norm": 0.3182397186756134, + "learning_rate": 6.24e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 1.0, + "epoch": 1.377, + "step": 2754 + }, + { + "loss": 0.0605, + "grad_norm": 1.1844297647476196, + "learning_rate": 6.235000000000001e-06, + "num_tokens": 938619.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3775, + "step": 2755 + }, + { + "loss": 0.0633, + "grad_norm": 1.227432131767273, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3780000000000001, + "step": 2756 + }, + { + "loss": 0.0026, + "grad_norm": 0.3716835677623749, + "learning_rate": 6.225000000000001e-06, + "num_tokens": 939222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3785, + "step": 2757 + }, + { + "loss": 0.0599, + "grad_norm": 1.3364546298980713, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.379, + "step": 2758 + }, + { + "loss": 0.0532, + "grad_norm": 1.3746514320373535, + "learning_rate": 6.215000000000001e-06, + "num_tokens": 940246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3795, + "step": 2759 + }, + { + "loss": 0.0696, + "grad_norm": 1.6494160890579224, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.38, + "step": 2760 + }, + { + "loss": 0.0031, + "grad_norm": 0.4407944083213806, + "learning_rate": 6.205000000000001e-06, + "num_tokens": 940849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3805, + "step": 2761 + }, + { + "loss": 0.0559, + "grad_norm": 1.3899201154708862, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.381, + "step": 2762 + }, + { + "loss": 0.0393, + "grad_norm": 1.0294471979141235, + "learning_rate": 6.195000000000001e-06, + "num_tokens": 941873.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3815, + "step": 2763 + }, + { + "loss": 0.0028, + "grad_norm": 0.41492387652397156, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3820000000000001, + "step": 2764 + }, + { + "loss": 0.039, + "grad_norm": 1.2755433320999146, + "learning_rate": 6.185000000000001e-06, + "num_tokens": 942476.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3825, + "step": 2765 + }, + { + "loss": 0.0407, + "grad_norm": 1.1641042232513428, + "learning_rate": 6.18e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.383, + "step": 2766 + }, + { + "loss": 0.0033, + "grad_norm": 0.45876702666282654, + "learning_rate": 6.175000000000001e-06, + "num_tokens": 943079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3835, + "step": 2767 + }, + { + "loss": 0.053, + "grad_norm": 1.1277137994766235, + "learning_rate": 6.17e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.384, + "step": 2768 + }, + { + "loss": 0.069, + "grad_norm": 1.974735140800476, + "learning_rate": 6.165000000000001e-06, + "num_tokens": 944103.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3845, + "step": 2769 + }, + { + "loss": 0.0399, + "grad_norm": 1.308519959449768, + "learning_rate": 6.16e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.385, + "step": 2770 + }, + { + "loss": 0.0399, + "grad_norm": 1.3881995677947998, + "learning_rate": 6.155000000000001e-06, + "num_tokens": 945127.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3855, + "step": 2771 + }, + { + "loss": 0.0388, + "grad_norm": 1.376846194267273, + "learning_rate": 6.15e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3860000000000001, + "step": 2772 + }, + { + "loss": 0.0565, + "grad_norm": 1.6753615140914917, + "learning_rate": 6.145000000000001e-06, + "num_tokens": 946151.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3865, + "step": 2773 + }, + { + "loss": 0.0537, + "grad_norm": 1.350510597229004, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.387, + "step": 2774 + }, + { + "loss": 0.0348, + "grad_norm": 1.0870490074157715, + "learning_rate": 6.1350000000000006e-06, + "num_tokens": 947175.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3875, + "step": 2775 + }, + { + "loss": 0.0041, + "grad_norm": 0.5800921320915222, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 1.0, + "epoch": 1.388, + "step": 2776 + }, + { + "loss": 0.0046, + "grad_norm": 0.6146813631057739, + "learning_rate": 6.125000000000001e-06, + "num_tokens": 947357.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3885, + "step": 2777 + }, + { + "loss": 0.0685, + "grad_norm": 2.028545618057251, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.389, + "step": 2778 + }, + { + "loss": 0.0562, + "grad_norm": 1.10191011428833, + "learning_rate": 6.115000000000001e-06, + "num_tokens": 948381.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3895, + "step": 2779 + }, + { + "loss": 0.057, + "grad_norm": 1.6782788038253784, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3900000000000001, + "step": 2780 + }, + { + "loss": 0.0048, + "grad_norm": 0.6447672843933105, + "learning_rate": 6.105000000000001e-06, + "num_tokens": 948984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3905, + "step": 2781 + }, + { + "loss": 0.0045, + "grad_norm": 0.6120741963386536, + "learning_rate": 6.1e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.391, + "step": 2782 + }, + { + "loss": 0.0037, + "grad_norm": 0.5294094085693359, + "learning_rate": 6.095000000000001e-06, + "num_tokens": 949166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3915, + "step": 2783 + }, + { + "loss": 0.0041, + "grad_norm": 0.5634744167327881, + "learning_rate": 6.09e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.392, + "step": 2784 + }, + { + "loss": 0.0543, + "grad_norm": 1.1946736574172974, + "learning_rate": 6.085000000000001e-06, + "num_tokens": 949769.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3925, + "step": 2785 + }, + { + "loss": 0.0393, + "grad_norm": 1.366204857826233, + "learning_rate": 6.08e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.393, + "step": 2786 + }, + { + "loss": 0.0031, + "grad_norm": 0.4588482677936554, + "learning_rate": 6.075000000000001e-06, + "num_tokens": 950372.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3935, + "step": 2787 + }, + { + "loss": 0.0741, + "grad_norm": 1.6554986238479614, + "learning_rate": 6.07e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.3940000000000001, + "step": 2788 + }, + { + "loss": 0.0358, + "grad_norm": 1.0052374601364136, + "learning_rate": 6.065000000000001e-06, + "num_tokens": 951396.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3945, + "step": 2789 + }, + { + "loss": 0.0029, + "grad_norm": 0.4081237316131592, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.395, + "step": 2790 + }, + { + "loss": 0.0627, + "grad_norm": 1.5037425756454468, + "learning_rate": 6.0550000000000005e-06, + "num_tokens": 951999.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3955, + "step": 2791 + }, + { + "loss": 0.0024, + "grad_norm": 0.36483630537986755, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.396, + "step": 2792 + }, + { + "loss": 0.0455, + "grad_norm": 1.2050751447677612, + "learning_rate": 6.0450000000000006e-06, + "num_tokens": 952602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3965, + "step": 2793 + }, + { + "loss": 0.0021, + "grad_norm": 0.3035581111907959, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.397, + "step": 2794 + }, + { + "loss": 0.0025, + "grad_norm": 0.3607647716999054, + "learning_rate": 6.035000000000001e-06, + "num_tokens": 952784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3975, + "step": 2795 + }, + { + "loss": 0.0625, + "grad_norm": 1.2081470489501953, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3980000000000001, + "step": 2796 + }, + { + "loss": 0.0425, + "grad_norm": 1.0764844417572021, + "learning_rate": 6.025000000000001e-06, + "num_tokens": 953808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3985, + "step": 2797 + }, + { + "loss": 0.0632, + "grad_norm": 1.425076961517334, + "learning_rate": 6.02e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.399, + "step": 2798 + }, + { + "loss": 0.0395, + "grad_norm": 0.9470378160476685, + "learning_rate": 6.015000000000001e-06, + "num_tokens": 954832.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3995, + "step": 2799 + }, + { + "loss": 0.0404, + "grad_norm": 1.0599867105484009, + "learning_rate": 6.01e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4, + "step": 2800 + }, + { + "loss": 0.0577, + "grad_norm": 1.2933481931686401, + "learning_rate": 6.005000000000001e-06, + "num_tokens": 955856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4005, + "step": 2801 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215804398059845, + "learning_rate": 6e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 1.0, + "epoch": 1.401, + "step": 2802 + }, + { + "loss": 0.0601, + "grad_norm": 1.4103161096572876, + "learning_rate": 5.995000000000001e-06, + "num_tokens": 956459.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4015, + "step": 2803 + }, + { + "loss": 0.0022, + "grad_norm": 0.303093820810318, + "learning_rate": 5.99e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4020000000000001, + "step": 2804 + }, + { + "loss": 0.0663, + "grad_norm": 1.360801339149475, + "learning_rate": 5.985000000000001e-06, + "num_tokens": 957062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4025, + "step": 2805 + }, + { + "loss": 0.0022, + "grad_norm": 0.3075718581676483, + "learning_rate": 5.98e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 1.0, + "epoch": 1.403, + "step": 2806 + }, + { + "loss": 0.0602, + "grad_norm": 1.137125849723816, + "learning_rate": 5.975e-06, + "num_tokens": 957665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4035, + "step": 2807 + }, + { + "loss": 0.0022, + "grad_norm": 0.30045661330223083, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.404, + "step": 2808 + }, + { + "loss": 0.0392, + "grad_norm": 1.0042834281921387, + "learning_rate": 5.9650000000000005e-06, + "num_tokens": 958268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4045, + "step": 2809 + }, + { + "loss": 0.0401, + "grad_norm": 1.117727279663086, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.405, + "step": 2810 + }, + { + "loss": 0.0703, + "grad_norm": 1.4459725618362427, + "learning_rate": 5.955000000000001e-06, + "num_tokens": 959292.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4055, + "step": 2811 + }, + { + "loss": 0.0621, + "grad_norm": 1.3719003200531006, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4060000000000001, + "step": 2812 + }, + { + "loss": 0.0023, + "grad_norm": 0.31605690717697144, + "learning_rate": 5.945000000000001e-06, + "num_tokens": 959895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4064999999999999, + "step": 2813 + }, + { + "loss": 0.0605, + "grad_norm": 1.3043557405471802, + "learning_rate": 5.94e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.407, + "step": 2814 + }, + { + "loss": 0.0653, + "grad_norm": 1.2358129024505615, + "learning_rate": 5.935000000000001e-06, + "num_tokens": 960919.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4075, + "step": 2815 + }, + { + "loss": 0.0025, + "grad_norm": 0.3330060839653015, + "learning_rate": 5.93e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.408, + "step": 2816 + }, + { + "loss": 0.058, + "grad_norm": 1.1393845081329346, + "learning_rate": 5.925000000000001e-06, + "num_tokens": 961522.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4085, + "step": 2817 + }, + { + "loss": 0.0689, + "grad_norm": 1.4732993841171265, + "learning_rate": 5.92e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.409, + "step": 2818 + }, + { + "loss": 0.0028, + "grad_norm": 0.37631359696388245, + "learning_rate": 5.915000000000001e-06, + "num_tokens": 962125.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4095, + "step": 2819 + }, + { + "loss": 0.0026, + "grad_norm": 0.35936713218688965, + "learning_rate": 5.91e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.41, + "step": 2820 + }, + { + "loss": 0.0558, + "grad_norm": 1.2061470746994019, + "learning_rate": 5.905000000000001e-06, + "num_tokens": 962728.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4104999999999999, + "step": 2821 + }, + { + "loss": 0.0582, + "grad_norm": 1.513380527496338, + "learning_rate": 5.9e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 2822 + }, + { + "loss": 0.0418, + "grad_norm": 1.2391456365585327, + "learning_rate": 5.895e-06, + "num_tokens": 963752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4115, + "step": 2823 + }, + { + "loss": 0.069, + "grad_norm": 1.4670116901397705, + "learning_rate": 5.89e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.412, + "step": 2824 + }, + { + "loss": 0.0028, + "grad_norm": 0.3788264989852905, + "learning_rate": 5.885e-06, + "num_tokens": 964355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4125, + "step": 2825 + }, + { + "loss": 0.0027, + "grad_norm": 0.3687077462673187, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 1.0, + "epoch": 1.413, + "step": 2826 + }, + { + "loss": 0.0399, + "grad_norm": 1.233347773551941, + "learning_rate": 5.8750000000000005e-06, + "num_tokens": 964958.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4135, + "step": 2827 + }, + { + "loss": 0.0027, + "grad_norm": 0.37683984637260437, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.414, + "step": 2828 + }, + { + "loss": 0.048, + "grad_norm": 1.2649948596954346, + "learning_rate": 5.865000000000001e-06, + "num_tokens": 965561.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4144999999999999, + "step": 2829 + }, + { + "loss": 0.0589, + "grad_norm": 1.3882242441177368, + "learning_rate": 5.86e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.415, + "step": 2830 + }, + { + "loss": 0.0362, + "grad_norm": 1.1658241748809814, + "learning_rate": 5.855000000000001e-06, + "num_tokens": 966585.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4155, + "step": 2831 + }, + { + "loss": 0.0521, + "grad_norm": 1.0679434537887573, + "learning_rate": 5.85e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.416, + "step": 2832 + }, + { + "loss": 0.003, + "grad_norm": 0.40383246541023254, + "learning_rate": 5.845000000000001e-06, + "num_tokens": 967188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4165, + "step": 2833 + }, + { + "loss": 0.0427, + "grad_norm": 1.2304917573928833, + "learning_rate": 5.84e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.417, + "step": 2834 + }, + { + "loss": 0.0538, + "grad_norm": 1.1524217128753662, + "learning_rate": 5.835000000000001e-06, + "num_tokens": 968212.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4175, + "step": 2835 + }, + { + "loss": 0.0379, + "grad_norm": 0.9404373168945312, + "learning_rate": 5.83e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.418, + "step": 2836 + }, + { + "loss": 0.0031, + "grad_norm": 0.4096873104572296, + "learning_rate": 5.825000000000001e-06, + "num_tokens": 968815.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4184999999999999, + "step": 2837 + }, + { + "loss": 0.0028, + "grad_norm": 0.37403908371925354, + "learning_rate": 5.82e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.419, + "step": 2838 + }, + { + "loss": 0.0361, + "grad_norm": 0.9613595604896545, + "learning_rate": 5.815e-06, + "num_tokens": 969418.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.4195, + "step": 2839 + }, + { + "loss": 0.0571, + "grad_norm": 1.3871361017227173, + "learning_rate": 5.81e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.42, + "step": 2840 + }, + { + "loss": 0.0365, + "grad_norm": 1.060208797454834, + "learning_rate": 5.805e-06, + "num_tokens": 970442.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4205, + "step": 2841 + }, + { + "loss": 0.0031, + "grad_norm": 0.4013337790966034, + "learning_rate": 5.8e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 1.0, + "epoch": 1.421, + "step": 2842 + }, + { + "loss": 0.041, + "grad_norm": 1.2097371816635132, + "learning_rate": 5.795e-06, + "num_tokens": 971045.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4215, + "step": 2843 + }, + { + "loss": 0.0614, + "grad_norm": 1.1929858922958374, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.422, + "step": 2844 + }, + { + "loss": 0.0559, + "grad_norm": 1.3881855010986328, + "learning_rate": 5.7850000000000005e-06, + "num_tokens": 972069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4224999999999999, + "step": 2845 + }, + { + "loss": 0.0649, + "grad_norm": 1.5359828472137451, + "learning_rate": 5.78e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.423, + "step": 2846 + }, + { + "loss": 0.0562, + "grad_norm": 1.2387086153030396, + "learning_rate": 5.775000000000001e-06, + "num_tokens": 973093.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4235, + "step": 2847 + }, + { + "loss": 0.0634, + "grad_norm": 1.30796480178833, + "learning_rate": 5.77e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.424, + "step": 2848 + }, + { + "loss": 0.0035, + "grad_norm": 0.4502550959587097, + "learning_rate": 5.765000000000001e-06, + "num_tokens": 973696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4245, + "step": 2849 + }, + { + "loss": 0.0625, + "grad_norm": 1.4468958377838135, + "learning_rate": 5.76e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.425, + "step": 2850 + }, + { + "loss": 0.0675, + "grad_norm": 1.6001074314117432, + "learning_rate": 5.755000000000001e-06, + "num_tokens": 974720.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4255, + "step": 2851 + }, + { + "loss": 0.0039, + "grad_norm": 0.5094487071037292, + "learning_rate": 5.75e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.426, + "step": 2852 + }, + { + "loss": 0.039, + "grad_norm": 0.9305217266082764, + "learning_rate": 5.745000000000001e-06, + "num_tokens": 975323.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4264999999999999, + "step": 2853 + }, + { + "loss": 0.0379, + "grad_norm": 0.9311109185218811, + "learning_rate": 5.74e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.427, + "step": 2854 + }, + { + "loss": 0.0656, + "grad_norm": 1.3803378343582153, + "learning_rate": 5.735e-06, + "num_tokens": 976347.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4275, + "step": 2855 + }, + { + "loss": 0.0495, + "grad_norm": 1.455142855644226, + "learning_rate": 5.73e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.428, + "step": 2856 + }, + { + "loss": 0.048, + "grad_norm": 0.9757342338562012, + "learning_rate": 5.725e-06, + "num_tokens": 977371.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4285, + "step": 2857 + }, + { + "loss": 0.07, + "grad_norm": 1.3820722103118896, + "learning_rate": 5.72e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.429, + "step": 2858 + }, + { + "loss": 0.0496, + "grad_norm": 0.9005600810050964, + "learning_rate": 5.715e-06, + "num_tokens": 978395.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4295, + "step": 2859 + }, + { + "loss": 0.0588, + "grad_norm": 1.1311612129211426, + "learning_rate": 5.71e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.43, + "step": 2860 + }, + { + "loss": 0.0603, + "grad_norm": 1.2565733194351196, + "learning_rate": 5.7050000000000004e-06, + "num_tokens": 979419.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4304999999999999, + "step": 2861 + }, + { + "loss": 0.0061, + "grad_norm": 0.7569929361343384, + "learning_rate": 5.7e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.431, + "step": 2862 + }, + { + "loss": 0.0061, + "grad_norm": 0.757468044757843, + "learning_rate": 5.6950000000000005e-06, + "num_tokens": 979601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4315, + "step": 2863 + }, + { + "loss": 0.0442, + "grad_norm": 1.3257757425308228, + "learning_rate": 5.69e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.432, + "step": 2864 + }, + { + "loss": 0.0054, + "grad_norm": 0.7246440649032593, + "learning_rate": 5.685000000000001e-06, + "num_tokens": 980204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4325, + "step": 2865 + }, + { + "loss": 0.0558, + "grad_norm": 1.1359434127807617, + "learning_rate": 5.68e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.433, + "step": 2866 + }, + { + "loss": 0.0059, + "grad_norm": 0.7417834997177124, + "learning_rate": 5.675000000000001e-06, + "num_tokens": 980807.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4335, + "step": 2867 + }, + { + "loss": 0.0046, + "grad_norm": 0.6065738201141357, + "learning_rate": 5.67e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 1.0, + "epoch": 1.434, + "step": 2868 + }, + { + "loss": 0.0045, + "grad_norm": 0.6112881898880005, + "learning_rate": 5.665000000000001e-06, + "num_tokens": 980989.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4344999999999999, + "step": 2869 + }, + { + "loss": 0.0598, + "grad_norm": 1.1446788311004639, + "learning_rate": 5.66e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.435, + "step": 2870 + }, + { + "loss": 0.004, + "grad_norm": 0.5359569787979126, + "learning_rate": 5.655e-06, + "num_tokens": 981592.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4355, + "step": 2871 + }, + { + "loss": 0.0372, + "grad_norm": 1.0225598812103271, + "learning_rate": 5.65e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.436, + "step": 2872 + }, + { + "loss": 0.0031, + "grad_norm": 0.4344872236251831, + "learning_rate": 5.645e-06, + "num_tokens": 982195.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4365, + "step": 2873 + }, + { + "loss": 0.0035, + "grad_norm": 0.4770989418029785, + "learning_rate": 5.64e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 1.0, + "epoch": 1.437, + "step": 2874 + }, + { + "loss": 0.1529, + "grad_norm": 2.6292223930358887, + "learning_rate": 5.635e-06, + "num_tokens": 982798.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4375, + "step": 2875 + }, + { + "loss": 0.0536, + "grad_norm": 1.1502479314804077, + "learning_rate": 5.63e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.438, + "step": 2876 + }, + { + "loss": 0.0541, + "grad_norm": 1.5837680101394653, + "learning_rate": 5.625e-06, + "num_tokens": 983822.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4385, + "step": 2877 + }, + { + "loss": 0.0621, + "grad_norm": 1.0932730436325073, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.439, + "step": 2878 + }, + { + "loss": 0.0024, + "grad_norm": 0.3176769018173218, + "learning_rate": 5.6150000000000005e-06, + "num_tokens": 984425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4395, + "step": 2879 + }, + { + "loss": 0.056, + "grad_norm": 1.2500354051589966, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.44, + "step": 2880 + }, + { + "loss": 0.046, + "grad_norm": 1.282015323638916, + "learning_rate": 5.6050000000000005e-06, + "num_tokens": 985449.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4405000000000001, + "step": 2881 + }, + { + "loss": 0.0672, + "grad_norm": 1.5532522201538086, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.441, + "step": 2882 + }, + { + "loss": 0.0571, + "grad_norm": 1.1880862712860107, + "learning_rate": 5.595000000000001e-06, + "num_tokens": 986473.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4415, + "step": 2883 + }, + { + "loss": 0.0019, + "grad_norm": 0.26678329706192017, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.442, + "step": 2884 + }, + { + "loss": 0.002, + "grad_norm": 0.26291605830192566, + "learning_rate": 5.585000000000001e-06, + "num_tokens": 986655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4425, + "step": 2885 + }, + { + "loss": 0.002, + "grad_norm": 0.2711234986782074, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.443, + "step": 2886 + }, + { + "loss": 0.0021, + "grad_norm": 0.2862178087234497, + "learning_rate": 5.575000000000001e-06, + "num_tokens": 986837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4435, + "step": 2887 + }, + { + "loss": 0.0571, + "grad_norm": 1.3704899549484253, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.444, + "step": 2888 + }, + { + "loss": 0.0585, + "grad_norm": 1.0157582759857178, + "learning_rate": 5.565e-06, + "num_tokens": 987861.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4445000000000001, + "step": 2889 + }, + { + "loss": 0.0377, + "grad_norm": 1.079724669456482, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.445, + "step": 2890 + }, + { + "loss": 0.14, + "grad_norm": 1.9184038639068604, + "learning_rate": 5.555e-06, + "num_tokens": 988885.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4455, + "step": 2891 + }, + { + "loss": 0.0019, + "grad_norm": 0.25762176513671875, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.446, + "step": 2892 + }, + { + "loss": 0.0702, + "grad_norm": 1.5166800022125244, + "learning_rate": 5.545e-06, + "num_tokens": 989488.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4465, + "step": 2893 + }, + { + "loss": 0.0394, + "grad_norm": 1.1091899871826172, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.447, + "step": 2894 + }, + { + "loss": 0.0647, + "grad_norm": 1.4911457300186157, + "learning_rate": 5.535e-06, + "num_tokens": 990512.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4475, + "step": 2895 + }, + { + "loss": 0.063, + "grad_norm": 1.6225489377975464, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.448, + "step": 2896 + }, + { + "loss": 0.041, + "grad_norm": 1.3053377866744995, + "learning_rate": 5.5250000000000005e-06, + "num_tokens": 991536.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4485000000000001, + "step": 2897 + }, + { + "loss": 0.002, + "grad_norm": 0.27576708793640137, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 1.0, + "epoch": 1.449, + "step": 2898 + }, + { + "loss": 0.0019, + "grad_norm": 0.26415082812309265, + "learning_rate": 5.5150000000000006e-06, + "num_tokens": 991718.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4495, + "step": 2899 + }, + { + "loss": 0.0021, + "grad_norm": 0.29174545407295227, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 2900 + }, + { + "loss": 0.0573, + "grad_norm": 1.38834810256958, + "learning_rate": 5.505000000000001e-06, + "num_tokens": 992321.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4505, + "step": 2901 + }, + { + "loss": 0.0443, + "grad_norm": 1.4421913623809814, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 2902 + }, + { + "loss": 0.0022, + "grad_norm": 0.29639050364494324, + "learning_rate": 5.495000000000001e-06, + "num_tokens": 992924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4515, + "step": 2903 + }, + { + "loss": 0.0655, + "grad_norm": 1.5755751132965088, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.452, + "step": 2904 + }, + { + "loss": 0.0022, + "grad_norm": 0.2955166697502136, + "learning_rate": 5.485e-06, + "num_tokens": 993527.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4525000000000001, + "step": 2905 + }, + { + "loss": 0.0021, + "grad_norm": 0.2841387689113617, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.453, + "step": 2906 + }, + { + "loss": 0.0021, + "grad_norm": 0.286550909280777, + "learning_rate": 5.475e-06, + "num_tokens": 993709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4535, + "step": 2907 + }, + { + "loss": 0.0357, + "grad_norm": 1.0881201028823853, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.454, + "step": 2908 + }, + { + "loss": 0.0409, + "grad_norm": 1.0831390619277954, + "learning_rate": 5.465e-06, + "num_tokens": 994733.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4545, + "step": 2909 + }, + { + "loss": 0.0573, + "grad_norm": 1.2077234983444214, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.455, + "step": 2910 + }, + { + "loss": 0.0567, + "grad_norm": 1.2307626008987427, + "learning_rate": 5.455e-06, + "num_tokens": 995757.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4555, + "step": 2911 + }, + { + "loss": 0.067, + "grad_norm": 1.356170654296875, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.456, + "step": 2912 + }, + { + "loss": 0.0019, + "grad_norm": 0.2535565495491028, + "learning_rate": 5.445e-06, + "num_tokens": 996360.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4565000000000001, + "step": 2913 + }, + { + "loss": 0.0366, + "grad_norm": 1.0972084999084473, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.457, + "step": 2914 + }, + { + "loss": 0.054, + "grad_norm": 1.0509806871414185, + "learning_rate": 5.4350000000000005e-06, + "num_tokens": 997384.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4575, + "step": 2915 + }, + { + "loss": 0.0609, + "grad_norm": 1.3918635845184326, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.458, + "step": 2916 + }, + { + "loss": 0.0388, + "grad_norm": 1.0420371294021606, + "learning_rate": 5.4250000000000006e-06, + "num_tokens": 998408.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4585, + "step": 2917 + }, + { + "loss": 0.072, + "grad_norm": 1.3679769039154053, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.459, + "step": 2918 + }, + { + "loss": 0.0027, + "grad_norm": 0.3709925413131714, + "learning_rate": 5.415000000000001e-06, + "num_tokens": 999011.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4595, + "step": 2919 + }, + { + "loss": 0.0661, + "grad_norm": 1.381754755973816, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.46, + "step": 2920 + }, + { + "loss": 0.041, + "grad_norm": 1.2045968770980835, + "learning_rate": 5.405e-06, + "num_tokens": 1000035.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4605000000000001, + "step": 2921 + }, + { + "loss": 0.0023, + "grad_norm": 0.3062268793582916, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 1.0, + "epoch": 1.461, + "step": 2922 + }, + { + "loss": 0.0464, + "grad_norm": 1.0317680835723877, + "learning_rate": 5.395e-06, + "num_tokens": 1000638.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4615, + "step": 2923 + }, + { + "loss": 0.0495, + "grad_norm": 1.3268100023269653, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.462, + "step": 2924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6260963678359985, + "learning_rate": 5.385e-06, + "num_tokens": 1001662.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4625, + "step": 2925 + }, + { + "loss": 0.0553, + "grad_norm": 1.0903215408325195, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.463, + "step": 2926 + }, + { + "loss": 0.0029, + "grad_norm": 0.3851076066493988, + "learning_rate": 5.375e-06, + "num_tokens": 1002265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4635, + "step": 2927 + }, + { + "loss": 0.0692, + "grad_norm": 1.6572927236557007, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.464, + "step": 2928 + }, + { + "loss": 0.0625, + "grad_norm": 1.5664637088775635, + "learning_rate": 5.365e-06, + "num_tokens": 1003289.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4645000000000001, + "step": 2929 + }, + { + "loss": 0.0626, + "grad_norm": 1.198908805847168, + "learning_rate": 5.36e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.465, + "step": 2930 + }, + { + "loss": 0.0641, + "grad_norm": 1.2499873638153076, + "learning_rate": 5.355e-06, + "num_tokens": 1004313.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4655, + "step": 2931 + }, + { + "loss": 0.0042, + "grad_norm": 0.5362296104431152, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 1.0, + "epoch": 1.466, + "step": 2932 + }, + { + "loss": 0.0037, + "grad_norm": 0.49612900614738464, + "learning_rate": 5.3450000000000005e-06, + "num_tokens": 1004495.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4665, + "step": 2933 + }, + { + "loss": 0.0039, + "grad_norm": 0.5115715861320496, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.467, + "step": 2934 + }, + { + "loss": 0.056, + "grad_norm": 1.3353906869888306, + "learning_rate": 5.335000000000001e-06, + "num_tokens": 1005098.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4675, + "step": 2935 + }, + { + "loss": 0.0407, + "grad_norm": 1.1807116270065308, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.468, + "step": 2936 + }, + { + "loss": 0.0551, + "grad_norm": 1.257308006286621, + "learning_rate": 5.325e-06, + "num_tokens": 1006122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4685000000000001, + "step": 2937 + }, + { + "loss": 0.0606, + "grad_norm": 1.2219009399414062, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4689999999999999, + "step": 2938 + }, + { + "loss": 0.0403, + "grad_norm": 1.094189167022705, + "learning_rate": 5.315e-06, + "num_tokens": 1007146.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4695, + "step": 2939 + }, + { + "loss": 0.0467, + "grad_norm": 1.1191236972808838, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 2940 + }, + { + "loss": 0.0556, + "grad_norm": 1.1905457973480225, + "learning_rate": 5.305e-06, + "num_tokens": 1008170.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4705, + "step": 2941 + }, + { + "loss": 0.0038, + "grad_norm": 0.5084776282310486, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 1.0, + "epoch": 1.471, + "step": 2942 + }, + { + "loss": 0.0558, + "grad_norm": 0.9725843071937561, + "learning_rate": 5.295e-06, + "num_tokens": 1008773.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4715, + "step": 2943 + }, + { + "loss": 0.058, + "grad_norm": 1.1404790878295898, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.472, + "step": 2944 + }, + { + "loss": 0.0038, + "grad_norm": 0.4927501380443573, + "learning_rate": 5.285e-06, + "num_tokens": 1009376.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4725, + "step": 2945 + }, + { + "loss": 0.052, + "grad_norm": 1.0383561849594116, + "learning_rate": 5.28e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4729999999999999, + "step": 2946 + }, + { + "loss": 0.0039, + "grad_norm": 0.5245242118835449, + "learning_rate": 5.275e-06, + "num_tokens": 1009979.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4735, + "step": 2947 + }, + { + "loss": 0.0599, + "grad_norm": 1.137878179550171, + "learning_rate": 5.27e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.474, + "step": 2948 + }, + { + "loss": 0.0039, + "grad_norm": 0.5066397190093994, + "learning_rate": 5.265e-06, + "num_tokens": 1010582.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4745, + "step": 2949 + }, + { + "loss": 0.0037, + "grad_norm": 0.4922652542591095, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 1.0, + "epoch": 1.475, + "step": 2950 + }, + { + "loss": 0.0402, + "grad_norm": 1.1538424491882324, + "learning_rate": 5.2550000000000005e-06, + "num_tokens": 1011185.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4755, + "step": 2951 + }, + { + "loss": 0.0562, + "grad_norm": 1.8279345035552979, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.476, + "step": 2952 + }, + { + "loss": 0.0636, + "grad_norm": 1.2982397079467773, + "learning_rate": 5.245e-06, + "num_tokens": 1012209.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4765, + "step": 2953 + }, + { + "loss": 0.0033, + "grad_norm": 0.4363272488117218, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4769999999999999, + "step": 2954 + }, + { + "loss": 0.0549, + "grad_norm": 1.556806206703186, + "learning_rate": 5.235e-06, + "num_tokens": 1012812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4775, + "step": 2955 + }, + { + "loss": 0.0358, + "grad_norm": 1.0845907926559448, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.478, + "step": 2956 + }, + { + "loss": 0.0032, + "grad_norm": 0.4301038384437561, + "learning_rate": 5.225e-06, + "num_tokens": 1013415.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4785, + "step": 2957 + }, + { + "loss": 0.003, + "grad_norm": 0.3937813341617584, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 1.0, + "epoch": 1.479, + "step": 2958 + }, + { + "loss": 0.0403, + "grad_norm": 0.9416876435279846, + "learning_rate": 5.215e-06, + "num_tokens": 1014018.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4795, + "step": 2959 + }, + { + "loss": 0.0029, + "grad_norm": 0.3991153836250305, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.48, + "step": 2960 + }, + { + "loss": 0.0367, + "grad_norm": 1.106955885887146, + "learning_rate": 5.205e-06, + "num_tokens": 1014621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4805, + "step": 2961 + }, + { + "loss": 0.0586, + "grad_norm": 1.3418941497802734, + "learning_rate": 5.2e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4809999999999999, + "step": 2962 + }, + { + "loss": 0.0358, + "grad_norm": 0.9489701390266418, + "learning_rate": 5.195e-06, + "num_tokens": 1015645.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4815, + "step": 2963 + }, + { + "loss": 0.0629, + "grad_norm": 1.0855809450149536, + "learning_rate": 5.19e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.482, + "step": 2964 + }, + { + "loss": 0.0027, + "grad_norm": 0.3812173306941986, + "learning_rate": 5.185e-06, + "num_tokens": 1016248.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4825, + "step": 2965 + }, + { + "loss": 0.0028, + "grad_norm": 0.3925476372241974, + "learning_rate": 5.18e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 1.0, + "epoch": 1.483, + "step": 2966 + }, + { + "loss": 0.0567, + "grad_norm": 1.3809915781021118, + "learning_rate": 5.1750000000000004e-06, + "num_tokens": 1016851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4835, + "step": 2967 + }, + { + "loss": 0.0428, + "grad_norm": 1.4269046783447266, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.484, + "step": 2968 + }, + { + "loss": 0.0026, + "grad_norm": 0.3535688519477844, + "learning_rate": 5.165e-06, + "num_tokens": 1017454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4845, + "step": 2969 + }, + { + "loss": 0.0025, + "grad_norm": 0.34918057918548584, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4849999999999999, + "step": 2970 + }, + { + "loss": 0.0025, + "grad_norm": 0.34093669056892395, + "learning_rate": 5.155e-06, + "num_tokens": 1017636.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4855, + "step": 2971 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282490372657776, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.486, + "step": 2972 + }, + { + "loss": 0.0762, + "grad_norm": 2.083855628967285, + "learning_rate": 5.145e-06, + "num_tokens": 1018239.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4865, + "step": 2973 + }, + { + "loss": 0.0548, + "grad_norm": 1.5333393812179565, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.487, + "step": 2974 + }, + { + "loss": 0.0373, + "grad_norm": 1.078650712966919, + "learning_rate": 5.135e-06, + "num_tokens": 1019263.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4875, + "step": 2975 + }, + { + "loss": 0.0447, + "grad_norm": 1.3176923990249634, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.488, + "step": 2976 + }, + { + "loss": 0.0023, + "grad_norm": 0.3142336308956146, + "learning_rate": 5.125e-06, + "num_tokens": 1019866.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4885, + "step": 2977 + }, + { + "loss": 0.0021, + "grad_norm": 0.2898966073989868, + "learning_rate": 5.12e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 2978 + }, + { + "loss": 0.046, + "grad_norm": 1.2612260580062866, + "learning_rate": 5.115e-06, + "num_tokens": 1020469.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4895, + "step": 2979 + }, + { + "loss": 0.0718, + "grad_norm": 2.1195919513702393, + "learning_rate": 5.11e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.49, + "step": 2980 + }, + { + "loss": 0.002, + "grad_norm": 0.2805778682231903, + "learning_rate": 5.105e-06, + "num_tokens": 1021072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4905, + "step": 2981 + }, + { + "loss": 0.002, + "grad_norm": 0.2843017280101776, + "learning_rate": 5.1e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 2982 + }, + { + "loss": 0.002, + "grad_norm": 0.277892529964447, + "learning_rate": 5.095e-06, + "num_tokens": 1021254.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4915, + "step": 2983 + }, + { + "loss": 0.0422, + "grad_norm": 1.0654278993606567, + "learning_rate": 5.09e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.492, + "step": 2984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29488760232925415, + "learning_rate": 5.085e-06, + "num_tokens": 1021857.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4925, + "step": 2985 + }, + { + "loss": 0.0392, + "grad_norm": 1.086630940437317, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4929999999999999, + "step": 2986 + }, + { + "loss": 0.0018, + "grad_norm": 0.24030831456184387, + "learning_rate": 5.075e-06, + "num_tokens": 1022460.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4935, + "step": 2987 + }, + { + "loss": 0.0406, + "grad_norm": 0.9846900105476379, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.494, + "step": 2988 + }, + { + "loss": 0.0418, + "grad_norm": 1.6849744319915771, + "learning_rate": 5.065e-06, + "num_tokens": 1023484.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4945, + "step": 2989 + }, + { + "loss": 0.0015, + "grad_norm": 0.2105080932378769, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 1.0, + "epoch": 1.495, + "step": 2990 + }, + { + "loss": 0.0019, + "grad_norm": 0.26552438735961914, + "learning_rate": 5.055e-06, + "num_tokens": 1023666.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4955, + "step": 2991 + }, + { + "loss": 0.0016, + "grad_norm": 0.21752813458442688, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 2992 + }, + { + "loss": 0.0666, + "grad_norm": 1.4344254732131958, + "learning_rate": 5.045e-06, + "num_tokens": 1024269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4965, + "step": 2993 + }, + { + "loss": 0.0415, + "grad_norm": 1.1530293226242065, + "learning_rate": 5.04e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4969999999999999, + "step": 2994 + }, + { + "loss": 0.0365, + "grad_norm": 1.0033750534057617, + "learning_rate": 5.035e-06, + "num_tokens": 1025293.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4975, + "step": 2995 + }, + { + "loss": 0.0369, + "grad_norm": 1.062666654586792, + "learning_rate": 5.03e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.498, + "step": 2996 + }, + { + "loss": 0.0016, + "grad_norm": 0.23261243104934692, + "learning_rate": 5.025e-06, + "num_tokens": 1025896.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4985, + "step": 2997 + }, + { + "loss": 0.0019, + "grad_norm": 0.26436832547187805, + "learning_rate": 5.02e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 2998 + }, + { + "loss": 0.0395, + "grad_norm": 1.0828720331192017, + "learning_rate": 5.015e-06, + "num_tokens": 1026499.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4995, + "step": 2999 + }, + { + "loss": 0.0018, + "grad_norm": 0.24229036271572113, + "learning_rate": 5.01e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5, + "step": 3000 + }, + { + "loss": 0.0636, + "grad_norm": 1.5817841291427612, + "learning_rate": 5.0049999999999995e-06, + "num_tokens": 1027102.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5005, + "step": 3001 + }, + { + "loss": 0.0016, + "grad_norm": 0.21737374365329742, + "learning_rate": 5e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.501, + "step": 3002 + }, + { + "loss": 0.0535, + "grad_norm": 1.0760457515716553, + "learning_rate": 4.9950000000000005e-06, + "num_tokens": 1027705.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5015, + "step": 3003 + }, + { + "loss": 0.0702, + "grad_norm": 1.5160242319107056, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 3004 + }, + { + "loss": 0.002, + "grad_norm": 0.28444817662239075, + "learning_rate": 4.9850000000000006e-06, + "num_tokens": 1028308.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5025, + "step": 3005 + }, + { + "loss": 0.0659, + "grad_norm": 1.394598364830017, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5030000000000001, + "step": 3006 + }, + { + "loss": 0.0549, + "grad_norm": 1.4268598556518555, + "learning_rate": 4.975000000000001e-06, + "num_tokens": 1029332.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5034999999999998, + "step": 3007 + }, + { + "loss": 0.0693, + "grad_norm": 1.3022048473358154, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.504, + "step": 3008 + }, + { + "loss": 0.0577, + "grad_norm": 1.6034104824066162, + "learning_rate": 4.965000000000001e-06, + "num_tokens": 1030356.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5045, + "step": 3009 + }, + { + "loss": 0.002, + "grad_norm": 0.26663535833358765, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.505, + "step": 3010 + }, + { + "loss": 0.0021, + "grad_norm": 0.29342901706695557, + "learning_rate": 4.955e-06, + "num_tokens": 1030538.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5055, + "step": 3011 + }, + { + "loss": 0.0574, + "grad_norm": 1.232057809829712, + "learning_rate": 4.95e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.506, + "step": 3012 + }, + { + "loss": 0.0022, + "grad_norm": 0.2940972149372101, + "learning_rate": 4.945e-06, + "num_tokens": 1031141.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5065, + "step": 3013 + }, + { + "loss": 0.0022, + "grad_norm": 0.3054879307746887, + "learning_rate": 4.94e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 3014 + }, + { + "loss": 0.002, + "grad_norm": 0.2681850492954254, + "learning_rate": 4.935e-06, + "num_tokens": 1031323.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5074999999999998, + "step": 3015 + }, + { + "loss": 0.0018, + "grad_norm": 0.24893507361412048, + "learning_rate": 4.93e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 3016 + }, + { + "loss": 0.0514, + "grad_norm": 0.9832684993743896, + "learning_rate": 4.925e-06, + "num_tokens": 1031926.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5085, + "step": 3017 + }, + { + "loss": 0.0546, + "grad_norm": 1.0513758659362793, + "learning_rate": 4.92e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.509, + "step": 3018 + }, + { + "loss": 0.0438, + "grad_norm": 1.3256640434265137, + "learning_rate": 4.915e-06, + "num_tokens": 1032950.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5095, + "step": 3019 + }, + { + "loss": 0.039, + "grad_norm": 1.1269205808639526, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.51, + "step": 3020 + }, + { + "loss": 0.0606, + "grad_norm": 1.2971444129943848, + "learning_rate": 4.9050000000000005e-06, + "num_tokens": 1033974.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5105, + "step": 3021 + }, + { + "loss": 0.0018, + "grad_norm": 0.24280324578285217, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5110000000000001, + "step": 3022 + }, + { + "loss": 0.0726, + "grad_norm": 1.984804630279541, + "learning_rate": 4.8950000000000006e-06, + "num_tokens": 1034577.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.5114999999999998, + "step": 3023 + }, + { + "loss": 0.0444, + "grad_norm": 1.1891791820526123, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.512, + "step": 3024 + }, + { + "loss": 0.0425, + "grad_norm": 1.3020859956741333, + "learning_rate": 4.885000000000001e-06, + "num_tokens": 1035601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5125, + "step": 3025 + }, + { + "loss": 0.0397, + "grad_norm": 0.8992137312889099, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.513, + "step": 3026 + }, + { + "loss": 0.0518, + "grad_norm": 1.0060539245605469, + "learning_rate": 4.875e-06, + "num_tokens": 1036625.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5135, + "step": 3027 + }, + { + "loss": 0.0618, + "grad_norm": 1.2295892238616943, + "learning_rate": 4.87e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.514, + "step": 3028 + }, + { + "loss": 0.057, + "grad_norm": 1.2740446329116821, + "learning_rate": 4.865e-06, + "num_tokens": 1037649.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5145, + "step": 3029 + }, + { + "loss": 0.067, + "grad_norm": 1.2444658279418945, + "learning_rate": 4.86e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5150000000000001, + "step": 3030 + }, + { + "loss": 0.0389, + "grad_norm": 1.0539816617965698, + "learning_rate": 4.855e-06, + "num_tokens": 1038673.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5154999999999998, + "step": 3031 + }, + { + "loss": 0.0613, + "grad_norm": 1.2166608572006226, + "learning_rate": 4.85e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.516, + "step": 3032 + }, + { + "loss": 0.0636, + "grad_norm": 1.2355148792266846, + "learning_rate": 4.845e-06, + "num_tokens": 1039697.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5165, + "step": 3033 + }, + { + "loss": 0.0586, + "grad_norm": 1.195371150970459, + "learning_rate": 4.84e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.517, + "step": 3034 + }, + { + "loss": 0.0031, + "grad_norm": 0.4328796863555908, + "learning_rate": 4.835e-06, + "num_tokens": 1040300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5175, + "step": 3035 + }, + { + "loss": 0.0033, + "grad_norm": 0.4462224841117859, + "learning_rate": 4.83e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 3036 + }, + { + "loss": 0.0404, + "grad_norm": 1.2766720056533813, + "learning_rate": 4.825e-06, + "num_tokens": 1040903.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5185, + "step": 3037 + }, + { + "loss": 0.0038, + "grad_norm": 0.5095945000648499, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5190000000000001, + "step": 3038 + }, + { + "loss": 0.0528, + "grad_norm": 1.006589651107788, + "learning_rate": 4.8150000000000005e-06, + "num_tokens": 1041506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5194999999999999, + "step": 3039 + }, + { + "loss": 0.0417, + "grad_norm": 1.2964030504226685, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.52, + "step": 3040 + }, + { + "loss": 0.0592, + "grad_norm": 1.1840168237686157, + "learning_rate": 4.805000000000001e-06, + "num_tokens": 1042530.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5205, + "step": 3041 + }, + { + "loss": 0.0038, + "grad_norm": 0.49861085414886475, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 1.0, + "epoch": 1.521, + "step": 3042 + }, + { + "loss": 0.0037, + "grad_norm": 0.49751704931259155, + "learning_rate": 4.795e-06, + "num_tokens": 1042712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5215, + "step": 3043 + }, + { + "loss": 0.0481, + "grad_norm": 1.022782564163208, + "learning_rate": 4.79e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.522, + "step": 3044 + }, + { + "loss": 0.0038, + "grad_norm": 0.49228596687316895, + "learning_rate": 4.785e-06, + "num_tokens": 1043315.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5225, + "step": 3045 + }, + { + "loss": 0.0376, + "grad_norm": 1.1729862689971924, + "learning_rate": 4.78e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5230000000000001, + "step": 3046 + }, + { + "loss": 0.0653, + "grad_norm": 1.5206072330474854, + "learning_rate": 4.775e-06, + "num_tokens": 1044339.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5234999999999999, + "step": 3047 + }, + { + "loss": 0.0633, + "grad_norm": 1.2756298780441284, + "learning_rate": 4.77e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.524, + "step": 3048 + }, + { + "loss": 0.0036, + "grad_norm": 0.4977829158306122, + "learning_rate": 4.765e-06, + "num_tokens": 1044942.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5245, + "step": 3049 + }, + { + "loss": 0.0526, + "grad_norm": 1.0627686977386475, + "learning_rate": 4.76e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.525, + "step": 3050 + }, + { + "loss": 0.0381, + "grad_norm": 1.1623107194900513, + "learning_rate": 4.755e-06, + "num_tokens": 1045966.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5255, + "step": 3051 + }, + { + "loss": 0.0036, + "grad_norm": 0.5119946002960205, + "learning_rate": 4.75e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.526, + "step": 3052 + }, + { + "loss": 0.0581, + "grad_norm": 1.3532719612121582, + "learning_rate": 4.745e-06, + "num_tokens": 1046569.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5265, + "step": 3053 + }, + { + "loss": 0.0594, + "grad_norm": 1.2599351406097412, + "learning_rate": 4.74e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5270000000000001, + "step": 3054 + }, + { + "loss": 0.0033, + "grad_norm": 0.4622514843940735, + "learning_rate": 4.735e-06, + "num_tokens": 1047172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5274999999999999, + "step": 3055 + }, + { + "loss": 0.0728, + "grad_norm": 1.6162607669830322, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.528, + "step": 3056 + }, + { + "loss": 0.0627, + "grad_norm": 1.4714545011520386, + "learning_rate": 4.7250000000000005e-06, + "num_tokens": 1048196.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5285, + "step": 3057 + }, + { + "loss": 0.0034, + "grad_norm": 0.48141252994537354, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 1.0, + "epoch": 1.529, + "step": 3058 + }, + { + "loss": 0.0385, + "grad_norm": 1.0676530599594116, + "learning_rate": 4.715e-06, + "num_tokens": 1048799.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5295, + "step": 3059 + }, + { + "loss": 0.0032, + "grad_norm": 0.44829145073890686, + "learning_rate": 4.71e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 1.0, + "epoch": 1.53, + "step": 3060 + }, + { + "loss": 0.0031, + "grad_norm": 0.4258093535900116, + "learning_rate": 4.705e-06, + "num_tokens": 1048981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5305, + "step": 3061 + }, + { + "loss": 0.0715, + "grad_norm": 1.3509596586227417, + "learning_rate": 4.7e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5310000000000001, + "step": 3062 + }, + { + "loss": 0.0341, + "grad_norm": 1.0876250267028809, + "learning_rate": 4.695e-06, + "num_tokens": 1050005.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5314999999999999, + "step": 3063 + }, + { + "loss": 0.0611, + "grad_norm": 1.3174924850463867, + "learning_rate": 4.69e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.532, + "step": 3064 + }, + { + "loss": 0.0417, + "grad_norm": 1.123489499092102, + "learning_rate": 4.685000000000001e-06, + "num_tokens": 1051029.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5325, + "step": 3065 + }, + { + "loss": 0.066, + "grad_norm": 1.7399777173995972, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.533, + "step": 3066 + }, + { + "loss": 0.0028, + "grad_norm": 0.38190290331840515, + "learning_rate": 4.675000000000001e-06, + "num_tokens": 1051632.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5335, + "step": 3067 + }, + { + "loss": 0.0651, + "grad_norm": 1.4947158098220825, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.534, + "step": 3068 + }, + { + "loss": 0.003, + "grad_norm": 0.40696173906326294, + "learning_rate": 4.665e-06, + "num_tokens": 1052235.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5345, + "step": 3069 + }, + { + "loss": 0.0555, + "grad_norm": 1.2926570177078247, + "learning_rate": 4.66e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5350000000000001, + "step": 3070 + }, + { + "loss": 0.0625, + "grad_norm": 1.2110244035720825, + "learning_rate": 4.655e-06, + "num_tokens": 1053259.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5354999999999999, + "step": 3071 + }, + { + "loss": 0.0033, + "grad_norm": 0.44495561718940735, + "learning_rate": 4.65e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 1.0, + "epoch": 1.536, + "step": 3072 + }, + { + "loss": 0.0574, + "grad_norm": 1.1019057035446167, + "learning_rate": 4.645e-06, + "num_tokens": 1053862.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5365, + "step": 3073 + }, + { + "loss": 0.003, + "grad_norm": 0.4128797650337219, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.537, + "step": 3074 + }, + { + "loss": 0.0572, + "grad_norm": 1.164238452911377, + "learning_rate": 4.6350000000000005e-06, + "num_tokens": 1054465.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5375, + "step": 3075 + }, + { + "loss": 0.0631, + "grad_norm": 1.4220542907714844, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.538, + "step": 3076 + }, + { + "loss": 0.0377, + "grad_norm": 1.2259591817855835, + "learning_rate": 4.625000000000001e-06, + "num_tokens": 1055489.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5385, + "step": 3077 + }, + { + "loss": 0.003, + "grad_norm": 0.4099157154560089, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5390000000000001, + "step": 3078 + }, + { + "loss": 0.0027, + "grad_norm": 0.3750811219215393, + "learning_rate": 4.615000000000001e-06, + "num_tokens": 1055671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5394999999999999, + "step": 3079 + }, + { + "loss": 0.0621, + "grad_norm": 1.2325596809387207, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.54, + "step": 3080 + }, + { + "loss": 0.0504, + "grad_norm": 0.9959844350814819, + "learning_rate": 4.605000000000001e-06, + "num_tokens": 1056695.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5405, + "step": 3081 + }, + { + "loss": 0.0574, + "grad_norm": 1.0301742553710938, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.541, + "step": 3082 + }, + { + "loss": 0.0512, + "grad_norm": 1.0320547819137573, + "learning_rate": 4.595000000000001e-06, + "num_tokens": 1057719.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5415, + "step": 3083 + }, + { + "loss": 0.0561, + "grad_norm": 1.225005865097046, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.542, + "step": 3084 + }, + { + "loss": 0.0376, + "grad_norm": 1.1090381145477295, + "learning_rate": 4.585e-06, + "num_tokens": 1058743.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.5425, + "step": 3085 + }, + { + "loss": 0.0032, + "grad_norm": 0.44738513231277466, + "learning_rate": 4.58e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5430000000000001, + "step": 3086 + }, + { + "loss": 0.0031, + "grad_norm": 0.4485037624835968, + "learning_rate": 4.575e-06, + "num_tokens": 1058925.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5434999999999999, + "step": 3087 + }, + { + "loss": 0.0703, + "grad_norm": 1.630645751953125, + "learning_rate": 4.57e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.544, + "step": 3088 + }, + { + "loss": 0.0034, + "grad_norm": 0.4586680233478546, + "learning_rate": 4.565e-06, + "num_tokens": 1059528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5445, + "step": 3089 + }, + { + "loss": 0.003, + "grad_norm": 0.41872572898864746, + "learning_rate": 4.56e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.545, + "step": 3090 + }, + { + "loss": 0.0433, + "grad_norm": 1.1152652502059937, + "learning_rate": 4.5550000000000004e-06, + "num_tokens": 1060131.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5455, + "step": 3091 + }, + { + "loss": 0.0025, + "grad_norm": 0.35068032145500183, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.546, + "step": 3092 + }, + { + "loss": 0.0396, + "grad_norm": 1.0990018844604492, + "learning_rate": 4.5450000000000005e-06, + "num_tokens": 1060734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5465, + "step": 3093 + }, + { + "loss": 0.0635, + "grad_norm": 1.6193867921829224, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5470000000000002, + "step": 3094 + }, + { + "loss": 0.0027, + "grad_norm": 0.3813343644142151, + "learning_rate": 4.535000000000001e-06, + "num_tokens": 1061337.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5474999999999999, + "step": 3095 + }, + { + "loss": 0.0025, + "grad_norm": 0.3389427363872528, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 1.0, + "epoch": 1.548, + "step": 3096 + }, + { + "loss": 0.0652, + "grad_norm": 1.455460786819458, + "learning_rate": 4.525000000000001e-06, + "num_tokens": 1061940.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5485, + "step": 3097 + }, + { + "loss": 0.0596, + "grad_norm": 1.318932056427002, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.549, + "step": 3098 + }, + { + "loss": 0.0021, + "grad_norm": 0.30851492285728455, + "learning_rate": 4.515000000000001e-06, + "num_tokens": 1062543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5495, + "step": 3099 + }, + { + "loss": 0.0021, + "grad_norm": 0.29576948285102844, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 1.0, + "epoch": 1.55, + "step": 3100 + }, + { + "loss": 0.0021, + "grad_norm": 0.29117029905319214, + "learning_rate": 4.505e-06, + "num_tokens": 1062725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5505, + "step": 3101 + }, + { + "loss": 0.04, + "grad_norm": 1.1777619123458862, + "learning_rate": 4.5e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5510000000000002, + "step": 3102 + }, + { + "loss": 0.0538, + "grad_norm": 1.1641870737075806, + "learning_rate": 4.495e-06, + "num_tokens": 1063749.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5514999999999999, + "step": 3103 + }, + { + "loss": 0.0423, + "grad_norm": 1.3220707178115845, + "learning_rate": 4.49e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.552, + "step": 3104 + }, + { + "loss": 0.0021, + "grad_norm": 0.30619239807128906, + "learning_rate": 4.485e-06, + "num_tokens": 1064352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5525, + "step": 3105 + }, + { + "loss": 0.0681, + "grad_norm": 1.3809969425201416, + "learning_rate": 4.48e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.553, + "step": 3106 + }, + { + "loss": 0.055, + "grad_norm": 1.1956359148025513, + "learning_rate": 4.475e-06, + "num_tokens": 1065376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5535, + "step": 3107 + }, + { + "loss": 0.0573, + "grad_norm": 1.2887022495269775, + "learning_rate": 4.47e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.554, + "step": 3108 + }, + { + "loss": 0.0554, + "grad_norm": 1.1560310125350952, + "learning_rate": 4.4650000000000004e-06, + "num_tokens": 1066400.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5545, + "step": 3109 + }, + { + "loss": 0.0021, + "grad_norm": 0.29395192861557007, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5550000000000002, + "step": 3110 + }, + { + "loss": 0.0652, + "grad_norm": 1.608464002609253, + "learning_rate": 4.4550000000000005e-06, + "num_tokens": 1067003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5554999999999999, + "step": 3111 + }, + { + "loss": 0.0558, + "grad_norm": 1.2650138139724731, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.556, + "step": 3112 + }, + { + "loss": 0.0458, + "grad_norm": 1.2872962951660156, + "learning_rate": 4.445000000000001e-06, + "num_tokens": 1068027.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5565, + "step": 3113 + }, + { + "loss": 0.0022, + "grad_norm": 0.30732589960098267, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.557, + "step": 3114 + }, + { + "loss": 0.0558, + "grad_norm": 1.0926036834716797, + "learning_rate": 4.435000000000001e-06, + "num_tokens": 1068630.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5575, + "step": 3115 + }, + { + "loss": 0.0023, + "grad_norm": 0.32145828008651733, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 1.0, + "epoch": 1.558, + "step": 3116 + }, + { + "loss": 0.0373, + "grad_norm": 1.1655807495117188, + "learning_rate": 4.425e-06, + "num_tokens": 1069233.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5585, + "step": 3117 + }, + { + "loss": 0.0769, + "grad_norm": 1.796105980873108, + "learning_rate": 4.42e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.5590000000000002, + "step": 3118 + }, + { + "loss": 0.0026, + "grad_norm": 0.3620903789997101, + "learning_rate": 4.415e-06, + "num_tokens": 1069836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5594999999999999, + "step": 3119 + }, + { + "loss": 0.0429, + "grad_norm": 1.309659481048584, + "learning_rate": 4.41e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.56, + "step": 3120 + }, + { + "loss": 0.0023, + "grad_norm": 0.32819899916648865, + "learning_rate": 4.405e-06, + "num_tokens": 1070439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5605, + "step": 3121 + }, + { + "loss": 0.0576, + "grad_norm": 1.0110256671905518, + "learning_rate": 4.4e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.561, + "step": 3122 + }, + { + "loss": 0.0474, + "grad_norm": 1.327854037284851, + "learning_rate": 4.395e-06, + "num_tokens": 1071463.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5615, + "step": 3123 + }, + { + "loss": 0.0371, + "grad_norm": 1.2000775337219238, + "learning_rate": 4.39e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.562, + "step": 3124 + }, + { + "loss": 0.0532, + "grad_norm": 1.1874752044677734, + "learning_rate": 4.385e-06, + "num_tokens": 1072487.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5625, + "step": 3125 + }, + { + "loss": 0.0387, + "grad_norm": 1.2780605554580688, + "learning_rate": 4.38e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.563, + "step": 3126 + }, + { + "loss": 0.0029, + "grad_norm": 0.38496679067611694, + "learning_rate": 4.3750000000000005e-06, + "num_tokens": 1073090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5635, + "step": 3127 + }, + { + "loss": 0.0028, + "grad_norm": 0.3800834119319916, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.564, + "step": 3128 + }, + { + "loss": 0.0386, + "grad_norm": 1.077006459236145, + "learning_rate": 4.3650000000000006e-06, + "num_tokens": 1073693.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5645, + "step": 3129 + }, + { + "loss": 0.0669, + "grad_norm": 1.2879207134246826, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.565, + "step": 3130 + }, + { + "loss": 0.0027, + "grad_norm": 0.37664031982421875, + "learning_rate": 4.355000000000001e-06, + "num_tokens": 1074296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5655000000000001, + "step": 3131 + }, + { + "loss": 0.0026, + "grad_norm": 0.35762181878089905, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5659999999999998, + "step": 3132 + }, + { + "loss": 0.0026, + "grad_norm": 0.3616492450237274, + "learning_rate": 4.345000000000001e-06, + "num_tokens": 1074478.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5665, + "step": 3133 + }, + { + "loss": 0.054, + "grad_norm": 1.413800835609436, + "learning_rate": 4.34e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.567, + "step": 3134 + }, + { + "loss": 0.0549, + "grad_norm": 1.1791685819625854, + "learning_rate": 4.335e-06, + "num_tokens": 1075502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5675, + "step": 3135 + }, + { + "loss": 0.0382, + "grad_norm": 1.1417726278305054, + "learning_rate": 4.33e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.568, + "step": 3136 + }, + { + "loss": 0.0586, + "grad_norm": 1.360926866531372, + "learning_rate": 4.325e-06, + "num_tokens": 1076526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5685, + "step": 3137 + }, + { + "loss": 0.0569, + "grad_norm": 1.1636319160461426, + "learning_rate": 4.32e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.569, + "step": 3138 + }, + { + "loss": 0.0024, + "grad_norm": 0.3462548851966858, + "learning_rate": 4.315e-06, + "num_tokens": 1077129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5695000000000001, + "step": 3139 + }, + { + "loss": 0.0619, + "grad_norm": 1.3171995878219604, + "learning_rate": 4.31e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5699999999999998, + "step": 3140 + }, + { + "loss": 0.0026, + "grad_norm": 0.35494717955589294, + "learning_rate": 4.305e-06, + "num_tokens": 1077732.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5705, + "step": 3141 + }, + { + "loss": 0.003, + "grad_norm": 0.4175266921520233, + "learning_rate": 4.3e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 3142 + }, + { + "loss": 0.0588, + "grad_norm": 1.5107394456863403, + "learning_rate": 4.295e-06, + "num_tokens": 1078335.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5715, + "step": 3143 + }, + { + "loss": 0.0583, + "grad_norm": 1.5851935148239136, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.572, + "step": 3144 + }, + { + "loss": 0.0401, + "grad_norm": 1.1422215700149536, + "learning_rate": 4.2850000000000005e-06, + "num_tokens": 1079359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5725, + "step": 3145 + }, + { + "loss": 0.0429, + "grad_norm": 1.3809804916381836, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.573, + "step": 3146 + }, + { + "loss": 0.0397, + "grad_norm": 1.1466025114059448, + "learning_rate": 4.2750000000000006e-06, + "num_tokens": 1080383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5735000000000001, + "step": 3147 + }, + { + "loss": 0.0389, + "grad_norm": 1.035447120666504, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 3148 + }, + { + "loss": 0.0029, + "grad_norm": 0.39080947637557983, + "learning_rate": 4.265000000000001e-06, + "num_tokens": 1080986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5745, + "step": 3149 + }, + { + "loss": 0.0029, + "grad_norm": 0.39702585339546204, + "learning_rate": 4.26e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.575, + "step": 3150 + }, + { + "loss": 0.0376, + "grad_norm": 1.0406111478805542, + "learning_rate": 4.255e-06, + "num_tokens": 1081589.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5755, + "step": 3151 + }, + { + "loss": 0.0029, + "grad_norm": 0.40471911430358887, + "learning_rate": 4.25e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.576, + "step": 3152 + }, + { + "loss": 0.0542, + "grad_norm": 1.382663607597351, + "learning_rate": 4.245e-06, + "num_tokens": 1082192.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5765, + "step": 3153 + }, + { + "loss": 0.0026, + "grad_norm": 0.39454102516174316, + "learning_rate": 4.24e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.577, + "step": 3154 + }, + { + "loss": 0.0515, + "grad_norm": 1.1649845838546753, + "learning_rate": 4.235e-06, + "num_tokens": 1082795.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5775000000000001, + "step": 3155 + }, + { + "loss": 0.0383, + "grad_norm": 1.10068941116333, + "learning_rate": 4.23e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5779999999999998, + "step": 3156 + }, + { + "loss": 0.0417, + "grad_norm": 1.2253996133804321, + "learning_rate": 4.225e-06, + "num_tokens": 1083819.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5785, + "step": 3157 + }, + { + "loss": 0.0028, + "grad_norm": 0.3961932361125946, + "learning_rate": 4.22e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 1.0, + "epoch": 1.579, + "step": 3158 + }, + { + "loss": 0.0503, + "grad_norm": 1.089829921722412, + "learning_rate": 4.215e-06, + "num_tokens": 1084422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5795, + "step": 3159 + }, + { + "loss": 0.0026, + "grad_norm": 0.3804922103881836, + "learning_rate": 4.21e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.58, + "step": 3160 + }, + { + "loss": 0.0551, + "grad_norm": 1.131371259689331, + "learning_rate": 4.205e-06, + "num_tokens": 1085025.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5805, + "step": 3161 + }, + { + "loss": 0.0707, + "grad_norm": 1.5008512735366821, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.581, + "step": 3162 + }, + { + "loss": 0.1371, + "grad_norm": 2.452535629272461, + "learning_rate": 4.1950000000000005e-06, + "num_tokens": 1086049.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.5815000000000001, + "step": 3163 + }, + { + "loss": 0.0375, + "grad_norm": 1.132121205329895, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5819999999999999, + "step": 3164 + }, + { + "loss": 0.0372, + "grad_norm": 1.136691689491272, + "learning_rate": 4.185000000000001e-06, + "num_tokens": 1087073.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5825, + "step": 3165 + }, + { + "loss": 0.066, + "grad_norm": 1.451141595840454, + "learning_rate": 4.18e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.583, + "step": 3166 + }, + { + "loss": 0.0601, + "grad_norm": 1.3219071626663208, + "learning_rate": 4.175e-06, + "num_tokens": 1088097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5835, + "step": 3167 + }, + { + "loss": 0.0033, + "grad_norm": 0.44295263290405273, + "learning_rate": 4.17e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.584, + "step": 3168 + }, + { + "loss": 0.0033, + "grad_norm": 0.4387746751308441, + "learning_rate": 4.165e-06, + "num_tokens": 1088279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5845, + "step": 3169 + }, + { + "loss": 0.0031, + "grad_norm": 0.42495018243789673, + "learning_rate": 4.16e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 3170 + }, + { + "loss": 0.0032, + "grad_norm": 0.43195274472236633, + "learning_rate": 4.155e-06, + "num_tokens": 1088461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5855000000000001, + "step": 3171 + }, + { + "loss": 0.0383, + "grad_norm": 1.089600682258606, + "learning_rate": 4.15e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5859999999999999, + "step": 3172 + }, + { + "loss": 0.037, + "grad_norm": 1.125685691833496, + "learning_rate": 4.145e-06, + "num_tokens": 1089485.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5865, + "step": 3173 + }, + { + "loss": 0.0028, + "grad_norm": 0.3951958119869232, + "learning_rate": 4.14e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 1.0, + "epoch": 1.587, + "step": 3174 + }, + { + "loss": 0.0032, + "grad_norm": 0.4249975085258484, + "learning_rate": 4.135e-06, + "num_tokens": 1089667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5875, + "step": 3175 + }, + { + "loss": 0.003, + "grad_norm": 0.4017711281776428, + "learning_rate": 4.13e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 1.0, + "epoch": 1.588, + "step": 3176 + }, + { + "loss": 0.0554, + "grad_norm": 1.5242044925689697, + "learning_rate": 4.125e-06, + "num_tokens": 1090270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5885, + "step": 3177 + }, + { + "loss": 0.0397, + "grad_norm": 1.1341863870620728, + "learning_rate": 4.12e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.589, + "step": 3178 + }, + { + "loss": 0.0027, + "grad_norm": 0.36381402611732483, + "learning_rate": 4.115e-06, + "num_tokens": 1090873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5895000000000001, + "step": 3179 + }, + { + "loss": 0.0607, + "grad_norm": 1.1853790283203125, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5899999999999999, + "step": 3180 + }, + { + "loss": 0.0643, + "grad_norm": 1.3047658205032349, + "learning_rate": 4.1050000000000005e-06, + "num_tokens": 1091897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5905, + "step": 3181 + }, + { + "loss": 0.0026, + "grad_norm": 0.35462620854377747, + "learning_rate": 4.1e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.591, + "step": 3182 + }, + { + "loss": 0.0551, + "grad_norm": 1.313693642616272, + "learning_rate": 4.095e-06, + "num_tokens": 1092500.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5915, + "step": 3183 + }, + { + "loss": 0.0476, + "grad_norm": 1.3256938457489014, + "learning_rate": 4.09e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.592, + "step": 3184 + }, + { + "loss": 0.0674, + "grad_norm": 1.4579592943191528, + "learning_rate": 4.085e-06, + "num_tokens": 1093524.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5925, + "step": 3185 + }, + { + "loss": 0.0654, + "grad_norm": 1.39744234085083, + "learning_rate": 4.08e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.593, + "step": 3186 + }, + { + "loss": 0.0024, + "grad_norm": 0.3426502048969269, + "learning_rate": 4.075e-06, + "num_tokens": 1094127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5935000000000001, + "step": 3187 + }, + { + "loss": 0.0025, + "grad_norm": 0.34538590908050537, + "learning_rate": 4.07e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 3188 + }, + { + "loss": 0.0023, + "grad_norm": 0.317192405462265, + "learning_rate": 4.065e-06, + "num_tokens": 1094309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5945, + "step": 3189 + }, + { + "loss": 0.067, + "grad_norm": 1.3644077777862549, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.595, + "step": 3190 + }, + { + "loss": 0.0403, + "grad_norm": 1.0108872652053833, + "learning_rate": 4.055000000000001e-06, + "num_tokens": 1095333.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5955, + "step": 3191 + }, + { + "loss": 0.0023, + "grad_norm": 0.32959794998168945, + "learning_rate": 4.05e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 1.0, + "epoch": 1.596, + "step": 3192 + }, + { + "loss": 0.0695, + "grad_norm": 1.4694541692733765, + "learning_rate": 4.045e-06, + "num_tokens": 1095936.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5965, + "step": 3193 + }, + { + "loss": 0.0579, + "grad_norm": 1.4185339212417603, + "learning_rate": 4.04e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.597, + "step": 3194 + }, + { + "loss": 0.0023, + "grad_norm": 0.3271894156932831, + "learning_rate": 4.035e-06, + "num_tokens": 1096539.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5975000000000001, + "step": 3195 + }, + { + "loss": 0.0687, + "grad_norm": 1.3683706521987915, + "learning_rate": 4.03e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5979999999999999, + "step": 3196 + }, + { + "loss": 0.0022, + "grad_norm": 0.3076697289943695, + "learning_rate": 4.0250000000000004e-06, + "num_tokens": 1097142.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5985, + "step": 3197 + }, + { + "loss": 0.0633, + "grad_norm": 1.3920204639434814, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.599, + "step": 3198 + }, + { + "loss": 0.0025, + "grad_norm": 0.340093195438385, + "learning_rate": 4.0150000000000005e-06, + "num_tokens": 1097745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5995, + "step": 3199 + }, + { + "loss": 0.0446, + "grad_norm": 1.343589186668396, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6, + "step": 3200 + }, + { + "loss": 0.0019, + "grad_norm": 0.27124884724617004, + "learning_rate": 4.005000000000001e-06, + "num_tokens": 1098348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6005, + "step": 3201 + }, + { + "loss": 0.0404, + "grad_norm": 0.9648232460021973, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.601, + "step": 3202 + }, + { + "loss": 0.0019, + "grad_norm": 0.27278977632522583, + "learning_rate": 3.995000000000001e-06, + "num_tokens": 1098951.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6015000000000001, + "step": 3203 + }, + { + "loss": 0.0376, + "grad_norm": 1.0787500143051147, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6019999999999999, + "step": 3204 + }, + { + "loss": 0.0528, + "grad_norm": 1.1423871517181396, + "learning_rate": 3.985000000000001e-06, + "num_tokens": 1099975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6025, + "step": 3205 + }, + { + "loss": 0.0428, + "grad_norm": 1.0963202714920044, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.603, + "step": 3206 + }, + { + "loss": 0.0023, + "grad_norm": 0.3151981234550476, + "learning_rate": 3.975000000000001e-06, + "num_tokens": 1100578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6035, + "step": 3207 + }, + { + "loss": 0.0627, + "grad_norm": 1.3276523351669312, + "learning_rate": 3.97e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.604, + "step": 3208 + }, + { + "loss": 0.0644, + "grad_norm": 1.2610445022583008, + "learning_rate": 3.965e-06, + "num_tokens": 1101602.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6045, + "step": 3209 + }, + { + "loss": 0.0605, + "grad_norm": 1.5303077697753906, + "learning_rate": 3.96e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.605, + "step": 3210 + }, + { + "loss": 0.0428, + "grad_norm": 1.1033059358596802, + "learning_rate": 3.955e-06, + "num_tokens": 1102626.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6055000000000001, + "step": 3211 + }, + { + "loss": 0.0025, + "grad_norm": 0.3444884419441223, + "learning_rate": 3.95e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6059999999999999, + "step": 3212 + }, + { + "loss": 0.0021, + "grad_norm": 0.30967977643013, + "learning_rate": 3.945e-06, + "num_tokens": 1102808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6065, + "step": 3213 + }, + { + "loss": 0.0023, + "grad_norm": 0.3297445774078369, + "learning_rate": 3.94e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.607, + "step": 3214 + }, + { + "loss": 0.0389, + "grad_norm": 0.9863300323486328, + "learning_rate": 3.9350000000000004e-06, + "num_tokens": 1103411.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6075, + "step": 3215 + }, + { + "loss": 0.0024, + "grad_norm": 0.34737643599510193, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.608, + "step": 3216 + }, + { + "loss": 0.0636, + "grad_norm": 1.4206818342208862, + "learning_rate": 3.9250000000000005e-06, + "num_tokens": 1104014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6085, + "step": 3217 + }, + { + "loss": 0.0635, + "grad_norm": 1.3302878141403198, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.609, + "step": 3218 + }, + { + "loss": 0.0023, + "grad_norm": 0.34072810411453247, + "learning_rate": 3.915000000000001e-06, + "num_tokens": 1104617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6095000000000002, + "step": 3219 + }, + { + "loss": 0.0023, + "grad_norm": 0.324464350938797, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6099999999999999, + "step": 3220 + }, + { + "loss": 0.041, + "grad_norm": 1.2196465730667114, + "learning_rate": 3.905000000000001e-06, + "num_tokens": 1105220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6105, + "step": 3221 + }, + { + "loss": 0.0609, + "grad_norm": 1.3683393001556396, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.611, + "step": 3222 + }, + { + "loss": 0.067, + "grad_norm": 1.3955715894699097, + "learning_rate": 3.895000000000001e-06, + "num_tokens": 1106244.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6115, + "step": 3223 + }, + { + "loss": 0.0681, + "grad_norm": 1.2971601486206055, + "learning_rate": 3.89e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.612, + "step": 3224 + }, + { + "loss": 0.0399, + "grad_norm": 0.9620857834815979, + "learning_rate": 3.885e-06, + "num_tokens": 1107268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6125, + "step": 3225 + }, + { + "loss": 0.0563, + "grad_norm": 1.419252634048462, + "learning_rate": 3.88e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.613, + "step": 3226 + }, + { + "loss": 0.0025, + "grad_norm": 0.3523210883140564, + "learning_rate": 3.875e-06, + "num_tokens": 1107871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6135000000000002, + "step": 3227 + }, + { + "loss": 0.0025, + "grad_norm": 0.3481607437133789, + "learning_rate": 3.87e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 3228 + }, + { + "loss": 0.0668, + "grad_norm": 1.5234949588775635, + "learning_rate": 3.865e-06, + "num_tokens": 1108474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6145, + "step": 3229 + }, + { + "loss": 0.065, + "grad_norm": 1.0866061449050903, + "learning_rate": 3.86e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.615, + "step": 3230 + }, + { + "loss": 0.0023, + "grad_norm": 0.32322317361831665, + "learning_rate": 3.855e-06, + "num_tokens": 1109077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6155, + "step": 3231 + }, + { + "loss": 0.0028, + "grad_norm": 0.3983127474784851, + "learning_rate": 3.85e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 3232 + }, + { + "loss": 0.0028, + "grad_norm": 0.3855290114879608, + "learning_rate": 3.8450000000000005e-06, + "num_tokens": 1109259.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6165, + "step": 3233 + }, + { + "loss": 0.0628, + "grad_norm": 1.2134065628051758, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.617, + "step": 3234 + }, + { + "loss": 0.0026, + "grad_norm": 0.3645097613334656, + "learning_rate": 3.8350000000000006e-06, + "num_tokens": 1109862.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6175000000000002, + "step": 3235 + }, + { + "loss": 0.0564, + "grad_norm": 1.3227709531784058, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6179999999999999, + "step": 3236 + }, + { + "loss": 0.0356, + "grad_norm": 1.1357544660568237, + "learning_rate": 3.825000000000001e-06, + "num_tokens": 1110886.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6185, + "step": 3237 + }, + { + "loss": 0.002, + "grad_norm": 0.2842106819152832, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.619, + "step": 3238 + }, + { + "loss": 0.0021, + "grad_norm": 0.2954864501953125, + "learning_rate": 3.815000000000001e-06, + "num_tokens": 1111068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6195, + "step": 3239 + }, + { + "loss": 0.0535, + "grad_norm": 1.2989691495895386, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.62, + "step": 3240 + }, + { + "loss": 0.0633, + "grad_norm": 1.4842454195022583, + "learning_rate": 3.8050000000000004e-06, + "num_tokens": 1112092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6205, + "step": 3241 + }, + { + "loss": 0.0613, + "grad_norm": 1.4029802083969116, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.621, + "step": 3242 + }, + { + "loss": 0.0021, + "grad_norm": 0.3039712905883789, + "learning_rate": 3.7950000000000005e-06, + "num_tokens": 1112695.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6215000000000002, + "step": 3243 + }, + { + "loss": 0.0564, + "grad_norm": 1.3126254081726074, + "learning_rate": 3.79e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6219999999999999, + "step": 3244 + }, + { + "loss": 0.0372, + "grad_norm": 1.1704014539718628, + "learning_rate": 3.785e-06, + "num_tokens": 1113719.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6225, + "step": 3245 + }, + { + "loss": 0.0438, + "grad_norm": 1.2828481197357178, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.623, + "step": 3246 + }, + { + "loss": 0.0023, + "grad_norm": 0.343226820230484, + "learning_rate": 3.7750000000000003e-06, + "num_tokens": 1114322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6235, + "step": 3247 + }, + { + "loss": 0.0402, + "grad_norm": 1.072348952293396, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.624, + "step": 3248 + }, + { + "loss": 0.0372, + "grad_norm": 1.061455488204956, + "learning_rate": 3.7650000000000004e-06, + "num_tokens": 1115346.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6245, + "step": 3249 + }, + { + "loss": 0.0621, + "grad_norm": 1.3332241773605347, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.625, + "step": 3250 + }, + { + "loss": 0.0665, + "grad_norm": 1.4206236600875854, + "learning_rate": 3.7550000000000005e-06, + "num_tokens": 1116370.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6255, + "step": 3251 + }, + { + "loss": 0.0616, + "grad_norm": 1.5544387102127075, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.626, + "step": 3252 + }, + { + "loss": 0.0024, + "grad_norm": 0.34623461961746216, + "learning_rate": 3.745e-06, + "num_tokens": 1116973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6265, + "step": 3253 + }, + { + "loss": 0.0611, + "grad_norm": 1.2223175764083862, + "learning_rate": 3.74e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.627, + "step": 3254 + }, + { + "loss": 0.0517, + "grad_norm": 1.338625192642212, + "learning_rate": 3.7350000000000002e-06, + "num_tokens": 1117997.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6275, + "step": 3255 + }, + { + "loss": 0.0567, + "grad_norm": 1.3747273683547974, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6280000000000001, + "step": 3256 + }, + { + "loss": 0.0026, + "grad_norm": 0.36324965953826904, + "learning_rate": 3.7250000000000003e-06, + "num_tokens": 1118600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6284999999999998, + "step": 3257 + }, + { + "loss": 0.0025, + "grad_norm": 0.3447258472442627, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 3258 + }, + { + "loss": 0.0026, + "grad_norm": 0.36628466844558716, + "learning_rate": 3.7150000000000004e-06, + "num_tokens": 1118782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6295, + "step": 3259 + }, + { + "loss": 0.0535, + "grad_norm": 1.2702912092208862, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.63, + "step": 3260 + }, + { + "loss": 0.0026, + "grad_norm": 0.37140271067619324, + "learning_rate": 3.705e-06, + "num_tokens": 1119385.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6305, + "step": 3261 + }, + { + "loss": 0.003, + "grad_norm": 0.4019966721534729, + "learning_rate": 3.7e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 1.0, + "epoch": 1.631, + "step": 3262 + }, + { + "loss": 0.0669, + "grad_norm": 1.4418880939483643, + "learning_rate": 3.695e-06, + "num_tokens": 1119988.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6315, + "step": 3263 + }, + { + "loss": 0.0396, + "grad_norm": 1.2212142944335938, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6320000000000001, + "step": 3264 + }, + { + "loss": 0.0026, + "grad_norm": 0.37143605947494507, + "learning_rate": 3.6850000000000003e-06, + "num_tokens": 1120591.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6324999999999998, + "step": 3265 + }, + { + "loss": 0.0588, + "grad_norm": 1.3627078533172607, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.633, + "step": 3266 + }, + { + "loss": 0.0027, + "grad_norm": 0.3791561722755432, + "learning_rate": 3.6750000000000004e-06, + "num_tokens": 1121194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6335, + "step": 3267 + }, + { + "loss": 0.0567, + "grad_norm": 1.289622187614441, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.634, + "step": 3268 + }, + { + "loss": 0.0579, + "grad_norm": 1.220171332359314, + "learning_rate": 3.665e-06, + "num_tokens": 1122218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6345, + "step": 3269 + }, + { + "loss": 0.0543, + "grad_norm": 1.3633372783660889, + "learning_rate": 3.66e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.635, + "step": 3270 + }, + { + "loss": 0.0376, + "grad_norm": 1.1212244033813477, + "learning_rate": 3.655e-06, + "num_tokens": 1123242.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6355, + "step": 3271 + }, + { + "loss": 0.066, + "grad_norm": 1.352933645248413, + "learning_rate": 3.65e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6360000000000001, + "step": 3272 + }, + { + "loss": 0.0469, + "grad_norm": 1.09308922290802, + "learning_rate": 3.6450000000000003e-06, + "num_tokens": 1124266.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6364999999999998, + "step": 3273 + }, + { + "loss": 0.1411, + "grad_norm": 2.6187405586242676, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.637, + "step": 3274 + }, + { + "loss": 0.0414, + "grad_norm": 1.162994146347046, + "learning_rate": 3.6350000000000003e-06, + "num_tokens": 1125290.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6375, + "step": 3275 + }, + { + "loss": 0.0028, + "grad_norm": 0.3896919786930084, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.638, + "step": 3276 + }, + { + "loss": 0.0026, + "grad_norm": 0.3726244270801544, + "learning_rate": 3.625e-06, + "num_tokens": 1125472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6385, + "step": 3277 + }, + { + "loss": 0.0026, + "grad_norm": 0.36463192105293274, + "learning_rate": 3.62e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.639, + "step": 3278 + }, + { + "loss": 0.0507, + "grad_norm": 1.3470423221588135, + "learning_rate": 3.615e-06, + "num_tokens": 1126075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6395, + "step": 3279 + }, + { + "loss": 0.0683, + "grad_norm": 1.4609153270721436, + "learning_rate": 3.61e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.6400000000000001, + "step": 3280 + }, + { + "loss": 0.0535, + "grad_norm": 1.1537185907363892, + "learning_rate": 3.6050000000000002e-06, + "num_tokens": 1127099.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6404999999999998, + "step": 3281 + }, + { + "loss": 0.0608, + "grad_norm": 1.3845043182373047, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.641, + "step": 3282 + }, + { + "loss": 0.0447, + "grad_norm": 1.212424397468567, + "learning_rate": 3.5950000000000003e-06, + "num_tokens": 1128123.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6415, + "step": 3283 + }, + { + "loss": 0.0026, + "grad_norm": 0.37876564264297485, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 1.0, + "epoch": 1.642, + "step": 3284 + }, + { + "loss": 0.0408, + "grad_norm": 1.2840468883514404, + "learning_rate": 3.585e-06, + "num_tokens": 1128726.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6425, + "step": 3285 + }, + { + "loss": 0.0386, + "grad_norm": 1.1343239545822144, + "learning_rate": 3.58e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.643, + "step": 3286 + }, + { + "loss": 0.0381, + "grad_norm": 1.1031399965286255, + "learning_rate": 3.575e-06, + "num_tokens": 1129750.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6435, + "step": 3287 + }, + { + "loss": 0.0728, + "grad_norm": 1.8012501001358032, + "learning_rate": 3.57e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6440000000000001, + "step": 3288 + }, + { + "loss": 0.003, + "grad_norm": 0.42031532526016235, + "learning_rate": 3.565e-06, + "num_tokens": 1130353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6444999999999999, + "step": 3289 + }, + { + "loss": 0.0028, + "grad_norm": 0.42307499051094055, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 1.0, + "epoch": 1.645, + "step": 3290 + }, + { + "loss": 0.0656, + "grad_norm": 1.4206976890563965, + "learning_rate": 3.5550000000000003e-06, + "num_tokens": 1130956.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6455, + "step": 3291 + }, + { + "loss": 0.0373, + "grad_norm": 1.0836045742034912, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 3292 + }, + { + "loss": 0.0666, + "grad_norm": 1.4353013038635254, + "learning_rate": 3.545e-06, + "num_tokens": 1131980.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6465, + "step": 3293 + }, + { + "loss": 0.0033, + "grad_norm": 0.48532357811927795, + "learning_rate": 3.54e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.647, + "step": 3294 + }, + { + "loss": 0.0032, + "grad_norm": 0.4415268898010254, + "learning_rate": 3.535e-06, + "num_tokens": 1132162.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6475, + "step": 3295 + }, + { + "loss": 0.0029, + "grad_norm": 0.41665494441986084, + "learning_rate": 3.53e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 3296 + }, + { + "loss": 0.0638, + "grad_norm": 1.2469731569290161, + "learning_rate": 3.525e-06, + "num_tokens": 1132765.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6484999999999999, + "step": 3297 + }, + { + "loss": 0.0614, + "grad_norm": 1.251099944114685, + "learning_rate": 3.52e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.649, + "step": 3298 + }, + { + "loss": 0.0027, + "grad_norm": 0.39604058861732483, + "learning_rate": 3.5150000000000002e-06, + "num_tokens": 1133368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6495, + "step": 3299 + }, + { + "loss": 0.0588, + "grad_norm": 1.0699150562286377, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.65, + "step": 3300 + }, + { + "loss": 0.0583, + "grad_norm": 1.2757554054260254, + "learning_rate": 3.505e-06, + "num_tokens": 1134392.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6505, + "step": 3301 + }, + { + "loss": 0.0401, + "grad_norm": 1.3257462978363037, + "learning_rate": 3.5e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.651, + "step": 3302 + }, + { + "loss": 0.0643, + "grad_norm": 1.4011600017547607, + "learning_rate": 3.495e-06, + "num_tokens": 1135416.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6515, + "step": 3303 + }, + { + "loss": 0.0587, + "grad_norm": 1.5523959398269653, + "learning_rate": 3.49e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6520000000000001, + "step": 3304 + }, + { + "loss": 0.0602, + "grad_norm": 1.1153236627578735, + "learning_rate": 3.485e-06, + "num_tokens": 1136440.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6524999999999999, + "step": 3305 + }, + { + "loss": 0.0032, + "grad_norm": 0.4743506610393524, + "learning_rate": 3.48e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 1.0, + "epoch": 1.653, + "step": 3306 + }, + { + "loss": 0.0032, + "grad_norm": 0.44705691933631897, + "learning_rate": 3.475e-06, + "num_tokens": 1136622.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6535, + "step": 3307 + }, + { + "loss": 0.0627, + "grad_norm": 1.376706838607788, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.654, + "step": 3308 + }, + { + "loss": 0.0578, + "grad_norm": 1.3461076021194458, + "learning_rate": 3.465e-06, + "num_tokens": 1137646.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6545, + "step": 3309 + }, + { + "loss": 0.0028, + "grad_norm": 0.4053739011287689, + "learning_rate": 3.46e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 1.0, + "epoch": 1.655, + "step": 3310 + }, + { + "loss": 0.0028, + "grad_norm": 0.4151926636695862, + "learning_rate": 3.455e-06, + "num_tokens": 1137828.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6555, + "step": 3311 + }, + { + "loss": 0.003, + "grad_norm": 0.42436280846595764, + "learning_rate": 3.45e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6560000000000001, + "step": 3312 + }, + { + "loss": 0.0029, + "grad_norm": 0.41050389409065247, + "learning_rate": 3.445e-06, + "num_tokens": 1138010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6564999999999999, + "step": 3313 + }, + { + "loss": 0.0562, + "grad_norm": 1.2650190591812134, + "learning_rate": 3.44e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.657, + "step": 3314 + }, + { + "loss": 0.0558, + "grad_norm": 1.1567943096160889, + "learning_rate": 3.4350000000000006e-06, + "num_tokens": 1139034.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6575, + "step": 3315 + }, + { + "loss": 0.0413, + "grad_norm": 1.3011746406555176, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.658, + "step": 3316 + }, + { + "loss": 0.0569, + "grad_norm": 1.4117727279663086, + "learning_rate": 3.4250000000000007e-06, + "num_tokens": 1140058.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6585, + "step": 3317 + }, + { + "loss": 0.0027, + "grad_norm": 0.3829484283924103, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.659, + "step": 3318 + }, + { + "loss": 0.0516, + "grad_norm": 1.152258038520813, + "learning_rate": 3.4150000000000003e-06, + "num_tokens": 1140661.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6595, + "step": 3319 + }, + { + "loss": 0.0396, + "grad_norm": 1.20711088180542, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6600000000000001, + "step": 3320 + }, + { + "loss": 0.0522, + "grad_norm": 1.251099705696106, + "learning_rate": 3.4050000000000004e-06, + "num_tokens": 1141685.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6604999999999999, + "step": 3321 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730953454971313, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.661, + "step": 3322 + }, + { + "loss": 0.0613, + "grad_norm": 1.5974045991897583, + "learning_rate": 3.3950000000000005e-06, + "num_tokens": 1142709.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6615, + "step": 3323 + }, + { + "loss": 0.0522, + "grad_norm": 1.416182518005371, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.662, + "step": 3324 + }, + { + "loss": 0.0595, + "grad_norm": 1.381279706954956, + "learning_rate": 3.3850000000000006e-06, + "num_tokens": 1143733.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6625, + "step": 3325 + }, + { + "loss": 0.0563, + "grad_norm": 1.2484899759292603, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.663, + "step": 3326 + }, + { + "loss": 0.0029, + "grad_norm": 0.41797107458114624, + "learning_rate": 3.3750000000000003e-06, + "num_tokens": 1144336.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6635, + "step": 3327 + }, + { + "loss": 0.0027, + "grad_norm": 0.39544638991355896, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6640000000000001, + "step": 3328 + }, + { + "loss": 0.0371, + "grad_norm": 1.0045322179794312, + "learning_rate": 3.3650000000000004e-06, + "num_tokens": 1144939.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6644999999999999, + "step": 3329 + }, + { + "loss": 0.0671, + "grad_norm": 1.530097246170044, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.665, + "step": 3330 + }, + { + "loss": 0.0529, + "grad_norm": 1.179215669631958, + "learning_rate": 3.3550000000000005e-06, + "num_tokens": 1145963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6655, + "step": 3331 + }, + { + "loss": 0.0033, + "grad_norm": 0.46830442547798157, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.666, + "step": 3332 + }, + { + "loss": 0.0031, + "grad_norm": 0.44680675864219666, + "learning_rate": 3.3450000000000006e-06, + "num_tokens": 1146145.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6665, + "step": 3333 + }, + { + "loss": 0.0591, + "grad_norm": 2.0427138805389404, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.667, + "step": 3334 + }, + { + "loss": 0.0446, + "grad_norm": 1.0700162649154663, + "learning_rate": 3.3350000000000003e-06, + "num_tokens": 1147169.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6675, + "step": 3335 + }, + { + "loss": 0.0352, + "grad_norm": 0.953519344329834, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6680000000000001, + "step": 3336 + }, + { + "loss": 0.0402, + "grad_norm": 1.208362102508545, + "learning_rate": 3.3250000000000004e-06, + "num_tokens": 1148193.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6684999999999999, + "step": 3337 + }, + { + "loss": 0.0034, + "grad_norm": 0.48497405648231506, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 1.0, + "epoch": 1.669, + "step": 3338 + }, + { + "loss": 0.0031, + "grad_norm": 0.4533288776874542, + "learning_rate": 3.3150000000000004e-06, + "num_tokens": 1148375.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6695, + "step": 3339 + }, + { + "loss": 0.0531, + "grad_norm": 1.031333088874817, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.67, + "step": 3340 + }, + { + "loss": 0.0029, + "grad_norm": 0.40945783257484436, + "learning_rate": 3.3050000000000005e-06, + "num_tokens": 1148978.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6705, + "step": 3341 + }, + { + "loss": 0.0643, + "grad_norm": 1.0990197658538818, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.671, + "step": 3342 + }, + { + "loss": 0.0379, + "grad_norm": 1.0483911037445068, + "learning_rate": 3.2950000000000002e-06, + "num_tokens": 1150002.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6715, + "step": 3343 + }, + { + "loss": 0.0489, + "grad_norm": 1.0835374593734741, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6720000000000002, + "step": 3344 + }, + { + "loss": 0.0033, + "grad_norm": 0.4901528060436249, + "learning_rate": 3.2850000000000003e-06, + "num_tokens": 1150605.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6724999999999999, + "step": 3345 + }, + { + "loss": 0.0029, + "grad_norm": 0.41757330298423767, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.673, + "step": 3346 + }, + { + "loss": 0.0379, + "grad_norm": 0.9371951818466187, + "learning_rate": 3.2750000000000004e-06, + "num_tokens": 1151208.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6735, + "step": 3347 + }, + { + "loss": 0.0397, + "grad_norm": 1.0155102014541626, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 3348 + }, + { + "loss": 0.0027, + "grad_norm": 0.3897286653518677, + "learning_rate": 3.2650000000000005e-06, + "num_tokens": 1151811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6745, + "step": 3349 + }, + { + "loss": 0.0028, + "grad_norm": 0.4042399525642395, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 1.0, + "epoch": 1.675, + "step": 3350 + }, + { + "loss": 0.003, + "grad_norm": 0.43666109442710876, + "learning_rate": 3.255e-06, + "num_tokens": 1151993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6755, + "step": 3351 + }, + { + "loss": 0.0029, + "grad_norm": 0.42103472352027893, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6760000000000002, + "step": 3352 + }, + { + "loss": 0.0028, + "grad_norm": 0.41361838579177856, + "learning_rate": 3.2450000000000003e-06, + "num_tokens": 1152175.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6764999999999999, + "step": 3353 + }, + { + "loss": 0.0357, + "grad_norm": 0.9301024675369263, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.677, + "step": 3354 + }, + { + "loss": 0.0025, + "grad_norm": 0.3655649721622467, + "learning_rate": 3.2350000000000004e-06, + "num_tokens": 1152778.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6775, + "step": 3355 + }, + { + "loss": 0.0363, + "grad_norm": 1.0852001905441284, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.678, + "step": 3356 + }, + { + "loss": 0.0021, + "grad_norm": 0.3051436245441437, + "learning_rate": 3.2250000000000005e-06, + "num_tokens": 1153381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6785, + "step": 3357 + }, + { + "loss": 0.0025, + "grad_norm": 0.38162630796432495, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 3358 + }, + { + "loss": 0.0022, + "grad_norm": 0.33861595392227173, + "learning_rate": 3.215e-06, + "num_tokens": 1153563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6795, + "step": 3359 + }, + { + "loss": 0.0021, + "grad_norm": 0.311531126499176, + "learning_rate": 3.21e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 3360 + }, + { + "loss": 0.002, + "grad_norm": 0.30146220326423645, + "learning_rate": 3.2050000000000002e-06, + "num_tokens": 1153745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6804999999999999, + "step": 3361 + }, + { + "loss": 0.0019, + "grad_norm": 0.28205639123916626, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 3362 + }, + { + "loss": 0.0483, + "grad_norm": 1.185204029083252, + "learning_rate": 3.1950000000000003e-06, + "num_tokens": 1154348.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6815, + "step": 3363 + }, + { + "loss": 0.0705, + "grad_norm": 1.442715048789978, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.682, + "step": 3364 + }, + { + "loss": 0.059, + "grad_norm": 1.5234472751617432, + "learning_rate": 3.1850000000000004e-06, + "num_tokens": 1155372.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6825, + "step": 3365 + }, + { + "loss": 0.0712, + "grad_norm": 1.9519693851470947, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.683, + "step": 3366 + }, + { + "loss": 0.041, + "grad_norm": 1.0349758863449097, + "learning_rate": 3.175e-06, + "num_tokens": 1156396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6835, + "step": 3367 + }, + { + "loss": 0.0423, + "grad_norm": 1.263643503189087, + "learning_rate": 3.17e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 3368 + }, + { + "loss": 0.0015, + "grad_norm": 0.21718572080135345, + "learning_rate": 3.165e-06, + "num_tokens": 1156999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6844999999999999, + "step": 3369 + }, + { + "loss": 0.0612, + "grad_norm": 1.4974867105484009, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.685, + "step": 3370 + }, + { + "loss": 0.0684, + "grad_norm": 1.3690571784973145, + "learning_rate": 3.1550000000000003e-06, + "num_tokens": 1158023.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6855, + "step": 3371 + }, + { + "loss": 0.0015, + "grad_norm": 0.22092363238334656, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 1.0, + "epoch": 1.686, + "step": 3372 + }, + { + "loss": 0.0466, + "grad_norm": 1.359930157661438, + "learning_rate": 3.1450000000000004e-06, + "num_tokens": 1158626.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6865, + "step": 3373 + }, + { + "loss": 0.0017, + "grad_norm": 0.23505748808383942, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.687, + "step": 3374 + }, + { + "loss": 0.0412, + "grad_norm": 1.154797077178955, + "learning_rate": 3.135e-06, + "num_tokens": 1159229.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6875, + "step": 3375 + }, + { + "loss": 0.0688, + "grad_norm": 1.5609385967254639, + "learning_rate": 3.13e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.688, + "step": 3376 + }, + { + "loss": 0.0689, + "grad_norm": 1.9219101667404175, + "learning_rate": 3.125e-06, + "num_tokens": 1160253.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6885, + "step": 3377 + }, + { + "loss": 0.0528, + "grad_norm": 1.4017720222473145, + "learning_rate": 3.12e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 3378 + }, + { + "loss": 0.0018, + "grad_norm": 0.2644074261188507, + "learning_rate": 3.1150000000000002e-06, + "num_tokens": 1160856.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6895, + "step": 3379 + }, + { + "loss": 0.0359, + "grad_norm": 1.1351364850997925, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.69, + "step": 3380 + }, + { + "loss": 0.0561, + "grad_norm": 1.2852329015731812, + "learning_rate": 3.1050000000000003e-06, + "num_tokens": 1161880.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6905000000000001, + "step": 3381 + }, + { + "loss": 0.0019, + "grad_norm": 0.2809182107448578, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6909999999999998, + "step": 3382 + }, + { + "loss": 0.0019, + "grad_norm": 0.2629799544811249, + "learning_rate": 3.0950000000000004e-06, + "num_tokens": 1162062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6915, + "step": 3383 + }, + { + "loss": 0.0583, + "grad_norm": 1.3401031494140625, + "learning_rate": 3.09e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.692, + "step": 3384 + }, + { + "loss": 0.0019, + "grad_norm": 0.2741340398788452, + "learning_rate": 3.085e-06, + "num_tokens": 1162665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6925, + "step": 3385 + }, + { + "loss": 0.0019, + "grad_norm": 0.2670257091522217, + "learning_rate": 3.08e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 3386 + }, + { + "loss": 0.0529, + "grad_norm": 0.9913851022720337, + "learning_rate": 3.075e-06, + "num_tokens": 1163268.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6935, + "step": 3387 + }, + { + "loss": 0.0018, + "grad_norm": 0.2675456404685974, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.694, + "step": 3388 + }, + { + "loss": 0.0405, + "grad_norm": 1.6220101118087769, + "learning_rate": 3.0650000000000003e-06, + "num_tokens": 1163871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6945000000000001, + "step": 3389 + }, + { + "loss": 0.0478, + "grad_norm": 1.0595648288726807, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 3390 + }, + { + "loss": 0.0022, + "grad_norm": 0.3088478446006775, + "learning_rate": 3.0550000000000004e-06, + "num_tokens": 1164474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6955, + "step": 3391 + }, + { + "loss": 0.0501, + "grad_norm": 1.3393687009811401, + "learning_rate": 3.05e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.696, + "step": 3392 + }, + { + "loss": 0.0019, + "grad_norm": 0.2677120566368103, + "learning_rate": 3.045e-06, + "num_tokens": 1165077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6965, + "step": 3393 + }, + { + "loss": 0.0519, + "grad_norm": 1.1974607706069946, + "learning_rate": 3.04e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.697, + "step": 3394 + }, + { + "loss": 0.0406, + "grad_norm": 1.0820717811584473, + "learning_rate": 3.035e-06, + "num_tokens": 1166101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6975, + "step": 3395 + }, + { + "loss": 0.002, + "grad_norm": 0.2836916148662567, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.698, + "step": 3396 + }, + { + "loss": 0.002, + "grad_norm": 0.2837901711463928, + "learning_rate": 3.0250000000000003e-06, + "num_tokens": 1166283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6985000000000001, + "step": 3397 + }, + { + "loss": 0.0546, + "grad_norm": 1.4433382749557495, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6989999999999998, + "step": 3398 + }, + { + "loss": 0.0021, + "grad_norm": 0.2978130877017975, + "learning_rate": 3.0150000000000004e-06, + "num_tokens": 1166886.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6995, + "step": 3399 + }, + { + "loss": 0.002, + "grad_norm": 0.2806030511856079, + "learning_rate": 3.01e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 3400 + }, + { + "loss": 0.0636, + "grad_norm": 1.3879796266555786, + "learning_rate": 3.005e-06, + "num_tokens": 1167489.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7005, + "step": 3401 + }, + { + "loss": 0.002, + "grad_norm": 0.2759900689125061, + "learning_rate": 3e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.701, + "step": 3402 + }, + { + "loss": 0.0574, + "grad_norm": 1.3505700826644897, + "learning_rate": 2.995e-06, + "num_tokens": 1168092.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7015, + "step": 3403 + }, + { + "loss": 0.0554, + "grad_norm": 1.4108113050460815, + "learning_rate": 2.99e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.702, + "step": 3404 + }, + { + "loss": 0.0558, + "grad_norm": 1.5085475444793701, + "learning_rate": 2.9850000000000002e-06, + "num_tokens": 1169116.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7025000000000001, + "step": 3405 + }, + { + "loss": 0.0019, + "grad_norm": 0.2683292031288147, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7029999999999998, + "step": 3406 + }, + { + "loss": 0.0367, + "grad_norm": 1.1768198013305664, + "learning_rate": 2.9750000000000003e-06, + "num_tokens": 1169719.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7035, + "step": 3407 + }, + { + "loss": 0.002, + "grad_norm": 0.2821144759654999, + "learning_rate": 2.97e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 1.0, + "epoch": 1.704, + "step": 3408 + }, + { + "loss": 0.0018, + "grad_norm": 0.26630160212516785, + "learning_rate": 2.965e-06, + "num_tokens": 1169901.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7045, + "step": 3409 + }, + { + "loss": 0.0018, + "grad_norm": 0.2571128308773041, + "learning_rate": 2.96e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 3410 + }, + { + "loss": 0.002, + "grad_norm": 0.28111621737480164, + "learning_rate": 2.955e-06, + "num_tokens": 1170083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7055, + "step": 3411 + }, + { + "loss": 0.002, + "grad_norm": 0.27419018745422363, + "learning_rate": 2.95e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 3412 + }, + { + "loss": 0.0019, + "grad_norm": 0.26888176798820496, + "learning_rate": 2.945e-06, + "num_tokens": 1170265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7065000000000001, + "step": 3413 + }, + { + "loss": 0.0018, + "grad_norm": 0.2536250352859497, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 3414 + }, + { + "loss": 0.0018, + "grad_norm": 0.24844178557395935, + "learning_rate": 2.9350000000000003e-06, + "num_tokens": 1170447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7075, + "step": 3415 + }, + { + "loss": 0.0487, + "grad_norm": 1.4517875909805298, + "learning_rate": 2.93e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.708, + "step": 3416 + }, + { + "loss": 0.0564, + "grad_norm": 1.2101439237594604, + "learning_rate": 2.925e-06, + "num_tokens": 1171471.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7085, + "step": 3417 + }, + { + "loss": 0.043, + "grad_norm": 1.1227502822875977, + "learning_rate": 2.92e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.709, + "step": 3418 + }, + { + "loss": 0.0556, + "grad_norm": 1.1113651990890503, + "learning_rate": 2.915e-06, + "num_tokens": 1172495.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7095, + "step": 3419 + }, + { + "loss": 0.0015, + "grad_norm": 0.21050438284873962, + "learning_rate": 2.91e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.71, + "step": 3420 + }, + { + "loss": 0.0492, + "grad_norm": 1.136242389678955, + "learning_rate": 2.905e-06, + "num_tokens": 1173098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7105000000000001, + "step": 3421 + }, + { + "loss": 0.0549, + "grad_norm": 1.1831704378128052, + "learning_rate": 2.9e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7109999999999999, + "step": 3422 + }, + { + "loss": 0.0589, + "grad_norm": 1.318955659866333, + "learning_rate": 2.8950000000000002e-06, + "num_tokens": 1174122.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7115, + "step": 3423 + }, + { + "loss": 0.0385, + "grad_norm": 1.1089059114456177, + "learning_rate": 2.89e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.712, + "step": 3424 + }, + { + "loss": 0.0017, + "grad_norm": 0.24754203855991364, + "learning_rate": 2.885e-06, + "num_tokens": 1174725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7125, + "step": 3425 + }, + { + "loss": 0.0563, + "grad_norm": 1.1799119710922241, + "learning_rate": 2.88e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.713, + "step": 3426 + }, + { + "loss": 0.0017, + "grad_norm": 0.2318888157606125, + "learning_rate": 2.875e-06, + "num_tokens": 1175328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7135, + "step": 3427 + }, + { + "loss": 0.0623, + "grad_norm": 1.3154571056365967, + "learning_rate": 2.87e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.714, + "step": 3428 + }, + { + "loss": 0.0019, + "grad_norm": 0.26307183504104614, + "learning_rate": 2.865e-06, + "num_tokens": 1175931.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7145000000000001, + "step": 3429 + }, + { + "loss": 0.0018, + "grad_norm": 0.2589333653450012, + "learning_rate": 2.86e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 3430 + }, + { + "loss": 0.0504, + "grad_norm": 1.4614155292510986, + "learning_rate": 2.855e-06, + "num_tokens": 1176534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7155, + "step": 3431 + }, + { + "loss": 0.0018, + "grad_norm": 0.2591991722583771, + "learning_rate": 2.85e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.716, + "step": 3432 + }, + { + "loss": 0.0018, + "grad_norm": 0.25856250524520874, + "learning_rate": 2.845e-06, + "num_tokens": 1176716.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7165, + "step": 3433 + }, + { + "loss": 0.0368, + "grad_norm": 1.2794378995895386, + "learning_rate": 2.84e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.717, + "step": 3434 + }, + { + "loss": 0.0595, + "grad_norm": 1.1754332780838013, + "learning_rate": 2.835e-06, + "num_tokens": 1177740.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7175, + "step": 3435 + }, + { + "loss": 0.0016, + "grad_norm": 0.218499094247818, + "learning_rate": 2.83e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 1.0, + "epoch": 1.718, + "step": 3436 + }, + { + "loss": 0.0562, + "grad_norm": 1.4319361448287964, + "learning_rate": 2.825e-06, + "num_tokens": 1178343.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7185000000000001, + "step": 3437 + }, + { + "loss": 0.0548, + "grad_norm": 1.1614960432052612, + "learning_rate": 2.82e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7189999999999999, + "step": 3438 + }, + { + "loss": 0.0634, + "grad_norm": 1.559000849723816, + "learning_rate": 2.815e-06, + "num_tokens": 1179367.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7195, + "step": 3439 + }, + { + "loss": 0.0593, + "grad_norm": 1.1891441345214844, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 3440 + }, + { + "loss": 0.0638, + "grad_norm": 1.2654136419296265, + "learning_rate": 2.8050000000000007e-06, + "num_tokens": 1180391.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7205, + "step": 3441 + }, + { + "loss": 0.0411, + "grad_norm": 1.2888840436935425, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.721, + "step": 3442 + }, + { + "loss": 0.002, + "grad_norm": 0.2810196280479431, + "learning_rate": 2.7950000000000003e-06, + "num_tokens": 1180994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7215, + "step": 3443 + }, + { + "loss": 0.0393, + "grad_norm": 1.1534147262573242, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.722, + "step": 3444 + }, + { + "loss": 0.0019, + "grad_norm": 0.2703098952770233, + "learning_rate": 2.7850000000000004e-06, + "num_tokens": 1181597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7225000000000001, + "step": 3445 + }, + { + "loss": 0.0612, + "grad_norm": 1.2400104999542236, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7229999999999999, + "step": 3446 + }, + { + "loss": 0.0019, + "grad_norm": 0.27535656094551086, + "learning_rate": 2.7750000000000005e-06, + "num_tokens": 1182200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7235, + "step": 3447 + }, + { + "loss": 0.002, + "grad_norm": 0.2844158411026001, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 3448 + }, + { + "loss": 0.002, + "grad_norm": 0.2850154936313629, + "learning_rate": 2.7650000000000006e-06, + "num_tokens": 1182382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7245, + "step": 3449 + }, + { + "loss": 0.0018, + "grad_norm": 0.26619744300842285, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 3450 + }, + { + "loss": 0.0019, + "grad_norm": 0.2684476971626282, + "learning_rate": 2.7550000000000003e-06, + "num_tokens": 1182564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7255, + "step": 3451 + }, + { + "loss": 0.0577, + "grad_norm": 1.3094863891601562, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.726, + "step": 3452 + }, + { + "loss": 0.0378, + "grad_norm": 1.201589822769165, + "learning_rate": 2.7450000000000004e-06, + "num_tokens": 1183588.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7265000000000001, + "step": 3453 + }, + { + "loss": 0.0537, + "grad_norm": 1.2897847890853882, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7269999999999999, + "step": 3454 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792169749736786, + "learning_rate": 2.7350000000000005e-06, + "num_tokens": 1184191.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7275, + "step": 3455 + }, + { + "loss": 0.002, + "grad_norm": 0.28593137860298157, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 3456 + }, + { + "loss": 0.058, + "grad_norm": 1.3839404582977295, + "learning_rate": 2.7250000000000006e-06, + "num_tokens": 1184794.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7285, + "step": 3457 + }, + { + "loss": 0.0018, + "grad_norm": 0.2617915868759155, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 1.0, + "epoch": 1.729, + "step": 3458 + }, + { + "loss": 0.0019, + "grad_norm": 0.2803640067577362, + "learning_rate": 2.7150000000000003e-06, + "num_tokens": 1184976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7295, + "step": 3459 + }, + { + "loss": 0.0389, + "grad_norm": 1.0974253416061401, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.73, + "step": 3460 + }, + { + "loss": 0.0017, + "grad_norm": 0.24105492234230042, + "learning_rate": 2.7050000000000004e-06, + "num_tokens": 1185579.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7305000000000001, + "step": 3461 + }, + { + "loss": 0.0017, + "grad_norm": 0.2462151199579239, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 3462 + }, + { + "loss": 0.0681, + "grad_norm": 2.0248329639434814, + "learning_rate": 2.6950000000000005e-06, + "num_tokens": 1186182.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7315, + "step": 3463 + }, + { + "loss": 0.0506, + "grad_norm": 1.0506778955459595, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.732, + "step": 3464 + }, + { + "loss": 0.0414, + "grad_norm": 1.1461181640625, + "learning_rate": 2.6850000000000006e-06, + "num_tokens": 1187206.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7325, + "step": 3465 + }, + { + "loss": 0.002, + "grad_norm": 0.29532936215400696, + "learning_rate": 2.68e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 1.0, + "epoch": 1.733, + "step": 3466 + }, + { + "loss": 0.0018, + "grad_norm": 0.2511617839336395, + "learning_rate": 2.6750000000000002e-06, + "num_tokens": 1187388.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7335, + "step": 3467 + }, + { + "loss": 0.0017, + "grad_norm": 0.24015438556671143, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 3468 + }, + { + "loss": 0.0394, + "grad_norm": 1.186040997505188, + "learning_rate": 2.6650000000000003e-06, + "num_tokens": 1187991.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7345000000000002, + "step": 3469 + }, + { + "loss": 0.0516, + "grad_norm": 1.3716928958892822, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7349999999999999, + "step": 3470 + }, + { + "loss": 0.0017, + "grad_norm": 0.24118225276470184, + "learning_rate": 2.6550000000000004e-06, + "num_tokens": 1188594.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7355, + "step": 3471 + }, + { + "loss": 0.0634, + "grad_norm": 1.3280280828475952, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.736, + "step": 3472 + }, + { + "loss": 0.0606, + "grad_norm": 1.5957295894622803, + "learning_rate": 2.6450000000000005e-06, + "num_tokens": 1189618.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7365, + "step": 3473 + }, + { + "loss": 0.0019, + "grad_norm": 0.26652151346206665, + "learning_rate": 2.64e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.737, + "step": 3474 + }, + { + "loss": 0.0465, + "grad_norm": 1.2865381240844727, + "learning_rate": 2.635e-06, + "num_tokens": 1190221.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7375, + "step": 3475 + }, + { + "loss": 0.0696, + "grad_norm": 1.5268961191177368, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.738, + "step": 3476 + }, + { + "loss": 0.0016, + "grad_norm": 0.22352814674377441, + "learning_rate": 2.6250000000000003e-06, + "num_tokens": 1190824.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7385000000000002, + "step": 3477 + }, + { + "loss": 0.0398, + "grad_norm": 1.0832366943359375, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7389999999999999, + "step": 3478 + }, + { + "loss": 0.002, + "grad_norm": 0.2866823971271515, + "learning_rate": 2.6150000000000004e-06, + "num_tokens": 1191427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7395, + "step": 3479 + }, + { + "loss": 0.0017, + "grad_norm": 0.25320085883140564, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 3480 + }, + { + "loss": 0.0554, + "grad_norm": 1.305580496788025, + "learning_rate": 2.6050000000000005e-06, + "num_tokens": 1192030.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7405, + "step": 3481 + }, + { + "loss": 0.053, + "grad_norm": 1.3485558032989502, + "learning_rate": 2.6e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.741, + "step": 3482 + }, + { + "loss": 0.0597, + "grad_norm": 1.3094996213912964, + "learning_rate": 2.595e-06, + "num_tokens": 1193054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7415, + "step": 3483 + }, + { + "loss": 0.0361, + "grad_norm": 1.02549409866333, + "learning_rate": 2.59e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.742, + "step": 3484 + }, + { + "loss": 0.0549, + "grad_norm": 1.1604732275009155, + "learning_rate": 2.5850000000000002e-06, + "num_tokens": 1194078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7425000000000002, + "step": 3485 + }, + { + "loss": 0.0578, + "grad_norm": 1.1389886140823364, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7429999999999999, + "step": 3486 + }, + { + "loss": 0.0383, + "grad_norm": 1.1444112062454224, + "learning_rate": 2.5750000000000003e-06, + "num_tokens": 1195102.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7435, + "step": 3487 + }, + { + "loss": 0.0363, + "grad_norm": 1.2686033248901367, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.744, + "step": 3488 + }, + { + "loss": 0.0609, + "grad_norm": 1.2078722715377808, + "learning_rate": 2.5650000000000004e-06, + "num_tokens": 1196126.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7445, + "step": 3489 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754855155944824, + "learning_rate": 2.56e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 1.0, + "epoch": 1.745, + "step": 3490 + }, + { + "loss": 0.063, + "grad_norm": 1.346100091934204, + "learning_rate": 2.555e-06, + "num_tokens": 1196729.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7455, + "step": 3491 + }, + { + "loss": 0.0625, + "grad_norm": 1.3309886455535889, + "learning_rate": 2.55e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.746, + "step": 3492 + }, + { + "loss": 0.0023, + "grad_norm": 0.3301111161708832, + "learning_rate": 2.545e-06, + "num_tokens": 1197332.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7465000000000002, + "step": 3493 + }, + { + "loss": 0.0382, + "grad_norm": 1.0473533868789673, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7469999999999999, + "step": 3494 + }, + { + "loss": 0.0625, + "grad_norm": 1.2907440662384033, + "learning_rate": 2.5350000000000003e-06, + "num_tokens": 1198356.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7475, + "step": 3495 + }, + { + "loss": 0.0412, + "grad_norm": 1.1875349283218384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.748, + "step": 3496 + }, + { + "loss": 0.1176, + "grad_norm": 2.9710206985473633, + "learning_rate": 2.5250000000000004e-06, + "num_tokens": 1199380.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.7485, + "step": 3497 + }, + { + "loss": 0.0026, + "grad_norm": 0.36476898193359375, + "learning_rate": 2.52e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.749, + "step": 3498 + }, + { + "loss": 0.0379, + "grad_norm": 1.0208238363265991, + "learning_rate": 2.515e-06, + "num_tokens": 1199983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7495, + "step": 3499 + }, + { + "loss": 0.0026, + "grad_norm": 0.37356528639793396, + "learning_rate": 2.51e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.75, + "step": 3500 + }, + { + "loss": 0.0027, + "grad_norm": 0.39622190594673157, + "learning_rate": 2.505e-06, + "num_tokens": 1200165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7505, + "step": 3501 + }, + { + "loss": 0.0372, + "grad_norm": 1.0979310274124146, + "learning_rate": 2.5e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.751, + "step": 3502 + }, + { + "loss": 0.0362, + "grad_norm": 1.0418155193328857, + "learning_rate": 2.4950000000000003e-06, + "num_tokens": 1201189.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7515, + "step": 3503 + }, + { + "loss": 0.0632, + "grad_norm": 1.6260945796966553, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.752, + "step": 3504 + }, + { + "loss": 0.0029, + "grad_norm": 0.3957514762878418, + "learning_rate": 2.4850000000000003e-06, + "num_tokens": 1201792.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7525, + "step": 3505 + }, + { + "loss": 0.0024, + "grad_norm": 0.3393152356147766, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 3506 + }, + { + "loss": 0.0515, + "grad_norm": 1.1930348873138428, + "learning_rate": 2.475e-06, + "num_tokens": 1202395.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7534999999999998, + "step": 3507 + }, + { + "loss": 0.0026, + "grad_norm": 0.380045086145401, + "learning_rate": 2.47e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 1.0, + "epoch": 1.754, + "step": 3508 + }, + { + "loss": 0.0027, + "grad_norm": 0.3971390724182129, + "learning_rate": 2.465e-06, + "num_tokens": 1202577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7545, + "step": 3509 + }, + { + "loss": 0.0028, + "grad_norm": 0.38638150691986084, + "learning_rate": 2.46e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 1.0, + "epoch": 1.755, + "step": 3510 + }, + { + "loss": 0.0615, + "grad_norm": 1.3876094818115234, + "learning_rate": 2.4550000000000002e-06, + "num_tokens": 1203180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7555, + "step": 3511 + }, + { + "loss": 0.0432, + "grad_norm": 1.4136366844177246, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.756, + "step": 3512 + }, + { + "loss": 0.0024, + "grad_norm": 0.34141626954078674, + "learning_rate": 2.4450000000000003e-06, + "num_tokens": 1203783.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7565, + "step": 3513 + }, + { + "loss": 0.0566, + "grad_norm": 1.0875115394592285, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7570000000000001, + "step": 3514 + }, + { + "loss": 0.0482, + "grad_norm": 1.5494464635849, + "learning_rate": 2.435e-06, + "num_tokens": 1204807.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7574999999999998, + "step": 3515 + }, + { + "loss": 0.0413, + "grad_norm": 1.0267417430877686, + "learning_rate": 2.43e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.758, + "step": 3516 + }, + { + "loss": 0.0529, + "grad_norm": 1.3826123476028442, + "learning_rate": 2.425e-06, + "num_tokens": 1205831.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7585, + "step": 3517 + }, + { + "loss": 0.0622, + "grad_norm": 1.3799962997436523, + "learning_rate": 2.42e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.759, + "step": 3518 + }, + { + "loss": 0.0026, + "grad_norm": 0.36601629853248596, + "learning_rate": 2.415e-06, + "num_tokens": 1206434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7595, + "step": 3519 + }, + { + "loss": 0.057, + "grad_norm": 1.4413540363311768, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.76, + "step": 3520 + }, + { + "loss": 0.062, + "grad_norm": 1.5269067287445068, + "learning_rate": 2.4050000000000003e-06, + "num_tokens": 1207458.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7605, + "step": 3521 + }, + { + "loss": 0.0529, + "grad_norm": 1.1583778858184814, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7610000000000001, + "step": 3522 + }, + { + "loss": 0.0629, + "grad_norm": 1.502618432044983, + "learning_rate": 2.395e-06, + "num_tokens": 1208482.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7614999999999998, + "step": 3523 + }, + { + "loss": 0.0556, + "grad_norm": 1.4562733173370361, + "learning_rate": 2.39e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.762, + "step": 3524 + }, + { + "loss": 0.0028, + "grad_norm": 0.4034802317619324, + "learning_rate": 2.385e-06, + "num_tokens": 1209085.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7625, + "step": 3525 + }, + { + "loss": 0.0501, + "grad_norm": 1.3905121088027954, + "learning_rate": 2.38e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.763, + "step": 3526 + }, + { + "loss": 0.0628, + "grad_norm": 1.1878178119659424, + "learning_rate": 2.375e-06, + "num_tokens": 1210109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7635, + "step": 3527 + }, + { + "loss": 0.0371, + "grad_norm": 1.1999701261520386, + "learning_rate": 2.37e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.764, + "step": 3528 + }, + { + "loss": 0.0029, + "grad_norm": 0.40889084339141846, + "learning_rate": 2.3650000000000002e-06, + "num_tokens": 1210712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7645, + "step": 3529 + }, + { + "loss": 0.0389, + "grad_norm": 1.039504885673523, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7650000000000001, + "step": 3530 + }, + { + "loss": 0.068, + "grad_norm": 1.371443748474121, + "learning_rate": 2.355e-06, + "num_tokens": 1211736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7654999999999998, + "step": 3531 + }, + { + "loss": 0.0695, + "grad_norm": 1.7425730228424072, + "learning_rate": 2.35e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.766, + "step": 3532 + }, + { + "loss": 0.0523, + "grad_norm": 1.3040227890014648, + "learning_rate": 2.345e-06, + "num_tokens": 1212760.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7665, + "step": 3533 + }, + { + "loss": 0.0027, + "grad_norm": 0.3859405517578125, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 1.0, + "epoch": 1.767, + "step": 3534 + }, + { + "loss": 0.0385, + "grad_norm": 1.0744153261184692, + "learning_rate": 2.3350000000000005e-06, + "num_tokens": 1213363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7675, + "step": 3535 + }, + { + "loss": 0.0029, + "grad_norm": 0.4078717827796936, + "learning_rate": 2.33e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.768, + "step": 3536 + }, + { + "loss": 0.0464, + "grad_norm": 1.3526980876922607, + "learning_rate": 2.325e-06, + "num_tokens": 1213966.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7685, + "step": 3537 + }, + { + "loss": 0.0032, + "grad_norm": 0.44447413086891174, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7690000000000001, + "step": 3538 + }, + { + "loss": 0.0346, + "grad_norm": 0.9852960705757141, + "learning_rate": 2.3150000000000003e-06, + "num_tokens": 1214569.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7694999999999999, + "step": 3539 + }, + { + "loss": 0.0581, + "grad_norm": 1.1710577011108398, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.77, + "step": 3540 + }, + { + "loss": 0.003, + "grad_norm": 0.42533135414123535, + "learning_rate": 2.3050000000000004e-06, + "num_tokens": 1215172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7705, + "step": 3541 + }, + { + "loss": 0.0373, + "grad_norm": 0.9175604581832886, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.771, + "step": 3542 + }, + { + "loss": 0.0464, + "grad_norm": 1.2586400508880615, + "learning_rate": 2.2950000000000005e-06, + "num_tokens": 1216196.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.7715, + "step": 3543 + }, + { + "loss": 0.0557, + "grad_norm": 1.3000445365905762, + "learning_rate": 2.29e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.772, + "step": 3544 + }, + { + "loss": 0.0377, + "grad_norm": 1.0466715097427368, + "learning_rate": 2.285e-06, + "num_tokens": 1217220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7725, + "step": 3545 + }, + { + "loss": 0.003, + "grad_norm": 0.41341033577919006, + "learning_rate": 2.28e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7730000000000001, + "step": 3546 + }, + { + "loss": 0.0555, + "grad_norm": 1.2895411252975464, + "learning_rate": 2.2750000000000002e-06, + "num_tokens": 1217823.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7734999999999999, + "step": 3547 + }, + { + "loss": 0.0032, + "grad_norm": 0.4543672800064087, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 1.0, + "epoch": 1.774, + "step": 3548 + }, + { + "loss": 0.0033, + "grad_norm": 0.45242005586624146, + "learning_rate": 2.2650000000000003e-06, + "num_tokens": 1218005.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7745, + "step": 3549 + }, + { + "loss": 0.0664, + "grad_norm": 1.4492830038070679, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.775, + "step": 3550 + }, + { + "loss": 0.0621, + "grad_norm": 1.410575270652771, + "learning_rate": 2.2550000000000004e-06, + "num_tokens": 1219029.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7755, + "step": 3551 + }, + { + "loss": 0.0668, + "grad_norm": 1.4600263833999634, + "learning_rate": 2.25e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.776, + "step": 3552 + }, + { + "loss": 0.0518, + "grad_norm": 1.185958981513977, + "learning_rate": 2.245e-06, + "num_tokens": 1220053.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7765, + "step": 3553 + }, + { + "loss": 0.0031, + "grad_norm": 0.4426004886627197, + "learning_rate": 2.24e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7770000000000001, + "step": 3554 + }, + { + "loss": 0.0391, + "grad_norm": 1.1847765445709229, + "learning_rate": 2.235e-06, + "num_tokens": 1220656.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7774999999999999, + "step": 3555 + }, + { + "loss": 0.0387, + "grad_norm": 1.1244046688079834, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.778, + "step": 3556 + }, + { + "loss": 0.0639, + "grad_norm": 1.5144935846328735, + "learning_rate": 2.2250000000000003e-06, + "num_tokens": 1221680.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7785, + "step": 3557 + }, + { + "loss": 0.0504, + "grad_norm": 1.1694223880767822, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.779, + "step": 3558 + }, + { + "loss": 0.039, + "grad_norm": 1.198093295097351, + "learning_rate": 2.2150000000000004e-06, + "num_tokens": 1222704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7795, + "step": 3559 + }, + { + "loss": 0.0556, + "grad_norm": 1.4882034063339233, + "learning_rate": 2.21e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.78, + "step": 3560 + }, + { + "loss": 0.0033, + "grad_norm": 0.4605433940887451, + "learning_rate": 2.205e-06, + "num_tokens": 1223307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7805, + "step": 3561 + }, + { + "loss": 0.0427, + "grad_norm": 1.400830864906311, + "learning_rate": 2.2e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7810000000000001, + "step": 3562 + }, + { + "loss": 0.0596, + "grad_norm": 1.4765678644180298, + "learning_rate": 2.195e-06, + "num_tokens": 1224331.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7814999999999999, + "step": 3563 + }, + { + "loss": 0.0029, + "grad_norm": 0.4184083044528961, + "learning_rate": 2.19e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 1.0, + "epoch": 1.782, + "step": 3564 + }, + { + "loss": 0.0031, + "grad_norm": 0.4302586615085602, + "learning_rate": 2.1850000000000003e-06, + "num_tokens": 1224513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7825, + "step": 3565 + }, + { + "loss": 0.0031, + "grad_norm": 0.4298599362373352, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 1.0, + "epoch": 1.783, + "step": 3566 + }, + { + "loss": 0.065, + "grad_norm": 1.424648642539978, + "learning_rate": 2.1750000000000004e-06, + "num_tokens": 1225116.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7835, + "step": 3567 + }, + { + "loss": 0.0031, + "grad_norm": 0.4238447844982147, + "learning_rate": 2.17e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.784, + "step": 3568 + }, + { + "loss": 0.0031, + "grad_norm": 0.4220222532749176, + "learning_rate": 2.165e-06, + "num_tokens": 1225298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7845, + "step": 3569 + }, + { + "loss": 0.003, + "grad_norm": 0.42732101678848267, + "learning_rate": 2.16e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7850000000000001, + "step": 3570 + }, + { + "loss": 0.0346, + "grad_norm": 1.0672036409378052, + "learning_rate": 2.155e-06, + "num_tokens": 1225901.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7854999999999999, + "step": 3571 + }, + { + "loss": 0.0424, + "grad_norm": 1.0617742538452148, + "learning_rate": 2.15e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.786, + "step": 3572 + }, + { + "loss": 0.0592, + "grad_norm": 1.3852803707122803, + "learning_rate": 2.1450000000000002e-06, + "num_tokens": 1226925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7865, + "step": 3573 + }, + { + "loss": 0.0029, + "grad_norm": 0.4290924072265625, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 1.0, + "epoch": 1.787, + "step": 3574 + }, + { + "loss": 0.051, + "grad_norm": 1.1031818389892578, + "learning_rate": 2.1350000000000003e-06, + "num_tokens": 1227528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7875, + "step": 3575 + }, + { + "loss": 0.0393, + "grad_norm": 1.184659719467163, + "learning_rate": 2.13e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.788, + "step": 3576 + }, + { + "loss": 0.0755, + "grad_norm": 1.9755206108093262, + "learning_rate": 2.125e-06, + "num_tokens": 1228552.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.7885, + "step": 3577 + }, + { + "loss": 0.071, + "grad_norm": 1.4741475582122803, + "learning_rate": 2.12e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7890000000000001, + "step": 3578 + }, + { + "loss": 0.0609, + "grad_norm": 1.6418182849884033, + "learning_rate": 2.115e-06, + "num_tokens": 1229576.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7894999999999999, + "step": 3579 + }, + { + "loss": 0.0027, + "grad_norm": 0.40381157398223877, + "learning_rate": 2.11e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.79, + "step": 3580 + }, + { + "loss": 0.0551, + "grad_norm": 1.2949596643447876, + "learning_rate": 2.105e-06, + "num_tokens": 1230179.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7905, + "step": 3581 + }, + { + "loss": 0.0504, + "grad_norm": 1.073058843612671, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.791, + "step": 3582 + }, + { + "loss": 0.0028, + "grad_norm": 0.3910202980041504, + "learning_rate": 2.0950000000000003e-06, + "num_tokens": 1230782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7915, + "step": 3583 + }, + { + "loss": 0.0029, + "grad_norm": 0.40099310874938965, + "learning_rate": 2.09e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.792, + "step": 3584 + }, + { + "loss": 0.0686, + "grad_norm": 1.5408157110214233, + "learning_rate": 2.085e-06, + "num_tokens": 1231385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7925, + "step": 3585 + }, + { + "loss": 0.0547, + "grad_norm": 1.2888717651367188, + "learning_rate": 2.08e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7930000000000001, + "step": 3586 + }, + { + "loss": 0.0392, + "grad_norm": 1.1414070129394531, + "learning_rate": 2.075e-06, + "num_tokens": 1232409.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7934999999999999, + "step": 3587 + }, + { + "loss": 0.0567, + "grad_norm": 1.2421129941940308, + "learning_rate": 2.07e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.794, + "step": 3588 + }, + { + "loss": 0.0567, + "grad_norm": 1.2121027708053589, + "learning_rate": 2.065e-06, + "num_tokens": 1233433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7945, + "step": 3589 + }, + { + "loss": 0.0028, + "grad_norm": 0.4114837944507599, + "learning_rate": 2.06e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.795, + "step": 3590 + }, + { + "loss": 0.003, + "grad_norm": 0.4205188453197479, + "learning_rate": 2.0550000000000002e-06, + "num_tokens": 1233615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7955, + "step": 3591 + }, + { + "loss": 0.0029, + "grad_norm": 0.39967694878578186, + "learning_rate": 2.05e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 1.0, + "epoch": 1.796, + "step": 3592 + }, + { + "loss": 0.056, + "grad_norm": 1.251736044883728, + "learning_rate": 2.045e-06, + "num_tokens": 1234218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7965, + "step": 3593 + }, + { + "loss": 0.0028, + "grad_norm": 0.3914256989955902, + "learning_rate": 2.04e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7970000000000002, + "step": 3594 + }, + { + "loss": 0.0604, + "grad_norm": 1.1881632804870605, + "learning_rate": 2.035e-06, + "num_tokens": 1234821.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7974999999999999, + "step": 3595 + }, + { + "loss": 0.0622, + "grad_norm": 1.149919033050537, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.798, + "step": 3596 + }, + { + "loss": 0.0549, + "grad_norm": 1.0469919443130493, + "learning_rate": 2.025e-06, + "num_tokens": 1235845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7985, + "step": 3597 + }, + { + "loss": 0.0535, + "grad_norm": 1.3651666641235352, + "learning_rate": 2.02e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.799, + "step": 3598 + }, + { + "loss": 0.0026, + "grad_norm": 0.37465357780456543, + "learning_rate": 2.015e-06, + "num_tokens": 1236448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7995, + "step": 3599 + }, + { + "loss": 0.0365, + "grad_norm": 1.0199239253997803, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8, + "step": 3600 + }, + { + "loss": 0.0617, + "grad_norm": 1.1323697566986084, + "learning_rate": 2.0050000000000003e-06, + "num_tokens": 1237472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8005, + "step": 3601 + }, + { + "loss": 0.003, + "grad_norm": 0.4225693345069885, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8010000000000002, + "step": 3602 + }, + { + "loss": 0.0379, + "grad_norm": 1.1038097143173218, + "learning_rate": 1.9950000000000004e-06, + "num_tokens": 1238075.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8014999999999999, + "step": 3603 + }, + { + "loss": 0.003, + "grad_norm": 0.4044983685016632, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.802, + "step": 3604 + }, + { + "loss": 0.0655, + "grad_norm": 1.8133554458618164, + "learning_rate": 1.985e-06, + "num_tokens": 1238678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8025, + "step": 3605 + }, + { + "loss": 0.0028, + "grad_norm": 0.39725902676582336, + "learning_rate": 1.98e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.803, + "step": 3606 + }, + { + "loss": 0.003, + "grad_norm": 0.4250074028968811, + "learning_rate": 1.975e-06, + "num_tokens": 1238860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8035, + "step": 3607 + }, + { + "loss": 0.0378, + "grad_norm": 1.14003586769104, + "learning_rate": 1.97e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.804, + "step": 3608 + }, + { + "loss": 0.0028, + "grad_norm": 0.39355626702308655, + "learning_rate": 1.9650000000000002e-06, + "num_tokens": 1239463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8045, + "step": 3609 + }, + { + "loss": 0.0378, + "grad_norm": 1.2409162521362305, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8050000000000002, + "step": 3610 + }, + { + "loss": 0.0448, + "grad_norm": 1.4544258117675781, + "learning_rate": 1.9550000000000003e-06, + "num_tokens": 1240487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8054999999999999, + "step": 3611 + }, + { + "loss": 0.0027, + "grad_norm": 0.3753180205821991, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.806, + "step": 3612 + }, + { + "loss": 0.0029, + "grad_norm": 0.4058220088481903, + "learning_rate": 1.945e-06, + "num_tokens": 1240669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8065, + "step": 3613 + }, + { + "loss": 0.0574, + "grad_norm": 1.4277732372283936, + "learning_rate": 1.94e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.807, + "step": 3614 + }, + { + "loss": 0.0645, + "grad_norm": 1.5439943075180054, + "learning_rate": 1.935e-06, + "num_tokens": 1241693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8075, + "step": 3615 + }, + { + "loss": 0.0609, + "grad_norm": 1.4575119018554688, + "learning_rate": 1.93e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.808, + "step": 3616 + }, + { + "loss": 0.0024, + "grad_norm": 0.33791404962539673, + "learning_rate": 1.925e-06, + "num_tokens": 1242296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8085, + "step": 3617 + }, + { + "loss": 0.0392, + "grad_norm": 0.994301974773407, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8090000000000002, + "step": 3618 + }, + { + "loss": 0.0026, + "grad_norm": 0.35725516080856323, + "learning_rate": 1.9150000000000003e-06, + "num_tokens": 1242899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8094999999999999, + "step": 3619 + }, + { + "loss": 0.1147, + "grad_norm": 2.219489097595215, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.81, + "step": 3620 + }, + { + "loss": 0.0025, + "grad_norm": 0.358549028635025, + "learning_rate": 1.9050000000000002e-06, + "num_tokens": 1243502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8105, + "step": 3621 + }, + { + "loss": 0.0497, + "grad_norm": 1.0606470108032227, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.811, + "step": 3622 + }, + { + "loss": 0.0354, + "grad_norm": 1.1863391399383545, + "learning_rate": 1.895e-06, + "num_tokens": 1244526.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8115, + "step": 3623 + }, + { + "loss": 0.0617, + "grad_norm": 1.461073398590088, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.812, + "step": 3624 + }, + { + "loss": 0.0522, + "grad_norm": 1.180123209953308, + "learning_rate": 1.8850000000000002e-06, + "num_tokens": 1245550.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8125, + "step": 3625 + }, + { + "loss": 0.0513, + "grad_norm": 1.1050792932510376, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.813, + "step": 3626 + }, + { + "loss": 0.0382, + "grad_norm": 1.1048370599746704, + "learning_rate": 1.8750000000000003e-06, + "num_tokens": 1246574.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8135, + "step": 3627 + }, + { + "loss": 0.0594, + "grad_norm": 1.5278170108795166, + "learning_rate": 1.87e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.814, + "step": 3628 + }, + { + "loss": 0.0026, + "grad_norm": 0.3680756688117981, + "learning_rate": 1.8650000000000001e-06, + "num_tokens": 1247177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8145, + "step": 3629 + }, + { + "loss": 0.0025, + "grad_norm": 0.3478946387767792, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.815, + "step": 3630 + }, + { + "loss": 0.0602, + "grad_norm": 1.2490179538726807, + "learning_rate": 1.8550000000000002e-06, + "num_tokens": 1247780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8155000000000001, + "step": 3631 + }, + { + "loss": 0.0751, + "grad_norm": 1.6024861335754395, + "learning_rate": 1.85e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8159999999999998, + "step": 3632 + }, + { + "loss": 0.055, + "grad_norm": 1.4603705406188965, + "learning_rate": 1.8450000000000001e-06, + "num_tokens": 1248804.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8165, + "step": 3633 + }, + { + "loss": 0.0025, + "grad_norm": 0.37733298540115356, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.817, + "step": 3634 + }, + { + "loss": 0.0028, + "grad_norm": 0.3999163806438446, + "learning_rate": 1.8350000000000002e-06, + "num_tokens": 1248986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8175, + "step": 3635 + }, + { + "loss": 0.0027, + "grad_norm": 0.39710038900375366, + "learning_rate": 1.83e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.818, + "step": 3636 + }, + { + "loss": 0.0028, + "grad_norm": 0.39646029472351074, + "learning_rate": 1.825e-06, + "num_tokens": 1249168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8185, + "step": 3637 + }, + { + "loss": 0.0426, + "grad_norm": 1.3070132732391357, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.819, + "step": 3638 + }, + { + "loss": 0.039, + "grad_norm": 1.1619224548339844, + "learning_rate": 1.8150000000000002e-06, + "num_tokens": 1250192.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8195000000000001, + "step": 3639 + }, + { + "loss": 0.0367, + "grad_norm": 1.1559624671936035, + "learning_rate": 1.81e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8199999999999998, + "step": 3640 + }, + { + "loss": 0.053, + "grad_norm": 1.3208280801773071, + "learning_rate": 1.805e-06, + "num_tokens": 1251216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8205, + "step": 3641 + }, + { + "loss": 0.0544, + "grad_norm": 1.2948426008224487, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.821, + "step": 3642 + }, + { + "loss": 0.049, + "grad_norm": 1.0491054058074951, + "learning_rate": 1.7950000000000002e-06, + "num_tokens": 1252240.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8215, + "step": 3643 + }, + { + "loss": 0.037, + "grad_norm": 1.3279922008514404, + "learning_rate": 1.79e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.822, + "step": 3644 + }, + { + "loss": 0.0027, + "grad_norm": 0.38797032833099365, + "learning_rate": 1.785e-06, + "num_tokens": 1252843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8225, + "step": 3645 + }, + { + "loss": 0.0526, + "grad_norm": 1.3761346340179443, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.823, + "step": 3646 + }, + { + "loss": 0.0594, + "grad_norm": 1.5943882465362549, + "learning_rate": 1.7750000000000002e-06, + "num_tokens": 1253867.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8235000000000001, + "step": 3647 + }, + { + "loss": 0.0386, + "grad_norm": 1.1582005023956299, + "learning_rate": 1.77e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8239999999999998, + "step": 3648 + }, + { + "loss": 0.0625, + "grad_norm": 1.422128438949585, + "learning_rate": 1.765e-06, + "num_tokens": 1254891.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8245, + "step": 3649 + }, + { + "loss": 0.0027, + "grad_norm": 0.3794823884963989, + "learning_rate": 1.76e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.825, + "step": 3650 + }, + { + "loss": 0.0377, + "grad_norm": 1.0281649827957153, + "learning_rate": 1.7550000000000001e-06, + "num_tokens": 1255494.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8255, + "step": 3651 + }, + { + "loss": 0.057, + "grad_norm": 1.2542749643325806, + "learning_rate": 1.75e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.826, + "step": 3652 + }, + { + "loss": 0.0027, + "grad_norm": 0.3857089579105377, + "learning_rate": 1.745e-06, + "num_tokens": 1256097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8265, + "step": 3653 + }, + { + "loss": 0.0529, + "grad_norm": 1.148740291595459, + "learning_rate": 1.74e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.827, + "step": 3654 + }, + { + "loss": 0.003, + "grad_norm": 0.4200035333633423, + "learning_rate": 1.7350000000000001e-06, + "num_tokens": 1256700.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8275000000000001, + "step": 3655 + }, + { + "loss": 0.0028, + "grad_norm": 0.3945881426334381, + "learning_rate": 1.73e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8279999999999998, + "step": 3656 + }, + { + "loss": 0.039, + "grad_norm": 0.9618701934814453, + "learning_rate": 1.725e-06, + "num_tokens": 1257303.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8285, + "step": 3657 + }, + { + "loss": 0.0399, + "grad_norm": 1.2282723188400269, + "learning_rate": 1.72e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.829, + "step": 3658 + }, + { + "loss": 0.0509, + "grad_norm": 1.175613284111023, + "learning_rate": 1.7150000000000003e-06, + "num_tokens": 1258327.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8295, + "step": 3659 + }, + { + "loss": 0.0378, + "grad_norm": 1.1486104726791382, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.83, + "step": 3660 + }, + { + "loss": 0.0589, + "grad_norm": 1.3274273872375488, + "learning_rate": 1.7050000000000002e-06, + "num_tokens": 1259351.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8305, + "step": 3661 + }, + { + "loss": 0.046, + "grad_norm": 1.3887542486190796, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.831, + "step": 3662 + }, + { + "loss": 0.0029, + "grad_norm": 0.39590317010879517, + "learning_rate": 1.6950000000000003e-06, + "num_tokens": 1259954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8315000000000001, + "step": 3663 + }, + { + "loss": 0.0369, + "grad_norm": 1.080889105796814, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8319999999999999, + "step": 3664 + }, + { + "loss": 0.0535, + "grad_norm": 1.3136940002441406, + "learning_rate": 1.6850000000000002e-06, + "num_tokens": 1260978.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8325, + "step": 3665 + }, + { + "loss": 0.059, + "grad_norm": 1.5410752296447754, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 3666 + }, + { + "loss": 0.0029, + "grad_norm": 0.3952591121196747, + "learning_rate": 1.6750000000000003e-06, + "num_tokens": 1261581.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8335, + "step": 3667 + }, + { + "loss": 0.0518, + "grad_norm": 1.3276718854904175, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.834, + "step": 3668 + }, + { + "loss": 0.003, + "grad_norm": 0.4232414960861206, + "learning_rate": 1.6650000000000002e-06, + "num_tokens": 1262184.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8345, + "step": 3669 + }, + { + "loss": 0.0639, + "grad_norm": 1.2759331464767456, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.835, + "step": 3670 + }, + { + "loss": 0.0571, + "grad_norm": 1.5148133039474487, + "learning_rate": 1.6550000000000002e-06, + "num_tokens": 1263208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8355000000000001, + "step": 3671 + }, + { + "loss": 0.0637, + "grad_norm": 1.4910366535186768, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8359999999999999, + "step": 3672 + }, + { + "loss": 0.0029, + "grad_norm": 0.4135521948337555, + "learning_rate": 1.6450000000000001e-06, + "num_tokens": 1263811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8365, + "step": 3673 + }, + { + "loss": 0.0511, + "grad_norm": 1.2618604898452759, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.837, + "step": 3674 + }, + { + "loss": 0.0501, + "grad_norm": 1.1598845720291138, + "learning_rate": 1.6350000000000002e-06, + "num_tokens": 1264835.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8375, + "step": 3675 + }, + { + "loss": 0.0445, + "grad_norm": 1.0752735137939453, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.838, + "step": 3676 + }, + { + "loss": 0.003, + "grad_norm": 0.42967167496681213, + "learning_rate": 1.6250000000000001e-06, + "num_tokens": 1265438.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8385, + "step": 3677 + }, + { + "loss": 0.003, + "grad_norm": 0.41333630681037903, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 1.0, + "epoch": 1.839, + "step": 3678 + }, + { + "loss": 0.0033, + "grad_norm": 0.4601726531982422, + "learning_rate": 1.6150000000000002e-06, + "num_tokens": 1265620.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8395000000000001, + "step": 3679 + }, + { + "loss": 0.0648, + "grad_norm": 1.4645088911056519, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8399999999999999, + "step": 3680 + }, + { + "loss": 0.0371, + "grad_norm": 1.0282845497131348, + "learning_rate": 1.605e-06, + "num_tokens": 1266644.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8405, + "step": 3681 + }, + { + "loss": 0.0034, + "grad_norm": 0.4804507791996002, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 1.0, + "epoch": 1.841, + "step": 3682 + }, + { + "loss": 0.0611, + "grad_norm": 1.6006290912628174, + "learning_rate": 1.5950000000000002e-06, + "num_tokens": 1267247.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8415, + "step": 3683 + }, + { + "loss": 0.0032, + "grad_norm": 0.4456159472465515, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 1.0, + "epoch": 1.842, + "step": 3684 + }, + { + "loss": 0.0028, + "grad_norm": 0.39536213874816895, + "learning_rate": 1.585e-06, + "num_tokens": 1267429.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8425, + "step": 3685 + }, + { + "loss": 0.0441, + "grad_norm": 1.2790175676345825, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.843, + "step": 3686 + }, + { + "loss": 0.0545, + "grad_norm": 1.1657609939575195, + "learning_rate": 1.5750000000000002e-06, + "num_tokens": 1268453.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8435000000000001, + "step": 3687 + }, + { + "loss": 0.0536, + "grad_norm": 1.0926413536071777, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8439999999999999, + "step": 3688 + }, + { + "loss": 0.0362, + "grad_norm": 0.9912558197975159, + "learning_rate": 1.565e-06, + "num_tokens": 1269477.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8445, + "step": 3689 + }, + { + "loss": 0.0374, + "grad_norm": 1.0493851900100708, + "learning_rate": 1.56e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.845, + "step": 3690 + }, + { + "loss": 0.0028, + "grad_norm": 0.4059640169143677, + "learning_rate": 1.5550000000000001e-06, + "num_tokens": 1270080.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8455, + "step": 3691 + }, + { + "loss": 0.003, + "grad_norm": 0.4232662618160248, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 1.0, + "epoch": 1.846, + "step": 3692 + }, + { + "loss": 0.0031, + "grad_norm": 0.43225178122520447, + "learning_rate": 1.545e-06, + "num_tokens": 1270262.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8465, + "step": 3693 + }, + { + "loss": 0.0027, + "grad_norm": 0.3701487183570862, + "learning_rate": 1.54e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.847, + "step": 3694 + }, + { + "loss": 0.0545, + "grad_norm": 1.3909512758255005, + "learning_rate": 1.5350000000000001e-06, + "num_tokens": 1270865.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8475000000000001, + "step": 3695 + }, + { + "loss": 0.0027, + "grad_norm": 0.38712078332901, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8479999999999999, + "step": 3696 + }, + { + "loss": 0.0506, + "grad_norm": 1.0741735696792603, + "learning_rate": 1.525e-06, + "num_tokens": 1271468.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8485, + "step": 3697 + }, + { + "loss": 0.0693, + "grad_norm": 1.657240629196167, + "learning_rate": 1.52e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.849, + "step": 3698 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615441918373108, + "learning_rate": 1.5150000000000001e-06, + "num_tokens": 1272071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8495, + "step": 3699 + }, + { + "loss": 0.0355, + "grad_norm": 0.9562244415283203, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.85, + "step": 3700 + }, + { + "loss": 0.0026, + "grad_norm": 0.36725983023643494, + "learning_rate": 1.505e-06, + "num_tokens": 1272674.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8505, + "step": 3701 + }, + { + "loss": 0.0028, + "grad_norm": 0.3878721296787262, + "learning_rate": 1.5e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 1.0, + "epoch": 1.851, + "step": 3702 + }, + { + "loss": 0.0359, + "grad_norm": 1.0378117561340332, + "learning_rate": 1.495e-06, + "num_tokens": 1273277.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8515000000000001, + "step": 3703 + }, + { + "loss": 0.0656, + "grad_norm": 1.2746002674102783, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8519999999999999, + "step": 3704 + }, + { + "loss": 0.0026, + "grad_norm": 0.35767146944999695, + "learning_rate": 1.485e-06, + "num_tokens": 1273880.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8525, + "step": 3705 + }, + { + "loss": 0.0026, + "grad_norm": 0.36552944779396057, + "learning_rate": 1.48e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.853, + "step": 3706 + }, + { + "loss": 0.0473, + "grad_norm": 1.1046762466430664, + "learning_rate": 1.475e-06, + "num_tokens": 1274483.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8535, + "step": 3707 + }, + { + "loss": 0.0625, + "grad_norm": 1.4509928226470947, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.854, + "step": 3708 + }, + { + "loss": 0.0421, + "grad_norm": 1.1400452852249146, + "learning_rate": 1.465e-06, + "num_tokens": 1275507.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8545, + "step": 3709 + }, + { + "loss": 0.0026, + "grad_norm": 0.3619054853916168, + "learning_rate": 1.46e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 1.0, + "epoch": 1.855, + "step": 3710 + }, + { + "loss": 0.0026, + "grad_norm": 0.3667825162410736, + "learning_rate": 1.455e-06, + "num_tokens": 1275689.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8555000000000001, + "step": 3711 + }, + { + "loss": 0.0466, + "grad_norm": 1.255405068397522, + "learning_rate": 1.45e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8559999999999999, + "step": 3712 + }, + { + "loss": 0.0657, + "grad_norm": 1.4270333051681519, + "learning_rate": 1.445e-06, + "num_tokens": 1276713.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8565, + "step": 3713 + }, + { + "loss": 0.0356, + "grad_norm": 1.035252571105957, + "learning_rate": 1.44e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.857, + "step": 3714 + }, + { + "loss": 0.0024, + "grad_norm": 0.34851282835006714, + "learning_rate": 1.435e-06, + "num_tokens": 1277316.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8575, + "step": 3715 + }, + { + "loss": 0.0669, + "grad_norm": 1.6207127571105957, + "learning_rate": 1.43e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.858, + "step": 3716 + }, + { + "loss": 0.0025, + "grad_norm": 0.34068116545677185, + "learning_rate": 1.425e-06, + "num_tokens": 1277919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8585, + "step": 3717 + }, + { + "loss": 0.0023, + "grad_norm": 0.3336624801158905, + "learning_rate": 1.42e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 3718 + }, + { + "loss": 0.0663, + "grad_norm": 1.4342654943466187, + "learning_rate": 1.415e-06, + "num_tokens": 1278522.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8595000000000002, + "step": 3719 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730687618255615, + "learning_rate": 1.41e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8599999999999999, + "step": 3720 + }, + { + "loss": 0.062, + "grad_norm": 1.4714523553848267, + "learning_rate": 1.4050000000000003e-06, + "num_tokens": 1279546.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8605, + "step": 3721 + }, + { + "loss": 0.0514, + "grad_norm": 1.2004119157791138, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.861, + "step": 3722 + }, + { + "loss": 0.0023, + "grad_norm": 0.3368993103504181, + "learning_rate": 1.3950000000000002e-06, + "num_tokens": 1280149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8615, + "step": 3723 + }, + { + "loss": 0.0025, + "grad_norm": 0.3626645803451538, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 3724 + }, + { + "loss": 0.0379, + "grad_norm": 1.129130482673645, + "learning_rate": 1.3850000000000003e-06, + "num_tokens": 1280752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8625, + "step": 3725 + }, + { + "loss": 0.0026, + "grad_norm": 0.35549208521842957, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.863, + "step": 3726 + }, + { + "loss": 0.039, + "grad_norm": 1.0426714420318604, + "learning_rate": 1.3750000000000002e-06, + "num_tokens": 1281355.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8635000000000002, + "step": 3727 + }, + { + "loss": 0.0591, + "grad_norm": 1.4238243103027344, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8639999999999999, + "step": 3728 + }, + { + "loss": 0.0587, + "grad_norm": 1.182423710823059, + "learning_rate": 1.3650000000000003e-06, + "num_tokens": 1282379.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8645, + "step": 3729 + }, + { + "loss": 0.0344, + "grad_norm": 1.0535178184509277, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.865, + "step": 3730 + }, + { + "loss": 0.0024, + "grad_norm": 0.34818780422210693, + "learning_rate": 1.3550000000000002e-06, + "num_tokens": 1282982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8655, + "step": 3731 + }, + { + "loss": 0.0652, + "grad_norm": 1.3155183792114258, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.866, + "step": 3732 + }, + { + "loss": 0.0543, + "grad_norm": 1.2466151714324951, + "learning_rate": 1.3450000000000003e-06, + "num_tokens": 1284006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8665, + "step": 3733 + }, + { + "loss": 0.0366, + "grad_norm": 1.1111284494400024, + "learning_rate": 1.34e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.867, + "step": 3734 + }, + { + "loss": 0.036, + "grad_norm": 1.2413430213928223, + "learning_rate": 1.3350000000000001e-06, + "num_tokens": 1285030.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8675000000000002, + "step": 3735 + }, + { + "loss": 0.0503, + "grad_norm": 1.2572247982025146, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8679999999999999, + "step": 3736 + }, + { + "loss": 0.0634, + "grad_norm": 1.3656840324401855, + "learning_rate": 1.3250000000000002e-06, + "num_tokens": 1286054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8685, + "step": 3737 + }, + { + "loss": 0.0369, + "grad_norm": 1.1938374042510986, + "learning_rate": 1.32e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.869, + "step": 3738 + }, + { + "loss": 0.0619, + "grad_norm": 1.5963718891143799, + "learning_rate": 1.3150000000000001e-06, + "num_tokens": 1287078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8695, + "step": 3739 + }, + { + "loss": 0.0569, + "grad_norm": 1.3680788278579712, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.87, + "step": 3740 + }, + { + "loss": 0.0535, + "grad_norm": 1.175209879875183, + "learning_rate": 1.3050000000000002e-06, + "num_tokens": 1288102.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8705, + "step": 3741 + }, + { + "loss": 0.0026, + "grad_norm": 0.3611868619918823, + "learning_rate": 1.3e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.871, + "step": 3742 + }, + { + "loss": 0.0377, + "grad_norm": 1.2314857244491577, + "learning_rate": 1.295e-06, + "num_tokens": 1288705.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8715000000000002, + "step": 3743 + }, + { + "loss": 0.0511, + "grad_norm": 1.4128717184066772, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8719999999999999, + "step": 3744 + }, + { + "loss": 0.1336, + "grad_norm": 2.185844659805298, + "learning_rate": 1.2850000000000002e-06, + "num_tokens": 1289729.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.8725, + "step": 3745 + }, + { + "loss": 0.0025, + "grad_norm": 0.33957669138908386, + "learning_rate": 1.28e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 1.0, + "epoch": 1.873, + "step": 3746 + }, + { + "loss": 0.0027, + "grad_norm": 0.3769534230232239, + "learning_rate": 1.275e-06, + "num_tokens": 1289911.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8735, + "step": 3747 + }, + { + "loss": 0.0584, + "grad_norm": 1.4691829681396484, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.874, + "step": 3748 + }, + { + "loss": 0.0635, + "grad_norm": 1.6226807832717896, + "learning_rate": 1.2650000000000002e-06, + "num_tokens": 1290935.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8745, + "step": 3749 + }, + { + "loss": 0.0033, + "grad_norm": 0.4503451883792877, + "learning_rate": 1.26e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 1.0, + "epoch": 1.875, + "step": 3750 + }, + { + "loss": 0.0028, + "grad_norm": 0.39449983835220337, + "learning_rate": 1.255e-06, + "num_tokens": 1291117.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8755, + "step": 3751 + }, + { + "loss": 0.0029, + "grad_norm": 0.4101957678794861, + "learning_rate": 1.25e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 1.0, + "epoch": 1.876, + "step": 3752 + }, + { + "loss": 0.0359, + "grad_norm": 1.259843111038208, + "learning_rate": 1.2450000000000002e-06, + "num_tokens": 1291720.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8765, + "step": 3753 + }, + { + "loss": 0.0027, + "grad_norm": 0.372577965259552, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.877, + "step": 3754 + }, + { + "loss": 0.0596, + "grad_norm": 1.1994444131851196, + "learning_rate": 1.235e-06, + "num_tokens": 1292323.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8775, + "step": 3755 + }, + { + "loss": 0.0703, + "grad_norm": 1.5322065353393555, + "learning_rate": 1.23e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8780000000000001, + "step": 3756 + }, + { + "loss": 0.0643, + "grad_norm": 1.7045296430587769, + "learning_rate": 1.2250000000000001e-06, + "num_tokens": 1293347.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8784999999999998, + "step": 3757 + }, + { + "loss": 0.0439, + "grad_norm": 1.2476153373718262, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.879, + "step": 3758 + }, + { + "loss": 0.0402, + "grad_norm": 1.186736822128296, + "learning_rate": 1.215e-06, + "num_tokens": 1294371.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8795, + "step": 3759 + }, + { + "loss": 0.0029, + "grad_norm": 0.39700445532798767, + "learning_rate": 1.21e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.88, + "step": 3760 + }, + { + "loss": 0.1202, + "grad_norm": 3.1105434894561768, + "learning_rate": 1.2050000000000001e-06, + "num_tokens": 1294974.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.8805, + "step": 3761 + }, + { + "loss": 0.0408, + "grad_norm": 1.1640613079071045, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.881, + "step": 3762 + }, + { + "loss": 0.0023, + "grad_norm": 0.32245126366615295, + "learning_rate": 1.195e-06, + "num_tokens": 1295577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8815, + "step": 3763 + }, + { + "loss": 0.0644, + "grad_norm": 1.4617496728897095, + "learning_rate": 1.19e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8820000000000001, + "step": 3764 + }, + { + "loss": 0.0024, + "grad_norm": 0.3409968614578247, + "learning_rate": 1.185e-06, + "num_tokens": 1296180.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8824999999999998, + "step": 3765 + }, + { + "loss": 0.0666, + "grad_norm": 2.035632848739624, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.883, + "step": 3766 + }, + { + "loss": 0.0402, + "grad_norm": 1.1498757600784302, + "learning_rate": 1.175e-06, + "num_tokens": 1297204.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8835, + "step": 3767 + }, + { + "loss": 0.0593, + "grad_norm": 1.348196268081665, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.884, + "step": 3768 + }, + { + "loss": 0.0667, + "grad_norm": 1.692858099937439, + "learning_rate": 1.165e-06, + "num_tokens": 1298228.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8845, + "step": 3769 + }, + { + "loss": 0.0029, + "grad_norm": 0.40195682644844055, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 1.0, + "epoch": 1.885, + "step": 3770 + }, + { + "loss": 0.0515, + "grad_norm": 1.0095990896224976, + "learning_rate": 1.1550000000000002e-06, + "num_tokens": 1298831.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8855, + "step": 3771 + }, + { + "loss": 0.0411, + "grad_norm": 1.4529675245285034, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8860000000000001, + "step": 3772 + }, + { + "loss": 0.0029, + "grad_norm": 0.39934462308883667, + "learning_rate": 1.145e-06, + "num_tokens": 1299434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8864999999999998, + "step": 3773 + }, + { + "loss": 0.0026, + "grad_norm": 0.37341752648353577, + "learning_rate": 1.14e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.887, + "step": 3774 + }, + { + "loss": 0.003, + "grad_norm": 0.427602082490921, + "learning_rate": 1.1350000000000001e-06, + "num_tokens": 1299616.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8875, + "step": 3775 + }, + { + "loss": 0.0027, + "grad_norm": 0.38110828399658203, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 1.0, + "epoch": 1.888, + "step": 3776 + }, + { + "loss": 0.05, + "grad_norm": 1.3058017492294312, + "learning_rate": 1.125e-06, + "num_tokens": 1300219.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8885, + "step": 3777 + }, + { + "loss": 0.0551, + "grad_norm": 1.049538016319275, + "learning_rate": 1.12e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.889, + "step": 3778 + }, + { + "loss": 0.0543, + "grad_norm": 1.1460436582565308, + "learning_rate": 1.1150000000000001e-06, + "num_tokens": 1301243.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8895, + "step": 3779 + }, + { + "loss": 0.0402, + "grad_norm": 1.1601300239562988, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8900000000000001, + "step": 3780 + }, + { + "loss": 0.0571, + "grad_norm": 1.1402069330215454, + "learning_rate": 1.105e-06, + "num_tokens": 1302267.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8904999999999998, + "step": 3781 + }, + { + "loss": 0.0381, + "grad_norm": 1.2498735189437866, + "learning_rate": 1.1e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.891, + "step": 3782 + }, + { + "loss": 0.0658, + "grad_norm": 1.471903920173645, + "learning_rate": 1.095e-06, + "num_tokens": 1303291.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8915, + "step": 3783 + }, + { + "loss": 0.003, + "grad_norm": 0.40989261865615845, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.892, + "step": 3784 + }, + { + "loss": 0.0029, + "grad_norm": 0.4065409004688263, + "learning_rate": 1.085e-06, + "num_tokens": 1303473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8925, + "step": 3785 + }, + { + "loss": 0.0027, + "grad_norm": 0.38934385776519775, + "learning_rate": 1.08e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.893, + "step": 3786 + }, + { + "loss": 0.0028, + "grad_norm": 0.3856496810913086, + "learning_rate": 1.075e-06, + "num_tokens": 1303655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8935, + "step": 3787 + }, + { + "loss": 0.0422, + "grad_norm": 1.3679287433624268, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8940000000000001, + "step": 3788 + }, + { + "loss": 0.051, + "grad_norm": 1.206390619277954, + "learning_rate": 1.065e-06, + "num_tokens": 1304679.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8944999999999999, + "step": 3789 + }, + { + "loss": 0.0029, + "grad_norm": 0.41105058789253235, + "learning_rate": 1.06e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 1.0, + "epoch": 1.895, + "step": 3790 + }, + { + "loss": 0.0027, + "grad_norm": 0.3825374245643616, + "learning_rate": 1.055e-06, + "num_tokens": 1304861.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8955, + "step": 3791 + }, + { + "loss": 0.0024, + "grad_norm": 0.3389546871185303, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.896, + "step": 3792 + }, + { + "loss": 0.0027, + "grad_norm": 0.38113462924957275, + "learning_rate": 1.045e-06, + "num_tokens": 1305043.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8965, + "step": 3793 + }, + { + "loss": 0.0025, + "grad_norm": 0.35084959864616394, + "learning_rate": 1.04e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 1.0, + "epoch": 1.897, + "step": 3794 + }, + { + "loss": 0.056, + "grad_norm": 1.4280885457992554, + "learning_rate": 1.035e-06, + "num_tokens": 1305646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8975, + "step": 3795 + }, + { + "loss": 0.0584, + "grad_norm": 1.4864161014556885, + "learning_rate": 1.03e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8980000000000001, + "step": 3796 + }, + { + "loss": 0.0023, + "grad_norm": 0.32296261191368103, + "learning_rate": 1.025e-06, + "num_tokens": 1306249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8984999999999999, + "step": 3797 + }, + { + "loss": 0.0372, + "grad_norm": 1.1412842273712158, + "learning_rate": 1.02e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.899, + "step": 3798 + }, + { + "loss": 0.036, + "grad_norm": 1.0588805675506592, + "learning_rate": 1.0150000000000002e-06, + "num_tokens": 1307273.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8995, + "step": 3799 + }, + { + "loss": 0.0025, + "grad_norm": 0.34841030836105347, + "learning_rate": 1.01e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9, + "step": 3800 + }, + { + "loss": 0.0025, + "grad_norm": 0.3537651002407074, + "learning_rate": 1.0050000000000001e-06, + "num_tokens": 1307455.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9005, + "step": 3801 + }, + { + "loss": 0.0405, + "grad_norm": 1.1438575983047485, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.901, + "step": 3802 + }, + { + "loss": 0.0694, + "grad_norm": 1.4709012508392334, + "learning_rate": 9.950000000000002e-07, + "num_tokens": 1308479.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9015, + "step": 3803 + }, + { + "loss": 0.0023, + "grad_norm": 0.3326675593852997, + "learning_rate": 9.9e-07, + "num_tokens": 1308570.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9020000000000001, + "step": 3804 + }, + { + "loss": 0.0635, + "grad_norm": 1.4323761463165283, + "learning_rate": 9.85e-07, + "num_tokens": 1309082.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9024999999999999, + "step": 3805 + }, + { + "loss": 0.0683, + "grad_norm": 1.6102875471115112, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.903, + "step": 3806 + }, + { + "loss": 0.0022, + "grad_norm": 0.3131149709224701, + "learning_rate": 9.750000000000002e-07, + "num_tokens": 1309685.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9035, + "step": 3807 + }, + { + "loss": 0.0021, + "grad_norm": 0.30395570397377014, + "learning_rate": 9.7e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 3808 + }, + { + "loss": 0.056, + "grad_norm": 1.3097760677337646, + "learning_rate": 9.65e-07, + "num_tokens": 1310288.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9045, + "step": 3809 + }, + { + "loss": 0.0425, + "grad_norm": 1.2873075008392334, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.905, + "step": 3810 + }, + { + "loss": 0.0366, + "grad_norm": 1.1098606586456299, + "learning_rate": 9.550000000000002e-07, + "num_tokens": 1311312.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9055, + "step": 3811 + }, + { + "loss": 0.0023, + "grad_norm": 0.33073046803474426, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9060000000000001, + "step": 3812 + }, + { + "loss": 0.0558, + "grad_norm": 1.287516713142395, + "learning_rate": 9.450000000000001e-07, + "num_tokens": 1311915.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9064999999999999, + "step": 3813 + }, + { + "loss": 0.0023, + "grad_norm": 0.3197239935398102, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 1.0, + "epoch": 1.907, + "step": 3814 + }, + { + "loss": 0.0022, + "grad_norm": 0.3093603253364563, + "learning_rate": 9.35e-07, + "num_tokens": 1312097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9075, + "step": 3815 + }, + { + "loss": 0.0027, + "grad_norm": 0.3792094588279724, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.908, + "step": 3816 + }, + { + "loss": 0.0024, + "grad_norm": 0.33527225255966187, + "learning_rate": 9.25e-07, + "num_tokens": 1312279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9085, + "step": 3817 + }, + { + "loss": 0.0531, + "grad_norm": 1.204848051071167, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.909, + "step": 3818 + }, + { + "loss": 0.0702, + "grad_norm": 1.3416361808776855, + "learning_rate": 9.15e-07, + "num_tokens": 1313303.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9095, + "step": 3819 + }, + { + "loss": 0.0541, + "grad_norm": 1.515673279762268, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9100000000000001, + "step": 3820 + }, + { + "loss": 0.0024, + "grad_norm": 0.33284807205200195, + "learning_rate": 9.05e-07, + "num_tokens": 1313906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9104999999999999, + "step": 3821 + }, + { + "loss": 0.0023, + "grad_norm": 0.32082033157348633, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 3822 + }, + { + "loss": 0.056, + "grad_norm": 1.2340785264968872, + "learning_rate": 8.95e-07, + "num_tokens": 1314509.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9115, + "step": 3823 + }, + { + "loss": 0.0021, + "grad_norm": 0.3040038049221039, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.912, + "step": 3824 + }, + { + "loss": 0.0392, + "grad_norm": 1.3959851264953613, + "learning_rate": 8.85e-07, + "num_tokens": 1315112.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9125, + "step": 3825 + }, + { + "loss": 0.0027, + "grad_norm": 0.37887290120124817, + "learning_rate": 8.8e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 1.0, + "epoch": 1.913, + "step": 3826 + }, + { + "loss": 0.0022, + "grad_norm": 0.30666735768318176, + "learning_rate": 8.75e-07, + "num_tokens": 1315294.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9135, + "step": 3827 + }, + { + "loss": 0.0691, + "grad_norm": 1.3549600839614868, + "learning_rate": 8.7e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9140000000000001, + "step": 3828 + }, + { + "loss": 0.0675, + "grad_norm": 1.2945553064346313, + "learning_rate": 8.65e-07, + "num_tokens": 1316318.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9144999999999999, + "step": 3829 + }, + { + "loss": 0.0022, + "grad_norm": 0.3147728145122528, + "learning_rate": 8.6e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.915, + "step": 3830 + }, + { + "loss": 0.0531, + "grad_norm": 1.0365914106369019, + "learning_rate": 8.550000000000002e-07, + "num_tokens": 1316921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9155, + "step": 3831 + }, + { + "loss": 0.0416, + "grad_norm": 1.2123857736587524, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.916, + "step": 3832 + }, + { + "loss": 0.0023, + "grad_norm": 0.3252547085285187, + "learning_rate": 8.450000000000002e-07, + "num_tokens": 1317524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9165, + "step": 3833 + }, + { + "loss": 0.0021, + "grad_norm": 0.29913613200187683, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.917, + "step": 3834 + }, + { + "loss": 0.0688, + "grad_norm": 1.6491233110427856, + "learning_rate": 8.350000000000002e-07, + "num_tokens": 1318127.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9175, + "step": 3835 + }, + { + "loss": 0.0021, + "grad_norm": 0.3058773875236511, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9180000000000001, + "step": 3836 + }, + { + "loss": 0.038, + "grad_norm": 1.1742405891418457, + "learning_rate": 8.250000000000001e-07, + "num_tokens": 1318730.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9184999999999999, + "step": 3837 + }, + { + "loss": 0.002, + "grad_norm": 0.27437257766723633, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.919, + "step": 3838 + }, + { + "loss": 0.0397, + "grad_norm": 1.1734699010849, + "learning_rate": 8.150000000000001e-07, + "num_tokens": 1319333.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9195, + "step": 3839 + }, + { + "loss": 0.0688, + "grad_norm": 1.6114236116409302, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.92, + "step": 3840 + }, + { + "loss": 0.0396, + "grad_norm": 1.3022080659866333, + "learning_rate": 8.050000000000001e-07, + "num_tokens": 1320357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9205, + "step": 3841 + }, + { + "loss": 0.002, + "grad_norm": 0.2882446348667145, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.921, + "step": 3842 + }, + { + "loss": 0.0636, + "grad_norm": 1.4788239002227783, + "learning_rate": 7.950000000000001e-07, + "num_tokens": 1320960.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9215, + "step": 3843 + }, + { + "loss": 0.0554, + "grad_norm": 1.472805142402649, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 3844 + }, + { + "loss": 0.0382, + "grad_norm": 1.3122379779815674, + "learning_rate": 7.850000000000001e-07, + "num_tokens": 1321984.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9224999999999999, + "step": 3845 + }, + { + "loss": 0.0019, + "grad_norm": 0.27439191937446594, + "learning_rate": 7.8e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.923, + "step": 3846 + }, + { + "loss": 0.0021, + "grad_norm": 0.3059723973274231, + "learning_rate": 7.750000000000001e-07, + "num_tokens": 1322166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9235, + "step": 3847 + }, + { + "loss": 0.0021, + "grad_norm": 0.3025694489479065, + "learning_rate": 7.7e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 3848 + }, + { + "loss": 0.0416, + "grad_norm": 1.4384698867797852, + "learning_rate": 7.650000000000001e-07, + "num_tokens": 1322769.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9245, + "step": 3849 + }, + { + "loss": 0.0019, + "grad_norm": 0.26954689621925354, + "learning_rate": 7.6e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.925, + "step": 3850 + }, + { + "loss": 0.0373, + "grad_norm": 1.0434874296188354, + "learning_rate": 7.550000000000001e-07, + "num_tokens": 1323372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9255, + "step": 3851 + }, + { + "loss": 0.0384, + "grad_norm": 1.2146815061569214, + "learning_rate": 7.5e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9260000000000002, + "step": 3852 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992803454399109, + "learning_rate": 7.450000000000001e-07, + "num_tokens": 1323975.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9264999999999999, + "step": 3853 + }, + { + "loss": 0.0683, + "grad_norm": 2.0715625286102295, + "learning_rate": 7.4e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.927, + "step": 3854 + }, + { + "loss": 0.0687, + "grad_norm": 1.7195099592208862, + "learning_rate": 7.350000000000001e-07, + "num_tokens": 1324999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.9275, + "step": 3855 + }, + { + "loss": 0.0022, + "grad_norm": 0.31213998794555664, + "learning_rate": 7.3e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.928, + "step": 3856 + }, + { + "loss": 0.0446, + "grad_norm": 1.5833452939987183, + "learning_rate": 7.25e-07, + "num_tokens": 1325602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9285, + "step": 3857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27154725790023804, + "learning_rate": 7.2e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.929, + "step": 3858 + }, + { + "loss": 0.0385, + "grad_norm": 1.1363227367401123, + "learning_rate": 7.15e-07, + "num_tokens": 1326205.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9295, + "step": 3859 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992321252822876, + "learning_rate": 7.1e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9300000000000002, + "step": 3860 + }, + { + "loss": 0.0537, + "grad_norm": 1.2202407121658325, + "learning_rate": 7.05e-07, + "num_tokens": 1326808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9304999999999999, + "step": 3861 + }, + { + "loss": 0.0659, + "grad_norm": 1.3972662687301636, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.931, + "step": 3862 + }, + { + "loss": 0.0022, + "grad_norm": 0.3156076967716217, + "learning_rate": 6.950000000000001e-07, + "num_tokens": 1327411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9315, + "step": 3863 + }, + { + "loss": 0.002, + "grad_norm": 0.2746105492115021, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 3864 + }, + { + "loss": 0.0492, + "grad_norm": 1.111280083656311, + "learning_rate": 6.850000000000001e-07, + "num_tokens": 1328014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9325, + "step": 3865 + }, + { + "loss": 0.0557, + "grad_norm": 1.1395080089569092, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.933, + "step": 3866 + }, + { + "loss": 0.041, + "grad_norm": 1.1225674152374268, + "learning_rate": 6.750000000000001e-07, + "num_tokens": 1329038.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9335, + "step": 3867 + }, + { + "loss": 0.0021, + "grad_norm": 0.2975449860095978, + "learning_rate": 6.7e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9340000000000002, + "step": 3868 + }, + { + "loss": 0.002, + "grad_norm": 0.2790532410144806, + "learning_rate": 6.650000000000001e-07, + "num_tokens": 1329220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9344999999999999, + "step": 3869 + }, + { + "loss": 0.0019, + "grad_norm": 0.27045223116874695, + "learning_rate": 6.6e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 3870 + }, + { + "loss": 0.0587, + "grad_norm": 1.2998172044754028, + "learning_rate": 6.550000000000001e-07, + "num_tokens": 1329823.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9355, + "step": 3871 + }, + { + "loss": 0.1167, + "grad_norm": 2.1144580841064453, + "learning_rate": 6.5e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.936, + "step": 3872 + }, + { + "loss": 0.0021, + "grad_norm": 0.29768821597099304, + "learning_rate": 6.450000000000001e-07, + "num_tokens": 1330426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9365, + "step": 3873 + }, + { + "loss": 0.0021, + "grad_norm": 0.3033559024333954, + "learning_rate": 6.4e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 3874 + }, + { + "loss": 0.0017, + "grad_norm": 0.2499658465385437, + "learning_rate": 6.350000000000001e-07, + "num_tokens": 1330608.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9375, + "step": 3875 + }, + { + "loss": 0.002, + "grad_norm": 0.28729239106178284, + "learning_rate": 6.3e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 3876 + }, + { + "loss": 0.0538, + "grad_norm": 1.3207937479019165, + "learning_rate": 6.25e-07, + "num_tokens": 1331211.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9385, + "step": 3877 + }, + { + "loss": 0.0022, + "grad_norm": 0.3201894760131836, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.939, + "step": 3878 + }, + { + "loss": 0.058, + "grad_norm": 1.3156497478485107, + "learning_rate": 6.15e-07, + "num_tokens": 1331814.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9395, + "step": 3879 + }, + { + "loss": 0.0544, + "grad_norm": 1.192156195640564, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.94, + "step": 3880 + }, + { + "loss": 0.0634, + "grad_norm": 2.076542377471924, + "learning_rate": 6.05e-07, + "num_tokens": 1332838.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9405000000000001, + "step": 3881 + }, + { + "loss": 0.0488, + "grad_norm": 1.3221850395202637, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9409999999999998, + "step": 3882 + }, + { + "loss": 0.0021, + "grad_norm": 0.3004106283187866, + "learning_rate": 5.95e-07, + "num_tokens": 1333441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9415, + "step": 3883 + }, + { + "loss": 0.0541, + "grad_norm": 1.230305790901184, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.942, + "step": 3884 + }, + { + "loss": 0.002, + "grad_norm": 0.2805992662906647, + "learning_rate": 5.850000000000001e-07, + "num_tokens": 1334044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9425, + "step": 3885 + }, + { + "loss": 0.0019, + "grad_norm": 0.27598538994789124, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 3886 + }, + { + "loss": 0.0021, + "grad_norm": 0.3006319999694824, + "learning_rate": 5.750000000000001e-07, + "num_tokens": 1334226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9435, + "step": 3887 + }, + { + "loss": 0.0628, + "grad_norm": 1.3234870433807373, + "learning_rate": 5.7e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.944, + "step": 3888 + }, + { + "loss": 0.0368, + "grad_norm": 0.9632979035377502, + "learning_rate": 5.650000000000001e-07, + "num_tokens": 1335250.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9445000000000001, + "step": 3889 + }, + { + "loss": 0.0396, + "grad_norm": 1.0664863586425781, + "learning_rate": 5.6e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9449999999999998, + "step": 3890 + }, + { + "loss": 0.0361, + "grad_norm": 0.998447060585022, + "learning_rate": 5.550000000000001e-07, + "num_tokens": 1336274.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9455, + "step": 3891 + }, + { + "loss": 0.066, + "grad_norm": 1.6561861038208008, + "learning_rate": 5.5e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.946, + "step": 3892 + }, + { + "loss": 0.0564, + "grad_norm": 1.0982937812805176, + "learning_rate": 5.450000000000001e-07, + "num_tokens": 1337298.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9465, + "step": 3893 + }, + { + "loss": 0.0649, + "grad_norm": 1.3116402626037598, + "learning_rate": 5.4e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.947, + "step": 3894 + }, + { + "loss": 0.0393, + "grad_norm": 1.211995005607605, + "learning_rate": 5.350000000000001e-07, + "num_tokens": 1338322.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9475, + "step": 3895 + }, + { + "loss": 0.0656, + "grad_norm": 1.3053356409072876, + "learning_rate": 5.3e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.948, + "step": 3896 + }, + { + "loss": 0.059, + "grad_norm": 1.4926881790161133, + "learning_rate": 5.250000000000001e-07, + "num_tokens": 1339346.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9485000000000001, + "step": 3897 + }, + { + "loss": 0.0517, + "grad_norm": 1.099536657333374, + "learning_rate": 5.2e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9489999999999998, + "step": 3898 + }, + { + "loss": 0.002, + "grad_norm": 0.2851589620113373, + "learning_rate": 5.15e-07, + "num_tokens": 1339949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9495, + "step": 3899 + }, + { + "loss": 0.002, + "grad_norm": 0.2879925072193146, + "learning_rate": 5.1e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 3900 + }, + { + "loss": 0.0557, + "grad_norm": 1.0640603303909302, + "learning_rate": 5.05e-07, + "num_tokens": 1340552.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9505, + "step": 3901 + }, + { + "loss": 0.0021, + "grad_norm": 0.3005947470664978, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.951, + "step": 3902 + }, + { + "loss": 0.0021, + "grad_norm": 0.30592235922813416, + "learning_rate": 4.95e-07, + "num_tokens": 1340734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9515, + "step": 3903 + }, + { + "loss": 0.0508, + "grad_norm": 1.1045085191726685, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.952, + "step": 3904 + }, + { + "loss": 0.0539, + "grad_norm": 1.1382217407226562, + "learning_rate": 4.85e-07, + "num_tokens": 1341758.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9525000000000001, + "step": 3905 + }, + { + "loss": 0.0576, + "grad_norm": 1.5904083251953125, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9529999999999998, + "step": 3906 + }, + { + "loss": 0.0401, + "grad_norm": 1.0153878927230835, + "learning_rate": 4.7500000000000006e-07, + "num_tokens": 1342782.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9535, + "step": 3907 + }, + { + "loss": 0.0023, + "grad_norm": 0.32124239206314087, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.954, + "step": 3908 + }, + { + "loss": 0.037, + "grad_norm": 1.1176637411117554, + "learning_rate": 4.6500000000000005e-07, + "num_tokens": 1343385.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9545, + "step": 3909 + }, + { + "loss": 0.0414, + "grad_norm": 1.1863677501678467, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.955, + "step": 3910 + }, + { + "loss": 0.0697, + "grad_norm": 1.6575289964675903, + "learning_rate": 4.5500000000000004e-07, + "num_tokens": 1344409.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9555, + "step": 3911 + }, + { + "loss": 0.0384, + "grad_norm": 1.020317554473877, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.956, + "step": 3912 + }, + { + "loss": 0.0554, + "grad_norm": 1.1557419300079346, + "learning_rate": 4.4500000000000003e-07, + "num_tokens": 1345433.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9565000000000001, + "step": 3913 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282678723335266, + "learning_rate": 4.4e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9569999999999999, + "step": 3914 + }, + { + "loss": 0.0611, + "grad_norm": 1.4425996541976929, + "learning_rate": 4.35e-07, + "num_tokens": 1346036.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9575, + "step": 3915 + }, + { + "loss": 0.0021, + "grad_norm": 0.30943119525909424, + "learning_rate": 4.3e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.958, + "step": 3916 + }, + { + "loss": 0.0021, + "grad_norm": 0.29412642121315, + "learning_rate": 4.2500000000000006e-07, + "num_tokens": 1346218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9585, + "step": 3917 + }, + { + "loss": 0.0021, + "grad_norm": 0.2940139174461365, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.959, + "step": 3918 + }, + { + "loss": 0.0021, + "grad_norm": 0.3061344027519226, + "learning_rate": 4.1500000000000005e-07, + "num_tokens": 1346400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9595, + "step": 3919 + }, + { + "loss": 0.0399, + "grad_norm": 1.3357733488082886, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.96, + "step": 3920 + }, + { + "loss": 0.0548, + "grad_norm": 1.1528651714324951, + "learning_rate": 4.0500000000000004e-07, + "num_tokens": 1347424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9605000000000001, + "step": 3921 + }, + { + "loss": 0.0024, + "grad_norm": 0.3415958285331726, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9609999999999999, + "step": 3922 + }, + { + "loss": 0.0672, + "grad_norm": 1.716910719871521, + "learning_rate": 3.9500000000000003e-07, + "num_tokens": 1348027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9615, + "step": 3923 + }, + { + "loss": 0.0019, + "grad_norm": 0.2726108729839325, + "learning_rate": 3.9e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.962, + "step": 3924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6874312162399292, + "learning_rate": 3.85e-07, + "num_tokens": 1348630.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9625, + "step": 3925 + }, + { + "loss": 0.0677, + "grad_norm": 1.6080477237701416, + "learning_rate": 3.8e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 3926 + }, + { + "loss": 0.0455, + "grad_norm": 1.2764126062393188, + "learning_rate": 3.75e-07, + "num_tokens": 1349654.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9635, + "step": 3927 + }, + { + "loss": 0.0414, + "grad_norm": 1.4081971645355225, + "learning_rate": 3.7e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.964, + "step": 3928 + }, + { + "loss": 0.0022, + "grad_norm": 0.3177483081817627, + "learning_rate": 3.65e-07, + "num_tokens": 1350257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9645000000000001, + "step": 3929 + }, + { + "loss": 0.0024, + "grad_norm": 0.33574411273002625, + "learning_rate": 3.6e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 3930 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346923887729645, + "learning_rate": 3.55e-07, + "num_tokens": 1350439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9655, + "step": 3931 + }, + { + "loss": 0.0562, + "grad_norm": 1.2322405576705933, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.966, + "step": 3932 + }, + { + "loss": 0.0382, + "grad_norm": 1.126086711883545, + "learning_rate": 3.4500000000000003e-07, + "num_tokens": 1351463.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9665, + "step": 3933 + }, + { + "loss": 0.0679, + "grad_norm": 1.7950743436813354, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.967, + "step": 3934 + }, + { + "loss": 0.0023, + "grad_norm": 0.31813737750053406, + "learning_rate": 3.35e-07, + "num_tokens": 1352066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9675, + "step": 3935 + }, + { + "loss": 0.0563, + "grad_norm": 1.4460132122039795, + "learning_rate": 3.3e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.968, + "step": 3936 + }, + { + "loss": 0.0388, + "grad_norm": 1.2290942668914795, + "learning_rate": 3.25e-07, + "num_tokens": 1353090.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9685000000000001, + "step": 3937 + }, + { + "loss": 0.0624, + "grad_norm": 1.2616753578186035, + "learning_rate": 3.2e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9689999999999999, + "step": 3938 + }, + { + "loss": 0.0018, + "grad_norm": 0.258317232131958, + "learning_rate": 3.15e-07, + "num_tokens": 1353693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9695, + "step": 3939 + }, + { + "loss": 0.0021, + "grad_norm": 0.2969084680080414, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 3940 + }, + { + "loss": 0.0023, + "grad_norm": 0.3306228518486023, + "learning_rate": 3.0500000000000004e-07, + "num_tokens": 1353875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9705, + "step": 3941 + }, + { + "loss": 0.0021, + "grad_norm": 0.2877337336540222, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.971, + "step": 3942 + }, + { + "loss": 0.0385, + "grad_norm": 1.1180164813995361, + "learning_rate": 2.9500000000000003e-07, + "num_tokens": 1354478.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9715, + "step": 3943 + }, + { + "loss": 0.0422, + "grad_norm": 1.2713475227355957, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 3944 + }, + { + "loss": 0.0021, + "grad_norm": 0.30450907349586487, + "learning_rate": 2.85e-07, + "num_tokens": 1355081.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9725000000000001, + "step": 3945 + }, + { + "loss": 0.0369, + "grad_norm": 1.0453548431396484, + "learning_rate": 2.8e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9729999999999999, + "step": 3946 + }, + { + "loss": 0.0647, + "grad_norm": 1.4603972434997559, + "learning_rate": 2.75e-07, + "num_tokens": 1356105.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9735, + "step": 3947 + }, + { + "loss": 0.0572, + "grad_norm": 1.3418960571289062, + "learning_rate": 2.7e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.974, + "step": 3948 + }, + { + "loss": 0.0616, + "grad_norm": 1.2075037956237793, + "learning_rate": 2.65e-07, + "num_tokens": 1357129.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9745, + "step": 3949 + }, + { + "loss": 0.0561, + "grad_norm": 1.3293365240097046, + "learning_rate": 2.6e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.975, + "step": 3950 + }, + { + "loss": 0.0546, + "grad_norm": 1.1330344676971436, + "learning_rate": 2.55e-07, + "num_tokens": 1358153.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9755, + "step": 3951 + }, + { + "loss": 0.0553, + "grad_norm": 1.403975486755371, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 3952 + }, + { + "loss": 0.0589, + "grad_norm": 1.0574450492858887, + "learning_rate": 2.4500000000000004e-07, + "num_tokens": 1359177.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9765000000000001, + "step": 3953 + }, + { + "loss": 0.0024, + "grad_norm": 0.34114331007003784, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9769999999999999, + "step": 3954 + }, + { + "loss": 0.0531, + "grad_norm": 1.2925927639007568, + "learning_rate": 2.3500000000000003e-07, + "num_tokens": 1359780.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9775, + "step": 3955 + }, + { + "loss": 0.0023, + "grad_norm": 0.32414519786834717, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.978, + "step": 3956 + }, + { + "loss": 0.0409, + "grad_norm": 1.1193647384643555, + "learning_rate": 2.2500000000000002e-07, + "num_tokens": 1360383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9785, + "step": 3957 + }, + { + "loss": 0.0528, + "grad_norm": 1.0519967079162598, + "learning_rate": 2.2e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.979, + "step": 3958 + }, + { + "loss": 0.002, + "grad_norm": 0.290457159280777, + "learning_rate": 2.15e-07, + "num_tokens": 1360986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9795, + "step": 3959 + }, + { + "loss": 0.064, + "grad_norm": 1.5267326831817627, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.98, + "step": 3960 + }, + { + "loss": 0.0571, + "grad_norm": 1.354665756225586, + "learning_rate": 2.0500000000000002e-07, + "num_tokens": 1362010.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9805000000000001, + "step": 3961 + }, + { + "loss": 0.0023, + "grad_norm": 0.3175540566444397, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9809999999999999, + "step": 3962 + }, + { + "loss": 0.0022, + "grad_norm": 0.31645578145980835, + "learning_rate": 1.95e-07, + "num_tokens": 1362192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9815, + "step": 3963 + }, + { + "loss": 0.0023, + "grad_norm": 0.32781633734703064, + "learning_rate": 1.9e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 3964 + }, + { + "loss": 0.0022, + "grad_norm": 0.3074043393135071, + "learning_rate": 1.85e-07, + "num_tokens": 1362374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9825, + "step": 3965 + }, + { + "loss": 0.0616, + "grad_norm": 1.3107956647872925, + "learning_rate": 1.8e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.983, + "step": 3966 + }, + { + "loss": 0.0428, + "grad_norm": 1.0233242511749268, + "learning_rate": 1.7500000000000002e-07, + "num_tokens": 1363398.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9835, + "step": 3967 + }, + { + "loss": 0.0509, + "grad_norm": 1.1120326519012451, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.984, + "step": 3968 + }, + { + "loss": 0.0578, + "grad_norm": 1.1184195280075073, + "learning_rate": 1.65e-07, + "num_tokens": 1364422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9845000000000002, + "step": 3969 + }, + { + "loss": 0.0024, + "grad_norm": 0.3374731242656708, + "learning_rate": 1.6e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9849999999999999, + "step": 3970 + }, + { + "loss": 0.0647, + "grad_norm": 1.385146141052246, + "learning_rate": 1.5500000000000002e-07, + "num_tokens": 1365025.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9855, + "step": 3971 + }, + { + "loss": 0.0621, + "grad_norm": 1.3918462991714478, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.986, + "step": 3972 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185434639453888, + "learning_rate": 1.4500000000000001e-07, + "num_tokens": 1365628.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9865, + "step": 3973 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098815679550171, + "learning_rate": 1.4e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 3974 + }, + { + "loss": 0.0508, + "grad_norm": 1.1450035572052002, + "learning_rate": 1.35e-07, + "num_tokens": 1366231.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9875, + "step": 3975 + }, + { + "loss": 0.0545, + "grad_norm": 1.133862018585205, + "learning_rate": 1.3e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.988, + "step": 3976 + }, + { + "loss": 0.0575, + "grad_norm": 1.3929400444030762, + "learning_rate": 1.2500000000000002e-07, + "num_tokens": 1367255.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9885000000000002, + "step": 3977 + }, + { + "loss": 0.0023, + "grad_norm": 0.32601818442344666, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9889999999999999, + "step": 3978 + }, + { + "loss": 0.0614, + "grad_norm": 1.4804233312606812, + "learning_rate": 1.1500000000000001e-07, + "num_tokens": 1367858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9895, + "step": 3979 + }, + { + "loss": 0.0339, + "grad_norm": 1.0161491632461548, + "learning_rate": 1.1e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.99, + "step": 3980 + }, + { + "loss": 0.0374, + "grad_norm": 0.9113408327102661, + "learning_rate": 1.0500000000000001e-07, + "num_tokens": 1368882.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9905, + "step": 3981 + }, + { + "loss": 0.0022, + "grad_norm": 0.31800293922424316, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.991, + "step": 3982 + }, + { + "loss": 0.0022, + "grad_norm": 0.3091203570365906, + "learning_rate": 9.5e-08, + "num_tokens": 1369064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9915, + "step": 3983 + }, + { + "loss": 0.0697, + "grad_norm": 1.368817687034607, + "learning_rate": 9e-08, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.992, + "step": 3984 + }, + { + "loss": 0.0024, + "grad_norm": 0.334277480840683, + "learning_rate": 8.500000000000001e-08, + "num_tokens": 1369667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9925000000000002, + "step": 3985 + }, + { + "loss": 0.0545, + "grad_norm": 1.1396604776382446, + "learning_rate": 8e-08, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9929999999999999, + "step": 3986 + }, + { + "loss": 0.002, + "grad_norm": 0.2931969463825226, + "learning_rate": 7.500000000000001e-08, + "num_tokens": 1370270.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9935, + "step": 3987 + }, + { + "loss": 0.0021, + "grad_norm": 0.29304033517837524, + "learning_rate": 7e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 3988 + }, + { + "loss": 0.0579, + "grad_norm": 1.3336025476455688, + "learning_rate": 6.5e-08, + "num_tokens": 1370873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9945, + "step": 3989 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215644359588623, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.995, + "step": 3990 + }, + { + "loss": 0.0405, + "grad_norm": 1.221953272819519, + "learning_rate": 5.5e-08, + "num_tokens": 1371476.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9955, + "step": 3991 + }, + { + "loss": 0.0404, + "grad_norm": 1.0604480504989624, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.996, + "step": 3992 + }, + { + "loss": 0.0381, + "grad_norm": 0.919835090637207, + "learning_rate": 4.5e-08, + "num_tokens": 1372500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9965000000000002, + "step": 3993 + }, + { + "loss": 0.0378, + "grad_norm": 1.2490025758743286, + "learning_rate": 4e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9969999999999999, + "step": 3994 + }, + { + "loss": 0.0021, + "grad_norm": 0.3125726878643036, + "learning_rate": 3.5e-08, + "num_tokens": 1373103.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9975, + "step": 3995 + }, + { + "loss": 0.0023, + "grad_norm": 0.3294070065021515, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 3996 + }, + { + "loss": 0.002, + "grad_norm": 0.2793242931365967, + "learning_rate": 2.5000000000000002e-08, + "num_tokens": 1373285.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9985, + "step": 3997 + }, + { + "loss": 0.0386, + "grad_norm": 1.0813380479812622, + "learning_rate": 2e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.999, + "step": 3998 + }, + { + "loss": 0.0025, + "grad_norm": 0.3470178544521332, + "learning_rate": 1.5000000000000002e-08, + "num_tokens": 1373888.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9995, + "step": 3999 + }, + { + "loss": 0.0681, + "grad_norm": 1.5211089849472046, + "learning_rate": 1e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 2.0, + "step": 4000 + }, + { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898, + "epoch": 2.0, + "step": 4000 + } +] \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..be089935a10e89f2cb7ed806e7c10efa3baca54a --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "unsloth_available": false, + "train_runtime": 483.7085, + "train_loss": 0.11515871361242898, + "train_metrics": { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b/submission_summary.json b/docs/results/submission_evidence/qwen_0_5b_1_5b/submission_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..559a39eee196526b0c832f9689a667397f11b61a --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b/submission_summary.json @@ -0,0 +1,235 @@ +{ + "status": "ok", + "generated_at_unix": 1777179035.763374, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "pending_artifact_upload", + "files": [ + ".gitattributes" + ], + "meaningful_file_count": 0, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/f313e87ad0df089dbe586b469c8f0a34e05bc5cd", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png", + "reward_component_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png", + "primary_reward_channel_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system" +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/artifact_repo_listing.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/artifact_repo_listing.json new file mode 100644 index 0000000000000000000000000000000000000000..5f23072480e95f65785211fc47071cef6078b859 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/artifact_repo_listing.json @@ -0,0 +1,91 @@ +{ + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "ok", + "files": [ + ".gitattributes", + "usable_model_bundles/local-qwen-0-5b-active-smoke/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/bundle_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/generation_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merge_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/acceptance_gate.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/anti_hacking_overfit_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/baselines.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dose_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dosing_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/frontier_ready.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/graph_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_ablation_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_auto.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_fallback_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_strict_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_sweep_summary.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/inference_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/planner_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/plot_index.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/risk_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/robustness.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/supervisor_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json" + ], + "meaningful_file_count": 82, + "error": "" +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..0f7093d3dc5b03c1710e6cd800244e1f0c3d6f0c Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..354ee4f38019cfceb7db848c00ee7bda6270c162 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..a334d8db37904ac9ab47a582cd1efb83545a7027 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..5d068d5f289f2e688017d55fba2219c1d0154167 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_polyguard_report.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_polyguard_report.json new file mode 100644 index 0000000000000000000000000000000000000000..0e50fc2cc335c77af3fcf4dde5e9e15b2927fcb8 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_polyguard_report.json @@ -0,0 +1,133 @@ +{ + "status": "ok", + "judge": "PolyGuard verifier/reward system", + "llm_as_judge": false, + "matched_seeds": [ + 8000, + 8001, + 8002, + 8003, + 8004, + 8005, + 8006, + 8007 + ], + "summaries": { + "basic_llm": { + "episodes": 8, + "avg_reward": 0.762, + "avg_latency_seconds": 0.0044, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.25, + "candidate_diversity": 1 + }, + "sft_policy": { + "episodes": 8, + "avg_reward": 0.818, + "avg_latency_seconds": 0.0012, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.0, + "candidate_diversity": 2 + }, + "full_polyguard_pipeline": { + "episodes": 8, + "avg_reward": 0.805, + "avg_latency_seconds": 0.5021, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.0, + "candidate_diversity": 2 + } + }, + "pipeline_minus_basic_reward_delta": 0.043, + "deltas": [ + { + "seed": 8000, + "basic_reward": 0.717, + "pipeline_reward": 0.804, + "reward_delta": 0.087, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [ + "holdout_ddi_not_addressed" + ], + "pipeline_failure_reasons": [] + }, + { + "seed": 8001, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8002, + "basic_reward": 0.777, + "pipeline_reward": 0.804, + "reward_delta": 0.027, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8003, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8004, + "basic_reward": 0.717, + "pipeline_reward": 0.804, + "reward_delta": 0.087, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [ + "holdout_ddi_not_addressed" + ], + "pipeline_failure_reasons": [] + }, + { + "seed": 8005, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8006, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8007, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + } + ], + "notes": [ + "basic_llm is an evaluation-only prompt-style proxy that selects the first legal candidate without verifier reranking.", + "sft_policy is an evaluation-only SFT-style safety ranker over the same candidate set.", + "full_polyguard_pipeline runs the orchestrated LLM+bandit stack and scores through the same verifier." + ] +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/hf_status_snapshot.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/hf_status_snapshot.json new file mode 100644 index 0000000000000000000000000000000000000000..adec7032d7fae6ba4ca73ed347e0176c38aa961f --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/hf_status_snapshot.json @@ -0,0 +1,311 @@ +{ + "status": "running", + "started_at": 1777162756.623835, + "finished_at": null, + "commands": [ + { + "args": [ + "python", + "scripts/bootstrap_data.py" + ], + "returncode": 0, + "elapsed_seconds": 0.577 + }, + { + "args": [ + "python", + "scripts/build_training_corpus.py", + "--profile", + "massive", + "--with-local", + "--with-synthetic", + "--with-hf" + ], + "returncode": 0, + "elapsed_seconds": 3.86 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 257.387 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 4230.645 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 7.303 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 15.201 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 18.461 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 3.989 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 454.278 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 5118.654 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 10.6 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 17.128 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 21.528 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 4.001 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-3B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 736.955 + } + ], + "artifact_repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "model_sweep": [ + "Qwen/Qwen2.5-0.5B-Instruct", + "Qwen/Qwen2.5-1.5B-Instruct", + "Qwen/Qwen2.5-3B-Instruct" + ], + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "log_tail": "\u2588\u2588\u2588\u2588\u2588\u258a| 1965/2000 [11:41<00:10, 3.22it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1966/2000 [11:42<00:11, 2.91it/s]\n \n{'loss': 0.0449, 'grad_norm': 0.8585970401763916, 'learning_rate': 3.7e-07, 'num_tokens': 1350951.0, 'mean_token_accuracy': 0.9767054915428162, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1966/2000 [11:42<00:11, 2.91it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1967/2000 [11:42<00:11, 2.85it/s]\n \n{'loss': 0.0518, 'grad_norm': 0.7478350400924683, 'learning_rate': 3.6e-07, 'num_tokens': 1351975.0, 'mean_token_accuracy': 0.9755381345748901, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1967/2000 [11:42<00:11, 2.85it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1968/2000 [11:42<00:11, 2.69it/s]\n \n{'loss': 0.0442, 'grad_norm': 0.8791924715042114, 'learning_rate': 3.5000000000000004e-07, 'num_tokens': 1352578.0, 'mean_token_accuracy': 0.9767054915428162, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1968/2000 [11:42<00:11, 2.69it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1969/2000 [11:43<00:11, 2.70it/s]\n \n{'loss': 0.0488, 'grad_norm': 0.6195839047431946, 'learning_rate': 3.4000000000000003e-07, 'num_tokens': 1353602.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1969/2000 [11:43<00:11, 2.70it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1970/2000 [11:43<00:09, 3.27it/s]\n \n{'loss': 0.0047, 'grad_norm': 0.8639671802520752, 'learning_rate': 3.3e-07, 'num_tokens': 1353784.0, 'mean_token_accuracy': 1.0, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1970/2000 [11:43<00:09, 3.27it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1971/2000 [11:43<00:07, 3.82it/s]\n \n{'loss': 0.0048, 'grad_norm': 0.8560010194778442, 'learning_rate': 3.2e-07, 'num_tokens': 1353966.0, 'mean_token_accuracy': 1.0, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1971/2000 [11:43<00:07, 3.82it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1972/2000 [11:43<00:08, 3.41it/s]\n \n{'loss': 0.0382, 'grad_norm': 0.8542295694351196, 'learning_rate': 3.1000000000000005e-07, 'num_tokens': 1354990.0, 'mean_token_accuracy': 0.9823874831199646, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1972/2000 [11:43<00:08, 3.41it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1973/2000 [11:44<00:08, 3.02it/s]\n \n{'loss': 0.033, 'grad_norm': 0.7632898688316345, 'learning_rate': 3.0000000000000004e-07, 'num_tokens': 1355593.0, 'mean_token_accuracy': 0.9833610653877258, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1973/2000 [11:44<00:08, 3.02it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1974/2000 [11:44<00:08, 2.92it/s]\n \n{'loss': 0.0582, 'grad_norm': 0.7546073198318481, 'learning_rate': 2.9000000000000003e-07, 'num_tokens': 1356617.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1974/2000 [11:44<00:08, 2.92it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1975/2000 [11:44<00:08, 2.85it/s]\n \n{'loss': 0.0607, 'grad_norm': 0.9100231528282166, 'learning_rate': 2.8e-07, 'num_tokens': 1357641.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1975/2000 [11:44<00:08, 2.85it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1976/2000 [11:45<00:08, 2.81it/s]\n \n{'loss': 0.0522, 'grad_norm': 0.9831849932670593, 'learning_rate': 2.7e-07, 'num_tokens': 1358665.0, 'mean_token_accuracy': 0.9726027250289917, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1976/2000 [11:45<00:08, 2.81it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1977/2000 [11:45<00:08, 2.67it/s]\n \n{'loss': 0.0455, 'grad_norm': 0.7770227789878845, 'learning_rate': 2.6e-07, 'num_tokens': 1359268.0, 'mean_token_accuracy': 0.9783693552017212, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1977/2000 [11:45<00:08, 2.67it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1978/2000 [11:46<00:08, 2.58it/s]\n \n{'loss': 0.043, 'grad_norm': 0.9285680055618286, 'learning_rate': 2.5000000000000004e-07, 'num_tokens': 1359871.0, 'mean_token_accuracy': 0.981697142124176, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1978/2000 [11:46<00:08, 2.58it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1979/2000 [11:46<00:08, 2.62it/s]\n \n{'loss': 0.0475, 'grad_norm': 0.725820004940033, 'learning_rate': 2.4000000000000003e-07, 'num_tokens': 1360895.0, 'mean_token_accuracy': 0.9784736037254333, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1979/2000 [11:46<00:08, 2.62it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1980/2000 [11:46<00:07, 2.54it/s]\n \n{'loss': 0.0523, 'grad_norm': 0.9508711099624634, 'learning_rate': 2.3000000000000002e-07, 'num_tokens': 1361498.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1980/2000 [11:46<00:07, 2.54it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1981/2000 [11:47<00:07, 2.49it/s]\n \n{'loss': 0.0461, 'grad_norm': 0.9076665639877319, 'learning_rate': 2.2e-07, 'num_tokens': 1362101.0, 'mean_token_accuracy': 0.980033278465271, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1981/2000 [11:47<00:07, 2.49it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1982/2000 [11:47<00:05, 3.07it/s]\n \n{'loss': 0.0049, 'grad_norm': 0.8733372092247009, 'learning_rate': 2.1000000000000003e-07, 'num_tokens': 1362283.0, 'mean_token_accuracy': 1.0, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1982/2000 [11:47<00:05, 3.07it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1983/2000 [11:47<00:06, 2.83it/s]\n \n{'loss': 0.0499, 'grad_norm': 1.0219769477844238, 'learning_rate': 2.0000000000000002e-07, 'num_tokens': 1362886.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1983/2000 [11:47<00:06, 2.83it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1984/2000 [11:48<00:05, 2.79it/s]\n \n{'loss': 0.047, 'grad_norm': 0.6855125427246094, 'learning_rate': 1.9e-07, 'num_tokens': 1363910.0, 'mean_token_accuracy': 0.9794520735740662, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1984/2000 [11:48<00:05, 2.79it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1985/2000 [11:48<00:05, 2.66it/s]\n \n{'loss': 0.053, 'grad_norm': 0.9592626094818115, 'learning_rate': 1.8e-07, 'num_tokens': 1364513.0, 'mean_token_accuracy': 0.9717137813568115, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1985/2000 [11:48<00:05, 2.66it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1986/2000 [11:49<00:05, 2.67it/s]\n \n{'loss': 0.0634, 'grad_norm': 0.9822715520858765, 'learning_rate': 1.7000000000000001e-07, 'num_tokens': 1365537.0, 'mean_token_accuracy': 0.9696673154830933, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1986/2000 [11:49<00:05, 2.67it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1987/2000 [11:49<00:04, 3.24it/s]\n \n{'loss': 0.005, 'grad_norm': 0.9051101207733154, 'learning_rate': 1.6e-07, 'num_tokens': 1365719.0, 'mean_token_accuracy': 1.0, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1987/2000 [11:49<00:04, 3.24it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1988/2000 [11:49<00:03, 3.06it/s]\n \n{'loss': 0.057, 'grad_norm': 0.7732815742492676, 'learning_rate': 1.5000000000000002e-07, 'num_tokens': 1366743.0, 'mean_token_accuracy': 0.9716242551803589, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1988/2000 [11:49<00:03, 3.06it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1989/2000 [11:50<00:03, 2.82it/s]\n \n{'loss': 0.0488, 'grad_norm': 1.0130807161331177, 'learning_rate': 1.4e-07, 'num_tokens': 1367346.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1989/2000 [11:50<00:03, 2.82it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1990/2000 [11:50<00:03, 2.79it/s]\n \n{'loss': 0.0502, 'grad_norm': 0.7733030319213867, 'learning_rate': 1.3e-07, 'num_tokens': 1368370.0, 'mean_token_accuracy': 0.976516604423523, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1990/2000 [11:50<00:03, 2.79it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1991/2000 [11:50<00:03, 2.65it/s]\n \n{'loss': 0.033, 'grad_norm': 0.8099549412727356, 'learning_rate': 1.2000000000000002e-07, 'num_tokens': 1368973.0, 'mean_token_accuracy': 0.981697142124176, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1991/2000 [11:50<00:03, 2.65it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1992/2000 [11:51<00:03, 2.57it/s]\n \n{'loss': 0.0505, 'grad_norm': 0.8513318300247192, 'learning_rate': 1.1e-07, 'num_tokens': 1369576.0, 'mean_token_accuracy': 0.9733777046203613, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1992/2000 [11:51<00:03, 2.57it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1993/2000 [11:51<00:02, 2.51it/s]\n \n{'loss': 0.0471, 'grad_norm': 0.8666603565216064, 'learning_rate': 1.0000000000000001e-07, 'num_tokens': 1370179.0, 'mean_token_accuracy': 0.9783693552017212, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1993/2000 [11:51<00:02, 2.51it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1994/2000 [11:51<00:01, 3.08it/s]\n \n{'loss': 0.0046, 'grad_norm': 0.8277124166488647, 'learning_rate': 9e-08, 'num_tokens': 1370361.0, 'mean_token_accuracy': 1.0, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1994/2000 [11:51<00:01, 3.08it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1995/2000 [11:52<00:01, 2.83it/s]\n \n{'loss': 0.0491, 'grad_norm': 0.7712334990501404, 'learning_rate': 8e-08, 'num_tokens': 1370964.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1995/2000 [11:52<00:01, 2.83it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1996/2000 [11:52<00:01, 2.80it/s]\n \n{'loss': 0.037, 'grad_norm': 0.8775883316993713, 'learning_rate': 7e-08, 'num_tokens': 1371988.0, 'mean_token_accuracy': 0.980430543422699, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1996/2000 [11:52<00:01, 2.80it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1997/2000 [11:53<00:01, 2.77it/s]\n \n{'loss': 0.0377, 'grad_norm': 0.7055721282958984, 'learning_rate': 6.000000000000001e-08, 'num_tokens': 1373012.0, 'mean_token_accuracy': 0.9814090132713318, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1997/2000 [11:53<00:01, 2.77it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1998/2000 [11:53<00:00, 3.33it/s]\n \n{'loss': 0.005, 'grad_norm': 0.8954693675041199, 'learning_rate': 5.0000000000000004e-08, 'num_tokens': 1373194.0, 'mean_token_accuracy': 1.0, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1998/2000 [11:53<00:00, 3.33it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1999/2000 [11:53<00:00, 2.98it/s]\n \n{'loss': 0.0314, 'grad_norm': 0.7444577217102051, 'learning_rate': 4e-08, 'num_tokens': 1373797.0, 'mean_token_accuracy': 0.9883527159690857, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1999/2000 [11:53<00:00, 2.98it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n \n{'loss': 0.0525, 'grad_norm': 1.007545828819275, 'learning_rate': 3.0000000000000004e-08, 'num_tokens': 1374400.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n \n{'train_runtime': 714.3473, 'train_samples_per_second': 5.6, 'train_steps_per_second': 2.8, 'train_loss': 0.1561080440459773, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.80it/s]\nsft_trl_done\n$ python scripts/train_grpo_trl.py --model-id Qwen/Qwen2.5-3B-Instruct --prompts-path data/processed/training_corpus_grpo_prompts.jsonl --output-dir checkpoints/sweeps/qwen-qwen2-5-3b-instruct --report-path outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json --max-prompts 0 --max-steps 0 --epochs 1.0 --batch-size 2 --grad-accum 1 --num-generations 2 --max-prompt-length 384 --max-completion-length 64 --learning-rate 1e-06 --use-unsloth\n" +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/manifest.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..3da0dfffbe111a4157d841c447612b8e57a82adc --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/manifest.json @@ -0,0 +1,378 @@ +{ + "status": "ok", + "generated_at_unix": 1777179904.792038, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "not_seen_in_status", + "grpo_postsave_inference": "not_seen_in_status", + "policy_ablation": "not_seen_in_status" + }, + "metrics": { + "sft_train_loss": 0.18184852770145518, + "sft_train_runtime": 372.1845, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.569, + "sft_last_loss": 0.0037, + "sft_best_loss": 0.0011, + "sft_last_token_accuracy": 1.0, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.762, + "sft_avg_latency_seconds": 2.748, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "ok", + "files": [ + ".gitattributes", + "usable_model_bundles/local-qwen-0-5b-active-smoke/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/bundle_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/generation_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merge_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/acceptance_gate.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/anti_hacking_overfit_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/baselines.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dose_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dosing_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/frontier_ready.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/graph_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_ablation_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_auto.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_fallback_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_strict_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_sweep_summary.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/inference_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/planner_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/plot_index.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/risk_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/robustness.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/supervisor_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json" + ], + "meaningful_file_count": 82, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/63acc4b1a4167e78b785814b5de63c5a913f9099", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 736.955, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png", + "qwen-qwen2-5-3b-instruct_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png", + "qwen-qwen2-5-3b-instruct_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png", + "qwen-qwen2-5-3b-instruct_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png", + "reward_component_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png", + "primary_reward_channel_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 3B grpo_history.json: pending_artifact_upload", + "Qwen 3B grpo_postsave_inference: not_seen_in_status", + "Qwen 3B grpo_training: not_seen_in_status", + "Qwen 3B policy_ablation: not_seen_in_status", + "Qwen 3B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system", + "bundle_zip": "submission_bundle/qwen_0_5b_1_5b_3b_evidence.zip", + "mirrored_file_count": 64 +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/mirrored_files.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/mirrored_files.json new file mode 100644 index 0000000000000000000000000000000000000000..c770cc817b74a8bcae5ba1403b48e3a863d4318f --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/mirrored_files.json @@ -0,0 +1,66 @@ +[ + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/action_traces.jsonl", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_failure_cases.md", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/submission_summary.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_vs_polyguard_report.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/README.md", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/manifest.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/artifact_repo_listing.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/policy_ablation_report.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/remote_stage_records.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/hf_status_snapshot.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/run_metadata.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_history.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/availability.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_legality.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_token_accuracy.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_exploit_detection.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_training_loss.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_learning_rate.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_legality.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_avg_reward.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_training_loss.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_learning_rate.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_latency.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/primary_reward_channel_bars.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_token_accuracy.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_training_loss.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_learning_rate.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/traces/action_traces.jsonl", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_loss_curves.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/grpo_reward_curves.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_vs_grpo_reward.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_loss.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_reward.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_grpo_reward.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/reward_component_bars.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/train_holdout_gap.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_latency_validity.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/anti_cheat_failure_rates.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/policy_stack_avg_reward.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/avg_reward.png", + "docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/legality_rate.png" +] diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..b8a16a69c129c24b20c8ab712e219662b853e8ac Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..b02893a92db120bde2f2a629c680c7191230edeb Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..a084c777866c2316a63e3ab9a6339d45606517a5 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_report.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_report.json new file mode 100644 index 0000000000000000000000000000000000000000..17f42d1ba8e5ed4aaf91fc331e9057d45b539b10 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_report.json @@ -0,0 +1,150 @@ +{ + "status": "ok", + "ablations": { + "bandit_only": { + "avg_reward": 0.779625, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.8125, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.483125, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9056250000000008, + "exploit_detection_count": 2.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0625, + "avg_invalid_actions": 0.0625, + "reward_columns": { + "format_compliance_score": 0.9989999999999996, + "candidate_alignment_score": 0.9989999999999996, + "legality_score": 0.9989999999999996, + "safety_delta_score": 0.483125, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999995, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000002, + "efficiency_score": 0.5855625, + "process_fidelity_score": 0.9056250000000008, + "explanation_grounding_score": 0.8000000000000004, + "anti_cheat_score": 0.9366249999999997, + "uncertainty_calibration_score": 0.8531250000000004 + }, + "primary_reward_channels": { + "safety_legality": 0.9469062499999998, + "clinical_improvement": 0.6273749999999997, + "dosing_quality": 0.6550000000000001, + "process_integrity": 0.8225937500000001 + }, + "policy_stack": "bandit-only", + "failure_mining": { + "total_rows": 32, + "failure_rows": 2, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 2 + } + ] + } + }, + "llm_only": { + "avg_reward": 0.7723913043478261, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.4882608695652174, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.4882608695652174, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999998, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8482608695652176 + }, + "primary_reward_channels": { + "safety_legality": 0.8853478260869562, + "clinical_improvement": 0.6290869565217388, + "dosing_quality": 0.6549999999999998, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm-only", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + }, + "llm_bandit": { + "avg_reward": 0.7647391304347826, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.48982608695652174, + "avg_dosing_quality": 0.717391304347826, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.48982608695652174, + "burden_improvement_score": 0.5043478260869565, + "disease_stability_score": 0.8582608695652173, + "dosing_quality_score": 0.717391304347826, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8126086956521739 + }, + "primary_reward_channels": { + "safety_legality": 0.8765217391304347, + "clinical_improvement": 0.6171739130434781, + "dosing_quality": 0.6386956521739129, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm+bandit", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + } + }, + "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/grpo_ablation_report.json" +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..2b33f8c40f985870bbf6ad986307cf9988ae229d Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..eeaee74949d469af50bcf55e1d66b8847e491f78 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..176b10578333a39d8ea7e5a324635821effc2343 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..82738c12da437f5bad55185490b0f85bbbf2b40d Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..b0ac61084306b4eb2130df9f58696d2980c3f96f Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..b9d1dcdb391fd27ab28296ac3874fb7ff02b5633 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..9c3af01d6fb94de66e47a204bfe5a545edd93330 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..47db263568828b5cee9fe01e3a103dad716e063d Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..297e6547bd5e074ff09271eee72d670824892595 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..ffd982a07fec0d80dff092afea033c65d3a06552 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..91f0c0075c563b6915e2f8225a659d9f88c08bc8 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..8ee344753fde4ea2476b340dbf618a9b12b1f94c Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..2118ea2b4b2a5dee26ac5177eb0e2ae2bbd48bce Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..2782296497a7a8b3c5134a67aafb5b288e0113dd Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..a8de709d9201c4d7a4fb502d3045104c0a8017a5 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..642d57b9cb8a88d2a602adcbc92e220df2fc1c6c Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..c72e897e7360ab9ceaafaaf36dd867414c0694d9 Binary files /dev/null and b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png differ diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/remote_stage_records.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/remote_stage_records.json new file mode 100644 index 0000000000000000000000000000000000000000..f3fe78e328e89d17c930dcf22e0d42cf569bdc56 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/remote_stage_records.json @@ -0,0 +1,101 @@ +[ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 736.955, + "completed": true + } +] diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..2f0b417999883105867eebe93b2fdb8bbdaf4b43 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bbe17a795d04470e938101377019eadd6246670049fc717149bbe6d28888bae +size 142092 diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/availability.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/availability.json new file mode 100644 index 0000000000000000000000000000000000000000..d10ae0a2d52e93bf7afd4fe5560708fd2cc8e794 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/availability.json @@ -0,0 +1,27 @@ +{ + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + } +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..e97c6f0e7743a9470ea6efc3b741694dafed8b57 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 1.839, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8000\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.224, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "There are 5 candidates with IDs . The candidate with ID has the lowest prediction score. Therefore, we will choose candidate . Rationale: Candidate has the lowest prediction score among the candidates with IDs , indicating that they are the most likely to", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "there is no patient with patient_id = 8002, please select a different patient_id. Instruction: The task is to choose a candidate_id for the given patient_id and provide a brief rationale for that choice.\nThus, the final answer is (e).Human beings have been able to use fire since at least 3 million years ago, but we are not sure how they learned", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8003\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 1.737, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8004\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_1", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.736, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..986493a2b78954513d0716891dc5dd71c576c569 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "model_index": 0, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 2, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..7d5a627d2519bb0afa7485ff88f51eb813d742e6 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.0856, + "grad_norm": 3.887380838394165, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.001, + "step": 1 + }, + { + "loss": 1.6647, + "grad_norm": 1.2190884351730347, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.7138103246688843, + "epoch": 0.002, + "step": 2 + }, + { + "loss": 1.1696, + "grad_norm": 0.8276316523551941, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.003, + "step": 3 + }, + { + "loss": 3.0464, + "grad_norm": 3.3297364711761475, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.004, + "step": 4 + }, + { + "loss": 1.1875, + "grad_norm": 0.8076611757278442, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.005, + "step": 5 + }, + { + "loss": 1.6105, + "grad_norm": 1.0332727432250977, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.7188019752502441, + "epoch": 0.006, + "step": 6 + }, + { + "loss": 1.5834, + "grad_norm": 1.0094527006149292, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.007, + "step": 7 + }, + { + "loss": 1.1683, + "grad_norm": 0.7861526012420654, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.008, + "step": 8 + }, + { + "loss": 1.3843, + "grad_norm": 0.7377748489379883, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7495107650756836, + "epoch": 0.009, + "step": 9 + }, + { + "loss": 1.584, + "grad_norm": 0.9443085193634033, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.720465898513794, + "epoch": 0.01, + "step": 10 + }, + { + "loss": 1.366, + "grad_norm": 0.7967380285263062, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7504892349243164, + "epoch": 0.011, + "step": 11 + }, + { + "loss": 1.5266, + "grad_norm": 1.0016096830368042, + "learning_rate": 1.989e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.012, + "step": 12 + }, + { + "loss": 1.2453, + "grad_norm": 0.9283791184425354, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.7836938500404358, + "epoch": 0.013, + "step": 13 + }, + { + "loss": 1.6206, + "grad_norm": 0.9805537462234497, + "learning_rate": 1.987e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7171381115913391, + "epoch": 0.014, + "step": 14 + }, + { + "loss": 1.5375, + "grad_norm": 0.9191323518753052, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.015, + "step": 15 + }, + { + "loss": 1.3423, + "grad_norm": 0.7822748422622681, + "learning_rate": 1.985e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.016, + "step": 16 + }, + { + "loss": 2.9309, + "grad_norm": 2.773752450942993, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5277777910232544, + "epoch": 0.017, + "step": 17 + }, + { + "loss": 1.1574, + "grad_norm": 0.7265554666519165, + "learning_rate": 1.983e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7876712083816528, + "epoch": 0.018, + "step": 18 + }, + { + "loss": 2.9093, + "grad_norm": 2.9051146507263184, + "learning_rate": 1.982e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5388888716697693, + "epoch": 0.019, + "step": 19 + }, + { + "loss": 1.5786, + "grad_norm": 0.9728697538375854, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.02, + "step": 20 + }, + { + "loss": 1.0934, + "grad_norm": 0.7319854497909546, + "learning_rate": 1.98e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.7974559664726257, + "epoch": 0.021, + "step": 21 + }, + { + "loss": 1.2097, + "grad_norm": 0.8981963992118835, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.022, + "step": 22 + }, + { + "loss": 1.4816, + "grad_norm": 1.0308023691177368, + "learning_rate": 1.978e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.023, + "step": 23 + }, + { + "loss": 1.3218, + "grad_norm": 0.7793745398521423, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.024, + "step": 24 + }, + { + "loss": 1.4883, + "grad_norm": 1.0108226537704468, + "learning_rate": 1.976e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.025, + "step": 25 + }, + { + "loss": 1.1398, + "grad_norm": 0.7284001111984253, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7857142686843872, + "epoch": 0.026, + "step": 26 + }, + { + "loss": 1.5201, + "grad_norm": 0.9933396577835083, + "learning_rate": 1.974e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.7354409098625183, + "epoch": 0.027, + "step": 27 + }, + { + "loss": 2.8162, + "grad_norm": 3.1626200675964355, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.028, + "step": 28 + }, + { + "loss": 1.31, + "grad_norm": 0.8019158244132996, + "learning_rate": 1.972e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.7573385238647461, + "epoch": 0.029, + "step": 29 + }, + { + "loss": 2.7985, + "grad_norm": 3.126246929168701, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.03, + "step": 30 + }, + { + "loss": 1.5341, + "grad_norm": 0.952720582485199, + "learning_rate": 1.97e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7271214723587036, + "epoch": 0.031, + "step": 31 + }, + { + "loss": 1.0763, + "grad_norm": 0.7093926668167114, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.032, + "step": 32 + }, + { + "loss": 1.2127, + "grad_norm": 0.813561201095581, + "learning_rate": 1.968e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.033, + "step": 33 + }, + { + "loss": 2.7516, + "grad_norm": 3.1947083473205566, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.034, + "step": 34 + }, + { + "loss": 1.1881, + "grad_norm": 1.0367817878723145, + "learning_rate": 1.966e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.035, + "step": 35 + }, + { + "loss": 1.1991, + "grad_norm": 0.9249914288520813, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.036, + "step": 36 + }, + { + "loss": 1.0422, + "grad_norm": 0.7850101590156555, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.037, + "step": 37 + }, + { + "loss": 1.2488, + "grad_norm": 0.8151567578315735, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7651663422584534, + "epoch": 0.038, + "step": 38 + }, + { + "loss": 1.5095, + "grad_norm": 1.0585670471191406, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.7254575490951538, + "epoch": 0.039, + "step": 39 + }, + { + "loss": 2.6828, + "grad_norm": 3.3681087493896484, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.04, + "step": 40 + }, + { + "loss": 1.1754, + "grad_norm": 1.029766321182251, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.041, + "step": 41 + }, + { + "loss": 1.0827, + "grad_norm": 0.7520174980163574, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.042, + "step": 42 + }, + { + "loss": 1.1385, + "grad_norm": 1.012759804725647, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.043, + "step": 43 + }, + { + "loss": 2.6322, + "grad_norm": 3.4875218868255615, + "learning_rate": 1.957e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.044, + "step": 44 + }, + { + "loss": 1.23, + "grad_norm": 0.9103058576583862, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.045, + "step": 45 + }, + { + "loss": 1.4499, + "grad_norm": 1.0566458702087402, + "learning_rate": 1.955e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.046, + "step": 46 + }, + { + "loss": 1.1171, + "grad_norm": 1.0389467477798462, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.047, + "step": 47 + }, + { + "loss": 1.4262, + "grad_norm": 1.0595616102218628, + "learning_rate": 1.953e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.048, + "step": 48 + }, + { + "loss": 1.1224, + "grad_norm": 1.0530123710632324, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.049, + "step": 49 + }, + { + "loss": 2.5409, + "grad_norm": 3.6781489849090576, + "learning_rate": 1.951e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.05, + "step": 50 + }, + { + "loss": 1.0942, + "grad_norm": 1.0411880016326904, + "learning_rate": 1.95e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.7970049977302551, + "epoch": 0.051, + "step": 51 + }, + { + "loss": 1.0622, + "grad_norm": 0.8258970975875854, + "learning_rate": 1.949e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.052, + "step": 52 + }, + { + "loss": 1.1977, + "grad_norm": 0.8957047462463379, + "learning_rate": 1.948e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.7700586915016174, + "epoch": 0.053, + "step": 53 + }, + { + "loss": 1.3695, + "grad_norm": 1.122542142868042, + "learning_rate": 1.947e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.7520798444747925, + "epoch": 0.054, + "step": 54 + }, + { + "loss": 0.8548, + "grad_norm": 0.7688314914703369, + "learning_rate": 1.946e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.055, + "step": 55 + }, + { + "loss": 1.0659, + "grad_norm": 1.0568362474441528, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.056, + "step": 56 + }, + { + "loss": 1.0294, + "grad_norm": 0.8596540689468384, + "learning_rate": 1.944e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.057, + "step": 57 + }, + { + "loss": 1.4359, + "grad_norm": 1.2490142583847046, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.7321131229400635, + "epoch": 0.058, + "step": 58 + }, + { + "loss": 2.416, + "grad_norm": 3.7482848167419434, + "learning_rate": 1.942e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.059, + "step": 59 + }, + { + "loss": 1.0725, + "grad_norm": 1.117326259613037, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.06, + "step": 60 + }, + { + "loss": 0.9739, + "grad_norm": 0.8864734768867493, + "learning_rate": 1.94e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.061, + "step": 61 + }, + { + "loss": 1.1443, + "grad_norm": 0.9423307776451111, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.7739726305007935, + "epoch": 0.062, + "step": 62 + }, + { + "loss": 0.8009, + "grad_norm": 0.8988932967185974, + "learning_rate": 1.938e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.063, + "step": 63 + }, + { + "loss": 1.0508, + "grad_norm": 1.1697311401367188, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.064, + "step": 64 + }, + { + "loss": 1.2747, + "grad_norm": 1.2967511415481567, + "learning_rate": 1.936e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.7570715546607971, + "epoch": 0.065, + "step": 65 + }, + { + "loss": 1.2796, + "grad_norm": 1.2881773710250854, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7554076313972473, + "epoch": 0.066, + "step": 66 + }, + { + "loss": 2.3052, + "grad_norm": 4.034823894500732, + "learning_rate": 1.934e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.067, + "step": 67 + }, + { + "loss": 1.2806, + "grad_norm": 1.3690178394317627, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.7587354183197021, + "epoch": 0.068, + "step": 68 + }, + { + "loss": 1.1807, + "grad_norm": 1.0886963605880737, + "learning_rate": 1.932e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.7632094025611877, + "epoch": 0.069, + "step": 69 + }, + { + "loss": 1.0076, + "grad_norm": 1.3501569032669067, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.07, + "step": 70 + }, + { + "loss": 0.921, + "grad_norm": 1.0231209993362427, + "learning_rate": 1.93e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8111546039581299, + "epoch": 0.071, + "step": 71 + }, + { + "loss": 2.1999, + "grad_norm": 4.47637939453125, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.072, + "step": 72 + }, + { + "loss": 2.1852, + "grad_norm": 4.533531188964844, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.073, + "step": 73 + }, + { + "loss": 2.1623, + "grad_norm": 4.683750152587891, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.074, + "step": 74 + }, + { + "loss": 1.2988, + "grad_norm": 1.5087296962738037, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.075, + "step": 75 + }, + { + "loss": 2.1266, + "grad_norm": 4.944180011749268, + "learning_rate": 1.925e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.076, + "step": 76 + }, + { + "loss": 0.9762, + "grad_norm": 1.0376505851745605, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.077, + "step": 77 + }, + { + "loss": 2.0834, + "grad_norm": 5.394686222076416, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.078, + "step": 78 + }, + { + "loss": 0.9309, + "grad_norm": 1.0764528512954712, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8170254230499268, + "epoch": 0.079, + "step": 79 + }, + { + "loss": 0.7549, + "grad_norm": 1.089787244796753, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.08, + "step": 80 + }, + { + "loss": 1.0972, + "grad_norm": 1.2265634536743164, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.7915851473808289, + "epoch": 0.081, + "step": 81 + }, + { + "loss": 2.0061, + "grad_norm": 5.302765846252441, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.082, + "step": 82 + }, + { + "loss": 1.1197, + "grad_norm": 1.216346025466919, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.7749511003494263, + "epoch": 0.083, + "step": 83 + }, + { + "loss": 1.181, + "grad_norm": 1.5846738815307617, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.084, + "step": 84 + }, + { + "loss": 0.8929, + "grad_norm": 1.1130127906799316, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8268101811408997, + "epoch": 0.085, + "step": 85 + }, + { + "loss": 1.9339, + "grad_norm": NaN, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.086, + "step": 86 + }, + { + "loss": 1.1623, + "grad_norm": 1.7714096307754517, + "learning_rate": 1.915e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.7720465660095215, + "epoch": 0.087, + "step": 87 + }, + { + "loss": 1.0203, + "grad_norm": 1.204126000404358, + "learning_rate": 1.914e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.088, + "step": 88 + }, + { + "loss": 0.8569, + "grad_norm": 1.2058078050613403, + "learning_rate": 1.913e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.089, + "step": 89 + }, + { + "loss": 1.197, + "grad_norm": 1.8821589946746826, + "learning_rate": 1.912e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.7670549154281616, + "epoch": 0.09, + "step": 90 + }, + { + "loss": 1.1908, + "grad_norm": 1.9740996360778809, + "learning_rate": 1.911e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.7703827023506165, + "epoch": 0.091, + "step": 91 + }, + { + "loss": 0.889, + "grad_norm": 1.5037046670913696, + "learning_rate": 1.91e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8236272931098938, + "epoch": 0.092, + "step": 92 + }, + { + "loss": 1.1821, + "grad_norm": 1.539967656135559, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.093, + "step": 93 + }, + { + "loss": 1.0278, + "grad_norm": 1.2005809545516968, + "learning_rate": 1.908e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.094, + "step": 94 + }, + { + "loss": 1.1361, + "grad_norm": 1.8167128562927246, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.095, + "step": 95 + }, + { + "loss": 1.0977, + "grad_norm": 2.2985150814056396, + "learning_rate": 1.906e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.096, + "step": 96 + }, + { + "loss": 1.0695, + "grad_norm": 1.590173602104187, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.097, + "step": 97 + }, + { + "loss": 1.1519, + "grad_norm": 1.5389997959136963, + "learning_rate": 1.904e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.098, + "step": 98 + }, + { + "loss": 1.1507, + "grad_norm": 1.6002172231674194, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.099, + "step": 99 + }, + { + "loss": 1.0454, + "grad_norm": 1.181969404220581, + "learning_rate": 1.902e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.1, + "step": 100 + }, + { + "loss": 1.0897, + "grad_norm": 1.832823634147644, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.101, + "step": 101 + }, + { + "loss": 0.8593, + "grad_norm": 1.2972052097320557, + "learning_rate": 1.9e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.102, + "step": 102 + }, + { + "loss": 0.9507, + "grad_norm": 1.114174723625183, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8150684833526611, + "epoch": 0.103, + "step": 103 + }, + { + "loss": 0.8422, + "grad_norm": 1.0837013721466064, + "learning_rate": 1.898e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.104, + "step": 104 + }, + { + "loss": 0.9674, + "grad_norm": 1.1756479740142822, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.105, + "step": 105 + }, + { + "loss": 0.7975, + "grad_norm": 1.3874446153640747, + "learning_rate": 1.896e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.840266227722168, + "epoch": 0.106, + "step": 106 + }, + { + "loss": 1.0557, + "grad_norm": 1.959272027015686, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.7936772108078003, + "epoch": 0.107, + "step": 107 + }, + { + "loss": 1.0885, + "grad_norm": 1.503557801246643, + "learning_rate": 1.894e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.108, + "step": 108 + }, + { + "loss": 0.8082, + "grad_norm": 1.470276117324829, + "learning_rate": 1.893e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.8302828669548035, + "epoch": 0.109, + "step": 109 + }, + { + "loss": 1.5508, + "grad_norm": 6.328886985778809, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.6944444179534912, + "epoch": 0.11, + "step": 110 + }, + { + "loss": 1.0059, + "grad_norm": 1.5663049221038818, + "learning_rate": 1.891e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.8103161454200745, + "epoch": 0.111, + "step": 111 + }, + { + "loss": 1.0336, + "grad_norm": 1.4562171697616577, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.112, + "step": 112 + }, + { + "loss": 1.0438, + "grad_norm": 1.5646629333496094, + "learning_rate": 1.889e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.113, + "step": 113 + }, + { + "loss": 1.0279, + "grad_norm": 1.513607144355774, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.114, + "step": 114 + }, + { + "loss": 1.4402, + "grad_norm": 6.165053367614746, + "learning_rate": 1.887e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.115, + "step": 115 + }, + { + "loss": 0.7349, + "grad_norm": 1.454982876777649, + "learning_rate": 1.886e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.116, + "step": 116 + }, + { + "loss": 0.7338, + "grad_norm": 1.9169820547103882, + "learning_rate": 1.885e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.841930091381073, + "epoch": 0.117, + "step": 117 + }, + { + "loss": 0.7831, + "grad_norm": 1.3472567796707153, + "learning_rate": 1.884e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.118, + "step": 118 + }, + { + "loss": 1.028, + "grad_norm": 1.5241106748580933, + "learning_rate": 1.883e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.8036605715751648, + "epoch": 0.119, + "step": 119 + }, + { + "loss": 1.3458, + "grad_norm": 5.9579386711120605, + "learning_rate": 1.882e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.12, + "step": 120 + }, + { + "loss": 0.7727, + "grad_norm": 1.444265604019165, + "learning_rate": 1.881e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.8385518789291382, + "epoch": 0.121, + "step": 121 + }, + { + "loss": 0.6351, + "grad_norm": 1.281785488128662, + "learning_rate": 1.88e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.122, + "step": 122 + }, + { + "loss": 0.6884, + "grad_norm": 1.6917502880096436, + "learning_rate": 1.879e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.123, + "step": 123 + }, + { + "loss": 0.886, + "grad_norm": 1.6544225215911865, + "learning_rate": 1.878e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.8286189436912537, + "epoch": 0.124, + "step": 124 + }, + { + "loss": 0.7652, + "grad_norm": 1.2762014865875244, + "learning_rate": 1.877e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.125, + "step": 125 + }, + { + "loss": 1.2517, + "grad_norm": 7.621744632720947, + "learning_rate": 1.876e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.126, + "step": 126 + }, + { + "loss": 0.6909, + "grad_norm": 1.8651930093765259, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.127, + "step": 127 + }, + { + "loss": 0.9464, + "grad_norm": 2.0513856410980225, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.821963369846344, + "epoch": 0.128, + "step": 128 + }, + { + "loss": 0.8355, + "grad_norm": 1.3392603397369385, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.8405088186264038, + "epoch": 0.129, + "step": 129 + }, + { + "loss": 0.7124, + "grad_norm": 1.7539966106414795, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.861896812915802, + "epoch": 0.13, + "step": 130 + }, + { + "loss": 1.1931, + "grad_norm": 7.2109856605529785, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.131, + "step": 131 + }, + { + "loss": 0.806, + "grad_norm": 1.531593918800354, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.8424657583236694, + "epoch": 0.132, + "step": 132 + }, + { + "loss": 0.7483, + "grad_norm": 1.6686372756958008, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.133, + "step": 133 + }, + { + "loss": 0.905, + "grad_norm": 3.809466600418091, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.8336106538772583, + "epoch": 0.134, + "step": 134 + }, + { + "loss": 0.7299, + "grad_norm": 1.7963030338287354, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.135, + "step": 135 + }, + { + "loss": 0.6384, + "grad_norm": 2.485582113265991, + "learning_rate": 1.866e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.8718801736831665, + "epoch": 0.136, + "step": 136 + }, + { + "loss": 0.5473, + "grad_norm": 1.6607071161270142, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.137, + "step": 137 + }, + { + "loss": 0.6719, + "grad_norm": 1.6095962524414062, + "learning_rate": 1.864e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.860232949256897, + "epoch": 0.138, + "step": 138 + }, + { + "loss": 0.8772, + "grad_norm": 1.8398959636688232, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.8352745175361633, + "epoch": 0.139, + "step": 139 + }, + { + "loss": 0.6813, + "grad_norm": 1.754347324371338, + "learning_rate": 1.862e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.14, + "step": 140 + }, + { + "loss": 0.8176, + "grad_norm": 1.8010166883468628, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.141, + "step": 141 + }, + { + "loss": 0.6013, + "grad_norm": 2.131845712661743, + "learning_rate": 1.86e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.8768718838691711, + "epoch": 0.142, + "step": 142 + }, + { + "loss": 1.0551, + "grad_norm": 8.797135353088379, + "learning_rate": 1.859e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.8055555820465088, + "epoch": 0.143, + "step": 143 + }, + { + "loss": 0.8096, + "grad_norm": 1.6665289402008057, + "learning_rate": 1.858e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.144, + "step": 144 + }, + { + "loss": 0.6237, + "grad_norm": 2.031190872192383, + "learning_rate": 1.857e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.8735440969467163, + "epoch": 0.145, + "step": 145 + }, + { + "loss": 0.8527, + "grad_norm": 2.5186493396759033, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.8386023044586182, + "epoch": 0.146, + "step": 146 + }, + { + "loss": 0.83, + "grad_norm": 1.5677316188812256, + "learning_rate": 1.855e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.8444226980209351, + "epoch": 0.147, + "step": 147 + }, + { + "loss": 0.6951, + "grad_norm": 3.395341634750366, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.148, + "step": 148 + }, + { + "loss": 0.7634, + "grad_norm": 1.658737301826477, + "learning_rate": 1.853e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.149, + "step": 149 + }, + { + "loss": 0.6195, + "grad_norm": 1.4803838729858398, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.8776907920837402, + "epoch": 0.15, + "step": 150 + }, + { + "loss": 0.6916, + "grad_norm": 1.462860345840454, + "learning_rate": 1.851e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.151, + "step": 151 + }, + { + "loss": 0.7854, + "grad_norm": 1.6279668807983398, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.152, + "step": 152 + }, + { + "loss": 0.749, + "grad_norm": 1.8625388145446777, + "learning_rate": 1.849e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.153, + "step": 153 + }, + { + "loss": 0.6619, + "grad_norm": 1.6320242881774902, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.8679060935974121, + "epoch": 0.154, + "step": 154 + }, + { + "loss": 0.9864, + "grad_norm": NaN, + "learning_rate": 1.847e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.8222222328186035, + "epoch": 0.155, + "step": 155 + }, + { + "loss": 0.7698, + "grad_norm": 2.241466999053955, + "learning_rate": 1.847e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.156, + "step": 156 + }, + { + "loss": 0.8501, + "grad_norm": 2.594738721847534, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.8435940146446228, + "epoch": 0.157, + "step": 157 + }, + { + "loss": 0.962, + "grad_norm": 10.902610778808594, + "learning_rate": 1.845e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.8166666626930237, + "epoch": 0.158, + "step": 158 + }, + { + "loss": 0.7822, + "grad_norm": 1.6955127716064453, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.159, + "step": 159 + }, + { + "loss": 0.7942, + "grad_norm": 2.5727546215057373, + "learning_rate": 1.843e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.8519134521484375, + "epoch": 0.16, + "step": 160 + }, + { + "loss": 0.8074, + "grad_norm": 2.082172155380249, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.161, + "step": 161 + }, + { + "loss": 0.6346, + "grad_norm": 1.4917131662368774, + "learning_rate": 1.841e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.162, + "step": 162 + }, + { + "loss": 0.6574, + "grad_norm": 1.7243297100067139, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.163, + "step": 163 + }, + { + "loss": 0.7782, + "grad_norm": 2.236922264099121, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.164, + "step": 164 + }, + { + "loss": 0.7541, + "grad_norm": 2.998671531677246, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.165, + "step": 165 + }, + { + "loss": 0.7637, + "grad_norm": 2.231337070465088, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.166, + "step": 166 + }, + { + "loss": 0.4918, + "grad_norm": 2.1853654384613037, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.167, + "step": 167 + }, + { + "loss": 0.8615, + "grad_norm": 19.52778434753418, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.168, + "step": 168 + }, + { + "loss": 0.727, + "grad_norm": 2.8629372119903564, + "learning_rate": 1.834e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.169, + "step": 169 + }, + { + "loss": 0.6812, + "grad_norm": 2.578798294067383, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.8600782752037048, + "epoch": 0.17, + "step": 170 + }, + { + "loss": 0.718, + "grad_norm": 2.7950305938720703, + "learning_rate": 1.832e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.171, + "step": 171 + }, + { + "loss": 0.8269, + "grad_norm": 18.518278121948242, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.8333333134651184, + "epoch": 0.172, + "step": 172 + }, + { + "loss": 0.8122, + "grad_norm": 10.636402130126953, + "learning_rate": 1.83e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.8500000238418579, + "epoch": 0.173, + "step": 173 + }, + { + "loss": 0.5631, + "grad_norm": 1.8652675151824951, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.174, + "step": 174 + }, + { + "loss": 0.5823, + "grad_norm": 2.174743890762329, + "learning_rate": 1.828e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.175, + "step": 175 + }, + { + "loss": 0.6878, + "grad_norm": 2.426223039627075, + "learning_rate": 1.827e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.176, + "step": 176 + }, + { + "loss": 0.4815, + "grad_norm": 2.2111594676971436, + "learning_rate": 1.826e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.177, + "step": 177 + }, + { + "loss": 0.7905, + "grad_norm": 12.419157981872559, + "learning_rate": 1.825e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.178, + "step": 178 + }, + { + "loss": 0.6485, + "grad_norm": 2.6929852962493896, + "learning_rate": 1.824e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.8851913213729858, + "epoch": 0.179, + "step": 179 + }, + { + "loss": 0.5821, + "grad_norm": 2.588067054748535, + "learning_rate": 1.823e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.18, + "step": 180 + }, + { + "loss": 0.5376, + "grad_norm": 2.6413276195526123, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.181, + "step": 181 + }, + { + "loss": 0.4776, + "grad_norm": 2.0201733112335205, + "learning_rate": 1.821e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.182, + "step": 182 + }, + { + "loss": 0.7141, + "grad_norm": 8.398615837097168, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 0.8611111044883728, + "epoch": 0.183, + "step": 183 + }, + { + "loss": 0.687, + "grad_norm": 6.920986175537109, + "learning_rate": 1.819e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.184, + "step": 184 + }, + { + "loss": 0.6518, + "grad_norm": 3.54260516166687, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.185, + "step": 185 + }, + { + "loss": 0.6429, + "grad_norm": 4.033841609954834, + "learning_rate": 1.817e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.186, + "step": 186 + }, + { + "loss": 0.4786, + "grad_norm": 2.4023964405059814, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.187, + "step": 187 + }, + { + "loss": 0.5997, + "grad_norm": 2.695603370666504, + "learning_rate": 1.815e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.188, + "step": 188 + }, + { + "loss": 0.6251, + "grad_norm": 7.4209184646606445, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.189, + "step": 189 + }, + { + "loss": 0.6324, + "grad_norm": 10.130674362182617, + "learning_rate": 1.813e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.19, + "step": 190 + }, + { + "loss": 0.5939, + "grad_norm": 2.6180245876312256, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.873776912689209, + "epoch": 0.191, + "step": 191 + }, + { + "loss": 0.4098, + "grad_norm": 2.2663474082946777, + "learning_rate": 1.811e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.192, + "step": 192 + }, + { + "loss": 0.5111, + "grad_norm": 2.2139604091644287, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.8894324898719788, + "epoch": 0.193, + "step": 193 + }, + { + "loss": 0.4332, + "grad_norm": 2.2271547317504883, + "learning_rate": 1.809e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.194, + "step": 194 + }, + { + "loss": 0.4893, + "grad_norm": 2.0789742469787598, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.8972602486610413, + "epoch": 0.195, + "step": 195 + }, + { + "loss": 0.5755, + "grad_norm": 18.601898193359375, + "learning_rate": 1.807e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.196, + "step": 196 + }, + { + "loss": 0.4635, + "grad_norm": 6.127828598022461, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.197, + "step": 197 + }, + { + "loss": 0.603, + "grad_norm": 2.668287515640259, + "learning_rate": 1.805e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.198, + "step": 198 + }, + { + "loss": 0.6088, + "grad_norm": 2.419572353363037, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.8757338523864746, + "epoch": 0.199, + "step": 199 + }, + { + "loss": 0.5672, + "grad_norm": 3.028404712677002, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.8885191082954407, + "epoch": 0.2, + "step": 200 + }, + { + "loss": 0.4556, + "grad_norm": 4.009725093841553, + "learning_rate": 1.802e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.201, + "step": 201 + }, + { + "loss": 0.5269, + "grad_norm": 2.9101243019104004, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.202, + "step": 202 + }, + { + "loss": 0.6214, + "grad_norm": 2.7398433685302734, + "learning_rate": 1.8e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.8581213355064392, + "epoch": 0.203, + "step": 203 + }, + { + "loss": 0.5646, + "grad_norm": 2.60606050491333, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.204, + "step": 204 + }, + { + "loss": 0.3748, + "grad_norm": 3.7512423992156982, + "learning_rate": 1.798e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9234609007835388, + "epoch": 0.205, + "step": 205 + }, + { + "loss": 0.597, + "grad_norm": 3.150888442993164, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.206, + "step": 206 + }, + { + "loss": 0.511, + "grad_norm": 3.328899383544922, + "learning_rate": 1.796e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.207, + "step": 207 + }, + { + "loss": 0.491, + "grad_norm": 8.625993728637695, + "learning_rate": 1.795e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.208, + "step": 208 + }, + { + "loss": 0.4053, + "grad_norm": 2.2067341804504395, + "learning_rate": 1.794e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.209, + "step": 209 + }, + { + "loss": 0.4192, + "grad_norm": 2.0993006229400635, + "learning_rate": 1.793e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.21, + "step": 210 + }, + { + "loss": 0.3785, + "grad_norm": 2.821485996246338, + "learning_rate": 1.792e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9151414036750793, + "epoch": 0.211, + "step": 211 + }, + { + "loss": 0.5336, + "grad_norm": 2.169666051864624, + "learning_rate": 1.791e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.8901830315589905, + "epoch": 0.212, + "step": 212 + }, + { + "loss": 0.5235, + "grad_norm": 3.1590685844421387, + "learning_rate": 1.79e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.8835616707801819, + "epoch": 0.213, + "step": 213 + }, + { + "loss": 0.4736, + "grad_norm": 11.030704498291016, + "learning_rate": 1.789e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 0.9055555462837219, + "epoch": 0.214, + "step": 214 + }, + { + "loss": 0.5599, + "grad_norm": 3.9144341945648193, + "learning_rate": 1.788e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.215, + "step": 215 + }, + { + "loss": 0.5102, + "grad_norm": 2.9705278873443604, + "learning_rate": 1.787e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.216, + "step": 216 + }, + { + "loss": 0.4821, + "grad_norm": 3.4463229179382324, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.217, + "step": 217 + }, + { + "loss": 0.4385, + "grad_norm": 8.850930213928223, + "learning_rate": 1.785e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 0.9277777671813965, + "epoch": 0.218, + "step": 218 + }, + { + "loss": 0.4633, + "grad_norm": 2.936647415161133, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.219, + "step": 219 + }, + { + "loss": 0.4098, + "grad_norm": 6.922672271728516, + "learning_rate": 1.783e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.22, + "step": 220 + }, + { + "loss": 0.5233, + "grad_norm": 2.318746328353882, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.221, + "step": 221 + }, + { + "loss": 0.3223, + "grad_norm": 4.281177520751953, + "learning_rate": 1.781e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.222, + "step": 222 + }, + { + "loss": 0.4973, + "grad_norm": 3.6921546459198, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.8951746821403503, + "epoch": 0.223, + "step": 223 + }, + { + "loss": 0.4666, + "grad_norm": 3.4926915168762207, + "learning_rate": 1.779e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.224, + "step": 224 + }, + { + "loss": 0.3519, + "grad_norm": 2.668114423751831, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.225, + "step": 225 + }, + { + "loss": 0.4244, + "grad_norm": 2.4111084938049316, + "learning_rate": 1.777e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.226, + "step": 226 + }, + { + "loss": 0.3912, + "grad_norm": 10.561456680297852, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 0.949999988079071, + "epoch": 0.227, + "step": 227 + }, + { + "loss": 0.5091, + "grad_norm": 2.472616672515869, + "learning_rate": 1.775e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.228, + "step": 228 + }, + { + "loss": 0.4842, + "grad_norm": 2.881739854812622, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.229, + "step": 229 + }, + { + "loss": 0.4435, + "grad_norm": 3.2438275814056396, + "learning_rate": 1.773e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.23, + "step": 230 + }, + { + "loss": 0.3527, + "grad_norm": 2.2769415378570557, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.231, + "step": 231 + }, + { + "loss": 0.4951, + "grad_norm": 3.046674966812134, + "learning_rate": 1.771e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.232, + "step": 232 + }, + { + "loss": 0.4926, + "grad_norm": 4.042079925537109, + "learning_rate": 1.77e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.233, + "step": 233 + }, + { + "loss": 0.4564, + "grad_norm": 4.222212314605713, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9051580429077148, + "epoch": 0.234, + "step": 234 + }, + { + "loss": 0.3074, + "grad_norm": 3.150768280029297, + "learning_rate": 1.768e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.235, + "step": 235 + }, + { + "loss": 0.3858, + "grad_norm": 3.456815004348755, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.236, + "step": 236 + }, + { + "loss": 0.3352, + "grad_norm": 9.094295501708984, + "learning_rate": 1.766e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.237, + "step": 237 + }, + { + "loss": 0.4867, + "grad_norm": 3.2864322662353516, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.238, + "step": 238 + }, + { + "loss": 0.3303, + "grad_norm": 5.672657012939453, + "learning_rate": 1.764e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.239, + "step": 239 + }, + { + "loss": 0.4708, + "grad_norm": 3.677504062652588, + "learning_rate": 1.763e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.24, + "step": 240 + }, + { + "loss": 0.3175, + "grad_norm": 5.829269886016846, + "learning_rate": 1.762e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.241, + "step": 241 + }, + { + "loss": 0.4315, + "grad_norm": 3.211578130722046, + "learning_rate": 1.761e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.242, + "step": 242 + }, + { + "loss": 0.3084, + "grad_norm": 5.2650628089904785, + "learning_rate": 1.76e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.243, + "step": 243 + }, + { + "loss": 0.4516, + "grad_norm": 5.401496887207031, + "learning_rate": 1.759e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.244, + "step": 244 + }, + { + "loss": 0.4197, + "grad_norm": 3.938694953918457, + "learning_rate": 1.758e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.245, + "step": 245 + }, + { + "loss": 0.4329, + "grad_norm": 3.4744861125946045, + "learning_rate": 1.757e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.246, + "step": 246 + }, + { + "loss": 0.4525, + "grad_norm": 4.853247165679932, + "learning_rate": 1.756e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 0.9084858298301697, + "epoch": 0.247, + "step": 247 + }, + { + "loss": 0.2768, + "grad_norm": 5.6177144050598145, + "learning_rate": 1.755e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.248, + "step": 248 + }, + { + "loss": 0.3517, + "grad_norm": 2.8669052124023438, + "learning_rate": 1.754e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.249, + "step": 249 + }, + { + "loss": 0.4142, + "grad_norm": 3.5590577125549316, + "learning_rate": 1.753e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.25, + "step": 250 + }, + { + "loss": 0.4307, + "grad_norm": 5.072361946105957, + "learning_rate": 1.752e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.251, + "step": 251 + }, + { + "loss": 0.3981, + "grad_norm": 3.637819528579712, + "learning_rate": 1.751e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.912915825843811, + "epoch": 0.252, + "step": 252 + }, + { + "loss": 0.4344, + "grad_norm": 4.066125869750977, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.253, + "step": 253 + }, + { + "loss": 0.3574, + "grad_norm": 4.836447715759277, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.254, + "step": 254 + }, + { + "loss": 0.2738, + "grad_norm": 14.006624221801758, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.255, + "step": 255 + }, + { + "loss": 0.3416, + "grad_norm": 5.2639079093933105, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.256, + "step": 256 + }, + { + "loss": 0.2762, + "grad_norm": 12.536176681518555, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.257, + "step": 257 + }, + { + "loss": 0.4114, + "grad_norm": 6.311218738555908, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9090019464492798, + "epoch": 0.258, + "step": 258 + }, + { + "loss": 0.3912, + "grad_norm": 3.2677178382873535, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.259, + "step": 259 + }, + { + "loss": 0.3059, + "grad_norm": 4.582422256469727, + "learning_rate": 1.743e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.26, + "step": 260 + }, + { + "loss": 0.3697, + "grad_norm": 5.214661121368408, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.261, + "step": 261 + }, + { + "loss": 0.3486, + "grad_norm": 5.719533920288086, + "learning_rate": 1.741e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.262, + "step": 262 + }, + { + "loss": 0.328, + "grad_norm": 4.692359924316406, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9363992214202881, + "epoch": 0.263, + "step": 263 + }, + { + "loss": 0.3665, + "grad_norm": 2.810206174850464, + "learning_rate": 1.739e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.264, + "step": 264 + }, + { + "loss": 0.2363, + "grad_norm": 6.301739692687988, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.265, + "step": 265 + }, + { + "loss": 0.3762, + "grad_norm": 2.9034929275512695, + "learning_rate": 1.737e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.266, + "step": 266 + }, + { + "loss": 0.3573, + "grad_norm": 5.10465669631958, + "learning_rate": 1.736e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.267, + "step": 267 + }, + { + "loss": 0.3708, + "grad_norm": 2.8359761238098145, + "learning_rate": 1.735e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9251247644424438, + "epoch": 0.268, + "step": 268 + }, + { + "loss": 0.3615, + "grad_norm": 2.6100833415985107, + "learning_rate": 1.734e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.9267886877059937, + "epoch": 0.269, + "step": 269 + }, + { + "loss": 0.3131, + "grad_norm": 3.610330820083618, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.27, + "step": 270 + }, + { + "loss": 0.3301, + "grad_norm": 3.1220433712005615, + "learning_rate": 1.732e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.271, + "step": 271 + }, + { + "loss": 0.2314, + "grad_norm": 7.683000564575195, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.272, + "step": 272 + }, + { + "loss": 0.2391, + "grad_norm": 10.635171890258789, + "learning_rate": 1.73e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.273, + "step": 273 + }, + { + "loss": 0.3934, + "grad_norm": 7.659923076629639, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 0.9334442615509033, + "epoch": 0.274, + "step": 274 + }, + { + "loss": 0.3376, + "grad_norm": 5.6293864250183105, + "learning_rate": 1.728e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.275, + "step": 275 + }, + { + "loss": 0.3734, + "grad_norm": 4.872118949890137, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.276, + "step": 276 + }, + { + "loss": 0.2395, + "grad_norm": 3.4475960731506348, + "learning_rate": 1.726e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.277, + "step": 277 + }, + { + "loss": 0.3513, + "grad_norm": 3.5093634128570557, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.278, + "step": 278 + }, + { + "loss": 0.3505, + "grad_norm": 3.436389446258545, + "learning_rate": 1.724e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 0.9367720484733582, + "epoch": 0.279, + "step": 279 + }, + { + "loss": 0.3041, + "grad_norm": 3.4393298625946045, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.28, + "step": 280 + }, + { + "loss": 0.2922, + "grad_norm": 3.826392889022827, + "learning_rate": 1.722e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.281, + "step": 281 + }, + { + "loss": 0.3414, + "grad_norm": 7.017237663269043, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.282, + "step": 282 + }, + { + "loss": 0.3521, + "grad_norm": 4.018287658691406, + "learning_rate": 1.72e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.283, + "step": 283 + }, + { + "loss": 0.3455, + "grad_norm": 3.9697959423065186, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.284, + "step": 284 + }, + { + "loss": 0.3368, + "grad_norm": 3.0641541481018066, + "learning_rate": 1.718e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.285, + "step": 285 + }, + { + "loss": 0.3244, + "grad_norm": 4.277006149291992, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.286, + "step": 286 + }, + { + "loss": 0.353, + "grad_norm": 2.6876814365386963, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.287, + "step": 287 + }, + { + "loss": 0.3236, + "grad_norm": 3.7715723514556885, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.288, + "step": 288 + }, + { + "loss": 0.3158, + "grad_norm": 3.555406332015991, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.289, + "step": 289 + }, + { + "loss": 0.2062, + "grad_norm": 9.316679000854492, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.29, + "step": 290 + }, + { + "loss": 0.2002, + "grad_norm": 5.817254543304443, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.291, + "step": 291 + }, + { + "loss": 0.2809, + "grad_norm": 5.106694221496582, + "learning_rate": 1.711e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.292, + "step": 292 + }, + { + "loss": 0.295, + "grad_norm": 7.797866344451904, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.293, + "step": 293 + }, + { + "loss": 0.3144, + "grad_norm": 8.002677917480469, + "learning_rate": 1.709e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.294, + "step": 294 + }, + { + "loss": 0.2345, + "grad_norm": 4.315321445465088, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.295, + "step": 295 + }, + { + "loss": 0.306, + "grad_norm": 4.690162181854248, + "learning_rate": 1.707e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.296, + "step": 296 + }, + { + "loss": 0.3098, + "grad_norm": 4.387345790863037, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.297, + "step": 297 + }, + { + "loss": 0.2898, + "grad_norm": 5.204096794128418, + "learning_rate": 1.705e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.298, + "step": 298 + }, + { + "loss": 0.2894, + "grad_norm": 4.000877380371094, + "learning_rate": 1.704e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.299, + "step": 299 + }, + { + "loss": 0.3295, + "grad_norm": 5.276703357696533, + "learning_rate": 1.703e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9383561611175537, + "epoch": 0.3, + "step": 300 + }, + { + "loss": 0.2139, + "grad_norm": 2.6593077182769775, + "learning_rate": 1.702e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.301, + "step": 301 + }, + { + "loss": 0.2077, + "grad_norm": 9.37561321258545, + "learning_rate": 1.701e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.302, + "step": 302 + }, + { + "loss": 0.2274, + "grad_norm": 2.972815990447998, + "learning_rate": 1.7e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.303, + "step": 303 + }, + { + "loss": 0.2545, + "grad_norm": 2.4279375076293945, + "learning_rate": 1.699e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.304, + "step": 304 + }, + { + "loss": 0.2871, + "grad_norm": 2.8517541885375977, + "learning_rate": 1.698e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.305, + "step": 305 + }, + { + "loss": 0.2877, + "grad_norm": 4.114612102508545, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.306, + "step": 306 + }, + { + "loss": 0.2145, + "grad_norm": 14.7569580078125, + "learning_rate": 1.696e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.307, + "step": 307 + }, + { + "loss": 0.294, + "grad_norm": 3.094182252883911, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.308, + "step": 308 + }, + { + "loss": 0.2044, + "grad_norm": 3.026052951812744, + "learning_rate": 1.694e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.309, + "step": 309 + }, + { + "loss": 0.3061, + "grad_norm": 3.1381635665893555, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.31, + "step": 310 + }, + { + "loss": 0.2239, + "grad_norm": 2.3573496341705322, + "learning_rate": 1.692e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.311, + "step": 311 + }, + { + "loss": 0.2853, + "grad_norm": 7.762936115264893, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.312, + "step": 312 + }, + { + "loss": 0.2793, + "grad_norm": 7.716437816619873, + "learning_rate": 1.69e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.313, + "step": 313 + }, + { + "loss": 0.2764, + "grad_norm": 4.531182765960693, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.314, + "step": 314 + }, + { + "loss": 0.1807, + "grad_norm": 5.600939750671387, + "learning_rate": 1.688e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.315, + "step": 315 + }, + { + "loss": 0.1751, + "grad_norm": 6.357442378997803, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.316, + "step": 316 + }, + { + "loss": 0.2278, + "grad_norm": 4.381490230560303, + "learning_rate": 1.686e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.317, + "step": 317 + }, + { + "loss": 0.1693, + "grad_norm": 4.711330413818359, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.318, + "step": 318 + }, + { + "loss": 0.2719, + "grad_norm": 7.21658182144165, + "learning_rate": 1.684e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.319, + "step": 319 + }, + { + "loss": 0.1613, + "grad_norm": 2.806929111480713, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.32, + "step": 320 + }, + { + "loss": 0.2236, + "grad_norm": 3.729052782058716, + "learning_rate": 1.682e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.321, + "step": 321 + }, + { + "loss": 0.3026, + "grad_norm": 3.512017250061035, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.322, + "step": 322 + }, + { + "loss": 0.2492, + "grad_norm": 5.842523097991943, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.323, + "step": 323 + }, + { + "loss": 0.2591, + "grad_norm": 3.444624662399292, + "learning_rate": 1.679e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9442269802093506, + "epoch": 0.324, + "step": 324 + }, + { + "loss": 0.245, + "grad_norm": 3.560624837875366, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.325, + "step": 325 + }, + { + "loss": 0.2493, + "grad_norm": 3.812241792678833, + "learning_rate": 1.677e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.326, + "step": 326 + }, + { + "loss": 0.1623, + "grad_norm": 9.361125946044922, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.327, + "step": 327 + }, + { + "loss": 0.2385, + "grad_norm": 4.130789279937744, + "learning_rate": 1.675e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.328, + "step": 328 + }, + { + "loss": 0.248, + "grad_norm": 3.7591042518615723, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.329, + "step": 329 + }, + { + "loss": 0.2815, + "grad_norm": 6.346067905426025, + "learning_rate": 1.673e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.33, + "step": 330 + }, + { + "loss": 0.2502, + "grad_norm": 3.433945655822754, + "learning_rate": 1.672e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.331, + "step": 331 + }, + { + "loss": 0.2994, + "grad_norm": 3.7655599117279053, + "learning_rate": 1.671e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9403131008148193, + "epoch": 0.332, + "step": 332 + }, + { + "loss": 0.2622, + "grad_norm": 3.707118511199951, + "learning_rate": 1.67e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.333, + "step": 333 + }, + { + "loss": 0.2418, + "grad_norm": 5.776569843292236, + "learning_rate": 1.669e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.334, + "step": 334 + }, + { + "loss": 0.2278, + "grad_norm": 2.7461037635803223, + "learning_rate": 1.668e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.335, + "step": 335 + }, + { + "loss": 0.2152, + "grad_norm": 2.729001760482788, + "learning_rate": 1.667e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.336, + "step": 336 + }, + { + "loss": 0.2093, + "grad_norm": 2.409708261489868, + "learning_rate": 1.666e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.337, + "step": 337 + }, + { + "loss": 0.2121, + "grad_norm": 4.6761651039123535, + "learning_rate": 1.665e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.338, + "step": 338 + }, + { + "loss": 0.2645, + "grad_norm": 3.167815685272217, + "learning_rate": 1.664e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.339, + "step": 339 + }, + { + "loss": 0.1629, + "grad_norm": 12.654186248779297, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.34, + "step": 340 + }, + { + "loss": 0.2156, + "grad_norm": 2.461930751800537, + "learning_rate": 1.662e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.341, + "step": 341 + }, + { + "loss": 0.2281, + "grad_norm": 4.044505596160889, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.342, + "step": 342 + }, + { + "loss": 0.2303, + "grad_norm": 3.00589656829834, + "learning_rate": 1.66e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.343, + "step": 343 + }, + { + "loss": 0.2372, + "grad_norm": 1.9332551956176758, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.344, + "step": 344 + }, + { + "loss": 0.2303, + "grad_norm": 3.804724931716919, + "learning_rate": 1.658e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.345, + "step": 345 + }, + { + "loss": 0.1629, + "grad_norm": 13.47612190246582, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.346, + "step": 346 + }, + { + "loss": 0.2276, + "grad_norm": 3.5881187915802, + "learning_rate": 1.656e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.347, + "step": 347 + }, + { + "loss": 0.2474, + "grad_norm": 3.895529270172119, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.348, + "step": 348 + }, + { + "loss": 0.2205, + "grad_norm": 3.4531259536743164, + "learning_rate": 1.654e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.349, + "step": 349 + }, + { + "loss": 0.2277, + "grad_norm": 3.849405288696289, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.35, + "step": 350 + }, + { + "loss": 0.1993, + "grad_norm": 3.522599458694458, + "learning_rate": 1.652e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.351, + "step": 351 + }, + { + "loss": 0.2291, + "grad_norm": 3.7573893070220947, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.352, + "step": 352 + }, + { + "loss": 0.1756, + "grad_norm": 4.224817276000977, + "learning_rate": 1.65e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.353, + "step": 353 + }, + { + "loss": 0.1992, + "grad_norm": 2.2447433471679688, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.354, + "step": 354 + }, + { + "loss": 0.184, + "grad_norm": 2.0203311443328857, + "learning_rate": 1.648e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.355, + "step": 355 + }, + { + "loss": 0.2236, + "grad_norm": 3.499854803085327, + "learning_rate": 1.647e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.356, + "step": 356 + }, + { + "loss": 0.2141, + "grad_norm": 5.057332992553711, + "learning_rate": 1.646e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.357, + "step": 357 + }, + { + "loss": 0.232, + "grad_norm": 2.861778974533081, + "learning_rate": 1.645e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.358, + "step": 358 + }, + { + "loss": 0.184, + "grad_norm": 3.52634596824646, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.359, + "step": 359 + }, + { + "loss": 0.2205, + "grad_norm": 2.3115124702453613, + "learning_rate": 1.643e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.36, + "step": 360 + }, + { + "loss": 0.1838, + "grad_norm": 3.043916940689087, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.361, + "step": 361 + }, + { + "loss": 0.1874, + "grad_norm": 3.2404396533966064, + "learning_rate": 1.641e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.362, + "step": 362 + }, + { + "loss": 0.4084, + "grad_norm": 12.86927604675293, + "learning_rate": 1.64e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.363, + "step": 363 + }, + { + "loss": 0.1677, + "grad_norm": 3.4789700508117676, + "learning_rate": 1.639e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.364, + "step": 364 + }, + { + "loss": 0.1922, + "grad_norm": 4.1049699783325195, + "learning_rate": 1.638e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.365, + "step": 365 + }, + { + "loss": 0.1915, + "grad_norm": 3.2055957317352295, + "learning_rate": 1.637e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.366, + "step": 366 + }, + { + "loss": 0.166, + "grad_norm": 12.477117538452148, + "learning_rate": 1.636e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.367, + "step": 367 + }, + { + "loss": 0.1799, + "grad_norm": 4.58711051940918, + "learning_rate": 1.635e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.368, + "step": 368 + }, + { + "loss": 0.2299, + "grad_norm": 2.874641180038452, + "learning_rate": 1.634e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.369, + "step": 369 + }, + { + "loss": 0.1414, + "grad_norm": 5.157703399658203, + "learning_rate": 1.633e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.37, + "step": 370 + }, + { + "loss": 0.1812, + "grad_norm": 3.2541451454162598, + "learning_rate": 1.632e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.371, + "step": 371 + }, + { + "loss": 0.1366, + "grad_norm": 3.705273151397705, + "learning_rate": 1.631e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.372, + "step": 372 + }, + { + "loss": 0.1681, + "grad_norm": 3.6492865085601807, + "learning_rate": 1.63e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.373, + "step": 373 + }, + { + "loss": 0.1324, + "grad_norm": 3.3717288970947266, + "learning_rate": 1.629e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.374, + "step": 374 + }, + { + "loss": 0.1816, + "grad_norm": 4.410749912261963, + "learning_rate": 1.628e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.375, + "step": 375 + }, + { + "loss": 0.3611, + "grad_norm": 11.978804588317871, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.376, + "step": 376 + }, + { + "loss": 0.1686, + "grad_norm": 2.8153111934661865, + "learning_rate": 1.626e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.377, + "step": 377 + }, + { + "loss": 0.1293, + "grad_norm": 3.5253026485443115, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.378, + "step": 378 + }, + { + "loss": 0.1597, + "grad_norm": 2.9006922245025635, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.379, + "step": 379 + }, + { + "loss": 0.1975, + "grad_norm": 6.231935024261475, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.38, + "step": 380 + }, + { + "loss": 0.1232, + "grad_norm": 3.3006174564361572, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.381, + "step": 381 + }, + { + "loss": 0.1599, + "grad_norm": 3.177495241165161, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.382, + "step": 382 + }, + { + "loss": 0.1858, + "grad_norm": 2.967477798461914, + "learning_rate": 1.62e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.383, + "step": 383 + }, + { + "loss": 0.1725, + "grad_norm": 2.6947214603424072, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.384, + "step": 384 + }, + { + "loss": 0.1644, + "grad_norm": 3.6320605278015137, + "learning_rate": 1.618e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.385, + "step": 385 + }, + { + "loss": 0.1726, + "grad_norm": 6.163839817047119, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.386, + "step": 386 + }, + { + "loss": 0.2253, + "grad_norm": 3.695767879486084, + "learning_rate": 1.616e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.387, + "step": 387 + }, + { + "loss": 0.1295, + "grad_norm": 11.877620697021484, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.388, + "step": 388 + }, + { + "loss": 0.1641, + "grad_norm": 2.5848593711853027, + "learning_rate": 1.614e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.389, + "step": 389 + }, + { + "loss": 0.1299, + "grad_norm": 11.58799934387207, + "learning_rate": 1.613e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.39, + "step": 390 + }, + { + "loss": 0.153, + "grad_norm": 3.0241589546203613, + "learning_rate": 1.612e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.391, + "step": 391 + }, + { + "loss": 0.1741, + "grad_norm": 4.446482181549072, + "learning_rate": 1.611e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.392, + "step": 392 + }, + { + "loss": 0.1517, + "grad_norm": 2.0452992916107178, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.393, + "step": 393 + }, + { + "loss": 0.1482, + "grad_norm": 3.511587142944336, + "learning_rate": 1.609e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.394, + "step": 394 + }, + { + "loss": 0.1673, + "grad_norm": 4.165390968322754, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.395, + "step": 395 + }, + { + "loss": 0.1577, + "grad_norm": 2.5295603275299072, + "learning_rate": 1.607e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.396, + "step": 396 + }, + { + "loss": 0.1444, + "grad_norm": 2.6492788791656494, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.397, + "step": 397 + }, + { + "loss": 0.1731, + "grad_norm": 3.1617088317871094, + "learning_rate": 1.605e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.398, + "step": 398 + }, + { + "loss": 0.1411, + "grad_norm": 2.628790855407715, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.399, + "step": 399 + }, + { + "loss": 0.1442, + "grad_norm": 2.589632272720337, + "learning_rate": 1.603e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.4, + "step": 400 + }, + { + "loss": 0.1647, + "grad_norm": 2.7175090312957764, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.401, + "step": 401 + }, + { + "loss": 0.1225, + "grad_norm": 9.854316711425781, + "learning_rate": 1.601e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.402, + "step": 402 + }, + { + "loss": 0.1635, + "grad_norm": 2.513782501220703, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.403, + "step": 403 + }, + { + "loss": 0.1172, + "grad_norm": 4.978464126586914, + "learning_rate": 1.599e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.404, + "step": 404 + }, + { + "loss": 0.1535, + "grad_norm": 6.545207977294922, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.405, + "step": 405 + }, + { + "loss": 0.1554, + "grad_norm": 4.268946647644043, + "learning_rate": 1.597e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.406, + "step": 406 + }, + { + "loss": 0.1143, + "grad_norm": 2.5581111907958984, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.407, + "step": 407 + }, + { + "loss": 0.1446, + "grad_norm": 4.272138595581055, + "learning_rate": 1.595e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.408, + "step": 408 + }, + { + "loss": 0.1058, + "grad_norm": 1.8749103546142578, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.409, + "step": 409 + }, + { + "loss": 0.1972, + "grad_norm": 4.553700923919678, + "learning_rate": 1.593e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.41, + "step": 410 + }, + { + "loss": 0.1465, + "grad_norm": 4.258208751678467, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.411, + "step": 411 + }, + { + "loss": 0.1556, + "grad_norm": 2.6741788387298584, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.412, + "step": 412 + }, + { + "loss": 0.1074, + "grad_norm": 5.901241779327393, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.413, + "step": 413 + }, + { + "loss": 0.1999, + "grad_norm": 2.886406421661377, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 414 + }, + { + "loss": 0.163, + "grad_norm": 3.367415189743042, + "learning_rate": 1.588e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.415, + "step": 415 + }, + { + "loss": 0.1678, + "grad_norm": 2.3446123600006104, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.416, + "step": 416 + }, + { + "loss": 0.2442, + "grad_norm": 4.648331165313721, + "learning_rate": 1.586e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.417, + "step": 417 + }, + { + "loss": 0.1314, + "grad_norm": 3.296555519104004, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.418, + "step": 418 + }, + { + "loss": 0.1224, + "grad_norm": 14.873774528503418, + "learning_rate": 1.584e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.419, + "step": 419 + }, + { + "loss": 0.1792, + "grad_norm": 2.493760108947754, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.42, + "step": 420 + }, + { + "loss": 0.1289, + "grad_norm": 4.287231922149658, + "learning_rate": 1.582e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.421, + "step": 421 + }, + { + "loss": 0.1176, + "grad_norm": 12.776876449584961, + "learning_rate": 1.581e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.422, + "step": 422 + }, + { + "loss": 0.1651, + "grad_norm": 2.691632032394409, + "learning_rate": 1.58e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.423, + "step": 423 + }, + { + "loss": 0.271, + "grad_norm": 7.320021152496338, + "learning_rate": 1.579e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.424, + "step": 424 + }, + { + "loss": 0.1183, + "grad_norm": 2.511960029602051, + "learning_rate": 1.578e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.425, + "step": 425 + }, + { + "loss": 0.1387, + "grad_norm": 2.424102306365967, + "learning_rate": 1.577e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.426, + "step": 426 + }, + { + "loss": 0.1443, + "grad_norm": 3.659524917602539, + "learning_rate": 1.576e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.427, + "step": 427 + }, + { + "loss": 0.2176, + "grad_norm": 4.393547058105469, + "learning_rate": 1.575e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.428, + "step": 428 + }, + { + "loss": 0.1576, + "grad_norm": 3.995103359222412, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.429, + "step": 429 + }, + { + "loss": 0.0995, + "grad_norm": 7.335996627807617, + "learning_rate": 1.573e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.43, + "step": 430 + }, + { + "loss": 0.1224, + "grad_norm": 2.3261799812316895, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.431, + "step": 431 + }, + { + "loss": 0.1781, + "grad_norm": 3.084444761276245, + "learning_rate": 1.571e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.432, + "step": 432 + }, + { + "loss": 0.1262, + "grad_norm": 2.499669075012207, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.433, + "step": 433 + }, + { + "loss": 0.1306, + "grad_norm": 2.529611587524414, + "learning_rate": 1.569e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.434, + "step": 434 + }, + { + "loss": 0.1473, + "grad_norm": 2.308983325958252, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.435, + "step": 435 + }, + { + "loss": 0.1387, + "grad_norm": 2.9792327880859375, + "learning_rate": 1.567e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.436, + "step": 436 + }, + { + "loss": 0.1256, + "grad_norm": 3.446150302886963, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.437, + "step": 437 + }, + { + "loss": 0.1884, + "grad_norm": 2.8107986450195312, + "learning_rate": 1.565e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.438, + "step": 438 + }, + { + "loss": 0.1801, + "grad_norm": 2.476114511489868, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.439, + "step": 439 + }, + { + "loss": 0.1216, + "grad_norm": 2.8834075927734375, + "learning_rate": 1.563e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.44, + "step": 440 + }, + { + "loss": 0.1391, + "grad_norm": 3.0233523845672607, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.441, + "step": 441 + }, + { + "loss": 0.1355, + "grad_norm": 3.540644645690918, + "learning_rate": 1.561e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.442, + "step": 442 + }, + { + "loss": 0.1031, + "grad_norm": 2.104804515838623, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.443, + "step": 443 + }, + { + "loss": 0.1389, + "grad_norm": 2.2567386627197266, + "learning_rate": 1.559e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.444, + "step": 444 + }, + { + "loss": 0.116, + "grad_norm": 2.4400763511657715, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.445, + "step": 445 + }, + { + "loss": 0.1294, + "grad_norm": 2.306941509246826, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.446, + "step": 446 + }, + { + "loss": 0.1189, + "grad_norm": 2.5862247943878174, + "learning_rate": 1.556e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.447, + "step": 447 + }, + { + "loss": 0.2484, + "grad_norm": 4.606533050537109, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.448, + "step": 448 + }, + { + "loss": 0.2119, + "grad_norm": 3.4597740173339844, + "learning_rate": 1.554e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.449, + "step": 449 + }, + { + "loss": 0.1395, + "grad_norm": 3.5644280910491943, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.45, + "step": 450 + }, + { + "loss": 0.1167, + "grad_norm": 13.761821746826172, + "learning_rate": 1.552e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.451, + "step": 451 + }, + { + "loss": 0.1423, + "grad_norm": 3.3145618438720703, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.452, + "step": 452 + }, + { + "loss": 0.131, + "grad_norm": 4.129085540771484, + "learning_rate": 1.55e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.453, + "step": 453 + }, + { + "loss": 0.1337, + "grad_norm": 2.807199001312256, + "learning_rate": 1.549e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.454, + "step": 454 + }, + { + "loss": 0.1235, + "grad_norm": 2.291154384613037, + "learning_rate": 1.548e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.455, + "step": 455 + }, + { + "loss": 0.123, + "grad_norm": 3.186185836791992, + "learning_rate": 1.547e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.456, + "step": 456 + }, + { + "loss": 0.13, + "grad_norm": 2.2184228897094727, + "learning_rate": 1.546e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.457, + "step": 457 + }, + { + "loss": 0.1232, + "grad_norm": 2.6860218048095703, + "learning_rate": 1.545e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.458, + "step": 458 + }, + { + "loss": 0.1668, + "grad_norm": 2.615064859390259, + "learning_rate": 1.544e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.459, + "step": 459 + }, + { + "loss": 0.1268, + "grad_norm": 3.520294427871704, + "learning_rate": 1.543e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.46, + "step": 460 + }, + { + "loss": 0.1183, + "grad_norm": 3.490569829940796, + "learning_rate": 1.542e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.461, + "step": 461 + }, + { + "loss": 0.1025, + "grad_norm": 12.270122528076172, + "learning_rate": 1.541e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.462, + "step": 462 + }, + { + "loss": 0.1059, + "grad_norm": 2.1151371002197266, + "learning_rate": 1.54e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.463, + "step": 463 + }, + { + "loss": 0.1021, + "grad_norm": 2.0290112495422363, + "learning_rate": 1.539e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.464, + "step": 464 + }, + { + "loss": 0.0993, + "grad_norm": 10.768261909484863, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.465, + "step": 465 + }, + { + "loss": 0.1187, + "grad_norm": 3.7776851654052734, + "learning_rate": 1.537e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.466, + "step": 466 + }, + { + "loss": 0.0929, + "grad_norm": 3.5349013805389404, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.467, + "step": 467 + }, + { + "loss": 0.1292, + "grad_norm": 4.221794605255127, + "learning_rate": 1.535e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.468, + "step": 468 + }, + { + "loss": 0.1597, + "grad_norm": 3.645026445388794, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.469, + "step": 469 + }, + { + "loss": 0.1281, + "grad_norm": 4.336436748504639, + "learning_rate": 1.533e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.47, + "step": 470 + }, + { + "loss": 0.1427, + "grad_norm": 4.119178295135498, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.471, + "step": 471 + }, + { + "loss": 0.1959, + "grad_norm": 3.495059013366699, + "learning_rate": 1.531e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.472, + "step": 472 + }, + { + "loss": 0.1062, + "grad_norm": 2.910947799682617, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.473, + "step": 473 + }, + { + "loss": 0.1641, + "grad_norm": 1.9516125917434692, + "learning_rate": 1.529e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.474, + "step": 474 + }, + { + "loss": 0.1267, + "grad_norm": 2.637050151824951, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.475, + "step": 475 + }, + { + "loss": 0.1602, + "grad_norm": 2.365922689437866, + "learning_rate": 1.527e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 476 + }, + { + "loss": 0.145, + "grad_norm": 3.577690362930298, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.477, + "step": 477 + }, + { + "loss": 0.1917, + "grad_norm": 2.425001621246338, + "learning_rate": 1.525e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.478, + "step": 478 + }, + { + "loss": 0.1295, + "grad_norm": 2.570420503616333, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.479, + "step": 479 + }, + { + "loss": 0.1216, + "grad_norm": 2.951737403869629, + "learning_rate": 1.523e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.48, + "step": 480 + }, + { + "loss": 0.1172, + "grad_norm": 2.9054367542266846, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.481, + "step": 481 + }, + { + "loss": 0.1028, + "grad_norm": 11.967851638793945, + "learning_rate": 1.521e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.482, + "step": 482 + }, + { + "loss": 0.1411, + "grad_norm": 3.018132448196411, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.483, + "step": 483 + }, + { + "loss": 0.0953, + "grad_norm": 2.7196693420410156, + "learning_rate": 1.519e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.484, + "step": 484 + }, + { + "loss": 0.1322, + "grad_norm": 3.49013090133667, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.485, + "step": 485 + }, + { + "loss": 0.0793, + "grad_norm": 3.015738010406494, + "learning_rate": 1.517e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.486, + "step": 486 + }, + { + "loss": 0.1429, + "grad_norm": 2.9223875999450684, + "learning_rate": 1.516e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.487, + "step": 487 + }, + { + "loss": 0.1468, + "grad_norm": 3.956615924835205, + "learning_rate": 1.515e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.488, + "step": 488 + }, + { + "loss": 0.1171, + "grad_norm": 4.619190216064453, + "learning_rate": 1.514e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.489, + "step": 489 + }, + { + "loss": 0.0767, + "grad_norm": 1.605452299118042, + "learning_rate": 1.513e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.49, + "step": 490 + }, + { + "loss": 0.128, + "grad_norm": 4.304430961608887, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.491, + "step": 491 + }, + { + "loss": 0.0781, + "grad_norm": 1.868319034576416, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.492, + "step": 492 + }, + { + "loss": 0.1311, + "grad_norm": 2.720447540283203, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.493, + "step": 493 + }, + { + "loss": 0.1312, + "grad_norm": 3.6773548126220703, + "learning_rate": 1.509e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.494, + "step": 494 + }, + { + "loss": 0.164, + "grad_norm": 3.9428446292877197, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.495, + "step": 495 + }, + { + "loss": 0.1516, + "grad_norm": 2.488532781600952, + "learning_rate": 1.507e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.496, + "step": 496 + }, + { + "loss": 0.076, + "grad_norm": 3.0369679927825928, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.497, + "step": 497 + }, + { + "loss": 0.1552, + "grad_norm": 2.921428680419922, + "learning_rate": 1.505e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.498, + "step": 498 + }, + { + "loss": 0.0745, + "grad_norm": 4.530489921569824, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.499, + "step": 499 + }, + { + "loss": 0.1431, + "grad_norm": 2.894956350326538, + "learning_rate": 1.503e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.5, + "step": 500 + }, + { + "loss": 0.1196, + "grad_norm": 2.8564133644104004, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.501, + "step": 501 + }, + { + "loss": 0.1022, + "grad_norm": 2.487640857696533, + "learning_rate": 1.501e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.502, + "step": 502 + }, + { + "loss": 0.0816, + "grad_norm": 9.081964492797852, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.503, + "step": 503 + }, + { + "loss": 0.0696, + "grad_norm": 5.340896129608154, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.504, + "step": 504 + }, + { + "loss": 0.1355, + "grad_norm": 2.5042786598205566, + "learning_rate": 1.498e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.505, + "step": 505 + }, + { + "loss": 0.1177, + "grad_norm": 2.9676339626312256, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.506, + "step": 506 + }, + { + "loss": 0.1305, + "grad_norm": 2.792555570602417, + "learning_rate": 1.496e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.507, + "step": 507 + }, + { + "loss": 0.1155, + "grad_norm": 3.074509620666504, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.508, + "step": 508 + }, + { + "loss": 0.1274, + "grad_norm": 3.4446146488189697, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.509, + "step": 509 + }, + { + "loss": 0.0961, + "grad_norm": 4.31768798828125, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.51, + "step": 510 + }, + { + "loss": 0.1406, + "grad_norm": 3.5040206909179688, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.511, + "step": 511 + }, + { + "loss": 0.163, + "grad_norm": 3.973576307296753, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.512, + "step": 512 + }, + { + "loss": 0.1435, + "grad_norm": 2.7186615467071533, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.513, + "step": 513 + }, + { + "loss": 0.1024, + "grad_norm": 2.8186845779418945, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.514, + "step": 514 + }, + { + "loss": 0.0781, + "grad_norm": 10.394554138183594, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.515, + "step": 515 + }, + { + "loss": 0.0874, + "grad_norm": 10.657512664794922, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.516, + "step": 516 + }, + { + "loss": 0.0946, + "grad_norm": 2.6607813835144043, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.517, + "step": 517 + }, + { + "loss": 0.1189, + "grad_norm": 2.2012691497802734, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.518, + "step": 518 + }, + { + "loss": 0.1313, + "grad_norm": 3.873806953430176, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.519, + "step": 519 + }, + { + "loss": 0.0999, + "grad_norm": 1.8396018743515015, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.52, + "step": 520 + }, + { + "loss": 0.1057, + "grad_norm": 2.922558307647705, + "learning_rate": 1.482e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.521, + "step": 521 + }, + { + "loss": 0.0865, + "grad_norm": 2.5007052421569824, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.522, + "step": 522 + }, + { + "loss": 0.1029, + "grad_norm": 1.885617733001709, + "learning_rate": 1.48e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.523, + "step": 523 + }, + { + "loss": 0.0958, + "grad_norm": 1.7554020881652832, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.524, + "step": 524 + }, + { + "loss": 0.1244, + "grad_norm": 3.055809736251831, + "learning_rate": 1.478e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.525, + "step": 525 + }, + { + "loss": 0.1059, + "grad_norm": 2.518828868865967, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.526, + "step": 526 + }, + { + "loss": 0.0849, + "grad_norm": 4.157986640930176, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.527, + "step": 527 + }, + { + "loss": 0.0949, + "grad_norm": 5.624795436859131, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.528, + "step": 528 + }, + { + "loss": 0.1133, + "grad_norm": 4.383209228515625, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.529, + "step": 529 + }, + { + "loss": 0.0753, + "grad_norm": 10.447527885437012, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.53, + "step": 530 + }, + { + "loss": 0.0758, + "grad_norm": 2.0648767948150635, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.531, + "step": 531 + }, + { + "loss": 0.109, + "grad_norm": 2.311145782470703, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.532, + "step": 532 + }, + { + "loss": 0.0993, + "grad_norm": 2.5646841526031494, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.533, + "step": 533 + }, + { + "loss": 0.061, + "grad_norm": 4.201132774353027, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 534 + }, + { + "loss": 0.1403, + "grad_norm": 3.2465627193450928, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.535, + "step": 535 + }, + { + "loss": 0.0917, + "grad_norm": 4.278575420379639, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.536, + "step": 536 + }, + { + "loss": 0.1363, + "grad_norm": 2.6477434635162354, + "learning_rate": 1.466e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.537, + "step": 537 + }, + { + "loss": 0.1035, + "grad_norm": 2.616262435913086, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.538, + "step": 538 + }, + { + "loss": 0.1702, + "grad_norm": 2.8426945209503174, + "learning_rate": 1.464e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.539, + "step": 539 + }, + { + "loss": 0.0969, + "grad_norm": 2.934753179550171, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.54, + "step": 540 + }, + { + "loss": 0.0628, + "grad_norm": 6.173173904418945, + "learning_rate": 1.462e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.541, + "step": 541 + }, + { + "loss": 0.113, + "grad_norm": 2.183295249938965, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.542, + "step": 542 + }, + { + "loss": 0.0674, + "grad_norm": 2.466468095779419, + "learning_rate": 1.46e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.543, + "step": 543 + }, + { + "loss": 0.0629, + "grad_norm": 6.685276508331299, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.544, + "step": 544 + }, + { + "loss": 0.0606, + "grad_norm": 6.428196907043457, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 545 + }, + { + "loss": 0.0552, + "grad_norm": 3.2987399101257324, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 546 + }, + { + "loss": 0.1492, + "grad_norm": 3.802187919616699, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.547, + "step": 547 + }, + { + "loss": 0.0903, + "grad_norm": 3.23189115524292, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.548, + "step": 548 + }, + { + "loss": 0.0758, + "grad_norm": 3.0735082626342773, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.549, + "step": 549 + }, + { + "loss": 0.0978, + "grad_norm": 2.9236018657684326, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.55, + "step": 550 + }, + { + "loss": 0.0489, + "grad_norm": 1.232297420501709, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 551 + }, + { + "loss": 0.0472, + "grad_norm": 1.1960967779159546, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 552 + }, + { + "loss": 0.1622, + "grad_norm": 2.9212372303009033, + "learning_rate": 1.45e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.553, + "step": 553 + }, + { + "loss": 0.0964, + "grad_norm": 2.9365901947021484, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.554, + "step": 554 + }, + { + "loss": 0.1015, + "grad_norm": 3.297194719314575, + "learning_rate": 1.448e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.555, + "step": 555 + }, + { + "loss": 0.108, + "grad_norm": 3.8434770107269287, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.556, + "step": 556 + }, + { + "loss": 0.0869, + "grad_norm": 3.068513870239258, + "learning_rate": 1.446e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.557, + "step": 557 + }, + { + "loss": 0.0823, + "grad_norm": 2.382955312728882, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.558, + "step": 558 + }, + { + "loss": 0.0952, + "grad_norm": 2.0796663761138916, + "learning_rate": 1.444e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.559, + "step": 559 + }, + { + "loss": 0.0904, + "grad_norm": 2.491260290145874, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.56, + "step": 560 + }, + { + "loss": 0.0888, + "grad_norm": 1.8683680295944214, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.561, + "step": 561 + }, + { + "loss": 0.0824, + "grad_norm": 2.5860776901245117, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.562, + "step": 562 + }, + { + "loss": 0.0648, + "grad_norm": 10.482237815856934, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 563 + }, + { + "loss": 0.1033, + "grad_norm": 1.8212071657180786, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.564, + "step": 564 + }, + { + "loss": 0.1275, + "grad_norm": 2.206996440887451, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.565, + "step": 565 + }, + { + "loss": 0.1174, + "grad_norm": 2.454157590866089, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.566, + "step": 566 + }, + { + "loss": 0.0846, + "grad_norm": 2.7483479976654053, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.567, + "step": 567 + }, + { + "loss": 0.0712, + "grad_norm": 9.780473709106445, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.568, + "step": 568 + }, + { + "loss": 0.0838, + "grad_norm": 2.227144718170166, + "learning_rate": 1.434e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.569, + "step": 569 + }, + { + "loss": 0.0996, + "grad_norm": 2.4927093982696533, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.57, + "step": 570 + }, + { + "loss": 0.0723, + "grad_norm": 2.6736180782318115, + "learning_rate": 1.432e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.571, + "step": 571 + }, + { + "loss": 0.0765, + "grad_norm": 1.8901737928390503, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 0.572, + "step": 572 + }, + { + "loss": 0.0661, + "grad_norm": 1.9803191423416138, + "learning_rate": 1.43e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.573, + "step": 573 + }, + { + "loss": 0.06, + "grad_norm": 1.9032983779907227, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.574, + "step": 574 + }, + { + "loss": 0.0437, + "grad_norm": 2.9226999282836914, + "learning_rate": 1.428e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 575 + }, + { + "loss": 0.1345, + "grad_norm": 2.60559344291687, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.576, + "step": 576 + }, + { + "loss": 0.043, + "grad_norm": 3.43766713142395, + "learning_rate": 1.426e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 577 + }, + { + "loss": 0.0881, + "grad_norm": 3.27600359916687, + "learning_rate": 1.425e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.578, + "step": 578 + }, + { + "loss": 0.0777, + "grad_norm": 3.8467905521392822, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.579, + "step": 579 + }, + { + "loss": 0.0971, + "grad_norm": 3.3157150745391846, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.58, + "step": 580 + }, + { + "loss": 0.0769, + "grad_norm": 2.6883363723754883, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.581, + "step": 581 + }, + { + "loss": 0.0381, + "grad_norm": 2.187551736831665, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 582 + }, + { + "loss": 0.0571, + "grad_norm": 1.9329798221588135, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.583, + "step": 583 + }, + { + "loss": 0.0984, + "grad_norm": 2.6686573028564453, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 584 + }, + { + "loss": 0.0904, + "grad_norm": 2.7718393802642822, + "learning_rate": 1.418e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.585, + "step": 585 + }, + { + "loss": 0.0364, + "grad_norm": 3.612837314605713, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 586 + }, + { + "loss": 0.1408, + "grad_norm": 2.518528461456299, + "learning_rate": 1.416e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.587, + "step": 587 + }, + { + "loss": 0.0875, + "grad_norm": 2.7795908451080322, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.588, + "step": 588 + }, + { + "loss": 0.0644, + "grad_norm": 2.4260590076446533, + "learning_rate": 1.414e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 0.589, + "step": 589 + }, + { + "loss": 0.0884, + "grad_norm": 2.681588888168335, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 590 + }, + { + "loss": 0.1001, + "grad_norm": 2.8202459812164307, + "learning_rate": 1.412e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.591, + "step": 591 + }, + { + "loss": 0.0774, + "grad_norm": 1.7170965671539307, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.592, + "step": 592 + }, + { + "loss": 0.069, + "grad_norm": 1.68620765209198, + "learning_rate": 1.41e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.593, + "step": 593 + }, + { + "loss": 0.0694, + "grad_norm": 2.236591339111328, + "learning_rate": 1.409e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.594, + "step": 594 + }, + { + "loss": 0.0943, + "grad_norm": 2.7542996406555176, + "learning_rate": 1.408e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.595, + "step": 595 + }, + { + "loss": 0.0578, + "grad_norm": 1.8813996315002441, + "learning_rate": 1.407e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.596, + "step": 596 + }, + { + "loss": 0.0911, + "grad_norm": 2.0993378162384033, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.597, + "step": 597 + }, + { + "loss": 0.107, + "grad_norm": 2.6184418201446533, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.598, + "step": 598 + }, + { + "loss": 0.0803, + "grad_norm": 1.8751370906829834, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.599, + "step": 599 + }, + { + "loss": 0.0774, + "grad_norm": 3.0198869705200195, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.6, + "step": 600 + }, + { + "loss": 0.2953, + "grad_norm": 14.372690200805664, + "learning_rate": 1.402e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.601, + "step": 601 + }, + { + "loss": 0.0943, + "grad_norm": 2.2585110664367676, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.602, + "step": 602 + }, + { + "loss": 0.0432, + "grad_norm": 8.796082496643066, + "learning_rate": 1.4e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.603, + "step": 603 + }, + { + "loss": 0.1307, + "grad_norm": 2.903687000274658, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.604, + "step": 604 + }, + { + "loss": 0.1348, + "grad_norm": 3.1296894550323486, + "learning_rate": 1.398e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.605, + "step": 605 + }, + { + "loss": 0.1161, + "grad_norm": 2.436495542526245, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.606, + "step": 606 + }, + { + "loss": 0.0368, + "grad_norm": 5.359442710876465, + "learning_rate": 1.396e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.607, + "step": 607 + }, + { + "loss": 0.1177, + "grad_norm": 3.3482797145843506, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.608, + "step": 608 + }, + { + "loss": 0.1024, + "grad_norm": 3.229761838912964, + "learning_rate": 1.394e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.609, + "step": 609 + }, + { + "loss": 0.0988, + "grad_norm": 2.772888660430908, + "learning_rate": 1.393e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.61, + "step": 610 + }, + { + "loss": 0.0699, + "grad_norm": 2.91560435295105, + "learning_rate": 1.392e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.611, + "step": 611 + }, + { + "loss": 0.1212, + "grad_norm": 3.1388144493103027, + "learning_rate": 1.391e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.612, + "step": 612 + }, + { + "loss": 0.0776, + "grad_norm": 2.409531831741333, + "learning_rate": 1.39e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.613, + "step": 613 + }, + { + "loss": 0.0922, + "grad_norm": 2.301997423171997, + "learning_rate": 1.389e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.614, + "step": 614 + }, + { + "loss": 0.0382, + "grad_norm": 6.567748546600342, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.615, + "step": 615 + }, + { + "loss": 0.0702, + "grad_norm": 2.9374635219573975, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 616 + }, + { + "loss": 0.0952, + "grad_norm": 2.805278778076172, + "learning_rate": 1.386e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.617, + "step": 617 + }, + { + "loss": 0.0809, + "grad_norm": 2.7832789421081543, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.618, + "step": 618 + }, + { + "loss": 0.0967, + "grad_norm": 2.5809061527252197, + "learning_rate": 1.384e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.619, + "step": 619 + }, + { + "loss": 0.1193, + "grad_norm": 4.146383285522461, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.62, + "step": 620 + }, + { + "loss": 0.0646, + "grad_norm": 2.3339507579803467, + "learning_rate": 1.382e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.621, + "step": 621 + }, + { + "loss": 0.0698, + "grad_norm": 2.154700756072998, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.622, + "step": 622 + }, + { + "loss": 0.0861, + "grad_norm": 3.4389989376068115, + "learning_rate": 1.38e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.623, + "step": 623 + }, + { + "loss": 0.0744, + "grad_norm": 2.087575674057007, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.624, + "step": 624 + }, + { + "loss": 0.093, + "grad_norm": 2.7172322273254395, + "learning_rate": 1.378e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.625, + "step": 625 + }, + { + "loss": 0.0731, + "grad_norm": 2.2669014930725098, + "learning_rate": 1.377e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.626, + "step": 626 + }, + { + "loss": 0.0747, + "grad_norm": 3.104933500289917, + "learning_rate": 1.376e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.627, + "step": 627 + }, + { + "loss": 0.085, + "grad_norm": 2.475816249847412, + "learning_rate": 1.375e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.628, + "step": 628 + }, + { + "loss": 0.1415, + "grad_norm": 3.2964231967926025, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.629, + "step": 629 + }, + { + "loss": 0.0823, + "grad_norm": 1.5372464656829834, + "learning_rate": 1.373e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.63, + "step": 630 + }, + { + "loss": 0.1085, + "grad_norm": 2.136002540588379, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.631, + "step": 631 + }, + { + "loss": 0.0802, + "grad_norm": 2.1365489959716797, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.632, + "step": 632 + }, + { + "loss": 0.0359, + "grad_norm": 7.951494216918945, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.633, + "step": 633 + }, + { + "loss": 0.0344, + "grad_norm": 7.441174507141113, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.634, + "step": 634 + }, + { + "loss": 0.0838, + "grad_norm": 2.689347505569458, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.635, + "step": 635 + }, + { + "loss": 0.1337, + "grad_norm": 4.8380937576293945, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.636, + "step": 636 + }, + { + "loss": 0.1259, + "grad_norm": 3.2358460426330566, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.637, + "step": 637 + }, + { + "loss": 0.0269, + "grad_norm": 3.706432580947876, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 638 + }, + { + "loss": 0.0617, + "grad_norm": 2.4131107330322266, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.639, + "step": 639 + }, + { + "loss": 0.0225, + "grad_norm": 2.5498831272125244, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 640 + }, + { + "loss": 0.1159, + "grad_norm": 2.7629480361938477, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.641, + "step": 641 + }, + { + "loss": 0.0249, + "grad_norm": 2.194697380065918, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 642 + }, + { + "loss": 0.0852, + "grad_norm": 2.5653960704803467, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.643, + "step": 643 + }, + { + "loss": 0.0783, + "grad_norm": 2.402456283569336, + "learning_rate": 1.359e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 644 + }, + { + "loss": 0.1104, + "grad_norm": 2.646005392074585, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.645, + "step": 645 + }, + { + "loss": 0.0582, + "grad_norm": 2.135377883911133, + "learning_rate": 1.357e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.646, + "step": 646 + }, + { + "loss": 0.0242, + "grad_norm": 2.295201539993286, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 647 + }, + { + "loss": 0.0712, + "grad_norm": 2.529376745223999, + "learning_rate": 1.355e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.648, + "step": 648 + }, + { + "loss": 0.0697, + "grad_norm": 2.2107226848602295, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.649, + "step": 649 + }, + { + "loss": 0.1203, + "grad_norm": 2.456563711166382, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.65, + "step": 650 + }, + { + "loss": 0.091, + "grad_norm": 2.3880977630615234, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.651, + "step": 651 + }, + { + "loss": 0.0641, + "grad_norm": 2.5870609283447266, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.652, + "step": 652 + }, + { + "loss": 0.0678, + "grad_norm": 2.0148985385894775, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.653, + "step": 653 + }, + { + "loss": 0.0745, + "grad_norm": 2.9625463485717773, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.654, + "step": 654 + }, + { + "loss": 0.0759, + "grad_norm": 2.3625717163085938, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.655, + "step": 655 + }, + { + "loss": 0.0826, + "grad_norm": 3.747469902038574, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.656, + "step": 656 + }, + { + "loss": 0.0772, + "grad_norm": 2.4018380641937256, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.657, + "step": 657 + }, + { + "loss": 0.0834, + "grad_norm": 2.684398889541626, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.658, + "step": 658 + }, + { + "loss": 0.074, + "grad_norm": 2.106499671936035, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.659, + "step": 659 + }, + { + "loss": 0.0759, + "grad_norm": 2.1065762042999268, + "learning_rate": 1.343e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.66, + "step": 660 + }, + { + "loss": 0.1232, + "grad_norm": 2.89585280418396, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.661, + "step": 661 + }, + { + "loss": 0.0784, + "grad_norm": 2.267303943634033, + "learning_rate": 1.341e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.662, + "step": 662 + }, + { + "loss": 0.0591, + "grad_norm": 1.4712592363357544, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.663, + "step": 663 + }, + { + "loss": 0.0626, + "grad_norm": 1.9069504737854004, + "learning_rate": 1.339e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.664, + "step": 664 + }, + { + "loss": 0.1356, + "grad_norm": 3.2215309143066406, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.665, + "step": 665 + }, + { + "loss": 0.0678, + "grad_norm": 2.080892562866211, + "learning_rate": 1.337e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.666, + "step": 666 + }, + { + "loss": 0.0643, + "grad_norm": 2.593749523162842, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.667, + "step": 667 + }, + { + "loss": 0.3105, + "grad_norm": 13.254192352294922, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.668, + "step": 668 + }, + { + "loss": 0.0305, + "grad_norm": 7.083673000335693, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.669, + "step": 669 + }, + { + "loss": 0.0827, + "grad_norm": 1.9234445095062256, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.67, + "step": 670 + }, + { + "loss": 0.072, + "grad_norm": 1.6489096879959106, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.671, + "step": 671 + }, + { + "loss": 0.0786, + "grad_norm": 2.5704004764556885, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.672, + "step": 672 + }, + { + "loss": 0.1092, + "grad_norm": 2.335846424102783, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.673, + "step": 673 + }, + { + "loss": 0.08, + "grad_norm": 1.7859958410263062, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.674, + "step": 674 + }, + { + "loss": 0.0303, + "grad_norm": 6.245123386383057, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.675, + "step": 675 + }, + { + "loss": 0.0248, + "grad_norm": 6.11707878112793, + "learning_rate": 1.327e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.676, + "step": 676 + }, + { + "loss": 0.0714, + "grad_norm": 2.122776985168457, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.677, + "step": 677 + }, + { + "loss": 0.0583, + "grad_norm": 2.350274085998535, + "learning_rate": 1.325e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.678, + "step": 678 + }, + { + "loss": 0.0192, + "grad_norm": 3.1966686248779297, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 679 + }, + { + "loss": 0.087, + "grad_norm": 2.123091459274292, + "learning_rate": 1.323e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.68, + "step": 680 + }, + { + "loss": 0.0536, + "grad_norm": 2.108837842941284, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.681, + "step": 681 + }, + { + "loss": 0.0187, + "grad_norm": 2.225255012512207, + "learning_rate": 1.321e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 682 + }, + { + "loss": 0.0689, + "grad_norm": 1.968031883239746, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.683, + "step": 683 + }, + { + "loss": 0.0822, + "grad_norm": 2.5669515132904053, + "learning_rate": 1.319e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.684, + "step": 684 + }, + { + "loss": 0.0661, + "grad_norm": 2.156057596206665, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.685, + "step": 685 + }, + { + "loss": 0.0545, + "grad_norm": 2.8333444595336914, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.686, + "step": 686 + }, + { + "loss": 0.0889, + "grad_norm": 3.069793939590454, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.687, + "step": 687 + }, + { + "loss": 0.0761, + "grad_norm": 1.9274708032608032, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.688, + "step": 688 + }, + { + "loss": 0.1089, + "grad_norm": 2.992846965789795, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.689, + "step": 689 + }, + { + "loss": 0.1287, + "grad_norm": 4.56328821182251, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.69, + "step": 690 + }, + { + "loss": 0.1186, + "grad_norm": 2.255676746368408, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.691, + "step": 691 + }, + { + "loss": 0.0906, + "grad_norm": 1.8538860082626343, + "learning_rate": 1.311e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.692, + "step": 692 + }, + { + "loss": 0.2418, + "grad_norm": 11.443807601928711, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9434276223182678, + "epoch": 0.693, + "step": 693 + }, + { + "loss": 0.0399, + "grad_norm": 9.349817276000977, + "learning_rate": 1.309e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.694, + "step": 694 + }, + { + "loss": 0.037, + "grad_norm": 9.234195709228516, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.695, + "step": 695 + }, + { + "loss": 0.1228, + "grad_norm": 2.415926456451416, + "learning_rate": 1.307e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.696, + "step": 696 + }, + { + "loss": 0.0524, + "grad_norm": 2.570728063583374, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.697, + "step": 697 + }, + { + "loss": 0.086, + "grad_norm": 3.062072992324829, + "learning_rate": 1.305e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.698, + "step": 698 + }, + { + "loss": 0.0829, + "grad_norm": 2.552957534790039, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.699, + "step": 699 + }, + { + "loss": 0.1109, + "grad_norm": 2.1273176670074463, + "learning_rate": 1.303e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.7, + "step": 700 + }, + { + "loss": 0.0811, + "grad_norm": 2.13920259475708, + "learning_rate": 1.302e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.701, + "step": 701 + }, + { + "loss": 0.0689, + "grad_norm": 2.0192079544067383, + "learning_rate": 1.301e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.702, + "step": 702 + }, + { + "loss": 0.0726, + "grad_norm": 1.9012140035629272, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.703, + "step": 703 + }, + { + "loss": 0.075, + "grad_norm": 2.420971393585205, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.704, + "step": 704 + }, + { + "loss": 0.0965, + "grad_norm": 1.7867904901504517, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.705, + "step": 705 + }, + { + "loss": 0.0757, + "grad_norm": 2.5515830516815186, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.706, + "step": 706 + }, + { + "loss": 0.0758, + "grad_norm": 2.5376474857330322, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.707, + "step": 707 + }, + { + "loss": 0.0995, + "grad_norm": 1.8845465183258057, + "learning_rate": 1.295e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.708, + "step": 708 + }, + { + "loss": 0.0824, + "grad_norm": 2.292940616607666, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.709, + "step": 709 + }, + { + "loss": 0.0723, + "grad_norm": 2.140986919403076, + "learning_rate": 1.293e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.71, + "step": 710 + }, + { + "loss": 0.0714, + "grad_norm": 2.8790059089660645, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.711, + "step": 711 + }, + { + "loss": 0.0623, + "grad_norm": 1.6493089199066162, + "learning_rate": 1.291e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.712, + "step": 712 + }, + { + "loss": 0.0657, + "grad_norm": 1.8830665349960327, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.713, + "step": 713 + }, + { + "loss": 0.029, + "grad_norm": 7.065803527832031, + "learning_rate": 1.289e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.714, + "step": 714 + }, + { + "loss": 0.0952, + "grad_norm": 2.2632198333740234, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.715, + "step": 715 + }, + { + "loss": 0.0383, + "grad_norm": 8.098624229431152, + "learning_rate": 1.287e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.716, + "step": 716 + }, + { + "loss": 0.023, + "grad_norm": 5.657382011413574, + "learning_rate": 1.286e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.717, + "step": 717 + }, + { + "loss": 0.0649, + "grad_norm": 1.4795526266098022, + "learning_rate": 1.285e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.718, + "step": 718 + }, + { + "loss": 0.0737, + "grad_norm": 2.7369728088378906, + "learning_rate": 1.284e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.719, + "step": 719 + }, + { + "loss": 0.0637, + "grad_norm": 2.345536708831787, + "learning_rate": 1.283e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.72, + "step": 720 + }, + { + "loss": 0.0594, + "grad_norm": 2.2326128482818604, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.721, + "step": 721 + }, + { + "loss": 0.057, + "grad_norm": 3.0859591960906982, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.722, + "step": 722 + }, + { + "loss": 0.0709, + "grad_norm": 2.870548963546753, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.723, + "step": 723 + }, + { + "loss": 0.0772, + "grad_norm": 3.3536510467529297, + "learning_rate": 1.279e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.724, + "step": 724 + }, + { + "loss": 0.0163, + "grad_norm": 2.2633590698242188, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 725 + }, + { + "loss": 0.0128, + "grad_norm": 1.1394838094711304, + "learning_rate": 1.277e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 726 + }, + { + "loss": 0.0683, + "grad_norm": 2.8505446910858154, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.727, + "step": 727 + }, + { + "loss": 0.0557, + "grad_norm": 2.6770808696746826, + "learning_rate": 1.275e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.728, + "step": 728 + }, + { + "loss": 0.0586, + "grad_norm": 3.0272936820983887, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.729, + "step": 729 + }, + { + "loss": 0.0126, + "grad_norm": 0.8217504620552063, + "learning_rate": 1.273e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 730 + }, + { + "loss": 0.0776, + "grad_norm": 4.100428581237793, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.731, + "step": 731 + }, + { + "loss": 0.0689, + "grad_norm": 2.3711600303649902, + "learning_rate": 1.271e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.732, + "step": 732 + }, + { + "loss": 0.0797, + "grad_norm": 3.585756301879883, + "learning_rate": 1.27e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.733, + "step": 733 + }, + { + "loss": 0.0532, + "grad_norm": 2.134615421295166, + "learning_rate": 1.269e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.734, + "step": 734 + }, + { + "loss": 0.0974, + "grad_norm": 2.3772988319396973, + "learning_rate": 1.268e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.735, + "step": 735 + }, + { + "loss": 0.1153, + "grad_norm": 2.4541940689086914, + "learning_rate": 1.267e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.736, + "step": 736 + }, + { + "loss": 0.048, + "grad_norm": 1.6060377359390259, + "learning_rate": 1.266e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.737, + "step": 737 + }, + { + "loss": 0.0451, + "grad_norm": 2.1678755283355713, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.738, + "step": 738 + }, + { + "loss": 0.0748, + "grad_norm": 2.047844409942627, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.739, + "step": 739 + }, + { + "loss": 0.0824, + "grad_norm": 2.762352705001831, + "learning_rate": 1.263e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.74, + "step": 740 + }, + { + "loss": 0.1146, + "grad_norm": 3.0128841400146484, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.741, + "step": 741 + }, + { + "loss": 0.0711, + "grad_norm": 2.0650486946105957, + "learning_rate": 1.261e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.742, + "step": 742 + }, + { + "loss": 0.0334, + "grad_norm": 7.7052412033081055, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.743, + "step": 743 + }, + { + "loss": 0.0709, + "grad_norm": 1.5119361877441406, + "learning_rate": 1.259e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.744, + "step": 744 + }, + { + "loss": 0.0308, + "grad_norm": 7.3754143714904785, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.745, + "step": 745 + }, + { + "loss": 0.0995, + "grad_norm": 2.8331611156463623, + "learning_rate": 1.257e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.746, + "step": 746 + }, + { + "loss": 0.0562, + "grad_norm": 3.423184871673584, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.747, + "step": 747 + }, + { + "loss": 0.0659, + "grad_norm": 1.857692003250122, + "learning_rate": 1.255e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.748, + "step": 748 + }, + { + "loss": 0.2618, + "grad_norm": 11.681804656982422, + "learning_rate": 1.254e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.749, + "step": 749 + }, + { + "loss": 0.0791, + "grad_norm": 2.311647415161133, + "learning_rate": 1.253e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.75, + "step": 750 + }, + { + "loss": 0.0486, + "grad_norm": 2.8530430793762207, + "learning_rate": 1.252e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.751, + "step": 751 + }, + { + "loss": 0.1104, + "grad_norm": 2.617987871170044, + "learning_rate": 1.251e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.752, + "step": 752 + }, + { + "loss": 0.0195, + "grad_norm": 4.978179931640625, + "learning_rate": 1.25e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.753, + "step": 753 + }, + { + "loss": 0.0726, + "grad_norm": 2.0882959365844727, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.754, + "step": 754 + }, + { + "loss": 0.0754, + "grad_norm": 2.1230452060699463, + "learning_rate": 1.248e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.755, + "step": 755 + }, + { + "loss": 0.0707, + "grad_norm": 2.2002744674682617, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.756, + "step": 756 + }, + { + "loss": 0.0494, + "grad_norm": 1.7500207424163818, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.757, + "step": 757 + }, + { + "loss": 0.0811, + "grad_norm": 1.8128851652145386, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.758, + "step": 758 + }, + { + "loss": 0.0756, + "grad_norm": 2.397252082824707, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.759, + "step": 759 + }, + { + "loss": 0.0501, + "grad_norm": 1.975466012954712, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.76, + "step": 760 + }, + { + "loss": 0.1087, + "grad_norm": 2.2733750343322754, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 761 + }, + { + "loss": 0.1041, + "grad_norm": 2.3084492683410645, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.762, + "step": 762 + }, + { + "loss": 0.0496, + "grad_norm": 2.098421096801758, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.763, + "step": 763 + }, + { + "loss": 0.0626, + "grad_norm": 2.004920482635498, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.764, + "step": 764 + }, + { + "loss": 0.0667, + "grad_norm": 1.603124737739563, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.765, + "step": 765 + }, + { + "loss": 0.0829, + "grad_norm": 2.5960142612457275, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.766, + "step": 766 + }, + { + "loss": 0.0234, + "grad_norm": 5.8595757484436035, + "learning_rate": 1.236e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.767, + "step": 767 + }, + { + "loss": 0.1032, + "grad_norm": 1.7731209993362427, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 768 + }, + { + "loss": 0.0228, + "grad_norm": 6.049434185028076, + "learning_rate": 1.234e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.769, + "step": 769 + }, + { + "loss": 0.0828, + "grad_norm": 1.9529765844345093, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.77, + "step": 770 + }, + { + "loss": 0.0718, + "grad_norm": 1.3272991180419922, + "learning_rate": 1.232e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.771, + "step": 771 + }, + { + "loss": 0.0907, + "grad_norm": 2.2710683345794678, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.772, + "step": 772 + }, + { + "loss": 0.2171, + "grad_norm": 6.965005397796631, + "learning_rate": 1.23e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.773, + "step": 773 + }, + { + "loss": 0.0657, + "grad_norm": 2.213243007659912, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.774, + "step": 774 + }, + { + "loss": 0.1745, + "grad_norm": 6.300892353057861, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.775, + "step": 775 + }, + { + "loss": 0.06, + "grad_norm": 2.4582417011260986, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.776, + "step": 776 + }, + { + "loss": 0.0516, + "grad_norm": 1.6709243059158325, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.777, + "step": 777 + }, + { + "loss": 0.1051, + "grad_norm": 2.654740810394287, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.778, + "step": 778 + }, + { + "loss": 0.072, + "grad_norm": 2.0503504276275635, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.779, + "step": 779 + }, + { + "loss": 0.0742, + "grad_norm": 1.800299882888794, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.78, + "step": 780 + }, + { + "loss": 0.0737, + "grad_norm": 2.063502788543701, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.781, + "step": 781 + }, + { + "loss": 0.1061, + "grad_norm": 2.698178291320801, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.782, + "step": 782 + }, + { + "loss": 0.0737, + "grad_norm": 2.0112061500549316, + "learning_rate": 1.22e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.783, + "step": 783 + }, + { + "loss": 0.0195, + "grad_norm": 5.365294933319092, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.784, + "step": 784 + }, + { + "loss": 0.0601, + "grad_norm": 1.5453028678894043, + "learning_rate": 1.218e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.785, + "step": 785 + }, + { + "loss": 0.2441, + "grad_norm": 10.393324851989746, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.786, + "step": 786 + }, + { + "loss": 0.1079, + "grad_norm": 2.6032726764678955, + "learning_rate": 1.216e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.787, + "step": 787 + }, + { + "loss": 0.0639, + "grad_norm": 2.6428260803222656, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.788, + "step": 788 + }, + { + "loss": 0.0632, + "grad_norm": 1.3782398700714111, + "learning_rate": 1.214e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.789, + "step": 789 + }, + { + "loss": 0.0189, + "grad_norm": 4.952188014984131, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.79, + "step": 790 + }, + { + "loss": 0.0613, + "grad_norm": 1.8376456499099731, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.791, + "step": 791 + }, + { + "loss": 0.0539, + "grad_norm": 1.6092228889465332, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.792, + "step": 792 + }, + { + "loss": 0.0151, + "grad_norm": 3.721954345703125, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 793 + }, + { + "loss": 0.0168, + "grad_norm": 3.578442096710205, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 794 + }, + { + "loss": 0.0494, + "grad_norm": 1.714572787284851, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 795 + }, + { + "loss": 0.0715, + "grad_norm": 2.152249813079834, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 796 + }, + { + "loss": 0.0106, + "grad_norm": 1.2338261604309082, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 797 + }, + { + "loss": 0.0948, + "grad_norm": 3.4057295322418213, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 798 + }, + { + "loss": 0.0967, + "grad_norm": 2.297558546066284, + "learning_rate": 1.204e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.799, + "step": 799 + }, + { + "loss": 0.0715, + "grad_norm": 2.948807716369629, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 800 + }, + { + "loss": 0.0691, + "grad_norm": 2.480257749557495, + "learning_rate": 1.202e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.801, + "step": 801 + }, + { + "loss": 0.2602, + "grad_norm": 9.955911636352539, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.802, + "step": 802 + }, + { + "loss": 0.0623, + "grad_norm": 2.92844295501709, + "learning_rate": 1.2e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.803, + "step": 803 + }, + { + "loss": 0.0922, + "grad_norm": 2.3774516582489014, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.804, + "step": 804 + }, + { + "loss": 0.0664, + "grad_norm": 1.5494801998138428, + "learning_rate": 1.198e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.805, + "step": 805 + }, + { + "loss": 0.1929, + "grad_norm": 6.599433422088623, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.806, + "step": 806 + }, + { + "loss": 0.02, + "grad_norm": 5.4353718757629395, + "learning_rate": 1.196e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.807, + "step": 807 + }, + { + "loss": 0.0603, + "grad_norm": 1.707094669342041, + "learning_rate": 1.195e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.808, + "step": 808 + }, + { + "loss": 0.0722, + "grad_norm": 2.148479461669922, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.809, + "step": 809 + }, + { + "loss": 0.0717, + "grad_norm": 2.687295436859131, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.81, + "step": 810 + }, + { + "loss": 0.0695, + "grad_norm": 2.940627098083496, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.811, + "step": 811 + }, + { + "loss": 0.0195, + "grad_norm": 5.349563121795654, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.812, + "step": 812 + }, + { + "loss": 0.0931, + "grad_norm": 1.7995429039001465, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.813, + "step": 813 + }, + { + "loss": 0.0175, + "grad_norm": 5.07689094543457, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.814, + "step": 814 + }, + { + "loss": 0.0159, + "grad_norm": 4.247437000274658, + "learning_rate": 1.188e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.815, + "step": 815 + }, + { + "loss": 0.0783, + "grad_norm": 2.34236216545105, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.816, + "step": 816 + }, + { + "loss": 0.113, + "grad_norm": 2.772456407546997, + "learning_rate": 1.186e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.817, + "step": 817 + }, + { + "loss": 0.0621, + "grad_norm": 2.3582286834716797, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.818, + "step": 818 + }, + { + "loss": 0.0522, + "grad_norm": 3.014678716659546, + "learning_rate": 1.184e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.819, + "step": 819 + }, + { + "loss": 0.0758, + "grad_norm": 2.709341049194336, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.82, + "step": 820 + }, + { + "loss": 0.0718, + "grad_norm": 2.3536617755889893, + "learning_rate": 1.182e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.821, + "step": 821 + }, + { + "loss": 0.0789, + "grad_norm": 3.258106231689453, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.822, + "step": 822 + }, + { + "loss": 0.0763, + "grad_norm": 2.218254804611206, + "learning_rate": 1.18e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.823, + "step": 823 + }, + { + "loss": 0.0599, + "grad_norm": 2.2704806327819824, + "learning_rate": 1.179e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.824, + "step": 824 + }, + { + "loss": 0.0126, + "grad_norm": 2.4626388549804688, + "learning_rate": 1.178e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 825 + }, + { + "loss": 0.0669, + "grad_norm": 2.0617358684539795, + "learning_rate": 1.177e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.826, + "step": 826 + }, + { + "loss": 0.066, + "grad_norm": 2.0766263008117676, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.827, + "step": 827 + }, + { + "loss": 0.0618, + "grad_norm": 1.5771903991699219, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.828, + "step": 828 + }, + { + "loss": 0.0687, + "grad_norm": 1.789569616317749, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.829, + "step": 829 + }, + { + "loss": 0.0157, + "grad_norm": 4.058000087738037, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.83, + "step": 830 + }, + { + "loss": 0.0389, + "grad_norm": 1.5074262619018555, + "learning_rate": 1.172e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.831, + "step": 831 + }, + { + "loss": 0.0663, + "grad_norm": 2.1943564414978027, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.832, + "step": 832 + }, + { + "loss": 0.0734, + "grad_norm": 2.0293729305267334, + "learning_rate": 1.17e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.833, + "step": 833 + }, + { + "loss": 0.0734, + "grad_norm": 1.9577043056488037, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.834, + "step": 834 + }, + { + "loss": 0.0729, + "grad_norm": 2.053274154663086, + "learning_rate": 1.168e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 835 + }, + { + "loss": 0.1016, + "grad_norm": 4.023435115814209, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.836, + "step": 836 + }, + { + "loss": 0.0618, + "grad_norm": 2.152527093887329, + "learning_rate": 1.166e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.837, + "step": 837 + }, + { + "loss": 0.0633, + "grad_norm": 2.2773494720458984, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.838, + "step": 838 + }, + { + "loss": 0.0207, + "grad_norm": 5.423501491546631, + "learning_rate": 1.164e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.839, + "step": 839 + }, + { + "loss": 0.0651, + "grad_norm": 1.2856030464172363, + "learning_rate": 1.163e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.84, + "step": 840 + }, + { + "loss": 0.0628, + "grad_norm": 1.8682835102081299, + "learning_rate": 1.162e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 841 + }, + { + "loss": 0.0192, + "grad_norm": 4.855226516723633, + "learning_rate": 1.161e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.842, + "step": 842 + }, + { + "loss": 0.0757, + "grad_norm": 1.910493016242981, + "learning_rate": 1.16e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.843, + "step": 843 + }, + { + "loss": 0.0778, + "grad_norm": 3.503009796142578, + "learning_rate": 1.159e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.844, + "step": 844 + }, + { + "loss": 0.05, + "grad_norm": 1.867902398109436, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.845, + "step": 845 + }, + { + "loss": 0.0145, + "grad_norm": 3.8562870025634766, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 846 + }, + { + "loss": 0.0668, + "grad_norm": 1.7752705812454224, + "learning_rate": 1.156e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.847, + "step": 847 + }, + { + "loss": 0.0735, + "grad_norm": 2.393582582473755, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.848, + "step": 848 + }, + { + "loss": 0.0985, + "grad_norm": 2.7950665950775146, + "learning_rate": 1.154e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.849, + "step": 849 + }, + { + "loss": 0.0681, + "grad_norm": 2.1131601333618164, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.85, + "step": 850 + }, + { + "loss": 0.0515, + "grad_norm": 2.2755846977233887, + "learning_rate": 1.152e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.851, + "step": 851 + }, + { + "loss": 0.0434, + "grad_norm": 1.569434642791748, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.852, + "step": 852 + }, + { + "loss": 0.1047, + "grad_norm": 3.0928077697753906, + "learning_rate": 1.15e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.853, + "step": 853 + }, + { + "loss": 0.0575, + "grad_norm": 2.008404016494751, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.854, + "step": 854 + }, + { + "loss": 0.0579, + "grad_norm": 1.4861952066421509, + "learning_rate": 1.148e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.855, + "step": 855 + }, + { + "loss": 0.069, + "grad_norm": 1.9950709342956543, + "learning_rate": 1.147e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.856, + "step": 856 + }, + { + "loss": 0.0155, + "grad_norm": 4.394257068634033, + "learning_rate": 1.146e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.857, + "step": 857 + }, + { + "loss": 0.0969, + "grad_norm": 2.6770575046539307, + "learning_rate": 1.145e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.858, + "step": 858 + }, + { + "loss": 0.0712, + "grad_norm": 2.319610595703125, + "learning_rate": 1.144e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 859 + }, + { + "loss": 0.0689, + "grad_norm": 1.8970541954040527, + "learning_rate": 1.143e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.86, + "step": 860 + }, + { + "loss": 0.0899, + "grad_norm": 1.8339478969573975, + "learning_rate": 1.142e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.861, + "step": 861 + }, + { + "loss": 0.1032, + "grad_norm": 2.781162977218628, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.862, + "step": 862 + }, + { + "loss": 0.0604, + "grad_norm": 2.540081024169922, + "learning_rate": 1.14e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.863, + "step": 863 + }, + { + "loss": 0.0491, + "grad_norm": 1.9644439220428467, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.864, + "step": 864 + }, + { + "loss": 0.0802, + "grad_norm": 1.8939117193222046, + "learning_rate": 1.138e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.865, + "step": 865 + }, + { + "loss": 0.0681, + "grad_norm": 2.0177180767059326, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.866, + "step": 866 + }, + { + "loss": 0.0476, + "grad_norm": 1.9407687187194824, + "learning_rate": 1.136e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.867, + "step": 867 + }, + { + "loss": 0.0188, + "grad_norm": 5.371039390563965, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.868, + "step": 868 + }, + { + "loss": 0.0508, + "grad_norm": 1.873732566833496, + "learning_rate": 1.134e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.869, + "step": 869 + }, + { + "loss": 0.0237, + "grad_norm": 6.1496429443359375, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.87, + "step": 870 + }, + { + "loss": 0.099, + "grad_norm": 4.506502151489258, + "learning_rate": 1.132e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.871, + "step": 871 + }, + { + "loss": 0.1, + "grad_norm": 5.314243316650391, + "learning_rate": 1.131e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.872, + "step": 872 + }, + { + "loss": 0.0123, + "grad_norm": 3.1825995445251465, + "learning_rate": 1.13e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 873 + }, + { + "loss": 0.0132, + "grad_norm": 3.1502106189727783, + "learning_rate": 1.129e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 874 + }, + { + "loss": 0.0622, + "grad_norm": 2.719097375869751, + "learning_rate": 1.128e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.875, + "step": 875 + }, + { + "loss": 0.0992, + "grad_norm": 3.1199769973754883, + "learning_rate": 1.127e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.876, + "step": 876 + }, + { + "loss": 0.066, + "grad_norm": 2.5837504863739014, + "learning_rate": 1.126e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.877, + "step": 877 + }, + { + "loss": 0.0542, + "grad_norm": 2.4771666526794434, + "learning_rate": 1.125e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.878, + "step": 878 + }, + { + "loss": 0.0937, + "grad_norm": 3.6200714111328125, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.879, + "step": 879 + }, + { + "loss": 0.0674, + "grad_norm": 2.399535655975342, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.88, + "step": 880 + }, + { + "loss": 0.0678, + "grad_norm": 2.516605854034424, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.881, + "step": 881 + }, + { + "loss": 0.0668, + "grad_norm": 2.5172040462493896, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.882, + "step": 882 + }, + { + "loss": 0.0744, + "grad_norm": 2.4523816108703613, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.883, + "step": 883 + }, + { + "loss": 0.1019, + "grad_norm": 3.3321380615234375, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.884, + "step": 884 + }, + { + "loss": 0.0837, + "grad_norm": 1.8811334371566772, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.885, + "step": 885 + }, + { + "loss": 0.0531, + "grad_norm": 1.9141852855682373, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.886, + "step": 886 + }, + { + "loss": 0.0408, + "grad_norm": 1.487582802772522, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.887, + "step": 887 + }, + { + "loss": 0.0218, + "grad_norm": 5.286271095275879, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.888, + "step": 888 + }, + { + "loss": 0.0628, + "grad_norm": 1.7239201068878174, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.889, + "step": 889 + }, + { + "loss": 0.0625, + "grad_norm": 1.7386255264282227, + "learning_rate": 1.113e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.89, + "step": 890 + }, + { + "loss": 0.0405, + "grad_norm": 1.4104888439178467, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.891, + "step": 891 + }, + { + "loss": 0.0226, + "grad_norm": 4.608585834503174, + "learning_rate": 1.111e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.892, + "step": 892 + }, + { + "loss": 0.0968, + "grad_norm": 2.3830323219299316, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.893, + "step": 893 + }, + { + "loss": 0.0739, + "grad_norm": 1.8739683628082275, + "learning_rate": 1.109e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.894, + "step": 894 + }, + { + "loss": 0.058, + "grad_norm": 2.673945665359497, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.895, + "step": 895 + }, + { + "loss": 0.0943, + "grad_norm": 3.0288586616516113, + "learning_rate": 1.107e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.896, + "step": 896 + }, + { + "loss": 0.0726, + "grad_norm": 2.270813465118408, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.897, + "step": 897 + }, + { + "loss": 0.0589, + "grad_norm": 1.880444049835205, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.898, + "step": 898 + }, + { + "loss": 0.0143, + "grad_norm": 3.3361847400665283, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 899 + }, + { + "loss": 0.059, + "grad_norm": 1.848816990852356, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.9, + "step": 900 + }, + { + "loss": 0.0714, + "grad_norm": 2.0221500396728516, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.901, + "step": 901 + }, + { + "loss": 0.0668, + "grad_norm": 4.154532432556152, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.902, + "step": 902 + }, + { + "loss": 0.0617, + "grad_norm": 1.9648317098617554, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.903, + "step": 903 + }, + { + "loss": 0.0652, + "grad_norm": 2.866431474685669, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.904, + "step": 904 + }, + { + "loss": 0.0459, + "grad_norm": 2.3324079513549805, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.905, + "step": 905 + }, + { + "loss": 0.0111, + "grad_norm": 2.3991503715515137, + "learning_rate": 1.097e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 906 + }, + { + "loss": 0.0654, + "grad_norm": 1.9646960496902466, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.907, + "step": 907 + }, + { + "loss": 0.0798, + "grad_norm": 2.720228433609009, + "learning_rate": 1.095e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.908, + "step": 908 + }, + { + "loss": 0.0974, + "grad_norm": 2.5758628845214844, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.909, + "step": 909 + }, + { + "loss": 0.0621, + "grad_norm": 2.303436517715454, + "learning_rate": 1.093e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.91, + "step": 910 + }, + { + "loss": 0.0944, + "grad_norm": 2.617363929748535, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.911, + "step": 911 + }, + { + "loss": 0.0571, + "grad_norm": 1.898218035697937, + "learning_rate": 1.091e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.912, + "step": 912 + }, + { + "loss": 0.0136, + "grad_norm": 3.2630972862243652, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 913 + }, + { + "loss": 0.0482, + "grad_norm": 2.0208237171173096, + "learning_rate": 1.089e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.914, + "step": 914 + }, + { + "loss": 0.0486, + "grad_norm": 1.8037229776382446, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.915, + "step": 915 + }, + { + "loss": 0.0118, + "grad_norm": 2.722412586212158, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 916 + }, + { + "loss": 0.0687, + "grad_norm": 2.6608150005340576, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.917, + "step": 917 + }, + { + "loss": 0.0101, + "grad_norm": 1.664276361465454, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 918 + }, + { + "loss": 0.0609, + "grad_norm": 2.5043087005615234, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.919, + "step": 919 + }, + { + "loss": 0.0685, + "grad_norm": 2.0320653915405273, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.92, + "step": 920 + }, + { + "loss": 0.0709, + "grad_norm": 2.7590584754943848, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.921, + "step": 921 + }, + { + "loss": 0.0511, + "grad_norm": 2.424579620361328, + "learning_rate": 1.081e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.922, + "step": 922 + }, + { + "loss": 0.061, + "grad_norm": 1.826949119567871, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.923, + "step": 923 + }, + { + "loss": 0.0086, + "grad_norm": 1.5401605367660522, + "learning_rate": 1.079e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 924 + }, + { + "loss": 0.0667, + "grad_norm": 2.49796724319458, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.925, + "step": 925 + }, + { + "loss": 0.0741, + "grad_norm": 2.141827344894409, + "learning_rate": 1.077e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.926, + "step": 926 + }, + { + "loss": 0.0662, + "grad_norm": 2.1507174968719482, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.927, + "step": 927 + }, + { + "loss": 0.0596, + "grad_norm": 1.928731083869934, + "learning_rate": 1.075e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.928, + "step": 928 + }, + { + "loss": 0.0469, + "grad_norm": 2.391432523727417, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.929, + "step": 929 + }, + { + "loss": 0.0121, + "grad_norm": 2.9941039085388184, + "learning_rate": 1.073e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 930 + }, + { + "loss": 0.0452, + "grad_norm": 2.110806465148926, + "learning_rate": 1.072e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.931, + "step": 931 + }, + { + "loss": 0.0624, + "grad_norm": 1.8115919828414917, + "learning_rate": 1.071e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.932, + "step": 932 + }, + { + "loss": 0.0456, + "grad_norm": 1.548567533493042, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.933, + "step": 933 + }, + { + "loss": 0.0565, + "grad_norm": 1.9886720180511475, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.934, + "step": 934 + }, + { + "loss": 0.0457, + "grad_norm": 1.8589720726013184, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.935, + "step": 935 + }, + { + "loss": 0.041, + "grad_norm": 1.6640335321426392, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.936, + "step": 936 + }, + { + "loss": 0.0712, + "grad_norm": 2.0171613693237305, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.937, + "step": 937 + }, + { + "loss": 0.0628, + "grad_norm": 1.6715848445892334, + "learning_rate": 1.065e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.938, + "step": 938 + }, + { + "loss": 0.0416, + "grad_norm": 2.1554946899414062, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.939, + "step": 939 + }, + { + "loss": 0.0737, + "grad_norm": 2.242116689682007, + "learning_rate": 1.063e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.94, + "step": 940 + }, + { + "loss": 0.0177, + "grad_norm": 4.810120105743408, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.941, + "step": 941 + }, + { + "loss": 0.0649, + "grad_norm": 1.675683617591858, + "learning_rate": 1.061e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.942, + "step": 942 + }, + { + "loss": 0.0727, + "grad_norm": 2.5127744674682617, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.943, + "step": 943 + }, + { + "loss": 0.0587, + "grad_norm": 2.14599871635437, + "learning_rate": 1.059e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.944, + "step": 944 + }, + { + "loss": 0.1132, + "grad_norm": 2.5991926193237305, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.945, + "step": 945 + }, + { + "loss": 0.0786, + "grad_norm": 2.0661518573760986, + "learning_rate": 1.057e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.946, + "step": 946 + }, + { + "loss": 0.0686, + "grad_norm": 1.411996841430664, + "learning_rate": 1.056e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 947 + }, + { + "loss": 0.0886, + "grad_norm": 1.8908826112747192, + "learning_rate": 1.055e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.948, + "step": 948 + }, + { + "loss": 0.0795, + "grad_norm": 1.8596928119659424, + "learning_rate": 1.054e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.949, + "step": 949 + }, + { + "loss": 0.064, + "grad_norm": 2.0051939487457275, + "learning_rate": 1.053e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.95, + "step": 950 + }, + { + "loss": 0.0761, + "grad_norm": 1.7486968040466309, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 951 + }, + { + "loss": 0.0519, + "grad_norm": 1.7253214120864868, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.952, + "step": 952 + }, + { + "loss": 0.0688, + "grad_norm": 1.7860913276672363, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.953, + "step": 953 + }, + { + "loss": 0.0287, + "grad_norm": 6.397044658660889, + "learning_rate": 1.049e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 954 + }, + { + "loss": 0.0877, + "grad_norm": 1.6188372373580933, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.955, + "step": 955 + }, + { + "loss": 0.0595, + "grad_norm": 1.6029514074325562, + "learning_rate": 1.047e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.956, + "step": 956 + }, + { + "loss": 0.2163, + "grad_norm": 8.956819534301758, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.957, + "step": 957 + }, + { + "loss": 0.0666, + "grad_norm": 1.4872380495071411, + "learning_rate": 1.045e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.958, + "step": 958 + }, + { + "loss": 0.092, + "grad_norm": 3.029266595840454, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.959, + "step": 959 + }, + { + "loss": 0.0757, + "grad_norm": 1.899221658706665, + "learning_rate": 1.043e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.96, + "step": 960 + }, + { + "loss": 0.0666, + "grad_norm": 1.577907681465149, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.961, + "step": 961 + }, + { + "loss": 0.0581, + "grad_norm": 1.467238426208496, + "learning_rate": 1.041e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 962 + }, + { + "loss": 0.1923, + "grad_norm": 8.706313133239746, + "learning_rate": 1.04e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.963, + "step": 963 + }, + { + "loss": 0.062, + "grad_norm": 2.0428693294525146, + "learning_rate": 1.039e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.964, + "step": 964 + }, + { + "loss": 0.0775, + "grad_norm": 2.0258123874664307, + "learning_rate": 1.038e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.965, + "step": 965 + }, + { + "loss": 0.0661, + "grad_norm": 1.7304749488830566, + "learning_rate": 1.037e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.966, + "step": 966 + }, + { + "loss": 0.0547, + "grad_norm": 1.6691105365753174, + "learning_rate": 1.036e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.967, + "step": 967 + }, + { + "loss": 0.0617, + "grad_norm": 1.681009292602539, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.968, + "step": 968 + }, + { + "loss": 0.0544, + "grad_norm": 1.8074179887771606, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.969, + "step": 969 + }, + { + "loss": 0.0396, + "grad_norm": 1.812711477279663, + "learning_rate": 1.033e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.97, + "step": 970 + }, + { + "loss": 0.0577, + "grad_norm": 2.0831782817840576, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.971, + "step": 971 + }, + { + "loss": 0.0776, + "grad_norm": 1.3640745878219604, + "learning_rate": 1.031e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.972, + "step": 972 + }, + { + "loss": 0.0454, + "grad_norm": 1.9006543159484863, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.973, + "step": 973 + }, + { + "loss": 0.0633, + "grad_norm": 1.6996928453445435, + "learning_rate": 1.029e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.974, + "step": 974 + }, + { + "loss": 0.0738, + "grad_norm": 1.9721561670303345, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.975, + "step": 975 + }, + { + "loss": 0.0439, + "grad_norm": 2.2615768909454346, + "learning_rate": 1.027e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.976, + "step": 976 + }, + { + "loss": 0.0237, + "grad_norm": 5.635776519775391, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.977, + "step": 977 + }, + { + "loss": 0.094, + "grad_norm": 2.4352505207061768, + "learning_rate": 1.025e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.978, + "step": 978 + }, + { + "loss": 0.0648, + "grad_norm": 1.6868159770965576, + "learning_rate": 1.024e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.979, + "step": 979 + }, + { + "loss": 0.0652, + "grad_norm": 2.1479756832122803, + "learning_rate": 1.023e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.98, + "step": 980 + }, + { + "loss": 0.0597, + "grad_norm": 2.0000855922698975, + "learning_rate": 1.022e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.981, + "step": 981 + }, + { + "loss": 0.0643, + "grad_norm": 2.511259078979492, + "learning_rate": 1.021e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.982, + "step": 982 + }, + { + "loss": 0.0161, + "grad_norm": 3.99651837348938, + "learning_rate": 1.02e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.983, + "step": 983 + }, + { + "loss": 0.0649, + "grad_norm": 2.231045722961426, + "learning_rate": 1.019e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.984, + "step": 984 + }, + { + "loss": 0.0386, + "grad_norm": 1.9224427938461304, + "learning_rate": 1.018e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.985, + "step": 985 + }, + { + "loss": 0.0673, + "grad_norm": 2.328557014465332, + "learning_rate": 1.017e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.986, + "step": 986 + }, + { + "loss": 0.0642, + "grad_norm": 2.1176366806030273, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.987, + "step": 987 + }, + { + "loss": 0.0643, + "grad_norm": 2.319209098815918, + "learning_rate": 1.015e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.988, + "step": 988 + }, + { + "loss": 0.0126, + "grad_norm": 2.7921886444091797, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 989 + }, + { + "loss": 0.056, + "grad_norm": 1.6485341787338257, + "learning_rate": 1.013e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.99, + "step": 990 + }, + { + "loss": 0.0559, + "grad_norm": 1.85313081741333, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.991, + "step": 991 + }, + { + "loss": 0.0718, + "grad_norm": 2.0347867012023926, + "learning_rate": 1.011e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.992, + "step": 992 + }, + { + "loss": 0.0611, + "grad_norm": 2.6210453510284424, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.993, + "step": 993 + }, + { + "loss": 0.0428, + "grad_norm": 2.1774537563323975, + "learning_rate": 1.009e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.994, + "step": 994 + }, + { + "loss": 0.0564, + "grad_norm": 1.4708741903305054, + "learning_rate": 1.008e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.995, + "step": 995 + }, + { + "loss": 0.0461, + "grad_norm": 2.133490562438965, + "learning_rate": 1.007e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.996, + "step": 996 + }, + { + "loss": 0.0654, + "grad_norm": 1.8513908386230469, + "learning_rate": 1.006e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.997, + "step": 997 + }, + { + "loss": 0.0467, + "grad_norm": 2.651682138442993, + "learning_rate": 1.005e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.998, + "step": 998 + }, + { + "loss": 0.0496, + "grad_norm": 1.6719735860824585, + "learning_rate": 1.004e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.999, + "step": 999 + }, + { + "loss": 0.064, + "grad_norm": 1.7016679048538208, + "learning_rate": 1.003e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.0, + "step": 1000 + }, + { + "loss": 0.0601, + "grad_norm": 1.5496330261230469, + "learning_rate": 1.002e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.001, + "step": 1001 + }, + { + "loss": 0.0185, + "grad_norm": 4.8348541259765625, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687985.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.002, + "step": 1002 + }, + { + "loss": 0.0205, + "grad_norm": 5.356715202331543, + "learning_rate": 1e-05, + "num_tokens": 688167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.003, + "step": 1003 + }, + { + "loss": 0.065, + "grad_norm": 2.8306968212127686, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.004, + "step": 1004 + }, + { + "loss": 0.048, + "grad_norm": 1.684121012687683, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.005, + "step": 1005 + }, + { + "loss": 0.0611, + "grad_norm": 1.78119957447052, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.006, + "step": 1006 + }, + { + "loss": 0.069, + "grad_norm": 2.2316365242004395, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.007, + "step": 1007 + }, + { + "loss": 0.0779, + "grad_norm": 2.183338165283203, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.008, + "step": 1008 + }, + { + "loss": 0.0642, + "grad_norm": 1.943967580795288, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.009, + "step": 1009 + }, + { + "loss": 0.0415, + "grad_norm": 1.6110951900482178, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.01, + "step": 1010 + }, + { + "loss": 0.0117, + "grad_norm": 3.0185630321502686, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 1011 + }, + { + "loss": 0.0992, + "grad_norm": 3.14607310295105, + "learning_rate": 9.91e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 1.012, + "step": 1012 + }, + { + "loss": 0.047, + "grad_norm": 1.2475289106369019, + "learning_rate": 9.9e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.013, + "step": 1013 + }, + { + "loss": 0.0819, + "grad_norm": 2.5398612022399902, + "learning_rate": 9.89e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.014, + "step": 1014 + }, + { + "loss": 0.0555, + "grad_norm": 1.682294249534607, + "learning_rate": 9.88e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.015, + "step": 1015 + }, + { + "loss": 0.0867, + "grad_norm": 2.457875967025757, + "learning_rate": 9.87e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.016, + "step": 1016 + }, + { + "loss": 0.0667, + "grad_norm": 1.7135660648345947, + "learning_rate": 9.86e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.017, + "step": 1017 + }, + { + "loss": 0.0378, + "grad_norm": 1.4605510234832764, + "learning_rate": 9.85e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.018, + "step": 1018 + }, + { + "loss": 0.0612, + "grad_norm": 3.01509690284729, + "learning_rate": 9.84e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.019, + "step": 1019 + }, + { + "loss": 0.0623, + "grad_norm": 2.2433955669403076, + "learning_rate": 9.83e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.02, + "step": 1020 + }, + { + "loss": 0.0192, + "grad_norm": 5.402326583862305, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.021, + "step": 1021 + }, + { + "loss": 0.099, + "grad_norm": 4.552786827087402, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.022, + "step": 1022 + }, + { + "loss": 0.0569, + "grad_norm": 2.1845462322235107, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.023, + "step": 1023 + }, + { + "loss": 0.063, + "grad_norm": 2.7287683486938477, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.024, + "step": 1024 + }, + { + "loss": 0.0426, + "grad_norm": 2.1356048583984375, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.025, + "step": 1025 + }, + { + "loss": 0.0626, + "grad_norm": 2.1982219219207764, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.026, + "step": 1026 + }, + { + "loss": 0.0881, + "grad_norm": 2.790822982788086, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.027, + "step": 1027 + }, + { + "loss": 0.0872, + "grad_norm": 2.464653968811035, + "learning_rate": 9.75e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.028, + "step": 1028 + }, + { + "loss": 0.0144, + "grad_norm": 3.807983636856079, + "learning_rate": 9.74e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.029, + "step": 1029 + }, + { + "loss": 0.0594, + "grad_norm": 1.6763768196105957, + "learning_rate": 9.73e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.03, + "step": 1030 + }, + { + "loss": 0.0882, + "grad_norm": 1.924737811088562, + "learning_rate": 9.72e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.031, + "step": 1031 + }, + { + "loss": 0.0488, + "grad_norm": 2.331883430480957, + "learning_rate": 9.71e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.032, + "step": 1032 + }, + { + "loss": 0.088, + "grad_norm": 2.7460174560546875, + "learning_rate": 9.7e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.033, + "step": 1033 + }, + { + "loss": 0.0446, + "grad_norm": 1.7645024061203003, + "learning_rate": 9.69e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.034, + "step": 1034 + }, + { + "loss": 0.0806, + "grad_norm": 1.7870028018951416, + "learning_rate": 9.68e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.035, + "step": 1035 + }, + { + "loss": 0.0602, + "grad_norm": 1.6170544624328613, + "learning_rate": 9.67e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.036, + "step": 1036 + }, + { + "loss": 0.0427, + "grad_norm": 2.0376412868499756, + "learning_rate": 9.66e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.037, + "step": 1037 + }, + { + "loss": 0.0636, + "grad_norm": 2.1391189098358154, + "learning_rate": 9.65e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.038, + "step": 1038 + }, + { + "loss": 0.0127, + "grad_norm": 3.4139318466186523, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 1039 + }, + { + "loss": 0.0532, + "grad_norm": 2.2980690002441406, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.04, + "step": 1040 + }, + { + "loss": 0.042, + "grad_norm": 1.7804741859436035, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.041, + "step": 1041 + }, + { + "loss": 0.039, + "grad_norm": 1.5417966842651367, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.042, + "step": 1042 + }, + { + "loss": 0.0691, + "grad_norm": 1.9181416034698486, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.043, + "step": 1043 + }, + { + "loss": 0.0105, + "grad_norm": 2.567687511444092, + "learning_rate": 9.59e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 1044 + }, + { + "loss": 0.0513, + "grad_norm": 2.1507062911987305, + "learning_rate": 9.58e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.045, + "step": 1045 + }, + { + "loss": 0.0661, + "grad_norm": 2.6471474170684814, + "learning_rate": 9.57e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.046, + "step": 1046 + }, + { + "loss": 0.0528, + "grad_norm": 1.6081326007843018, + "learning_rate": 9.56e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.047, + "step": 1047 + }, + { + "loss": 0.0148, + "grad_norm": 3.6129963397979736, + "learning_rate": 9.55e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.048, + "step": 1048 + }, + { + "loss": 0.0589, + "grad_norm": 1.6536871194839478, + "learning_rate": 9.54e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 1049 + }, + { + "loss": 0.0893, + "grad_norm": 2.1024138927459717, + "learning_rate": 9.53e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.05, + "step": 1050 + }, + { + "loss": 0.0628, + "grad_norm": 1.6858649253845215, + "learning_rate": 9.52e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.051, + "step": 1051 + }, + { + "loss": 0.0532, + "grad_norm": 1.6352399587631226, + "learning_rate": 9.51e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.052, + "step": 1052 + }, + { + "loss": 0.0673, + "grad_norm": 1.62017822265625, + "learning_rate": 9.5e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.053, + "step": 1053 + }, + { + "loss": 0.0577, + "grad_norm": 1.5879229307174683, + "learning_rate": 9.49e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.054, + "step": 1054 + }, + { + "loss": 0.0148, + "grad_norm": 4.010829925537109, + "learning_rate": 9.48e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.055, + "step": 1055 + }, + { + "loss": 0.0147, + "grad_norm": 4.00789213180542, + "learning_rate": 9.47e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.056, + "step": 1056 + }, + { + "loss": 0.015, + "grad_norm": 4.107461929321289, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.057, + "step": 1057 + }, + { + "loss": 0.0458, + "grad_norm": 2.3218655586242676, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.058, + "step": 1058 + }, + { + "loss": 0.0119, + "grad_norm": 2.9490623474121094, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 1059 + }, + { + "loss": 0.0367, + "grad_norm": 1.8217196464538574, + "learning_rate": 9.43e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.06, + "step": 1060 + }, + { + "loss": 0.0079, + "grad_norm": 1.3022953271865845, + "learning_rate": 9.42e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 1061 + }, + { + "loss": 0.0724, + "grad_norm": 2.17926287651062, + "learning_rate": 9.41e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.062, + "step": 1062 + }, + { + "loss": 0.039, + "grad_norm": 1.739366888999939, + "learning_rate": 9.4e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.063, + "step": 1063 + }, + { + "loss": 0.0534, + "grad_norm": 2.180590867996216, + "learning_rate": 9.39e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.064, + "step": 1064 + }, + { + "loss": 0.0063, + "grad_norm": 0.5163084864616394, + "learning_rate": 9.38e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 1065 + }, + { + "loss": 0.0584, + "grad_norm": 2.8058063983917236, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.066, + "step": 1066 + }, + { + "loss": 0.0582, + "grad_norm": 2.005493640899658, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.067, + "step": 1067 + }, + { + "loss": 0.0497, + "grad_norm": 2.923448324203491, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.068, + "step": 1068 + }, + { + "loss": 0.006, + "grad_norm": 0.48110926151275635, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 1069 + }, + { + "loss": 0.0704, + "grad_norm": 2.408653497695923, + "learning_rate": 9.33e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.07, + "step": 1070 + }, + { + "loss": 0.0878, + "grad_norm": 2.767408847808838, + "learning_rate": 9.32e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 1071 + }, + { + "loss": 0.0599, + "grad_norm": 1.9640824794769287, + "learning_rate": 9.31e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.072, + "step": 1072 + }, + { + "loss": 0.0674, + "grad_norm": 2.939439535140991, + "learning_rate": 9.3e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.073, + "step": 1073 + }, + { + "loss": 0.0866, + "grad_norm": 2.223776340484619, + "learning_rate": 9.29e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.074, + "step": 1074 + }, + { + "loss": 0.0819, + "grad_norm": 1.7831770181655884, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.075, + "step": 1075 + }, + { + "loss": 0.0552, + "grad_norm": 1.528134822845459, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.076, + "step": 1076 + }, + { + "loss": 0.0105, + "grad_norm": 2.722768783569336, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 1077 + }, + { + "loss": 0.0559, + "grad_norm": 1.601446509361267, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.078, + "step": 1078 + }, + { + "loss": 0.0571, + "grad_norm": 1.6370468139648438, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.079, + "step": 1079 + }, + { + "loss": 0.0611, + "grad_norm": 1.7496470212936401, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.08, + "step": 1080 + }, + { + "loss": 0.0582, + "grad_norm": 1.8051985502243042, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.081, + "step": 1081 + }, + { + "loss": 0.0527, + "grad_norm": 1.1893869638442993, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.082, + "step": 1082 + }, + { + "loss": 0.0613, + "grad_norm": 1.7861930131912231, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.083, + "step": 1083 + }, + { + "loss": 0.0771, + "grad_norm": 1.6442121267318726, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.084, + "step": 1084 + }, + { + "loss": 0.0614, + "grad_norm": 1.7604858875274658, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.085, + "step": 1085 + }, + { + "loss": 0.0686, + "grad_norm": 1.7211897373199463, + "learning_rate": 9.17e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.086, + "step": 1086 + }, + { + "loss": 0.0851, + "grad_norm": 2.2072157859802246, + "learning_rate": 9.16e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.087, + "step": 1087 + }, + { + "loss": 0.0234, + "grad_norm": 6.049727916717529, + "learning_rate": 9.15e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.088, + "step": 1088 + }, + { + "loss": 0.0462, + "grad_norm": 2.178677558898926, + "learning_rate": 9.14e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.089, + "step": 1089 + }, + { + "loss": 0.0866, + "grad_norm": 2.1971359252929688, + "learning_rate": 9.13e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.09, + "step": 1090 + }, + { + "loss": 0.0701, + "grad_norm": 2.604931116104126, + "learning_rate": 9.12e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.091, + "step": 1091 + }, + { + "loss": 0.1403, + "grad_norm": 4.8585004806518555, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.092, + "step": 1092 + }, + { + "loss": 0.0418, + "grad_norm": 2.0918304920196533, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.093, + "step": 1093 + }, + { + "loss": 0.0607, + "grad_norm": 1.5581291913986206, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.094, + "step": 1094 + }, + { + "loss": 0.0464, + "grad_norm": 2.2121376991271973, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.095, + "step": 1095 + }, + { + "loss": 0.0187, + "grad_norm": 5.02223539352417, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.096, + "step": 1096 + }, + { + "loss": 0.051, + "grad_norm": 1.1968108415603638, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.097, + "step": 1097 + }, + { + "loss": 0.0379, + "grad_norm": 1.5838263034820557, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.098, + "step": 1098 + }, + { + "loss": 0.0599, + "grad_norm": 2.1656548976898193, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.099, + "step": 1099 + }, + { + "loss": 0.0531, + "grad_norm": 1.5780129432678223, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1, + "step": 1100 + }, + { + "loss": 0.0101, + "grad_norm": 2.5371878147125244, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 1101 + }, + { + "loss": 0.0635, + "grad_norm": 1.7947604656219482, + "learning_rate": 9.01e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.102, + "step": 1102 + }, + { + "loss": 0.0522, + "grad_norm": 2.101656436920166, + "learning_rate": 9e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.103, + "step": 1103 + }, + { + "loss": 0.0803, + "grad_norm": 1.9881861209869385, + "learning_rate": 8.99e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.104, + "step": 1104 + }, + { + "loss": 0.0618, + "grad_norm": 1.884840965270996, + "learning_rate": 8.98e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.105, + "step": 1105 + }, + { + "loss": 0.0554, + "grad_norm": 1.8216484785079956, + "learning_rate": 8.97e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.106, + "step": 1106 + }, + { + "loss": 0.0631, + "grad_norm": 2.1785407066345215, + "learning_rate": 8.96e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.107, + "step": 1107 + }, + { + "loss": 0.0409, + "grad_norm": 1.5896263122558594, + "learning_rate": 8.95e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.108, + "step": 1108 + }, + { + "loss": 0.1964, + "grad_norm": 6.368833541870117, + "learning_rate": 8.94e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 1.109, + "step": 1109 + }, + { + "loss": 0.0087, + "grad_norm": 1.9522284269332886, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 1110 + }, + { + "loss": 0.2323, + "grad_norm": 7.9943718910217285, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 1.111, + "step": 1111 + }, + { + "loss": 0.0801, + "grad_norm": 1.92306387424469, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.112, + "step": 1112 + }, + { + "loss": 0.045, + "grad_norm": 1.3462337255477905, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.113, + "step": 1113 + }, + { + "loss": 0.0721, + "grad_norm": 2.416792869567871, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 1114 + }, + { + "loss": 0.0406, + "grad_norm": 2.1178133487701416, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.115, + "step": 1115 + }, + { + "loss": 0.0559, + "grad_norm": 1.5205347537994385, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.116, + "step": 1116 + }, + { + "loss": 0.0342, + "grad_norm": 1.617630124092102, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.117, + "step": 1117 + }, + { + "loss": 0.0438, + "grad_norm": 2.34078049659729, + "learning_rate": 8.85e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1179999999999999, + "step": 1118 + }, + { + "loss": 0.0753, + "grad_norm": 1.8780885934829712, + "learning_rate": 8.84e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.119, + "step": 1119 + }, + { + "loss": 0.147, + "grad_norm": 5.077685356140137, + "learning_rate": 8.83e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.12, + "step": 1120 + }, + { + "loss": 0.0469, + "grad_norm": 1.9634060859680176, + "learning_rate": 8.82e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.121, + "step": 1121 + }, + { + "loss": 0.0662, + "grad_norm": 1.4567596912384033, + "learning_rate": 8.81e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1219999999999999, + "step": 1122 + }, + { + "loss": 0.0167, + "grad_norm": 4.722336292266846, + "learning_rate": 8.8e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.123, + "step": 1123 + }, + { + "loss": 0.0388, + "grad_norm": 2.1787490844726562, + "learning_rate": 8.79e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.124, + "step": 1124 + }, + { + "loss": 0.0508, + "grad_norm": 1.4540494680404663, + "learning_rate": 8.78e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.125, + "step": 1125 + }, + { + "loss": 0.0463, + "grad_norm": 1.9126884937286377, + "learning_rate": 8.77e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.126, + "step": 1126 + }, + { + "loss": 0.0413, + "grad_norm": 1.3725852966308594, + "learning_rate": 8.76e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.127, + "step": 1127 + }, + { + "loss": 0.0406, + "grad_norm": 1.769464373588562, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.1280000000000001, + "step": 1128 + }, + { + "loss": 0.0157, + "grad_norm": 4.246346473693848, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.129, + "step": 1129 + }, + { + "loss": 0.1541, + "grad_norm": 4.8993754386901855, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.13, + "step": 1130 + }, + { + "loss": 0.041, + "grad_norm": 1.7246980667114258, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.131, + "step": 1131 + }, + { + "loss": 0.0726, + "grad_norm": 2.2514991760253906, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1320000000000001, + "step": 1132 + }, + { + "loss": 0.0097, + "grad_norm": 2.538367509841919, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 1133 + }, + { + "loss": 0.083, + "grad_norm": 2.2139499187469482, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.134, + "step": 1134 + }, + { + "loss": 0.0086, + "grad_norm": 2.0688657760620117, + "learning_rate": 8.68e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 1135 + }, + { + "loss": 0.0579, + "grad_norm": 1.7580430507659912, + "learning_rate": 8.67e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.1360000000000001, + "step": 1136 + }, + { + "loss": 0.0071, + "grad_norm": 1.2317492961883545, + "learning_rate": 8.66e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 1137 + }, + { + "loss": 0.0547, + "grad_norm": 1.7383458614349365, + "learning_rate": 8.65e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.138, + "step": 1138 + }, + { + "loss": 0.0493, + "grad_norm": 1.9442108869552612, + "learning_rate": 8.64e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.139, + "step": 1139 + }, + { + "loss": 0.0743, + "grad_norm": 2.8182926177978516, + "learning_rate": 8.63e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.1400000000000001, + "step": 1140 + }, + { + "loss": 0.0058, + "grad_norm": 0.5721865296363831, + "learning_rate": 8.62e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 1141 + }, + { + "loss": 0.0615, + "grad_norm": 2.226674795150757, + "learning_rate": 8.61e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.142, + "step": 1142 + }, + { + "loss": 0.0063, + "grad_norm": 0.8222597241401672, + "learning_rate": 8.6e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 1143 + }, + { + "loss": 0.0679, + "grad_norm": 2.1432037353515625, + "learning_rate": 8.59e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.144, + "step": 1144 + }, + { + "loss": 0.0604, + "grad_norm": 2.196251392364502, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.145, + "step": 1145 + }, + { + "loss": 0.0067, + "grad_norm": 0.9334397912025452, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 1146 + }, + { + "loss": 0.0877, + "grad_norm": 2.9189441204071045, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.147, + "step": 1147 + }, + { + "loss": 0.04, + "grad_norm": 1.8555492162704468, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.148, + "step": 1148 + }, + { + "loss": 0.0433, + "grad_norm": 2.1462485790252686, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.149, + "step": 1149 + }, + { + "loss": 0.0912, + "grad_norm": 2.674384593963623, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.15, + "step": 1150 + }, + { + "loss": 0.0806, + "grad_norm": 2.1967833042144775, + "learning_rate": 8.52e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.151, + "step": 1151 + }, + { + "loss": 0.0397, + "grad_norm": 1.576885461807251, + "learning_rate": 8.51e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.152, + "step": 1152 + }, + { + "loss": 0.0385, + "grad_norm": 1.8607549667358398, + "learning_rate": 8.5e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.153, + "step": 1153 + }, + { + "loss": 0.0591, + "grad_norm": 2.075608491897583, + "learning_rate": 8.49e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.154, + "step": 1154 + }, + { + "loss": 0.0072, + "grad_norm": 1.595956563949585, + "learning_rate": 8.48e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 1155 + }, + { + "loss": 0.0107, + "grad_norm": 2.7350447177886963, + "learning_rate": 8.47e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 1156 + }, + { + "loss": 0.0675, + "grad_norm": 1.7995527982711792, + "learning_rate": 8.46e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.157, + "step": 1157 + }, + { + "loss": 0.0655, + "grad_norm": 2.3666279315948486, + "learning_rate": 8.45e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.158, + "step": 1158 + }, + { + "loss": 0.0898, + "grad_norm": 2.2464659214019775, + "learning_rate": 8.44e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.159, + "step": 1159 + }, + { + "loss": 0.0555, + "grad_norm": 2.4049134254455566, + "learning_rate": 8.43e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.16, + "step": 1160 + }, + { + "loss": 0.0835, + "grad_norm": 2.0087289810180664, + "learning_rate": 8.42e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.161, + "step": 1161 + }, + { + "loss": 0.0679, + "grad_norm": 2.1180970668792725, + "learning_rate": 8.41e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.162, + "step": 1162 + }, + { + "loss": 0.0605, + "grad_norm": 1.7271490097045898, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.163, + "step": 1163 + }, + { + "loss": 0.0381, + "grad_norm": 2.031334400177002, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.164, + "step": 1164 + }, + { + "loss": 0.0639, + "grad_norm": 1.7528166770935059, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.165, + "step": 1165 + }, + { + "loss": 0.1307, + "grad_norm": 3.783503293991089, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.166, + "step": 1166 + }, + { + "loss": 0.0473, + "grad_norm": 2.779741048812866, + "learning_rate": 8.36e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.167, + "step": 1167 + }, + { + "loss": 0.0455, + "grad_norm": 1.9504565000534058, + "learning_rate": 8.35e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.168, + "step": 1168 + }, + { + "loss": 0.0662, + "grad_norm": 2.2791426181793213, + "learning_rate": 8.34e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.169, + "step": 1169 + }, + { + "loss": 0.0857, + "grad_norm": 2.4661900997161865, + "learning_rate": 8.33e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.17, + "step": 1170 + }, + { + "loss": 0.0817, + "grad_norm": 2.018150568008423, + "learning_rate": 8.32e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.171, + "step": 1171 + }, + { + "loss": 0.0491, + "grad_norm": 1.4105336666107178, + "learning_rate": 8.31e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.172, + "step": 1172 + }, + { + "loss": 0.0705, + "grad_norm": 1.7099734544754028, + "learning_rate": 8.3e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.173, + "step": 1173 + }, + { + "loss": 0.0197, + "grad_norm": 5.4979472160339355, + "learning_rate": 8.29e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.174, + "step": 1174 + }, + { + "loss": 0.0515, + "grad_norm": 1.9852694272994995, + "learning_rate": 8.28e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.175, + "step": 1175 + }, + { + "loss": 0.0435, + "grad_norm": 1.3928176164627075, + "learning_rate": 8.27e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.176, + "step": 1176 + }, + { + "loss": 0.062, + "grad_norm": 2.7774510383605957, + "learning_rate": 8.26e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.177, + "step": 1177 + }, + { + "loss": 0.053, + "grad_norm": 0.9669445753097534, + "learning_rate": 8.25e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.178, + "step": 1178 + }, + { + "loss": 0.0178, + "grad_norm": 4.694067478179932, + "learning_rate": 8.24e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.179, + "step": 1179 + }, + { + "loss": 0.0133, + "grad_norm": 3.8942577838897705, + "learning_rate": 8.23e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.18, + "step": 1180 + }, + { + "loss": 0.042, + "grad_norm": 1.4630885124206543, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.181, + "step": 1181 + }, + { + "loss": 0.0598, + "grad_norm": 1.6373014450073242, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.182, + "step": 1182 + }, + { + "loss": 0.0454, + "grad_norm": 1.9768292903900146, + "learning_rate": 8.2e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.183, + "step": 1183 + }, + { + "loss": 0.0734, + "grad_norm": 1.4859123229980469, + "learning_rate": 8.19e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.184, + "step": 1184 + }, + { + "loss": 0.0647, + "grad_norm": 1.7751868963241577, + "learning_rate": 8.18e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.185, + "step": 1185 + }, + { + "loss": 0.0643, + "grad_norm": 1.6454154253005981, + "learning_rate": 8.17e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.186, + "step": 1186 + }, + { + "loss": 0.0511, + "grad_norm": 1.9402817487716675, + "learning_rate": 8.16e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.187, + "step": 1187 + }, + { + "loss": 0.047, + "grad_norm": 1.6513389348983765, + "learning_rate": 8.15e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.188, + "step": 1188 + }, + { + "loss": 0.0107, + "grad_norm": 2.9602744579315186, + "learning_rate": 8.14e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 1189 + }, + { + "loss": 0.0708, + "grad_norm": 1.9953235387802124, + "learning_rate": 8.13e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.19, + "step": 1190 + }, + { + "loss": 0.0562, + "grad_norm": 1.7549750804901123, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.191, + "step": 1191 + }, + { + "loss": 0.0589, + "grad_norm": 2.0597615242004395, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.192, + "step": 1192 + }, + { + "loss": 0.0469, + "grad_norm": 1.7559466361999512, + "learning_rate": 8.1e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.193, + "step": 1193 + }, + { + "loss": 0.0757, + "grad_norm": 2.0765254497528076, + "learning_rate": 8.09e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.194, + "step": 1194 + }, + { + "loss": 0.0118, + "grad_norm": 3.379472017288208, + "learning_rate": 8.08e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 1195 + }, + { + "loss": 0.0692, + "grad_norm": 1.6905264854431152, + "learning_rate": 8.07e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.196, + "step": 1196 + }, + { + "loss": 0.0493, + "grad_norm": 2.3974990844726562, + "learning_rate": 8.06e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.197, + "step": 1197 + }, + { + "loss": 0.0533, + "grad_norm": 1.609572410583496, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.198, + "step": 1198 + }, + { + "loss": 0.0727, + "grad_norm": 2.563096523284912, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.199, + "step": 1199 + }, + { + "loss": 0.0556, + "grad_norm": 2.0002143383026123, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.2, + "step": 1200 + }, + { + "loss": 0.0487, + "grad_norm": 1.7846338748931885, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.201, + "step": 1201 + }, + { + "loss": 0.0802, + "grad_norm": 2.2537660598754883, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.202, + "step": 1202 + }, + { + "loss": 0.0584, + "grad_norm": 3.043835163116455, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.203, + "step": 1203 + }, + { + "loss": 0.012, + "grad_norm": 3.2526142597198486, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.204, + "step": 1204 + }, + { + "loss": 0.063, + "grad_norm": 1.3797202110290527, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.205, + "step": 1205 + }, + { + "loss": 0.0658, + "grad_norm": 2.5818750858306885, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.206, + "step": 1206 + }, + { + "loss": 0.0108, + "grad_norm": 3.089911699295044, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 1207 + }, + { + "loss": 0.0781, + "grad_norm": 2.348559856414795, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.208, + "step": 1208 + }, + { + "loss": 0.053, + "grad_norm": 1.6293948888778687, + "learning_rate": 7.94e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.209, + "step": 1209 + }, + { + "loss": 0.0541, + "grad_norm": 1.7948721647262573, + "learning_rate": 7.93e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.21, + "step": 1210 + }, + { + "loss": 0.0408, + "grad_norm": 2.3477344512939453, + "learning_rate": 7.92e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.211, + "step": 1211 + }, + { + "loss": 0.0579, + "grad_norm": 2.6738388538360596, + "learning_rate": 7.91e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.212, + "step": 1212 + }, + { + "loss": 0.055, + "grad_norm": 1.522643804550171, + "learning_rate": 7.9e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.213, + "step": 1213 + }, + { + "loss": 0.0634, + "grad_norm": 1.585366129875183, + "learning_rate": 7.89e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.214, + "step": 1214 + }, + { + "loss": 0.0616, + "grad_norm": 1.645047664642334, + "learning_rate": 7.88e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.215, + "step": 1215 + }, + { + "loss": 0.0757, + "grad_norm": 1.689460039138794, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.216, + "step": 1216 + }, + { + "loss": 0.0454, + "grad_norm": 2.0291545391082764, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.217, + "step": 1217 + }, + { + "loss": 0.0104, + "grad_norm": 3.0368359088897705, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 1218 + }, + { + "loss": 0.0097, + "grad_norm": 2.792633533477783, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 1219 + }, + { + "loss": 0.0776, + "grad_norm": 2.638593912124634, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.22, + "step": 1220 + }, + { + "loss": 0.0612, + "grad_norm": 2.7605133056640625, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.221, + "step": 1221 + }, + { + "loss": 0.0884, + "grad_norm": 2.6775927543640137, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.222, + "step": 1222 + }, + { + "loss": 0.0752, + "grad_norm": 1.9850537776947021, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.223, + "step": 1223 + }, + { + "loss": 0.0439, + "grad_norm": 1.5452102422714233, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.224, + "step": 1224 + }, + { + "loss": 0.0435, + "grad_norm": 2.2355833053588867, + "learning_rate": 7.78e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.225, + "step": 1225 + }, + { + "loss": 0.0532, + "grad_norm": 1.7478253841400146, + "learning_rate": 7.77e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.226, + "step": 1226 + }, + { + "loss": 0.0106, + "grad_norm": 3.0870492458343506, + "learning_rate": 7.76e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 1227 + }, + { + "loss": 0.0534, + "grad_norm": 1.8180068731307983, + "learning_rate": 7.75e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.228, + "step": 1228 + }, + { + "loss": 0.0088, + "grad_norm": 2.428753137588501, + "learning_rate": 7.74e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 1229 + }, + { + "loss": 0.0094, + "grad_norm": 2.480687141418457, + "learning_rate": 7.73e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 1230 + }, + { + "loss": 0.056, + "grad_norm": 1.977836012840271, + "learning_rate": 7.72e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.231, + "step": 1231 + }, + { + "loss": 0.0576, + "grad_norm": 2.694723129272461, + "learning_rate": 7.71e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.232, + "step": 1232 + }, + { + "loss": 0.0559, + "grad_norm": 1.785524606704712, + "learning_rate": 7.7e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.233, + "step": 1233 + }, + { + "loss": 0.0548, + "grad_norm": 1.7176051139831543, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.234, + "step": 1234 + }, + { + "loss": 0.07, + "grad_norm": 1.961999773979187, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2349999999999999, + "step": 1235 + }, + { + "loss": 0.0592, + "grad_norm": 2.465545654296875, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.236, + "step": 1236 + }, + { + "loss": 0.0378, + "grad_norm": 1.4544801712036133, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.237, + "step": 1237 + }, + { + "loss": 0.0602, + "grad_norm": 1.772146224975586, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.238, + "step": 1238 + }, + { + "loss": 0.04, + "grad_norm": 2.1550979614257812, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2389999999999999, + "step": 1239 + }, + { + "loss": 0.0448, + "grad_norm": 2.0862441062927246, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.24, + "step": 1240 + }, + { + "loss": 0.073, + "grad_norm": 1.8445123434066772, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.241, + "step": 1241 + }, + { + "loss": 0.0701, + "grad_norm": 1.734731912612915, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.242, + "step": 1242 + }, + { + "loss": 0.0621, + "grad_norm": 2.5419921875, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2429999999999999, + "step": 1243 + }, + { + "loss": 0.0387, + "grad_norm": 2.232482671737671, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.244, + "step": 1244 + }, + { + "loss": 0.041, + "grad_norm": 2.1068978309631348, + "learning_rate": 7.58e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.245, + "step": 1245 + }, + { + "loss": 0.0677, + "grad_norm": 1.7934560775756836, + "learning_rate": 7.57e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.246, + "step": 1246 + }, + { + "loss": 0.0866, + "grad_norm": 2.3774123191833496, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.2469999999999999, + "step": 1247 + }, + { + "loss": 0.0188, + "grad_norm": 5.182284832000732, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.248, + "step": 1248 + }, + { + "loss": 0.0517, + "grad_norm": 1.6540446281433105, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.249, + "step": 1249 + }, + { + "loss": 0.0801, + "grad_norm": 1.7044258117675781, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.25, + "step": 1250 + }, + { + "loss": 0.018, + "grad_norm": 4.825031757354736, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.251, + "step": 1251 + }, + { + "loss": 0.0579, + "grad_norm": 1.9127049446105957, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.252, + "step": 1252 + }, + { + "loss": 0.0387, + "grad_norm": 1.524353265762329, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2530000000000001, + "step": 1253 + }, + { + "loss": 0.0743, + "grad_norm": 1.8598476648330688, + "learning_rate": 7.49e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.254, + "step": 1254 + }, + { + "loss": 0.0364, + "grad_norm": 1.6264195442199707, + "learning_rate": 7.48e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.255, + "step": 1255 + }, + { + "loss": 0.0746, + "grad_norm": 1.4887213706970215, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.256, + "step": 1256 + }, + { + "loss": 0.0117, + "grad_norm": 3.425563335418701, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 1257 + }, + { + "loss": 0.0552, + "grad_norm": 1.6610738039016724, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.258, + "step": 1258 + }, + { + "loss": 0.0105, + "grad_norm": 2.9016385078430176, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 1259 + }, + { + "loss": 0.0657, + "grad_norm": 2.349597215652466, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.26, + "step": 1260 + }, + { + "loss": 0.0706, + "grad_norm": 1.7171733379364014, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.2610000000000001, + "step": 1261 + }, + { + "loss": 0.0076, + "grad_norm": 2.070596933364868, + "learning_rate": 7.41e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 1262 + }, + { + "loss": 0.082, + "grad_norm": 2.476560115814209, + "learning_rate": 7.4e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.263, + "step": 1263 + }, + { + "loss": 0.0696, + "grad_norm": 2.013134002685547, + "learning_rate": 7.39e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 1264 + }, + { + "loss": 0.0456, + "grad_norm": 2.0719385147094727, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2650000000000001, + "step": 1265 + }, + { + "loss": 0.0789, + "grad_norm": 2.737678289413452, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.266, + "step": 1266 + }, + { + "loss": 0.0755, + "grad_norm": 2.932962417602539, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.267, + "step": 1267 + }, + { + "loss": 0.0621, + "grad_norm": 1.5760010480880737, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.268, + "step": 1268 + }, + { + "loss": 0.145, + "grad_norm": 4.413599491119385, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.2690000000000001, + "step": 1269 + }, + { + "loss": 0.052, + "grad_norm": 1.3965295553207397, + "learning_rate": 7.33e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.27, + "step": 1270 + }, + { + "loss": 0.0507, + "grad_norm": 1.5652461051940918, + "learning_rate": 7.32e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.271, + "step": 1271 + }, + { + "loss": 0.1608, + "grad_norm": 5.22923469543457, + "learning_rate": 7.31e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 1.272, + "step": 1272 + }, + { + "loss": 0.04, + "grad_norm": 2.1607284545898438, + "learning_rate": 7.3e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2730000000000001, + "step": 1273 + }, + { + "loss": 0.0093, + "grad_norm": 2.755345106124878, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 1274 + }, + { + "loss": 0.0403, + "grad_norm": 1.6918083429336548, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.275, + "step": 1275 + }, + { + "loss": 0.0569, + "grad_norm": 1.4805766344070435, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.276, + "step": 1276 + }, + { + "loss": 0.0639, + "grad_norm": 1.9898265600204468, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2770000000000001, + "step": 1277 + }, + { + "loss": 0.0764, + "grad_norm": 2.4644553661346436, + "learning_rate": 7.25e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.278, + "step": 1278 + }, + { + "loss": 0.0458, + "grad_norm": 1.6111081838607788, + "learning_rate": 7.24e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.279, + "step": 1279 + }, + { + "loss": 0.0439, + "grad_norm": 1.847048282623291, + "learning_rate": 7.23e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.28, + "step": 1280 + }, + { + "loss": 0.0485, + "grad_norm": 2.2336626052856445, + "learning_rate": 7.22e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2810000000000001, + "step": 1281 + }, + { + "loss": 0.0204, + "grad_norm": 5.058897972106934, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.282, + "step": 1282 + }, + { + "loss": 0.059, + "grad_norm": 1.464397668838501, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.283, + "step": 1283 + }, + { + "loss": 0.0663, + "grad_norm": 1.986909031867981, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.284, + "step": 1284 + }, + { + "loss": 0.0553, + "grad_norm": 1.3948322534561157, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.285, + "step": 1285 + }, + { + "loss": 0.0762, + "grad_norm": 1.8114221096038818, + "learning_rate": 7.17e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.286, + "step": 1286 + }, + { + "loss": 0.0596, + "grad_norm": 1.3451945781707764, + "learning_rate": 7.16e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 1287 + }, + { + "loss": 0.066, + "grad_norm": 1.6588683128356934, + "learning_rate": 7.15e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.288, + "step": 1288 + }, + { + "loss": 0.0486, + "grad_norm": 1.8605456352233887, + "learning_rate": 7.14e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.289, + "step": 1289 + }, + { + "loss": 0.0567, + "grad_norm": 1.8595200777053833, + "learning_rate": 7.13e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.29, + "step": 1290 + }, + { + "loss": 0.0651, + "grad_norm": 1.3704520463943481, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.291, + "step": 1291 + }, + { + "loss": 0.0776, + "grad_norm": 1.5874192714691162, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.292, + "step": 1292 + }, + { + "loss": 0.0584, + "grad_norm": 1.6083050966262817, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.293, + "step": 1293 + }, + { + "loss": 0.0526, + "grad_norm": 2.637402296066284, + "learning_rate": 7.09e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.294, + "step": 1294 + }, + { + "loss": 0.0434, + "grad_norm": 1.125180721282959, + "learning_rate": 7.08e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.295, + "step": 1295 + }, + { + "loss": 0.0604, + "grad_norm": 1.9658552408218384, + "learning_rate": 7.07e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.296, + "step": 1296 + }, + { + "loss": 0.0609, + "grad_norm": 2.3239123821258545, + "learning_rate": 7.06e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.297, + "step": 1297 + }, + { + "loss": 0.0822, + "grad_norm": 2.9983248710632324, + "learning_rate": 7.05e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.298, + "step": 1298 + }, + { + "loss": 0.062, + "grad_norm": 1.7106144428253174, + "learning_rate": 7.04e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.299, + "step": 1299 + }, + { + "loss": 0.0542, + "grad_norm": 1.9297690391540527, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3, + "step": 1300 + }, + { + "loss": 0.0174, + "grad_norm": 4.6414361000061035, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.301, + "step": 1301 + }, + { + "loss": 0.0755, + "grad_norm": 2.1787867546081543, + "learning_rate": 7.01e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.302, + "step": 1302 + }, + { + "loss": 0.015, + "grad_norm": 4.113848686218262, + "learning_rate": 7e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.303, + "step": 1303 + }, + { + "loss": 0.0492, + "grad_norm": 1.3803060054779053, + "learning_rate": 6.99e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.304, + "step": 1304 + }, + { + "loss": 0.0512, + "grad_norm": 1.5045576095581055, + "learning_rate": 6.98e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.305, + "step": 1305 + }, + { + "loss": 0.0608, + "grad_norm": 1.5915031433105469, + "learning_rate": 6.97e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.306, + "step": 1306 + }, + { + "loss": 0.0583, + "grad_norm": 1.2304151058197021, + "learning_rate": 6.96e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.307, + "step": 1307 + }, + { + "loss": 0.0563, + "grad_norm": 1.7730633020401, + "learning_rate": 6.95e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.308, + "step": 1308 + }, + { + "loss": 0.0684, + "grad_norm": 1.730749249458313, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.309, + "step": 1309 + }, + { + "loss": 0.052, + "grad_norm": 1.6816562414169312, + "learning_rate": 6.93e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.31, + "step": 1310 + }, + { + "loss": 0.0732, + "grad_norm": 2.309110164642334, + "learning_rate": 6.92e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.311, + "step": 1311 + }, + { + "loss": 0.0634, + "grad_norm": 1.8224540948867798, + "learning_rate": 6.91e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.312, + "step": 1312 + }, + { + "loss": 0.0584, + "grad_norm": 1.9186445474624634, + "learning_rate": 6.9e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.313, + "step": 1313 + }, + { + "loss": 0.0348, + "grad_norm": 1.3239874839782715, + "learning_rate": 6.89e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.314, + "step": 1314 + }, + { + "loss": 0.0938, + "grad_norm": 2.3451895713806152, + "learning_rate": 6.88e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.315, + "step": 1315 + }, + { + "loss": 0.0623, + "grad_norm": 1.8779281377792358, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.316, + "step": 1316 + }, + { + "loss": 0.167, + "grad_norm": 4.993703842163086, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.317, + "step": 1317 + }, + { + "loss": 0.0142, + "grad_norm": 4.2328338623046875, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.318, + "step": 1318 + }, + { + "loss": 0.0792, + "grad_norm": 2.0863592624664307, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.319, + "step": 1319 + }, + { + "loss": 0.044, + "grad_norm": 2.3412485122680664, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.32, + "step": 1320 + }, + { + "loss": 0.0404, + "grad_norm": 1.4804179668426514, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.321, + "step": 1321 + }, + { + "loss": 0.0168, + "grad_norm": 4.645394802093506, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.322, + "step": 1322 + }, + { + "loss": 0.0718, + "grad_norm": 1.6375811100006104, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.323, + "step": 1323 + }, + { + "loss": 0.06, + "grad_norm": 1.5656460523605347, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.324, + "step": 1324 + }, + { + "loss": 0.065, + "grad_norm": 1.7190107107162476, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.325, + "step": 1325 + }, + { + "loss": 0.0152, + "grad_norm": 3.9972171783447266, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.326, + "step": 1326 + }, + { + "loss": 0.0679, + "grad_norm": 2.4974441528320312, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 1327 + }, + { + "loss": 0.0582, + "grad_norm": 2.3485262393951416, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.328, + "step": 1328 + }, + { + "loss": 0.0829, + "grad_norm": 2.598663091659546, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.329, + "step": 1329 + }, + { + "loss": 0.01, + "grad_norm": 2.8793528079986572, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 1330 + }, + { + "loss": 0.0661, + "grad_norm": 1.9478849172592163, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.331, + "step": 1331 + }, + { + "loss": 0.0715, + "grad_norm": 1.916156530380249, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.332, + "step": 1332 + }, + { + "loss": 0.0601, + "grad_norm": 1.6466504335403442, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.333, + "step": 1333 + }, + { + "loss": 0.01, + "grad_norm": 2.8242533206939697, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 1334 + }, + { + "loss": 0.0409, + "grad_norm": 1.506545066833496, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.335, + "step": 1335 + }, + { + "loss": 0.0809, + "grad_norm": 1.7198259830474854, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.336, + "step": 1336 + }, + { + "loss": 0.1451, + "grad_norm": 4.725864887237549, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 1.337, + "step": 1337 + }, + { + "loss": 0.0649, + "grad_norm": 1.4829907417297363, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.338, + "step": 1338 + }, + { + "loss": 0.0779, + "grad_norm": 1.798589825630188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.339, + "step": 1339 + }, + { + "loss": 0.0645, + "grad_norm": 2.8309855461120605, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.34, + "step": 1340 + }, + { + "loss": 0.0573, + "grad_norm": 2.2329795360565186, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.341, + "step": 1341 + }, + { + "loss": 0.0633, + "grad_norm": 1.7102524042129517, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.342, + "step": 1342 + }, + { + "loss": 0.0533, + "grad_norm": 1.8966953754425049, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.343, + "step": 1343 + }, + { + "loss": 0.1242, + "grad_norm": 3.5069096088409424, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3439999999999999, + "step": 1344 + }, + { + "loss": 0.0668, + "grad_norm": 1.6451408863067627, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.345, + "step": 1345 + }, + { + "loss": 0.0168, + "grad_norm": 4.646505355834961, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.346, + "step": 1346 + }, + { + "loss": 0.0122, + "grad_norm": 3.5036394596099854, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.347, + "step": 1347 + }, + { + "loss": 0.054, + "grad_norm": 1.476265788078308, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3479999999999999, + "step": 1348 + }, + { + "loss": 0.0771, + "grad_norm": 2.343313455581665, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.349, + "step": 1349 + }, + { + "loss": 0.041, + "grad_norm": 1.5659995079040527, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.35, + "step": 1350 + }, + { + "loss": 0.0377, + "grad_norm": 1.196007251739502, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.351, + "step": 1351 + }, + { + "loss": 0.1297, + "grad_norm": 3.8112542629241943, + "learning_rate": 6.51e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 1.3519999999999999, + "step": 1352 + }, + { + "loss": 0.0526, + "grad_norm": 1.3368208408355713, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.353, + "step": 1353 + }, + { + "loss": 0.0444, + "grad_norm": 1.8093925714492798, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.354, + "step": 1354 + }, + { + "loss": 0.0101, + "grad_norm": 2.882591485977173, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 1355 + }, + { + "loss": 0.0437, + "grad_norm": 1.7717807292938232, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3559999999999999, + "step": 1356 + }, + { + "loss": 0.0546, + "grad_norm": 2.2301149368286133, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.357, + "step": 1357 + }, + { + "loss": 0.0102, + "grad_norm": 2.8497674465179443, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 1358 + }, + { + "loss": 0.059, + "grad_norm": 1.9033845663070679, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.359, + "step": 1359 + }, + { + "loss": 0.0431, + "grad_norm": 1.6551549434661865, + "learning_rate": 6.43e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3599999999999999, + "step": 1360 + }, + { + "loss": 0.0585, + "grad_norm": 1.5250738859176636, + "learning_rate": 6.42e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.361, + "step": 1361 + }, + { + "loss": 0.0576, + "grad_norm": 1.7390161752700806, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.362, + "step": 1362 + }, + { + "loss": 0.0642, + "grad_norm": 2.0047788619995117, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.363, + "step": 1363 + }, + { + "loss": 0.0409, + "grad_norm": 1.696035385131836, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.3639999999999999, + "step": 1364 + }, + { + "loss": 0.0577, + "grad_norm": 1.9078930616378784, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.365, + "step": 1365 + }, + { + "loss": 0.0098, + "grad_norm": 2.792039155960083, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 1366 + }, + { + "loss": 0.0582, + "grad_norm": 1.8414034843444824, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.367, + "step": 1367 + }, + { + "loss": 0.0545, + "grad_norm": 2.1793394088745117, + "learning_rate": 6.35e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 1368 + }, + { + "loss": 0.0449, + "grad_norm": 2.220048666000366, + "learning_rate": 6.34e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.369, + "step": 1369 + }, + { + "loss": 0.0545, + "grad_norm": 1.9344781637191772, + "learning_rate": 6.33e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.37, + "step": 1370 + }, + { + "loss": 0.0567, + "grad_norm": 1.8442058563232422, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.371, + "step": 1371 + }, + { + "loss": 0.0118, + "grad_norm": 3.14497971534729, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.3719999999999999, + "step": 1372 + }, + { + "loss": 0.0721, + "grad_norm": 2.7254114151000977, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.373, + "step": 1373 + }, + { + "loss": 0.0587, + "grad_norm": 1.436458945274353, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.374, + "step": 1374 + }, + { + "loss": 0.1323, + "grad_norm": 3.204223871231079, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.375, + "step": 1375 + }, + { + "loss": 0.0704, + "grad_norm": 1.601090431213379, + "learning_rate": 6.27e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.376, + "step": 1376 + }, + { + "loss": 0.0601, + "grad_norm": 1.5754057168960571, + "learning_rate": 6.26e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.377, + "step": 1377 + }, + { + "loss": 0.0711, + "grad_norm": 1.8766717910766602, + "learning_rate": 6.25e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.3780000000000001, + "step": 1378 + }, + { + "loss": 0.059, + "grad_norm": 2.119466781616211, + "learning_rate": 6.24e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.379, + "step": 1379 + }, + { + "loss": 0.0772, + "grad_norm": 1.8192287683486938, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.38, + "step": 1380 + }, + { + "loss": 0.0588, + "grad_norm": 1.6275320053100586, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.381, + "step": 1381 + }, + { + "loss": 0.0417, + "grad_norm": 2.3129870891571045, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3820000000000001, + "step": 1382 + }, + { + "loss": 0.0444, + "grad_norm": 1.6177237033843994, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.383, + "step": 1383 + }, + { + "loss": 0.0566, + "grad_norm": 2.093630075454712, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.384, + "step": 1384 + }, + { + "loss": 0.0655, + "grad_norm": 1.9267455339431763, + "learning_rate": 6.18e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.385, + "step": 1385 + }, + { + "loss": 0.0442, + "grad_norm": 1.0200287103652954, + "learning_rate": 6.17e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3860000000000001, + "step": 1386 + }, + { + "loss": 0.0638, + "grad_norm": 1.3187520503997803, + "learning_rate": 6.16e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.387, + "step": 1387 + }, + { + "loss": 0.0364, + "grad_norm": 1.6464682817459106, + "learning_rate": 6.15e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.388, + "step": 1388 + }, + { + "loss": 0.0775, + "grad_norm": 2.474910020828247, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.389, + "step": 1389 + }, + { + "loss": 0.0621, + "grad_norm": 1.1011793613433838, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.3900000000000001, + "step": 1390 + }, + { + "loss": 0.0218, + "grad_norm": 5.168939113616943, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.391, + "step": 1391 + }, + { + "loss": 0.0221, + "grad_norm": 5.572858810424805, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.392, + "step": 1392 + }, + { + "loss": 0.0561, + "grad_norm": 1.8146536350250244, + "learning_rate": 6.1e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.393, + "step": 1393 + }, + { + "loss": 0.0804, + "grad_norm": 3.2232189178466797, + "learning_rate": 6.09e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.3940000000000001, + "step": 1394 + }, + { + "loss": 0.039, + "grad_norm": 1.8940805196762085, + "learning_rate": 6.08e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.395, + "step": 1395 + }, + { + "loss": 0.0584, + "grad_norm": 2.0325937271118164, + "learning_rate": 6.07e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.396, + "step": 1396 + }, + { + "loss": 0.0422, + "grad_norm": 1.980771541595459, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.397, + "step": 1397 + }, + { + "loss": 0.0593, + "grad_norm": 1.710123896598816, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.3980000000000001, + "step": 1398 + }, + { + "loss": 0.0592, + "grad_norm": 2.430305004119873, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.399, + "step": 1399 + }, + { + "loss": 0.0467, + "grad_norm": 2.204895496368408, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.4, + "step": 1400 + }, + { + "loss": 0.0496, + "grad_norm": 1.7684513330459595, + "learning_rate": 6.02e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.401, + "step": 1401 + }, + { + "loss": 0.0462, + "grad_norm": 1.7807819843292236, + "learning_rate": 6.01e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.4020000000000001, + "step": 1402 + }, + { + "loss": 0.08, + "grad_norm": 1.9608607292175293, + "learning_rate": 6e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.403, + "step": 1403 + }, + { + "loss": 0.0588, + "grad_norm": 1.6851762533187866, + "learning_rate": 5.99e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.404, + "step": 1404 + }, + { + "loss": 0.0448, + "grad_norm": 1.395566701889038, + "learning_rate": 5.98e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 1.405, + "step": 1405 + }, + { + "loss": 0.0771, + "grad_norm": 1.94028639793396, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.4060000000000001, + "step": 1406 + }, + { + "loss": 0.0717, + "grad_norm": 2.421177864074707, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.407, + "step": 1407 + }, + { + "loss": 0.0602, + "grad_norm": 1.947490930557251, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.408, + "step": 1408 + }, + { + "loss": 0.084, + "grad_norm": 3.4976916313171387, + "learning_rate": 5.94e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.409, + "step": 1409 + }, + { + "loss": 0.0146, + "grad_norm": 3.9808900356292725, + "learning_rate": 5.93e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.41, + "step": 1410 + }, + { + "loss": 0.0583, + "grad_norm": 1.8078984022140503, + "learning_rate": 5.92e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 1411 + }, + { + "loss": 0.0687, + "grad_norm": 1.9551893472671509, + "learning_rate": 5.91e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.412, + "step": 1412 + }, + { + "loss": 0.0133, + "grad_norm": 3.68121075630188, + "learning_rate": 5.9e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.413, + "step": 1413 + }, + { + "loss": 0.0411, + "grad_norm": 1.987641453742981, + "learning_rate": 5.89e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.414, + "step": 1414 + }, + { + "loss": 0.0527, + "grad_norm": 1.6725058555603027, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.415, + "step": 1415 + }, + { + "loss": 0.0516, + "grad_norm": 1.3503282070159912, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.416, + "step": 1416 + }, + { + "loss": 0.0439, + "grad_norm": 1.5804824829101562, + "learning_rate": 5.86e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.417, + "step": 1417 + }, + { + "loss": 0.0481, + "grad_norm": 1.3769683837890625, + "learning_rate": 5.85e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.418, + "step": 1418 + }, + { + "loss": 0.0108, + "grad_norm": 3.01991868019104, + "learning_rate": 5.84e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.419, + "step": 1419 + }, + { + "loss": 0.0497, + "grad_norm": 1.416107177734375, + "learning_rate": 5.83e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.42, + "step": 1420 + }, + { + "loss": 0.0377, + "grad_norm": 1.3515864610671997, + "learning_rate": 5.82e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.421, + "step": 1421 + }, + { + "loss": 0.0607, + "grad_norm": 1.8614403009414673, + "learning_rate": 5.81e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.422, + "step": 1422 + }, + { + "loss": 0.0679, + "grad_norm": 2.109128952026367, + "learning_rate": 5.8e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.423, + "step": 1423 + }, + { + "loss": 0.0751, + "grad_norm": 1.5067026615142822, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.424, + "step": 1424 + }, + { + "loss": 0.0547, + "grad_norm": 1.5301975011825562, + "learning_rate": 5.78e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.425, + "step": 1425 + }, + { + "loss": 0.0683, + "grad_norm": 2.2441554069519043, + "learning_rate": 5.77e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.426, + "step": 1426 + }, + { + "loss": 0.0458, + "grad_norm": 1.8737249374389648, + "learning_rate": 5.76e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.427, + "step": 1427 + }, + { + "loss": 0.0687, + "grad_norm": 1.9434070587158203, + "learning_rate": 5.75e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.428, + "step": 1428 + }, + { + "loss": 0.0806, + "grad_norm": 1.8568007946014404, + "learning_rate": 5.74e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.429, + "step": 1429 + }, + { + "loss": 0.065, + "grad_norm": 2.0390608310699463, + "learning_rate": 5.73e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.43, + "step": 1430 + }, + { + "loss": 0.0615, + "grad_norm": 1.7913262844085693, + "learning_rate": 5.72e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.431, + "step": 1431 + }, + { + "loss": 0.0515, + "grad_norm": 2.496122121810913, + "learning_rate": 5.71e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.432, + "step": 1432 + }, + { + "loss": 0.0501, + "grad_norm": 1.633486270904541, + "learning_rate": 5.7e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.433, + "step": 1433 + }, + { + "loss": 0.0171, + "grad_norm": 4.812644958496094, + "learning_rate": 5.69e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.434, + "step": 1434 + }, + { + "loss": 0.0756, + "grad_norm": 2.208841562271118, + "learning_rate": 5.68e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.435, + "step": 1435 + }, + { + "loss": 0.0358, + "grad_norm": 1.725355625152588, + "learning_rate": 5.67e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.436, + "step": 1436 + }, + { + "loss": 0.0173, + "grad_norm": 4.879479885101318, + "learning_rate": 5.66e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.437, + "step": 1437 + }, + { + "loss": 0.1386, + "grad_norm": 3.6769933700561523, + "learning_rate": 5.65e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.438, + "step": 1438 + }, + { + "loss": 0.0712, + "grad_norm": 1.624098300933838, + "learning_rate": 5.64e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.439, + "step": 1439 + }, + { + "loss": 0.0534, + "grad_norm": 2.2485837936401367, + "learning_rate": 5.63e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.44, + "step": 1440 + }, + { + "loss": 0.0572, + "grad_norm": 1.977672815322876, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.441, + "step": 1441 + }, + { + "loss": 0.0515, + "grad_norm": 2.81058669090271, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.442, + "step": 1442 + }, + { + "loss": 0.0118, + "grad_norm": 3.3733158111572266, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.443, + "step": 1443 + }, + { + "loss": 0.0546, + "grad_norm": 1.634824275970459, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.444, + "step": 1444 + }, + { + "loss": 0.0549, + "grad_norm": 1.9184083938598633, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.445, + "step": 1445 + }, + { + "loss": 0.1835, + "grad_norm": 5.609441757202148, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 1.446, + "step": 1446 + }, + { + "loss": 0.0568, + "grad_norm": 1.4348167181015015, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.447, + "step": 1447 + }, + { + "loss": 0.0711, + "grad_norm": 1.6240220069885254, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.448, + "step": 1448 + }, + { + "loss": 0.0395, + "grad_norm": 1.7122279405593872, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.449, + "step": 1449 + }, + { + "loss": 0.0092, + "grad_norm": 2.6746726036071777, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 1450 + }, + { + "loss": 0.0516, + "grad_norm": 1.2466599941253662, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 1451 + }, + { + "loss": 0.0755, + "grad_norm": 2.3185651302337646, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.452, + "step": 1452 + }, + { + "loss": 0.0107, + "grad_norm": 3.2160799503326416, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.453, + "step": 1453 + }, + { + "loss": 0.0353, + "grad_norm": 1.6237694025039673, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.454, + "step": 1454 + }, + { + "loss": 0.052, + "grad_norm": 1.6856698989868164, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.455, + "step": 1455 + }, + { + "loss": 0.0672, + "grad_norm": 1.7814722061157227, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.456, + "step": 1456 + }, + { + "loss": 0.0354, + "grad_norm": 1.4843939542770386, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.457, + "step": 1457 + }, + { + "loss": 0.0642, + "grad_norm": 1.6205660104751587, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.458, + "step": 1458 + }, + { + "loss": 0.0694, + "grad_norm": 2.024721384048462, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.459, + "step": 1459 + }, + { + "loss": 0.0587, + "grad_norm": 1.8312665224075317, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.46, + "step": 1460 + }, + { + "loss": 0.0411, + "grad_norm": 1.8380608558654785, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.461, + "step": 1461 + }, + { + "loss": 0.0597, + "grad_norm": 1.7451549768447876, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.462, + "step": 1462 + }, + { + "loss": 0.0773, + "grad_norm": 1.7938144207000732, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.463, + "step": 1463 + }, + { + "loss": 0.0639, + "grad_norm": 2.6028213500976562, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.464, + "step": 1464 + }, + { + "loss": 0.0686, + "grad_norm": 1.8541765213012695, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.465, + "step": 1465 + }, + { + "loss": 0.0548, + "grad_norm": 1.739157795906067, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.466, + "step": 1466 + }, + { + "loss": 0.0131, + "grad_norm": 3.847865581512451, + "learning_rate": 5.36e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.467, + "step": 1467 + }, + { + "loss": 0.0556, + "grad_norm": 1.4072014093399048, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.468, + "step": 1468 + }, + { + "loss": 0.0656, + "grad_norm": 1.7529304027557373, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.4689999999999999, + "step": 1469 + }, + { + "loss": 0.0472, + "grad_norm": 1.359227180480957, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 1470 + }, + { + "loss": 0.0553, + "grad_norm": 1.8881477117538452, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.471, + "step": 1471 + }, + { + "loss": 0.0728, + "grad_norm": 1.792786717414856, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.472, + "step": 1472 + }, + { + "loss": 0.0589, + "grad_norm": 1.9897642135620117, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.4729999999999999, + "step": 1473 + }, + { + "loss": 0.0641, + "grad_norm": 2.224968433380127, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.474, + "step": 1474 + }, + { + "loss": 0.0176, + "grad_norm": 4.579442977905273, + "learning_rate": 5.28e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.475, + "step": 1475 + }, + { + "loss": 0.0465, + "grad_norm": 1.7030646800994873, + "learning_rate": 5.27e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.476, + "step": 1476 + }, + { + "loss": 0.0638, + "grad_norm": 1.8251057863235474, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.4769999999999999, + "step": 1477 + }, + { + "loss": 0.0532, + "grad_norm": 1.7170004844665527, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.478, + "step": 1478 + }, + { + "loss": 0.0146, + "grad_norm": 4.36711311340332, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.479, + "step": 1479 + }, + { + "loss": 0.0384, + "grad_norm": 1.4616270065307617, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.48, + "step": 1480 + }, + { + "loss": 0.0536, + "grad_norm": 1.4146326780319214, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4809999999999999, + "step": 1481 + }, + { + "loss": 0.058, + "grad_norm": 1.4087859392166138, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.482, + "step": 1482 + }, + { + "loss": 0.0131, + "grad_norm": 3.685961961746216, + "learning_rate": 5.2e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.483, + "step": 1483 + }, + { + "loss": 0.054, + "grad_norm": 2.024017572402954, + "learning_rate": 5.19e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.484, + "step": 1484 + }, + { + "loss": 0.0127, + "grad_norm": 3.772671699523926, + "learning_rate": 5.18e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.4849999999999999, + "step": 1485 + }, + { + "loss": 0.0119, + "grad_norm": 3.4980599880218506, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.486, + "step": 1486 + }, + { + "loss": 0.0759, + "grad_norm": 2.152510643005371, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.487, + "step": 1487 + }, + { + "loss": 0.0408, + "grad_norm": 1.5923069715499878, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.488, + "step": 1488 + }, + { + "loss": 0.0085, + "grad_norm": 2.5293490886688232, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 1489 + }, + { + "loss": 0.0694, + "grad_norm": 2.434215545654297, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.49, + "step": 1490 + }, + { + "loss": 0.0084, + "grad_norm": 2.269744873046875, + "learning_rate": 5.12e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 1491 + }, + { + "loss": 0.0472, + "grad_norm": 2.460083246231079, + "learning_rate": 5.11e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.492, + "step": 1492 + }, + { + "loss": 0.0346, + "grad_norm": 1.8150253295898438, + "learning_rate": 5.1e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.4929999999999999, + "step": 1493 + }, + { + "loss": 0.0436, + "grad_norm": 2.3509392738342285, + "learning_rate": 5.09e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.494, + "step": 1494 + }, + { + "loss": 0.0413, + "grad_norm": 1.7899376153945923, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.495, + "step": 1495 + }, + { + "loss": 0.0068, + "grad_norm": 1.4986844062805176, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 1496 + }, + { + "loss": 0.0719, + "grad_norm": 1.9978880882263184, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4969999999999999, + "step": 1497 + }, + { + "loss": 0.0407, + "grad_norm": 1.5322047472000122, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.498, + "step": 1498 + }, + { + "loss": 0.0057, + "grad_norm": 1.21915602684021, + "learning_rate": 5.04e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 1499 + }, + { + "loss": 0.0392, + "grad_norm": 1.8600904941558838, + "learning_rate": 5.03e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5, + "step": 1500 + }, + { + "loss": 0.058, + "grad_norm": 1.788377285003662, + "learning_rate": 5.02e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.501, + "step": 1501 + }, + { + "loss": 0.073, + "grad_norm": 2.0460190773010254, + "learning_rate": 5.01e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 1502 + }, + { + "loss": 0.0631, + "grad_norm": 2.3501951694488525, + "learning_rate": 5e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5030000000000001, + "step": 1503 + }, + { + "loss": 0.0655, + "grad_norm": 1.5405539274215698, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.504, + "step": 1504 + }, + { + "loss": 0.0527, + "grad_norm": 2.613194227218628, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.505, + "step": 1505 + }, + { + "loss": 0.0533, + "grad_norm": 2.3490524291992188, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.506, + "step": 1506 + }, + { + "loss": 0.007, + "grad_norm": 1.7071534395217896, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 1507 + }, + { + "loss": 0.0063, + "grad_norm": 1.578574776649475, + "learning_rate": 4.95e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 1508 + }, + { + "loss": 0.0586, + "grad_norm": 1.7500479221343994, + "learning_rate": 4.94e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.509, + "step": 1509 + }, + { + "loss": 0.0489, + "grad_norm": 2.1021506786346436, + "learning_rate": 4.93e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.51, + "step": 1510 + }, + { + "loss": 0.0505, + "grad_norm": 1.444482684135437, + "learning_rate": 4.92e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5110000000000001, + "step": 1511 + }, + { + "loss": 0.0663, + "grad_norm": 2.043468475341797, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.512, + "step": 1512 + }, + { + "loss": 0.0429, + "grad_norm": 1.7074294090270996, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.513, + "step": 1513 + }, + { + "loss": 0.0655, + "grad_norm": 2.4234681129455566, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.514, + "step": 1514 + }, + { + "loss": 0.0766, + "grad_norm": 2.124605655670166, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.5150000000000001, + "step": 1515 + }, + { + "loss": 0.0549, + "grad_norm": 1.533837080001831, + "learning_rate": 4.87e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.516, + "step": 1516 + }, + { + "loss": 0.0674, + "grad_norm": 1.8479790687561035, + "learning_rate": 4.86e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.517, + "step": 1517 + }, + { + "loss": 0.0105, + "grad_norm": 2.9812541007995605, + "learning_rate": 4.85e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 1518 + }, + { + "loss": 0.0394, + "grad_norm": 1.3361161947250366, + "learning_rate": 4.84e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5190000000000001, + "step": 1519 + }, + { + "loss": 0.0526, + "grad_norm": 1.8740735054016113, + "learning_rate": 4.83e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.52, + "step": 1520 + }, + { + "loss": 0.0622, + "grad_norm": 2.8182497024536133, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.521, + "step": 1521 + }, + { + "loss": 0.053, + "grad_norm": 1.3909233808517456, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.522, + "step": 1522 + }, + { + "loss": 0.0352, + "grad_norm": 1.3657585382461548, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5230000000000001, + "step": 1523 + }, + { + "loss": 0.0667, + "grad_norm": 1.9412925243377686, + "learning_rate": 4.79e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.524, + "step": 1524 + }, + { + "loss": 0.0536, + "grad_norm": 1.9261113405227661, + "learning_rate": 4.78e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.525, + "step": 1525 + }, + { + "loss": 0.0371, + "grad_norm": 1.7484430074691772, + "learning_rate": 4.77e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.526, + "step": 1526 + }, + { + "loss": 0.0629, + "grad_norm": 1.5757131576538086, + "learning_rate": 4.76e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5270000000000001, + "step": 1527 + }, + { + "loss": 0.0743, + "grad_norm": 2.2460429668426514, + "learning_rate": 4.75e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.528, + "step": 1528 + }, + { + "loss": 0.0537, + "grad_norm": 2.029741048812866, + "learning_rate": 4.74e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.529, + "step": 1529 + }, + { + "loss": 0.0363, + "grad_norm": 1.7011500597000122, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.53, + "step": 1530 + }, + { + "loss": 0.0773, + "grad_norm": 2.4450201988220215, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.5310000000000001, + "step": 1531 + }, + { + "loss": 0.0597, + "grad_norm": 2.192077159881592, + "learning_rate": 4.71e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.532, + "step": 1532 + }, + { + "loss": 0.0539, + "grad_norm": 1.464800238609314, + "learning_rate": 4.7e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.533, + "step": 1533 + }, + { + "loss": 0.0762, + "grad_norm": 2.326375722885132, + "learning_rate": 4.69e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.534, + "step": 1534 + }, + { + "loss": 0.0517, + "grad_norm": 1.547634482383728, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5350000000000001, + "step": 1535 + }, + { + "loss": 0.0783, + "grad_norm": 2.2572309970855713, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.536, + "step": 1536 + }, + { + "loss": 0.0644, + "grad_norm": 2.7545583248138428, + "learning_rate": 4.66e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.537, + "step": 1537 + }, + { + "loss": 0.0596, + "grad_norm": 1.4186100959777832, + "learning_rate": 4.65e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.538, + "step": 1538 + }, + { + "loss": 0.0408, + "grad_norm": 1.7284655570983887, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5390000000000001, + "step": 1539 + }, + { + "loss": 0.0605, + "grad_norm": 1.7523491382598877, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.54, + "step": 1540 + }, + { + "loss": 0.0593, + "grad_norm": 1.346951961517334, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.541, + "step": 1541 + }, + { + "loss": 0.0618, + "grad_norm": 1.4633326530456543, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.542, + "step": 1542 + }, + { + "loss": 0.0401, + "grad_norm": 1.6125143766403198, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5430000000000001, + "step": 1543 + }, + { + "loss": 0.0703, + "grad_norm": 1.801979422569275, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.544, + "step": 1544 + }, + { + "loss": 0.0168, + "grad_norm": 4.75988245010376, + "learning_rate": 4.58e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.545, + "step": 1545 + }, + { + "loss": 0.0395, + "grad_norm": 1.7274175882339478, + "learning_rate": 4.57e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.546, + "step": 1546 + }, + { + "loss": 0.0673, + "grad_norm": 1.813065767288208, + "learning_rate": 4.56e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5470000000000002, + "step": 1547 + }, + { + "loss": 0.0149, + "grad_norm": 4.271875858306885, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.548, + "step": 1548 + }, + { + "loss": 0.0663, + "grad_norm": 2.038168430328369, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.549, + "step": 1549 + }, + { + "loss": 0.0129, + "grad_norm": 3.939451217651367, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.55, + "step": 1550 + }, + { + "loss": 0.0375, + "grad_norm": 1.818014144897461, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5510000000000002, + "step": 1551 + }, + { + "loss": 0.0589, + "grad_norm": 1.9127329587936401, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.552, + "step": 1552 + }, + { + "loss": 0.062, + "grad_norm": 2.125767946243286, + "learning_rate": 4.5e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.553, + "step": 1553 + }, + { + "loss": 0.0627, + "grad_norm": 1.3601936101913452, + "learning_rate": 4.49e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.554, + "step": 1554 + }, + { + "loss": 0.0573, + "grad_norm": 1.9718780517578125, + "learning_rate": 4.48e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.5550000000000002, + "step": 1555 + }, + { + "loss": 0.0702, + "grad_norm": 1.8015897274017334, + "learning_rate": 4.47e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.556, + "step": 1556 + }, + { + "loss": 0.0456, + "grad_norm": 2.072335958480835, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.557, + "step": 1557 + }, + { + "loss": 0.0567, + "grad_norm": 1.921351432800293, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.558, + "step": 1558 + }, + { + "loss": 0.065, + "grad_norm": 1.5375345945358276, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5590000000000002, + "step": 1559 + }, + { + "loss": 0.0384, + "grad_norm": 1.3858362436294556, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.56, + "step": 1560 + }, + { + "loss": 0.0613, + "grad_norm": 1.8221303224563599, + "learning_rate": 4.42e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.561, + "step": 1561 + }, + { + "loss": 0.051, + "grad_norm": 1.5935691595077515, + "learning_rate": 4.41e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.562, + "step": 1562 + }, + { + "loss": 0.052, + "grad_norm": 1.4923861026763916, + "learning_rate": 4.4e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.563, + "step": 1563 + }, + { + "loss": 0.0114, + "grad_norm": 3.3136603832244873, + "learning_rate": 4.39e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.564, + "step": 1564 + }, + { + "loss": 0.0634, + "grad_norm": 1.8046377897262573, + "learning_rate": 4.38e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.565, + "step": 1565 + }, + { + "loss": 0.01, + "grad_norm": 2.8774094581604004, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.5659999999999998, + "step": 1566 + }, + { + "loss": 0.0506, + "grad_norm": 1.315585732460022, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.567, + "step": 1567 + }, + { + "loss": 0.051, + "grad_norm": 1.6535403728485107, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.568, + "step": 1568 + }, + { + "loss": 0.069, + "grad_norm": 1.9435205459594727, + "learning_rate": 4.34e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.569, + "step": 1569 + }, + { + "loss": 0.0599, + "grad_norm": 1.8793127536773682, + "learning_rate": 4.33e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.5699999999999998, + "step": 1570 + }, + { + "loss": 0.0098, + "grad_norm": 2.910207986831665, + "learning_rate": 4.32e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 1571 + }, + { + "loss": 0.0636, + "grad_norm": 2.1943273544311523, + "learning_rate": 4.31e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.572, + "step": 1572 + }, + { + "loss": 0.0567, + "grad_norm": 1.5598511695861816, + "learning_rate": 4.3e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.573, + "step": 1573 + }, + { + "loss": 0.0453, + "grad_norm": 1.9701513051986694, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 1574 + }, + { + "loss": 0.0102, + "grad_norm": 3.0775904655456543, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.575, + "step": 1575 + }, + { + "loss": 0.0422, + "grad_norm": 1.8043560981750488, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.576, + "step": 1576 + }, + { + "loss": 0.0473, + "grad_norm": 1.871073842048645, + "learning_rate": 4.26e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.577, + "step": 1577 + }, + { + "loss": 0.0514, + "grad_norm": 1.4562617540359497, + "learning_rate": 4.25e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5779999999999998, + "step": 1578 + }, + { + "loss": 0.0367, + "grad_norm": 1.4301601648330688, + "learning_rate": 4.24e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.579, + "step": 1579 + }, + { + "loss": 0.0504, + "grad_norm": 1.6110836267471313, + "learning_rate": 4.23e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.58, + "step": 1580 + }, + { + "loss": 0.074, + "grad_norm": 2.0486574172973633, + "learning_rate": 4.22e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.581, + "step": 1581 + }, + { + "loss": 0.1233, + "grad_norm": 3.3242132663726807, + "learning_rate": 4.21e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5819999999999999, + "step": 1582 + }, + { + "loss": 0.0647, + "grad_norm": 1.307567834854126, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.583, + "step": 1583 + }, + { + "loss": 0.0609, + "grad_norm": 1.7847832441329956, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.584, + "step": 1584 + }, + { + "loss": 0.0095, + "grad_norm": 2.857769727706909, + "learning_rate": 4.18e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 1585 + }, + { + "loss": 0.0358, + "grad_norm": 1.3912484645843506, + "learning_rate": 4.17e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5859999999999999, + "step": 1586 + }, + { + "loss": 0.0389, + "grad_norm": 1.5175739526748657, + "learning_rate": 4.16e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.587, + "step": 1587 + }, + { + "loss": 0.0126, + "grad_norm": 3.7526566982269287, + "learning_rate": 4.15e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.588, + "step": 1588 + }, + { + "loss": 0.0558, + "grad_norm": 1.6538053750991821, + "learning_rate": 4.14e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.589, + "step": 1589 + }, + { + "loss": 0.0538, + "grad_norm": 1.3453150987625122, + "learning_rate": 4.13e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5899999999999999, + "step": 1590 + }, + { + "loss": 0.0608, + "grad_norm": 2.0873332023620605, + "learning_rate": 4.12e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.591, + "step": 1591 + }, + { + "loss": 0.0611, + "grad_norm": 1.9410951137542725, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.592, + "step": 1592 + }, + { + "loss": 0.0769, + "grad_norm": 1.8411427736282349, + "learning_rate": 4.1e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.593, + "step": 1593 + }, + { + "loss": 0.0111, + "grad_norm": 3.2430572509765625, + "learning_rate": 4.09e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 1594 + }, + { + "loss": 0.0722, + "grad_norm": 2.1307482719421387, + "learning_rate": 4.08e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.595, + "step": 1595 + }, + { + "loss": 0.0377, + "grad_norm": 2.088995933532715, + "learning_rate": 4.07e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.596, + "step": 1596 + }, + { + "loss": 0.0617, + "grad_norm": 1.546595811843872, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.597, + "step": 1597 + }, + { + "loss": 0.0683, + "grad_norm": 1.7900023460388184, + "learning_rate": 4.05e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.5979999999999999, + "step": 1598 + }, + { + "loss": 0.057, + "grad_norm": 1.5026994943618774, + "learning_rate": 4.04e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.599, + "step": 1599 + }, + { + "loss": 0.0468, + "grad_norm": 1.8879090547561646, + "learning_rate": 4.03e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6, + "step": 1600 + }, + { + "loss": 0.0345, + "grad_norm": 1.3179066181182861, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.601, + "step": 1601 + }, + { + "loss": 0.0363, + "grad_norm": 1.297089695930481, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.6019999999999999, + "step": 1602 + }, + { + "loss": 0.0465, + "grad_norm": 1.4451963901519775, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.603, + "step": 1603 + }, + { + "loss": 0.0593, + "grad_norm": 1.6601592302322388, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.604, + "step": 1604 + }, + { + "loss": 0.0633, + "grad_norm": 1.759940266609192, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.605, + "step": 1605 + }, + { + "loss": 0.0394, + "grad_norm": 1.640942096710205, + "learning_rate": 3.97e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.6059999999999999, + "step": 1606 + }, + { + "loss": 0.0107, + "grad_norm": 3.121732711791992, + "learning_rate": 3.96e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.607, + "step": 1607 + }, + { + "loss": 0.0343, + "grad_norm": 1.376590371131897, + "learning_rate": 3.95e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.608, + "step": 1608 + }, + { + "loss": 0.0731, + "grad_norm": 1.5605193376541138, + "learning_rate": 3.94e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.609, + "step": 1609 + }, + { + "loss": 0.011, + "grad_norm": 3.3589043617248535, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6099999999999999, + "step": 1610 + }, + { + "loss": 0.0541, + "grad_norm": 1.0635466575622559, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.611, + "step": 1611 + }, + { + "loss": 0.0801, + "grad_norm": 2.1112594604492188, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.612, + "step": 1612 + }, + { + "loss": 0.0541, + "grad_norm": 1.915789008140564, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.613, + "step": 1613 + }, + { + "loss": 0.0097, + "grad_norm": 2.9668385982513428, + "learning_rate": 3.89e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 1614 + }, + { + "loss": 0.0785, + "grad_norm": 1.7575700283050537, + "learning_rate": 3.88e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 1.615, + "step": 1615 + }, + { + "loss": 0.0092, + "grad_norm": 2.8856735229492188, + "learning_rate": 3.87e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 1616 + }, + { + "loss": 0.0842, + "grad_norm": 2.108201265335083, + "learning_rate": 3.86e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.617, + "step": 1617 + }, + { + "loss": 0.0513, + "grad_norm": 1.646217942237854, + "learning_rate": 3.85e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6179999999999999, + "step": 1618 + }, + { + "loss": 0.0323, + "grad_norm": 1.7345075607299805, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.619, + "step": 1619 + }, + { + "loss": 0.0508, + "grad_norm": 2.1174609661102295, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.62, + "step": 1620 + }, + { + "loss": 0.0794, + "grad_norm": 1.751968502998352, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.621, + "step": 1621 + }, + { + "loss": 0.052, + "grad_norm": 2.0297329425811768, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6219999999999999, + "step": 1622 + }, + { + "loss": 0.0414, + "grad_norm": 1.4483790397644043, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.623, + "step": 1623 + }, + { + "loss": 0.0387, + "grad_norm": 1.6367487907409668, + "learning_rate": 3.79e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.624, + "step": 1624 + }, + { + "loss": 0.0579, + "grad_norm": 1.947627305984497, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.625, + "step": 1625 + }, + { + "loss": 0.0746, + "grad_norm": 1.7073363065719604, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.626, + "step": 1626 + }, + { + "loss": 0.07, + "grad_norm": 2.310190439224243, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.627, + "step": 1627 + }, + { + "loss": 0.0614, + "grad_norm": 1.841750979423523, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6280000000000001, + "step": 1628 + }, + { + "loss": 0.01, + "grad_norm": 3.1444506645202637, + "learning_rate": 3.74e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 1629 + }, + { + "loss": 0.0522, + "grad_norm": 1.662224292755127, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.63, + "step": 1630 + }, + { + "loss": 0.0132, + "grad_norm": 3.9977800846099854, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.631, + "step": 1631 + }, + { + "loss": 0.0544, + "grad_norm": 1.3922324180603027, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6320000000000001, + "step": 1632 + }, + { + "loss": 0.054, + "grad_norm": 2.120187759399414, + "learning_rate": 3.7e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.633, + "step": 1633 + }, + { + "loss": 0.0536, + "grad_norm": 1.914109468460083, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.634, + "step": 1634 + }, + { + "loss": 0.0598, + "grad_norm": 1.831244707107544, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.635, + "step": 1635 + }, + { + "loss": 0.0573, + "grad_norm": 1.5706382989883423, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6360000000000001, + "step": 1636 + }, + { + "loss": 0.1282, + "grad_norm": 2.7458832263946533, + "learning_rate": 3.66e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 1.637, + "step": 1637 + }, + { + "loss": 0.0356, + "grad_norm": 1.4152108430862427, + "learning_rate": 3.65e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.638, + "step": 1638 + }, + { + "loss": 0.0121, + "grad_norm": 3.4849400520324707, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.639, + "step": 1639 + }, + { + "loss": 0.0702, + "grad_norm": 1.8692002296447754, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.6400000000000001, + "step": 1640 + }, + { + "loss": 0.0601, + "grad_norm": 1.828239917755127, + "learning_rate": 3.62e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.641, + "step": 1641 + }, + { + "loss": 0.0399, + "grad_norm": 1.8158057928085327, + "learning_rate": 3.61e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.642, + "step": 1642 + }, + { + "loss": 0.0451, + "grad_norm": 1.7628754377365112, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.643, + "step": 1643 + }, + { + "loss": 0.0679, + "grad_norm": 1.837315320968628, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6440000000000001, + "step": 1644 + }, + { + "loss": 0.0112, + "grad_norm": 3.3357973098754883, + "learning_rate": 3.58e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.645, + "step": 1645 + }, + { + "loss": 0.0501, + "grad_norm": 1.5952306985855103, + "learning_rate": 3.57e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 1646 + }, + { + "loss": 0.0742, + "grad_norm": 2.5686585903167725, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.647, + "step": 1647 + }, + { + "loss": 0.0109, + "grad_norm": 3.133192777633667, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 1648 + }, + { + "loss": 0.068, + "grad_norm": 1.585485577583313, + "learning_rate": 3.54e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.649, + "step": 1649 + }, + { + "loss": 0.0687, + "grad_norm": 2.0019702911376953, + "learning_rate": 3.53e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.65, + "step": 1650 + }, + { + "loss": 0.0575, + "grad_norm": 1.6265766620635986, + "learning_rate": 3.52e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.651, + "step": 1651 + }, + { + "loss": 0.0707, + "grad_norm": 1.6374586820602417, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6520000000000001, + "step": 1652 + }, + { + "loss": 0.0697, + "grad_norm": 2.4204654693603516, + "learning_rate": 3.5e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.653, + "step": 1653 + }, + { + "loss": 0.0588, + "grad_norm": 2.1378262042999268, + "learning_rate": 3.49e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.654, + "step": 1654 + }, + { + "loss": 0.0562, + "grad_norm": 2.214315414428711, + "learning_rate": 3.48e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.655, + "step": 1655 + }, + { + "loss": 0.0124, + "grad_norm": 3.5861706733703613, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6560000000000001, + "step": 1656 + }, + { + "loss": 0.0487, + "grad_norm": 1.6121397018432617, + "learning_rate": 3.46e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.657, + "step": 1657 + }, + { + "loss": 0.0556, + "grad_norm": 2.084545850753784, + "learning_rate": 3.45e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.658, + "step": 1658 + }, + { + "loss": 0.0471, + "grad_norm": 1.8340671062469482, + "learning_rate": 3.44e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.659, + "step": 1659 + }, + { + "loss": 0.0507, + "grad_norm": 1.5023232698440552, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6600000000000001, + "step": 1660 + }, + { + "loss": 0.055, + "grad_norm": 1.5226930379867554, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.661, + "step": 1661 + }, + { + "loss": 0.0689, + "grad_norm": 1.8650307655334473, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.662, + "step": 1662 + }, + { + "loss": 0.0687, + "grad_norm": 1.4976561069488525, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.663, + "step": 1663 + }, + { + "loss": 0.012, + "grad_norm": 3.7820823192596436, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6640000000000001, + "step": 1664 + }, + { + "loss": 0.0644, + "grad_norm": 1.6768338680267334, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.665, + "step": 1665 + }, + { + "loss": 0.0508, + "grad_norm": 1.6384755373001099, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.666, + "step": 1666 + }, + { + "loss": 0.0557, + "grad_norm": 1.67027747631073, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.667, + "step": 1667 + }, + { + "loss": 0.0443, + "grad_norm": 1.8305268287658691, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6680000000000001, + "step": 1668 + }, + { + "loss": 0.0398, + "grad_norm": 1.6602362394332886, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.669, + "step": 1669 + }, + { + "loss": 0.0479, + "grad_norm": 1.694201946258545, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.67, + "step": 1670 + }, + { + "loss": 0.0693, + "grad_norm": 1.8437001705169678, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.671, + "step": 1671 + }, + { + "loss": 0.0512, + "grad_norm": 1.319399118423462, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6720000000000002, + "step": 1672 + }, + { + "loss": 0.0141, + "grad_norm": 4.160251617431641, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.673, + "step": 1673 + }, + { + "loss": 0.0473, + "grad_norm": 1.736594557762146, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 1674 + }, + { + "loss": 0.0117, + "grad_norm": 3.6965503692626953, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.675, + "step": 1675 + }, + { + "loss": 0.0129, + "grad_norm": 3.8872127532958984, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6760000000000002, + "step": 1676 + }, + { + "loss": 0.0338, + "grad_norm": 1.6114709377288818, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.677, + "step": 1677 + }, + { + "loss": 0.0401, + "grad_norm": 1.4854273796081543, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.678, + "step": 1678 + }, + { + "loss": 0.0091, + "grad_norm": 2.8193323612213135, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 1679 + }, + { + "loss": 0.0104, + "grad_norm": 3.194824457168579, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 1680 + }, + { + "loss": 0.0082, + "grad_norm": 2.627159357070923, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 1681 + }, + { + "loss": 0.0715, + "grad_norm": 2.015965223312378, + "learning_rate": 3.21e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.682, + "step": 1682 + }, + { + "loss": 0.0752, + "grad_norm": 1.8641659021377563, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.683, + "step": 1683 + }, + { + "loss": 0.0446, + "grad_norm": 1.8558416366577148, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 1684 + }, + { + "loss": 0.0754, + "grad_norm": 2.614729881286621, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.685, + "step": 1685 + }, + { + "loss": 0.0781, + "grad_norm": 2.3581247329711914, + "learning_rate": 3.17e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.686, + "step": 1686 + }, + { + "loss": 0.044, + "grad_norm": 2.02897310256958, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.687, + "step": 1687 + }, + { + "loss": 0.0576, + "grad_norm": 1.8537285327911377, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.688, + "step": 1688 + }, + { + "loss": 0.0673, + "grad_norm": 2.3672072887420654, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 1689 + }, + { + "loss": 0.0406, + "grad_norm": 2.049578905105591, + "learning_rate": 3.13e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.69, + "step": 1690 + }, + { + "loss": 0.0514, + "grad_norm": 1.8079686164855957, + "learning_rate": 3.12e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.6909999999999998, + "step": 1691 + }, + { + "loss": 0.0467, + "grad_norm": 1.5584005117416382, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.692, + "step": 1692 + }, + { + "loss": 0.0073, + "grad_norm": 2.0741705894470215, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 1693 + }, + { + "loss": 0.0501, + "grad_norm": 1.9797930717468262, + "learning_rate": 3.09e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.694, + "step": 1694 + }, + { + "loss": 0.0514, + "grad_norm": 1.531952977180481, + "learning_rate": 3.08e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 1695 + }, + { + "loss": 0.0511, + "grad_norm": 2.27657413482666, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.696, + "step": 1696 + }, + { + "loss": 0.0501, + "grad_norm": 1.5408827066421509, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.697, + "step": 1697 + }, + { + "loss": 0.0356, + "grad_norm": 1.3495177030563354, + "learning_rate": 3.05e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.698, + "step": 1698 + }, + { + "loss": 0.0524, + "grad_norm": 2.264927864074707, + "learning_rate": 3.04e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6989999999999998, + "step": 1699 + }, + { + "loss": 0.0085, + "grad_norm": 2.3997385501861572, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 1700 + }, + { + "loss": 0.0537, + "grad_norm": 2.03108811378479, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.701, + "step": 1701 + }, + { + "loss": 0.0625, + "grad_norm": 1.5735002756118774, + "learning_rate": 3.01e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.702, + "step": 1702 + }, + { + "loss": 0.0498, + "grad_norm": 1.4873791933059692, + "learning_rate": 3e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7029999999999998, + "step": 1703 + }, + { + "loss": 0.0401, + "grad_norm": 1.646492600440979, + "learning_rate": 2.99e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.704, + "step": 1704 + }, + { + "loss": 0.0092, + "grad_norm": 2.825364828109741, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 1705 + }, + { + "loss": 0.0094, + "grad_norm": 2.7768924236297607, + "learning_rate": 2.97e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 1706 + }, + { + "loss": 0.0095, + "grad_norm": 2.475404977798462, + "learning_rate": 2.96e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 1707 + }, + { + "loss": 0.0416, + "grad_norm": 2.0638792514801025, + "learning_rate": 2.95e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.708, + "step": 1708 + }, + { + "loss": 0.0544, + "grad_norm": 1.6516914367675781, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.709, + "step": 1709 + }, + { + "loss": 0.0534, + "grad_norm": 1.9903455972671509, + "learning_rate": 2.93e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.71, + "step": 1710 + }, + { + "loss": 0.061, + "grad_norm": 1.6336207389831543, + "learning_rate": 2.92e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7109999999999999, + "step": 1711 + }, + { + "loss": 0.0484, + "grad_norm": 1.5735485553741455, + "learning_rate": 2.91e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.712, + "step": 1712 + }, + { + "loss": 0.0523, + "grad_norm": 1.7996323108673096, + "learning_rate": 2.9e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.713, + "step": 1713 + }, + { + "loss": 0.0568, + "grad_norm": 1.6357063055038452, + "learning_rate": 2.89e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.714, + "step": 1714 + }, + { + "loss": 0.0097, + "grad_norm": 2.460446357727051, + "learning_rate": 2.88e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 1715 + }, + { + "loss": 0.0488, + "grad_norm": 1.7914141416549683, + "learning_rate": 2.87e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.716, + "step": 1716 + }, + { + "loss": 0.0426, + "grad_norm": 2.875281572341919, + "learning_rate": 2.86e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.717, + "step": 1717 + }, + { + "loss": 0.0535, + "grad_norm": 1.9656765460968018, + "learning_rate": 2.85e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.718, + "step": 1718 + }, + { + "loss": 0.0582, + "grad_norm": 1.7268273830413818, + "learning_rate": 2.84e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.7189999999999999, + "step": 1719 + }, + { + "loss": 0.0625, + "grad_norm": 1.7748886346817017, + "learning_rate": 2.83e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 1720 + }, + { + "loss": 0.0624, + "grad_norm": 1.655421257019043, + "learning_rate": 2.82e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.721, + "step": 1721 + }, + { + "loss": 0.0418, + "grad_norm": 1.857727289199829, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.722, + "step": 1722 + }, + { + "loss": 0.0628, + "grad_norm": 1.6072860956192017, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7229999999999999, + "step": 1723 + }, + { + "loss": 0.0079, + "grad_norm": 2.1282646656036377, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 1724 + }, + { + "loss": 0.0097, + "grad_norm": 2.870497465133667, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 1725 + }, + { + "loss": 0.0573, + "grad_norm": 2.2278597354888916, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.726, + "step": 1726 + }, + { + "loss": 0.0479, + "grad_norm": 1.6248372793197632, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.7269999999999999, + "step": 1727 + }, + { + "loss": 0.0098, + "grad_norm": 3.043905258178711, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 1728 + }, + { + "loss": 0.0515, + "grad_norm": 1.613357424736023, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.729, + "step": 1729 + }, + { + "loss": 0.0391, + "grad_norm": 1.959555983543396, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.73, + "step": 1730 + }, + { + "loss": 0.0085, + "grad_norm": 2.4167284965515137, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 1731 + }, + { + "loss": 0.0638, + "grad_norm": 1.9236712455749512, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.732, + "step": 1732 + }, + { + "loss": 0.0359, + "grad_norm": 1.9113582372665405, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.733, + "step": 1733 + }, + { + "loss": 0.0083, + "grad_norm": 2.5152554512023926, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 1734 + }, + { + "loss": 0.0471, + "grad_norm": 1.6409229040145874, + "learning_rate": 2.68e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7349999999999999, + "step": 1735 + }, + { + "loss": 0.0695, + "grad_norm": 2.0613510608673096, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.736, + "step": 1736 + }, + { + "loss": 0.057, + "grad_norm": 2.3862340450286865, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.737, + "step": 1737 + }, + { + "loss": 0.0733, + "grad_norm": 2.13395357131958, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.738, + "step": 1738 + }, + { + "loss": 0.0398, + "grad_norm": 1.8025071620941162, + "learning_rate": 2.64e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7389999999999999, + "step": 1739 + }, + { + "loss": 0.0076, + "grad_norm": 2.0499792098999023, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 1740 + }, + { + "loss": 0.061, + "grad_norm": 1.6320290565490723, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.741, + "step": 1741 + }, + { + "loss": 0.0581, + "grad_norm": 1.9588946104049683, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.742, + "step": 1742 + }, + { + "loss": 0.062, + "grad_norm": 1.8158897161483765, + "learning_rate": 2.6e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.7429999999999999, + "step": 1743 + }, + { + "loss": 0.0464, + "grad_norm": 2.4023096561431885, + "learning_rate": 2.59e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.744, + "step": 1744 + }, + { + "loss": 0.0604, + "grad_norm": 2.0760178565979004, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.745, + "step": 1745 + }, + { + "loss": 0.0721, + "grad_norm": 1.8943363428115845, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.746, + "step": 1746 + }, + { + "loss": 0.0394, + "grad_norm": 1.6580768823623657, + "learning_rate": 2.56e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.7469999999999999, + "step": 1747 + }, + { + "loss": 0.0575, + "grad_norm": 1.7064754962921143, + "learning_rate": 2.55e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.748, + "step": 1748 + }, + { + "loss": 0.1451, + "grad_norm": 5.286960124969482, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 1.749, + "step": 1749 + }, + { + "loss": 0.0367, + "grad_norm": 1.5256696939468384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.75, + "step": 1750 + }, + { + "loss": 0.0352, + "grad_norm": 1.4353508949279785, + "learning_rate": 2.52e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.751, + "step": 1751 + }, + { + "loss": 0.0544, + "grad_norm": 1.449508547782898, + "learning_rate": 2.51e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.752, + "step": 1752 + }, + { + "loss": 0.0088, + "grad_norm": 2.6737008094787598, + "learning_rate": 2.5e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 1753 + }, + { + "loss": 0.054, + "grad_norm": 1.1922411918640137, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.754, + "step": 1754 + }, + { + "loss": 0.0108, + "grad_norm": 3.180657386779785, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.755, + "step": 1755 + }, + { + "loss": 0.0636, + "grad_norm": 1.900195598602295, + "learning_rate": 2.47e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.756, + "step": 1756 + }, + { + "loss": 0.0602, + "grad_norm": 2.505511522293091, + "learning_rate": 2.46e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7570000000000001, + "step": 1757 + }, + { + "loss": 0.0516, + "grad_norm": 1.517896056175232, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.758, + "step": 1758 + }, + { + "loss": 0.0653, + "grad_norm": 1.5359817743301392, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.759, + "step": 1759 + }, + { + "loss": 0.062, + "grad_norm": 2.56500244140625, + "learning_rate": 2.43e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.76, + "step": 1760 + }, + { + "loss": 0.0616, + "grad_norm": 1.2327522039413452, + "learning_rate": 2.42e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7610000000000001, + "step": 1761 + }, + { + "loss": 0.0641, + "grad_norm": 2.0313050746917725, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.762, + "step": 1762 + }, + { + "loss": 0.0509, + "grad_norm": 1.9020798206329346, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.763, + "step": 1763 + }, + { + "loss": 0.0573, + "grad_norm": 1.3576561212539673, + "learning_rate": 2.39e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.764, + "step": 1764 + }, + { + "loss": 0.0359, + "grad_norm": 1.6285313367843628, + "learning_rate": 2.38e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7650000000000001, + "step": 1765 + }, + { + "loss": 0.0779, + "grad_norm": 2.119893789291382, + "learning_rate": 2.37e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.766, + "step": 1766 + }, + { + "loss": 0.0459, + "grad_norm": 1.8730247020721436, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.767, + "step": 1767 + }, + { + "loss": 0.0359, + "grad_norm": 1.5724204778671265, + "learning_rate": 2.35e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.768, + "step": 1768 + }, + { + "loss": 0.0375, + "grad_norm": 1.7161457538604736, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.7690000000000001, + "step": 1769 + }, + { + "loss": 0.0522, + "grad_norm": 1.3714388608932495, + "learning_rate": 2.33e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.77, + "step": 1770 + }, + { + "loss": 0.0368, + "grad_norm": 1.6326324939727783, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.771, + "step": 1771 + }, + { + "loss": 0.0526, + "grad_norm": 1.4099246263504028, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.772, + "step": 1772 + }, + { + "loss": 0.0343, + "grad_norm": 1.331606149673462, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7730000000000001, + "step": 1773 + }, + { + "loss": 0.0521, + "grad_norm": 2.03346586227417, + "learning_rate": 2.29e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.774, + "step": 1774 + }, + { + "loss": 0.0738, + "grad_norm": 2.287825584411621, + "learning_rate": 2.28e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.775, + "step": 1775 + }, + { + "loss": 0.0711, + "grad_norm": 1.560683012008667, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.776, + "step": 1776 + }, + { + "loss": 0.0483, + "grad_norm": 1.860205888748169, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.7770000000000001, + "step": 1777 + }, + { + "loss": 0.0418, + "grad_norm": 1.6539009809494019, + "learning_rate": 2.25e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.778, + "step": 1778 + }, + { + "loss": 0.0669, + "grad_norm": 1.5473995208740234, + "learning_rate": 2.24e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.779, + "step": 1779 + }, + { + "loss": 0.0488, + "grad_norm": 1.3596010208129883, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.78, + "step": 1780 + }, + { + "loss": 0.0407, + "grad_norm": 1.8577399253845215, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7810000000000001, + "step": 1781 + }, + { + "loss": 0.0639, + "grad_norm": 2.693002462387085, + "learning_rate": 2.21e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.782, + "step": 1782 + }, + { + "loss": 0.0146, + "grad_norm": 4.3713555335998535, + "learning_rate": 2.2e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.783, + "step": 1783 + }, + { + "loss": 0.0702, + "grad_norm": 1.8829140663146973, + "learning_rate": 2.19e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.784, + "step": 1784 + }, + { + "loss": 0.0145, + "grad_norm": 4.203199863433838, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.7850000000000001, + "step": 1785 + }, + { + "loss": 0.0418, + "grad_norm": 1.0440939664840698, + "learning_rate": 2.17e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.786, + "step": 1786 + }, + { + "loss": 0.0658, + "grad_norm": 1.5156137943267822, + "learning_rate": 2.16e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.787, + "step": 1787 + }, + { + "loss": 0.0506, + "grad_norm": 1.6226084232330322, + "learning_rate": 2.15e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.788, + "step": 1788 + }, + { + "loss": 0.087, + "grad_norm": 1.8399536609649658, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7890000000000001, + "step": 1789 + }, + { + "loss": 0.0607, + "grad_norm": 2.031243324279785, + "learning_rate": 2.13e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.79, + "step": 1790 + }, + { + "loss": 0.0609, + "grad_norm": 1.581013798713684, + "learning_rate": 2.12e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.791, + "step": 1791 + }, + { + "loss": 0.0149, + "grad_norm": 4.233753681182861, + "learning_rate": 2.11e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.792, + "step": 1792 + }, + { + "loss": 0.0698, + "grad_norm": 1.890411615371704, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7930000000000001, + "step": 1793 + }, + { + "loss": 0.0529, + "grad_norm": 1.3680751323699951, + "learning_rate": 2.09e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.794, + "step": 1794 + }, + { + "loss": 0.0528, + "grad_norm": 1.9651073217391968, + "learning_rate": 2.08e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.795, + "step": 1795 + }, + { + "loss": 0.0133, + "grad_norm": 3.887544631958008, + "learning_rate": 2.07e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.796, + "step": 1796 + }, + { + "loss": 0.05, + "grad_norm": 1.304778814315796, + "learning_rate": 2.06e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7970000000000002, + "step": 1797 + }, + { + "loss": 0.071, + "grad_norm": 1.9661753177642822, + "learning_rate": 2.05e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.798, + "step": 1798 + }, + { + "loss": 0.0557, + "grad_norm": 1.5037291049957275, + "learning_rate": 2.04e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.799, + "step": 1799 + }, + { + "loss": 0.0372, + "grad_norm": 1.4804255962371826, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.8, + "step": 1800 + }, + { + "loss": 0.0645, + "grad_norm": 1.577778697013855, + "learning_rate": 2.02e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.8010000000000002, + "step": 1801 + }, + { + "loss": 0.0399, + "grad_norm": 1.5963507890701294, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.802, + "step": 1802 + }, + { + "loss": 0.0612, + "grad_norm": 1.7424527406692505, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.803, + "step": 1803 + }, + { + "loss": 0.0377, + "grad_norm": 1.4296543598175049, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.804, + "step": 1804 + }, + { + "loss": 0.0378, + "grad_norm": 1.4681419134140015, + "learning_rate": 1.98e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8050000000000002, + "step": 1805 + }, + { + "loss": 0.0385, + "grad_norm": 1.876345157623291, + "learning_rate": 1.97e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.806, + "step": 1806 + }, + { + "loss": 0.0454, + "grad_norm": 1.3991385698318481, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.807, + "step": 1807 + }, + { + "loss": 0.0706, + "grad_norm": 1.6286864280700684, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.808, + "step": 1808 + }, + { + "loss": 0.0409, + "grad_norm": 1.7534390687942505, + "learning_rate": 1.94e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8090000000000002, + "step": 1809 + }, + { + "loss": 0.1302, + "grad_norm": 4.238317966461182, + "learning_rate": 1.93e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.81, + "step": 1810 + }, + { + "loss": 0.0525, + "grad_norm": 2.2462339401245117, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.811, + "step": 1811 + }, + { + "loss": 0.0609, + "grad_norm": 1.5136423110961914, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.812, + "step": 1812 + }, + { + "loss": 0.0595, + "grad_norm": 1.4645228385925293, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.813, + "step": 1813 + }, + { + "loss": 0.0485, + "grad_norm": 1.4663139581680298, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.814, + "step": 1814 + }, + { + "loss": 0.0117, + "grad_norm": 3.569246768951416, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.815, + "step": 1815 + }, + { + "loss": 0.0765, + "grad_norm": 1.4224154949188232, + "learning_rate": 1.87e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.8159999999999998, + "step": 1816 + }, + { + "loss": 0.0517, + "grad_norm": 1.4875210523605347, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.817, + "step": 1817 + }, + { + "loss": 0.0123, + "grad_norm": 3.643899440765381, + "learning_rate": 1.85e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.818, + "step": 1818 + }, + { + "loss": 0.0358, + "grad_norm": 1.7132638692855835, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.819, + "step": 1819 + }, + { + "loss": 0.0396, + "grad_norm": 1.291243553161621, + "learning_rate": 1.83e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8199999999999998, + "step": 1820 + }, + { + "loss": 0.0611, + "grad_norm": 1.6885188817977905, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.821, + "step": 1821 + }, + { + "loss": 0.0507, + "grad_norm": 1.215349555015564, + "learning_rate": 1.81e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.822, + "step": 1822 + }, + { + "loss": 0.0508, + "grad_norm": 1.5074315071105957, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.823, + "step": 1823 + }, + { + "loss": 0.0593, + "grad_norm": 1.500303030014038, + "learning_rate": 1.79e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8239999999999998, + "step": 1824 + }, + { + "loss": 0.0696, + "grad_norm": 2.0285537242889404, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.825, + "step": 1825 + }, + { + "loss": 0.051, + "grad_norm": 1.3399317264556885, + "learning_rate": 1.77e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.826, + "step": 1826 + }, + { + "loss": 0.0479, + "grad_norm": 1.868754506111145, + "learning_rate": 1.76e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.827, + "step": 1827 + }, + { + "loss": 0.0123, + "grad_norm": 3.5505826473236084, + "learning_rate": 1.75e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.8279999999999998, + "step": 1828 + }, + { + "loss": 0.0384, + "grad_norm": 1.1001877784729004, + "learning_rate": 1.74e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.829, + "step": 1829 + }, + { + "loss": 0.0503, + "grad_norm": 1.5732758045196533, + "learning_rate": 1.73e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.83, + "step": 1830 + }, + { + "loss": 0.0569, + "grad_norm": 1.4768040180206299, + "learning_rate": 1.72e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.831, + "step": 1831 + }, + { + "loss": 0.0376, + "grad_norm": 2.298859119415283, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8319999999999999, + "step": 1832 + }, + { + "loss": 0.0626, + "grad_norm": 1.4698207378387451, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 1833 + }, + { + "loss": 0.0527, + "grad_norm": 1.462391972541809, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.834, + "step": 1834 + }, + { + "loss": 0.0751, + "grad_norm": 2.242673873901367, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.835, + "step": 1835 + }, + { + "loss": 0.0633, + "grad_norm": 1.4788683652877808, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.8359999999999999, + "step": 1836 + }, + { + "loss": 0.0523, + "grad_norm": 1.5662829875946045, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.837, + "step": 1837 + }, + { + "loss": 0.0496, + "grad_norm": 1.2137081623077393, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.838, + "step": 1838 + }, + { + "loss": 0.0144, + "grad_norm": 3.972593307495117, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.839, + "step": 1839 + }, + { + "loss": 0.0612, + "grad_norm": 2.0851247310638428, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.8399999999999999, + "step": 1840 + }, + { + "loss": 0.0351, + "grad_norm": 1.7115992307662964, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.841, + "step": 1841 + }, + { + "loss": 0.0543, + "grad_norm": 1.7121071815490723, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.842, + "step": 1842 + }, + { + "loss": 0.0398, + "grad_norm": 2.520775318145752, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.843, + "step": 1843 + }, + { + "loss": 0.0588, + "grad_norm": 1.4704424142837524, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8439999999999999, + "step": 1844 + }, + { + "loss": 0.0393, + "grad_norm": 1.1732555627822876, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.845, + "step": 1845 + }, + { + "loss": 0.0126, + "grad_norm": 3.8587839603424072, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.846, + "step": 1846 + }, + { + "loss": 0.0154, + "grad_norm": 4.2589006423950195, + "learning_rate": 1.56e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.847, + "step": 1847 + }, + { + "loss": 0.0525, + "grad_norm": 1.5793870687484741, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.8479999999999999, + "step": 1848 + }, + { + "loss": 0.0711, + "grad_norm": 1.637081265449524, + "learning_rate": 1.54e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.849, + "step": 1849 + }, + { + "loss": 0.0367, + "grad_norm": 1.405205488204956, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.85, + "step": 1850 + }, + { + "loss": 0.0122, + "grad_norm": 3.7381093502044678, + "learning_rate": 1.52e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.851, + "step": 1851 + }, + { + "loss": 0.0595, + "grad_norm": 1.4563549757003784, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8519999999999999, + "step": 1852 + }, + { + "loss": 0.012, + "grad_norm": 3.3752598762512207, + "learning_rate": 1.5e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.853, + "step": 1853 + }, + { + "loss": 0.0575, + "grad_norm": 1.6581268310546875, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.854, + "step": 1854 + }, + { + "loss": 0.037, + "grad_norm": 1.6496632099151611, + "learning_rate": 1.48e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.855, + "step": 1855 + }, + { + "loss": 0.0435, + "grad_norm": 2.816823959350586, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.8559999999999999, + "step": 1856 + }, + { + "loss": 0.0691, + "grad_norm": 1.9923897981643677, + "learning_rate": 1.46e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.857, + "step": 1857 + }, + { + "loss": 0.0601, + "grad_norm": 1.9515984058380127, + "learning_rate": 1.45e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.858, + "step": 1858 + }, + { + "loss": 0.0097, + "grad_norm": 3.0719552040100098, + "learning_rate": 1.44e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 1859 + }, + { + "loss": 0.0641, + "grad_norm": 1.8086748123168945, + "learning_rate": 1.43e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8599999999999999, + "step": 1860 + }, + { + "loss": 0.067, + "grad_norm": 1.6446064710617065, + "learning_rate": 1.42e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.861, + "step": 1861 + }, + { + "loss": 0.0101, + "grad_norm": 3.0983476638793945, + "learning_rate": 1.41e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 1862 + }, + { + "loss": 0.0362, + "grad_norm": 1.6780548095703125, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.863, + "step": 1863 + }, + { + "loss": 0.054, + "grad_norm": 1.5340514183044434, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8639999999999999, + "step": 1864 + }, + { + "loss": 0.0562, + "grad_norm": 1.6704845428466797, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.865, + "step": 1865 + }, + { + "loss": 0.0647, + "grad_norm": 2.0944159030914307, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.866, + "step": 1866 + }, + { + "loss": 0.0497, + "grad_norm": 1.6780622005462646, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.867, + "step": 1867 + }, + { + "loss": 0.0531, + "grad_norm": 1.5871188640594482, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8679999999999999, + "step": 1868 + }, + { + "loss": 0.061, + "grad_norm": 1.572225570678711, + "learning_rate": 1.34e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.869, + "step": 1869 + }, + { + "loss": 0.0636, + "grad_norm": 1.7540369033813477, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.87, + "step": 1870 + }, + { + "loss": 0.0516, + "grad_norm": 1.9117010831832886, + "learning_rate": 1.32e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.871, + "step": 1871 + }, + { + "loss": 0.0516, + "grad_norm": 1.8945181369781494, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8719999999999999, + "step": 1872 + }, + { + "loss": 0.1903, + "grad_norm": 7.168573379516602, + "learning_rate": 1.3e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 1.873, + "step": 1873 + }, + { + "loss": 0.0584, + "grad_norm": 1.7484742403030396, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.874, + "step": 1874 + }, + { + "loss": 0.0592, + "grad_norm": 1.998748540878296, + "learning_rate": 1.28e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.875, + "step": 1875 + }, + { + "loss": 0.0132, + "grad_norm": 3.7218382358551025, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.876, + "step": 1876 + }, + { + "loss": 0.0397, + "grad_norm": 1.7368042469024658, + "learning_rate": 1.26e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.877, + "step": 1877 + }, + { + "loss": 0.0747, + "grad_norm": 1.7804408073425293, + "learning_rate": 1.25e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8780000000000001, + "step": 1878 + }, + { + "loss": 0.0564, + "grad_norm": 1.812559962272644, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.879, + "step": 1879 + }, + { + "loss": 0.0359, + "grad_norm": 1.5748106241226196, + "learning_rate": 1.23e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.88, + "step": 1880 + }, + { + "loss": 0.1015, + "grad_norm": 2.9346442222595215, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.881, + "step": 1881 + }, + { + "loss": 0.0714, + "grad_norm": 2.8724288940429688, + "learning_rate": 1.21e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.8820000000000001, + "step": 1882 + }, + { + "loss": 0.0544, + "grad_norm": 1.6409680843353271, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.883, + "step": 1883 + }, + { + "loss": 0.0569, + "grad_norm": 1.441733479499817, + "learning_rate": 1.19e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.884, + "step": 1884 + }, + { + "loss": 0.0709, + "grad_norm": 2.3944602012634277, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.885, + "step": 1885 + }, + { + "loss": 0.0593, + "grad_norm": 2.0737223625183105, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8860000000000001, + "step": 1886 + }, + { + "loss": 0.011, + "grad_norm": 3.4782493114471436, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.887, + "step": 1887 + }, + { + "loss": 0.0115, + "grad_norm": 3.5657458305358887, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.888, + "step": 1888 + }, + { + "loss": 0.0598, + "grad_norm": 1.5167820453643799, + "learning_rate": 1.14e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.889, + "step": 1889 + }, + { + "loss": 0.0507, + "grad_norm": 1.6942130327224731, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.8900000000000001, + "step": 1890 + }, + { + "loss": 0.05, + "grad_norm": 1.4450113773345947, + "learning_rate": 1.12e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.891, + "step": 1891 + }, + { + "loss": 0.0672, + "grad_norm": 1.7840543985366821, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.892, + "step": 1892 + }, + { + "loss": 0.0114, + "grad_norm": 3.6806554794311523, + "learning_rate": 1.1e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.893, + "step": 1893 + }, + { + "loss": 0.0433, + "grad_norm": 2.5975944995880127, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.8940000000000001, + "step": 1894 + }, + { + "loss": 0.048, + "grad_norm": 1.2934935092926025, + "learning_rate": 1.08e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.895, + "step": 1895 + }, + { + "loss": 0.0129, + "grad_norm": 3.9428789615631104, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.896, + "step": 1896 + }, + { + "loss": 0.0106, + "grad_norm": 3.178393840789795, + "learning_rate": 1.06e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.897, + "step": 1897 + }, + { + "loss": 0.0601, + "grad_norm": 1.3654727935791016, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8980000000000001, + "step": 1898 + }, + { + "loss": 0.0372, + "grad_norm": 1.596958041191101, + "learning_rate": 1.04e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.899, + "step": 1899 + }, + { + "loss": 0.0407, + "grad_norm": 1.3870348930358887, + "learning_rate": 1.03e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9, + "step": 1900 + }, + { + "loss": 0.0398, + "grad_norm": 1.8837169408798218, + "learning_rate": 1.02e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.901, + "step": 1901 + }, + { + "loss": 0.0685, + "grad_norm": 2.1320674419403076, + "learning_rate": 1.01e-06, + "num_tokens": 1308570.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9020000000000001, + "step": 1902 + }, + { + "loss": 0.0824, + "grad_norm": 2.3401284217834473, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.903, + "step": 1903 + }, + { + "loss": 0.0107, + "grad_norm": 3.2646677494049072, + "learning_rate": 9.9e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 1904 + }, + { + "loss": 0.053, + "grad_norm": 1.7195311784744263, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.905, + "step": 1905 + }, + { + "loss": 0.0388, + "grad_norm": 1.4336844682693481, + "learning_rate": 9.7e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.9060000000000001, + "step": 1906 + }, + { + "loss": 0.0496, + "grad_norm": 1.5110867023468018, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.907, + "step": 1907 + }, + { + "loss": 0.0106, + "grad_norm": 3.0311079025268555, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.908, + "step": 1908 + }, + { + "loss": 0.0536, + "grad_norm": 1.9689549207687378, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.909, + "step": 1909 + }, + { + "loss": 0.0761, + "grad_norm": 2.2891626358032227, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.9100000000000001, + "step": 1910 + }, + { + "loss": 0.0099, + "grad_norm": 2.886558771133423, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 1911 + }, + { + "loss": 0.0509, + "grad_norm": 2.247649669647217, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.912, + "step": 1912 + }, + { + "loss": 0.0396, + "grad_norm": 1.8190995454788208, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.913, + "step": 1913 + }, + { + "loss": 0.0681, + "grad_norm": 1.9473356008529663, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.9140000000000001, + "step": 1914 + }, + { + "loss": 0.0583, + "grad_norm": 1.7244383096694946, + "learning_rate": 8.8e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.915, + "step": 1915 + }, + { + "loss": 0.0497, + "grad_norm": 1.471281886100769, + "learning_rate": 8.7e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.916, + "step": 1916 + }, + { + "loss": 0.0105, + "grad_norm": 3.1323492527008057, + "learning_rate": 8.6e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.917, + "step": 1917 + }, + { + "loss": 0.0587, + "grad_norm": 1.6258044242858887, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9180000000000001, + "step": 1918 + }, + { + "loss": 0.0396, + "grad_norm": 3.7344205379486084, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.919, + "step": 1919 + }, + { + "loss": 0.0669, + "grad_norm": 1.567430853843689, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.92, + "step": 1920 + }, + { + "loss": 0.0403, + "grad_norm": 2.391710042953491, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.921, + "step": 1921 + }, + { + "loss": 0.0731, + "grad_norm": 1.7387372255325317, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 1922 + }, + { + "loss": 0.0346, + "grad_norm": 1.5562756061553955, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.923, + "step": 1923 + }, + { + "loss": 0.0094, + "grad_norm": 2.8271360397338867, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 1924 + }, + { + "loss": 0.0458, + "grad_norm": 2.486022472381592, + "learning_rate": 7.8e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.925, + "step": 1925 + }, + { + "loss": 0.0432, + "grad_norm": 1.4174907207489014, + "learning_rate": 7.7e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9260000000000002, + "step": 1926 + }, + { + "loss": 0.0685, + "grad_norm": 1.9511269330978394, + "learning_rate": 7.6e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.927, + "step": 1927 + }, + { + "loss": 0.0541, + "grad_norm": 1.7855056524276733, + "learning_rate": 7.5e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.928, + "step": 1928 + }, + { + "loss": 0.0381, + "grad_norm": 1.345107913017273, + "learning_rate": 7.4e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.929, + "step": 1929 + }, + { + "loss": 0.0405, + "grad_norm": 2.1388049125671387, + "learning_rate": 7.3e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9300000000000002, + "step": 1930 + }, + { + "loss": 0.065, + "grad_norm": 1.9286760091781616, + "learning_rate": 7.2e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.931, + "step": 1931 + }, + { + "loss": 0.0084, + "grad_norm": 2.553018808364868, + "learning_rate": 7.1e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 1932 + }, + { + "loss": 0.0591, + "grad_norm": 1.3521795272827148, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.933, + "step": 1933 + }, + { + "loss": 0.0407, + "grad_norm": 2.3110647201538086, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.9340000000000002, + "step": 1934 + }, + { + "loss": 0.0087, + "grad_norm": 2.560931921005249, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 1935 + }, + { + "loss": 0.1207, + "grad_norm": 3.6795732975006104, + "learning_rate": 6.7e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 1.936, + "step": 1936 + }, + { + "loss": 0.0079, + "grad_norm": 2.1008386611938477, + "learning_rate": 6.6e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 1937 + }, + { + "loss": 0.0087, + "grad_norm": 2.5367555618286133, + "learning_rate": 6.5e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 1938 + }, + { + "loss": 0.0518, + "grad_norm": 2.0541486740112305, + "learning_rate": 6.4e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.939, + "step": 1939 + }, + { + "loss": 0.0618, + "grad_norm": 1.8797075748443604, + "learning_rate": 6.3e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.94, + "step": 1940 + }, + { + "loss": 0.0628, + "grad_norm": 2.0876829624176025, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9409999999999998, + "step": 1941 + }, + { + "loss": 0.0453, + "grad_norm": 1.7904268503189087, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.942, + "step": 1942 + }, + { + "loss": 0.009, + "grad_norm": 2.73040771484375, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 1943 + }, + { + "loss": 0.0617, + "grad_norm": 1.6844722032546997, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.944, + "step": 1944 + }, + { + "loss": 0.0431, + "grad_norm": 1.8085075616836548, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9449999999999998, + "step": 1945 + }, + { + "loss": 0.0554, + "grad_norm": 1.8000997304916382, + "learning_rate": 5.7e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.946, + "step": 1946 + }, + { + "loss": 0.0608, + "grad_norm": 1.8177446126937866, + "learning_rate": 5.6e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.947, + "step": 1947 + }, + { + "loss": 0.0624, + "grad_norm": 1.5957430601119995, + "learning_rate": 5.5e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.948, + "step": 1948 + }, + { + "loss": 0.0615, + "grad_norm": 1.5245059728622437, + "learning_rate": 5.4e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9489999999999998, + "step": 1949 + }, + { + "loss": 0.0087, + "grad_norm": 2.8260550498962402, + "learning_rate": 5.3e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 1950 + }, + { + "loss": 0.0491, + "grad_norm": 1.5616376399993896, + "learning_rate": 5.2e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.951, + "step": 1951 + }, + { + "loss": 0.0552, + "grad_norm": 1.530611276626587, + "learning_rate": 5.1e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.952, + "step": 1952 + }, + { + "loss": 0.0563, + "grad_norm": 1.5877563953399658, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.9529999999999998, + "step": 1953 + }, + { + "loss": 0.034, + "grad_norm": 1.3671666383743286, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.954, + "step": 1954 + }, + { + "loss": 0.0447, + "grad_norm": 1.4045659303665161, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.955, + "step": 1955 + }, + { + "loss": 0.0523, + "grad_norm": 1.3664851188659668, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.956, + "step": 1956 + }, + { + "loss": 0.0545, + "grad_norm": 1.9731861352920532, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9569999999999999, + "step": 1957 + }, + { + "loss": 0.056, + "grad_norm": 1.9783090353012085, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.958, + "step": 1958 + }, + { + "loss": 0.0103, + "grad_norm": 3.2062110900878906, + "learning_rate": 4.4e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.959, + "step": 1959 + }, + { + "loss": 0.0356, + "grad_norm": 1.8231993913650513, + "learning_rate": 4.3e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.96, + "step": 1960 + }, + { + "loss": 0.0525, + "grad_norm": 1.708391785621643, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9609999999999999, + "step": 1961 + }, + { + "loss": 0.0794, + "grad_norm": 2.159344434738159, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.962, + "step": 1962 + }, + { + "loss": 0.0815, + "grad_norm": 1.9803351163864136, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 1963 + }, + { + "loss": 0.0442, + "grad_norm": 2.2135045528411865, + "learning_rate": 3.9e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.964, + "step": 1964 + }, + { + "loss": 0.0082, + "grad_norm": 2.504026174545288, + "learning_rate": 3.8e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 1965 + }, + { + "loss": 0.0524, + "grad_norm": 2.4293482303619385, + "learning_rate": 3.7e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.966, + "step": 1966 + }, + { + "loss": 0.0543, + "grad_norm": 1.5671586990356445, + "learning_rate": 3.6e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.967, + "step": 1967 + }, + { + "loss": 0.0549, + "grad_norm": 2.1507840156555176, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.968, + "step": 1968 + }, + { + "loss": 0.0561, + "grad_norm": 1.4668017625808716, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9689999999999999, + "step": 1969 + }, + { + "loss": 0.008, + "grad_norm": 2.4691226482391357, + "learning_rate": 3.3e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 1970 + }, + { + "loss": 0.0104, + "grad_norm": 3.135504722595215, + "learning_rate": 3.2e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.971, + "step": 1971 + }, + { + "loss": 0.0442, + "grad_norm": 1.5039496421813965, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 1972 + }, + { + "loss": 0.035, + "grad_norm": 1.5489939451217651, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9729999999999999, + "step": 1973 + }, + { + "loss": 0.0687, + "grad_norm": 1.601294994354248, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.974, + "step": 1974 + }, + { + "loss": 0.0629, + "grad_norm": 1.7154121398925781, + "learning_rate": 2.8e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.975, + "step": 1975 + }, + { + "loss": 0.0587, + "grad_norm": 2.0388171672821045, + "learning_rate": 2.7e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 1976 + }, + { + "loss": 0.051, + "grad_norm": 1.9510704278945923, + "learning_rate": 2.6e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9769999999999999, + "step": 1977 + }, + { + "loss": 0.0512, + "grad_norm": 1.7245160341262817, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.978, + "step": 1978 + }, + { + "loss": 0.0465, + "grad_norm": 1.383158802986145, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.979, + "step": 1979 + }, + { + "loss": 0.054, + "grad_norm": 2.2401952743530273, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.98, + "step": 1980 + }, + { + "loss": 0.0516, + "grad_norm": 2.7115116119384766, + "learning_rate": 2.2e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.9809999999999999, + "step": 1981 + }, + { + "loss": 0.0095, + "grad_norm": 2.8770017623901367, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 1982 + }, + { + "loss": 0.0618, + "grad_norm": 1.8771051168441772, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.983, + "step": 1983 + }, + { + "loss": 0.0524, + "grad_norm": 1.3788121938705444, + "learning_rate": 1.9e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.984, + "step": 1984 + }, + { + "loss": 0.0582, + "grad_norm": 1.583976149559021, + "learning_rate": 1.8e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9849999999999999, + "step": 1985 + }, + { + "loss": 0.0802, + "grad_norm": 1.9991214275360107, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.986, + "step": 1986 + }, + { + "loss": 0.0085, + "grad_norm": 2.6479129791259766, + "learning_rate": 1.6e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 1987 + }, + { + "loss": 0.06, + "grad_norm": 1.4170489311218262, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.988, + "step": 1988 + }, + { + "loss": 0.0502, + "grad_norm": 1.5151011943817139, + "learning_rate": 1.4e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9889999999999999, + "step": 1989 + }, + { + "loss": 0.0639, + "grad_norm": 1.8262159824371338, + "learning_rate": 1.3e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.99, + "step": 1990 + }, + { + "loss": 0.039, + "grad_norm": 1.5687544345855713, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.991, + "step": 1991 + }, + { + "loss": 0.0601, + "grad_norm": 1.4482007026672363, + "learning_rate": 1.1e-07, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.992, + "step": 1992 + }, + { + "loss": 0.0549, + "grad_norm": 2.2203757762908936, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9929999999999999, + "step": 1993 + }, + { + "loss": 0.0089, + "grad_norm": 2.697735071182251, + "learning_rate": 9e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 1994 + }, + { + "loss": 0.0595, + "grad_norm": 1.751516342163086, + "learning_rate": 8e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.995, + "step": 1995 + }, + { + "loss": 0.0499, + "grad_norm": 1.9196858406066895, + "learning_rate": 7e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.996, + "step": 1996 + }, + { + "loss": 0.0385, + "grad_norm": 1.7155030965805054, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.9969999999999999, + "step": 1997 + }, + { + "loss": 0.0089, + "grad_norm": 2.759751558303833, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 1998 + }, + { + "loss": 0.0393, + "grad_norm": 1.8272897005081177, + "learning_rate": 4e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.999, + "step": 1999 + }, + { + "loss": 0.0626, + "grad_norm": 1.7148770093917847, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 2.0, + "step": 2000 + }, + { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502, + "epoch": 2.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba2cfbcdcee3753e484b0a1b3adcb35ee225098 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "unsloth_available": false, + "train_runtime": 234.6302, + "train_loss": 0.19233327957964502, + "train_metrics": { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/availability.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/availability.json new file mode 100644 index 0000000000000000000000000000000000000000..f2d95c49345fee0c966ee899582d2fc611158764 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/availability.json @@ -0,0 +1,27 @@ +{ + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + } +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..7d529fa55bcfe3e68b31a5702ee3f96c63526167 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 2.158, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely to continue the sentence smoothly? Please provide a brief justification for your choice.\" rationale: \"The chosen candidate is more likely to maintain coherence with the existing context and future predictions accurately.\" \n\nMy reasoning:\n- The", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.557, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI; justification: The instruction is incomplete. Please provide a specific question or task related to the candidates mentioned in order for me to choose an appropriate candidate_id and provide a justified response.\" \"rationale\": \"This question asks for a justification, but no question or task was provided in the initial instruction. To fulfill this request, I need more information about what", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 2.04, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate is most likely to be correct for the following sentence fragment: 'The quick brown fox jumps over the lazy dog.'? Choose one candidate_id and provide your justification.\" \nrationale: The choice of 'cand_07' as the", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.059, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"I would choose candidate_id=cand_05; rationale=This candidate uses", + "candidate_id": "cand_05", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.007, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you select as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"Selecting the best candidate involves considering factors such as syntactic correctness, semantic coherence", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.127, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5778936849f9a5bb988c315271fbf3c3507aba26 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "model_index": 1, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 1, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..d8c5d1cfe6fab1b4a4647f03f5ca461b1739180f --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json @@ -0,0 +1,36011 @@ +[ + { + "loss": 2.9686, + "grad_norm": 1.1798820495605469, + "learning_rate": 2e-05, + "num_tokens": 91.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 2.9639, + "grad_norm": 1.146132469177246, + "learning_rate": 1.9995e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 1.2609, + "grad_norm": 0.2891564667224884, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 694.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 2.9479, + "grad_norm": 1.1511788368225098, + "learning_rate": 1.9985000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.002, + "step": 4 + }, + { + "loss": 0.8201, + "grad_norm": 0.27247434854507446, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1297.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 1.1688, + "grad_norm": 0.30153799057006836, + "learning_rate": 1.9975e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 2.927, + "grad_norm": 1.123976469039917, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1900.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 2.9219, + "grad_norm": 1.1258331537246704, + "learning_rate": 1.9965e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 1.2624, + "grad_norm": 0.3105297088623047, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 2503.0, + "mean_token_accuracy": 0.7592955231666565, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.8468, + "grad_norm": 0.27270445227622986, + "learning_rate": 1.9955e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.005, + "step": 10 + }, + { + "loss": 1.1895, + "grad_norm": 0.31019389629364014, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3527.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 2.8961, + "grad_norm": 1.0758286714553833, + "learning_rate": 1.9945e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 1.1822, + "grad_norm": 0.3052140772342682, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4130.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 2.8831, + "grad_norm": 1.0789313316345215, + "learning_rate": 1.9935e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.8383, + "grad_norm": 0.2903873026371002, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 4733.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 1.2037, + "grad_norm": 0.3023833632469177, + "learning_rate": 1.9925e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 1.2477, + "grad_norm": 0.28835517168045044, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 5757.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 1.237, + "grad_norm": 0.30421048402786255, + "learning_rate": 1.9915e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 2.8549, + "grad_norm": 1.0703911781311035, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6360.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 1.2092, + "grad_norm": 0.30991482734680176, + "learning_rate": 1.9905e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7690802216529846, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 1.2362, + "grad_norm": 0.3097628951072693, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7384.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 1.223, + "grad_norm": 0.31258082389831543, + "learning_rate": 1.9895000000000002e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 2.8321, + "grad_norm": 1.0650557279586792, + "learning_rate": 1.989e-05, + "num_tokens": 7987.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 1.1381, + "grad_norm": 0.31106889247894287, + "learning_rate": 1.9885e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.8059, + "grad_norm": 0.28179118037223816, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9011.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 2.8152, + "grad_norm": 1.0609599351882935, + "learning_rate": 1.9875000000000002e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 2.8078, + "grad_norm": 1.06212317943573, + "learning_rate": 1.987e-05, + "num_tokens": 9193.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 1.205, + "grad_norm": 0.3027011752128601, + "learning_rate": 1.9865e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 1.1295, + "grad_norm": 0.30131977796554565, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10217.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 2.7894, + "grad_norm": 1.0723512172698975, + "learning_rate": 1.9855000000000002e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 1.1157, + "grad_norm": 0.30370256304740906, + "learning_rate": 1.985e-05, + "num_tokens": 10820.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 1.2198, + "grad_norm": 0.3102725148200989, + "learning_rate": 1.9845e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 2.7699, + "grad_norm": 1.0780471563339233, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11423.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 2.7633, + "grad_norm": 1.0721458196640015, + "learning_rate": 1.9835000000000002e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.8241, + "grad_norm": 0.2753015458583832, + "learning_rate": 1.983e-05, + "num_tokens": 12026.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 1.2029, + "grad_norm": 0.32459118962287903, + "learning_rate": 1.9825e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 2.7393, + "grad_norm": 1.089471459388733, + "learning_rate": 1.982e-05, + "num_tokens": 12629.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 2.7339, + "grad_norm": 1.085958480834961, + "learning_rate": 1.9815000000000003e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 2.7235, + "grad_norm": 1.1013903617858887, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 12811.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 1.1925, + "grad_norm": 0.322603315114975, + "learning_rate": 1.9805e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 1.0755, + "grad_norm": 0.33030447363853455, + "learning_rate": 1.98e-05, + "num_tokens": 13835.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.8072, + "grad_norm": 0.292123407125473, + "learning_rate": 1.9795000000000003e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.7719, + "grad_norm": 0.2785574495792389, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14859.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 2.6826, + "grad_norm": 1.1196017265319824, + "learning_rate": 1.9785e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 2.6763, + "grad_norm": 1.1198991537094116, + "learning_rate": 1.978e-05, + "num_tokens": 15041.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 1.0823, + "grad_norm": 0.3456343412399292, + "learning_rate": 1.9775000000000003e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 1.1172, + "grad_norm": 0.3377469480037689, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16065.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 1.19, + "grad_norm": 0.3273194134235382, + "learning_rate": 1.9765e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 1.0897, + "grad_norm": 0.330640584230423, + "learning_rate": 1.976e-05, + "num_tokens": 17089.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 2.6381, + "grad_norm": 1.1452019214630127, + "learning_rate": 1.9755000000000003e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.7974, + "grad_norm": 0.30913424491882324, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 17692.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 1.175, + "grad_norm": 0.3387100100517273, + "learning_rate": 1.9745e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 1.1322, + "grad_norm": 0.3353443443775177, + "learning_rate": 1.974e-05, + "num_tokens": 18716.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 2.6086, + "grad_norm": 1.1715646982192993, + "learning_rate": 1.9735000000000003e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 2.5992, + "grad_norm": 1.1846489906311035, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18898.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 2.5913, + "grad_norm": 1.1861159801483154, + "learning_rate": 1.9725000000000002e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 1.1598, + "grad_norm": 0.3380836546421051, + "learning_rate": 1.972e-05, + "num_tokens": 19501.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 1.1193, + "grad_norm": 0.34247249364852905, + "learning_rate": 1.9715000000000004e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 2.5644, + "grad_norm": 1.205854892730713, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20104.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 2.5553, + "grad_norm": 1.211520791053772, + "learning_rate": 1.9705000000000002e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 2.5452, + "grad_norm": 1.2238597869873047, + "learning_rate": 1.97e-05, + "num_tokens": 20286.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 1.1531, + "grad_norm": 0.3495417535305023, + "learning_rate": 1.9695e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 1.0714, + "grad_norm": 0.3549030125141144, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21310.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.765, + "grad_norm": 0.3008621335029602, + "learning_rate": 1.9685000000000002e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 1.0392, + "grad_norm": 0.3398958444595337, + "learning_rate": 1.968e-05, + "num_tokens": 22334.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 1.0477, + "grad_norm": 0.35012176632881165, + "learning_rate": 1.9675e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 2.4882, + "grad_norm": 1.2684752941131592, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 22937.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 2.478, + "grad_norm": 1.2892162799835205, + "learning_rate": 1.9665000000000002e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 2.4664, + "grad_norm": 1.296135663986206, + "learning_rate": 1.966e-05, + "num_tokens": 23119.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.7605, + "grad_norm": 0.3300800323486328, + "learning_rate": 1.9655e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.7663, + "grad_norm": 0.33007505536079407, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24143.0, + "mean_token_accuracy": 0.8512719869613647, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 2.4349, + "grad_norm": 1.3247182369232178, + "learning_rate": 1.9645e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 1.0354, + "grad_norm": 0.3528023660182953, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 24746.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.738, + "grad_norm": 0.3283436894416809, + "learning_rate": 1.9635e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 1.1271, + "grad_norm": 0.38431045413017273, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 25770.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": 1.0373, + "grad_norm": 0.3673364818096161, + "learning_rate": 1.9625e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 1.156, + "grad_norm": 0.3851627707481384, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26794.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 2.3789, + "grad_norm": 1.3850467205047607, + "learning_rate": 1.9615e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 2.3734, + "grad_norm": 1.3814043998718262, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 26976.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 2.3599, + "grad_norm": 1.3965320587158203, + "learning_rate": 1.9605e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 2.3458, + "grad_norm": 1.4337000846862793, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27158.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.7631, + "grad_norm": 0.328967422246933, + "learning_rate": 1.9595e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 1.0816, + "grad_norm": 0.40056440234184265, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28182.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.761, + "grad_norm": 0.34349334239959717, + "learning_rate": 1.9585e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.7308, + "grad_norm": 0.35714098811149597, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29206.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 2.2886, + "grad_norm": 1.4950672388076782, + "learning_rate": 1.9575e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 2.2801, + "grad_norm": 1.5058231353759766, + "learning_rate": 1.957e-05, + "num_tokens": 29388.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 2.2683, + "grad_norm": 1.5141775608062744, + "learning_rate": 1.9565e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.9814, + "grad_norm": 0.3899815082550049, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 29991.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 1.1155, + "grad_norm": 0.40274983644485474, + "learning_rate": 1.9555e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 2.2309, + "grad_norm": 1.5758429765701294, + "learning_rate": 1.955e-05, + "num_tokens": 30594.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 1.0635, + "grad_norm": 0.4182218015193939, + "learning_rate": 1.9545e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.7083, + "grad_norm": 0.35819146037101746, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31618.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 2.1959, + "grad_norm": 1.6126611232757568, + "learning_rate": 1.9535000000000002e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 2.1797, + "grad_norm": 1.676061987876892, + "learning_rate": 1.953e-05, + "num_tokens": 31800.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 1.0347, + "grad_norm": 0.4216737151145935, + "learning_rate": 1.9525e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.6884, + "grad_norm": 0.39531153440475464, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32824.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 2.1441, + "grad_norm": 1.7453250885009766, + "learning_rate": 1.9515000000000002e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 2.1265, + "grad_norm": 1.7851935625076294, + "learning_rate": 1.951e-05, + "num_tokens": 33006.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 2.112, + "grad_norm": 1.830625057220459, + "learning_rate": 1.9505e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 2.0989, + "grad_norm": 1.851873755455017, + "learning_rate": 1.95e-05, + "num_tokens": 33188.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.6824, + "grad_norm": 0.39206984639167786, + "learning_rate": 1.9495000000000002e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.6874, + "grad_norm": 0.3998919725418091, + "learning_rate": 1.949e-05, + "num_tokens": 34212.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 1.0692, + "grad_norm": 0.45781052112579346, + "learning_rate": 1.9485e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.052, + "step": 104 + }, + { + "loss": 1.061, + "grad_norm": 0.4857180714607239, + "learning_rate": 1.948e-05, + "num_tokens": 35236.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.9418, + "grad_norm": 0.4719521701335907, + "learning_rate": 1.9475000000000002e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.9888, + "grad_norm": 0.4797465205192566, + "learning_rate": 1.947e-05, + "num_tokens": 36260.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 1.994, + "grad_norm": 2.2058191299438477, + "learning_rate": 1.9465e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.7016, + "grad_norm": 0.41740846633911133, + "learning_rate": 1.946e-05, + "num_tokens": 36863.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.6818, + "grad_norm": 0.43658050894737244, + "learning_rate": 1.9455000000000003e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.6655, + "grad_norm": 0.46398866176605225, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37887.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 1.9355, + "grad_norm": 2.4030585289001465, + "learning_rate": 1.9445e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 1.0308, + "grad_norm": 0.47935715317726135, + "learning_rate": 1.944e-05, + "num_tokens": 38490.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": 0.6529, + "grad_norm": 0.5175711512565613, + "learning_rate": 1.9435000000000003e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 1.9, + "grad_norm": 2.3800323009490967, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39093.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 1.0589, + "grad_norm": 0.5446810722351074, + "learning_rate": 1.9425e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 1.8661, + "grad_norm": 2.2952208518981934, + "learning_rate": 1.942e-05, + "num_tokens": 39696.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 1.8546, + "grad_norm": 2.2471399307250977, + "learning_rate": 1.9415000000000003e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 1.8394, + "grad_norm": 2.1859543323516846, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 39878.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.6737, + "grad_norm": 0.5614652633666992, + "learning_rate": 1.9405e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.6406, + "grad_norm": 0.5995651483535767, + "learning_rate": 1.94e-05, + "num_tokens": 40902.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.9218, + "grad_norm": 0.6819480657577515, + "learning_rate": 1.9395000000000003e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.9464, + "grad_norm": 0.6670010089874268, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 41926.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.9323, + "grad_norm": 0.8481072187423706, + "learning_rate": 1.9385e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.6372, + "grad_norm": 0.5398988127708435, + "learning_rate": 1.938e-05, + "num_tokens": 42950.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.6362, + "grad_norm": 0.5465712547302246, + "learning_rate": 1.9375e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 1.7297, + "grad_norm": 2.4601035118103027, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 43553.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.6423, + "grad_norm": 0.5248544812202454, + "learning_rate": 1.9365000000000002e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 1.7024, + "grad_norm": 2.7017173767089844, + "learning_rate": 1.936e-05, + "num_tokens": 44156.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.8623, + "grad_norm": 0.6321293711662292, + "learning_rate": 1.9355e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.8852, + "grad_norm": 0.7586547136306763, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45180.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 1.6632, + "grad_norm": 3.066443920135498, + "learning_rate": 1.9345000000000002e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 1.642, + "grad_norm": 3.3219645023345947, + "learning_rate": 1.934e-05, + "num_tokens": 45362.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 1.623, + "grad_norm": 3.5062637329101562, + "learning_rate": 1.9335e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 1.6017, + "grad_norm": 3.623307228088379, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 45544.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.8752, + "grad_norm": 0.7358177900314331, + "learning_rate": 1.9325000000000002e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.9563, + "grad_norm": 0.8089514970779419, + "learning_rate": 1.932e-05, + "num_tokens": 46568.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.9479, + "grad_norm": 0.8843920826911926, + "learning_rate": 1.9315e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 1.5158, + "grad_norm": 3.546642303466797, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47171.0, + "mean_token_accuracy": 0.7333333492279053, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.5831, + "grad_norm": 0.7032448053359985, + "learning_rate": 1.9305000000000002e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.8191, + "grad_norm": 0.9835058450698853, + "learning_rate": 1.93e-05, + "num_tokens": 48195.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.5936, + "grad_norm": 0.7396312952041626, + "learning_rate": 1.9295e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 1.4418, + "grad_norm": 3.6846494674682617, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48798.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 1.4276, + "grad_norm": 3.8224549293518066, + "learning_rate": 1.9285000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 1.4024, + "grad_norm": 3.874878168106079, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 48980.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 1.3769, + "grad_norm": 3.8388218879699707, + "learning_rate": 1.9275e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 1.3516, + "grad_norm": 3.6529314517974854, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49162.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 1.3215, + "grad_norm": 3.6978349685668945, + "learning_rate": 1.9265000000000003e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.7666666507720947, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 1.2966, + "grad_norm": 3.7301321029663086, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49344.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.9111, + "grad_norm": 0.9517998695373535, + "learning_rate": 1.9255e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 1.2327, + "grad_norm": 4.175051212310791, + "learning_rate": 1.925e-05, + "num_tokens": 49947.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 1.2076, + "grad_norm": 4.348862171173096, + "learning_rate": 1.9245000000000003e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.5662, + "grad_norm": 0.9280498623847961, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 50550.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.8844, + "grad_norm": 1.042202353477478, + "learning_rate": 1.9235e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 1.1432, + "grad_norm": NaN, + "learning_rate": 1.923e-05, + "num_tokens": 51153.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 1.1364, + "grad_norm": 3.4773733615875244, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.7888888716697693, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.5305, + "grad_norm": 1.0232493877410889, + "learning_rate": 1.9225000000000003e-05, + "num_tokens": 51756.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.8352, + "grad_norm": 1.172676920890808, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.5667, + "grad_norm": 1.041461706161499, + "learning_rate": 1.9215e-05, + "num_tokens": 52780.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.5104, + "grad_norm": 1.050549030303955, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.875, + "grad_norm": 1.1163139343261719, + "learning_rate": 1.9205000000000003e-05, + "num_tokens": 53804.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.799, + "grad_norm": 0.9202898740768433, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 1.0468, + "grad_norm": 6.722721576690674, + "learning_rate": 1.9195000000000002e-05, + "num_tokens": 54407.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 1.032, + "grad_norm": 6.30849027633667, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.8387, + "grad_norm": 0.8642046451568604, + "learning_rate": 1.9185000000000004e-05, + "num_tokens": 55010.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.8299, + "grad_norm": 0.8796883821487427, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.9957, + "grad_norm": 6.16769552230835, + "learning_rate": 1.9175000000000002e-05, + "num_tokens": 55613.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.7521, + "grad_norm": 0.8700262904167175, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.5251, + "grad_norm": 1.2144312858581543, + "learning_rate": 1.9165000000000004e-05, + "num_tokens": 56637.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": 0.76, + "grad_norm": 0.9009570479393005, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.941, + "grad_norm": 5.8355841636657715, + "learning_rate": 1.9155000000000002e-05, + "num_tokens": 57240.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.928, + "grad_norm": 5.541483402252197, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.716, + "grad_norm": 1.0414000749588013, + "learning_rate": 1.9145000000000004e-05, + "num_tokens": 57843.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.8929, + "grad_norm": 4.810738563537598, + "learning_rate": 1.914e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.7684, + "grad_norm": 1.2132883071899414, + "learning_rate": 1.9135000000000002e-05, + "num_tokens": 58446.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.6497, + "grad_norm": 1.1370697021484375, + "learning_rate": 1.913e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.6995, + "grad_norm": 1.2495081424713135, + "learning_rate": 1.9125000000000004e-05, + "num_tokens": 59470.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.4539, + "grad_norm": 1.0713244676589966, + "learning_rate": 1.912e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.8311, + "grad_norm": 8.016578674316406, + "learning_rate": 1.9115000000000002e-05, + "num_tokens": 60073.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.7657, + "grad_norm": 1.6656423807144165, + "learning_rate": 1.911e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.7687, + "grad_norm": 1.0611323118209839, + "learning_rate": 1.9105e-05, + "num_tokens": 61097.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.8062, + "grad_norm": 10.057961463928223, + "learning_rate": 1.91e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.4494, + "grad_norm": 0.8912132978439331, + "learning_rate": 1.9095000000000003e-05, + "num_tokens": 61700.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.7813, + "grad_norm": 8.121318817138672, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.762, + "grad_norm": 7.607242584228516, + "learning_rate": 1.9085e-05, + "num_tokens": 61882.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.7692, + "grad_norm": 1.015843391418457, + "learning_rate": 1.908e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.7587, + "grad_norm": 0.9659166932106018, + "learning_rate": 1.9075000000000003e-05, + "num_tokens": 62906.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.6702, + "grad_norm": 1.6121653318405151, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.7191, + "grad_norm": 5.08962345123291, + "learning_rate": 1.9065e-05, + "num_tokens": 63509.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.7033, + "grad_norm": 1.2752808332443237, + "learning_rate": 1.906e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.7025, + "grad_norm": 5.420579433441162, + "learning_rate": 1.9055e-05, + "num_tokens": 64112.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.6507, + "grad_norm": 0.9945167899131775, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.5894, + "grad_norm": 1.0229939222335815, + "learning_rate": 1.9045e-05, + "num_tokens": 65136.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.6627, + "grad_norm": 9.837233543395996, + "learning_rate": 1.904e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.7, + "grad_norm": 1.4510327577590942, + "learning_rate": 1.9035e-05, + "num_tokens": 65739.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.6437, + "grad_norm": 11.414746284484863, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.631, + "grad_norm": 10.233067512512207, + "learning_rate": 1.9025e-05, + "num_tokens": 65921.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.6945, + "grad_norm": 1.3608763217926025, + "learning_rate": 1.902e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.6546, + "grad_norm": 1.217339038848877, + "learning_rate": 1.9015e-05, + "num_tokens": 66945.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.6805, + "grad_norm": 1.5453741550445557, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.5748, + "grad_norm": 4.581247806549072, + "learning_rate": 1.9005000000000002e-05, + "num_tokens": 67548.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.6366, + "grad_norm": 1.6470707654953003, + "learning_rate": 1.9e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.4235, + "grad_norm": 0.9932326078414917, + "learning_rate": 1.8995e-05, + "num_tokens": 68572.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": 0.6296, + "grad_norm": 1.9582555294036865, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.5822, + "grad_norm": 1.569627046585083, + "learning_rate": 1.8985000000000002e-05, + "num_tokens": 69596.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.5748, + "grad_norm": 1.2322492599487305, + "learning_rate": 1.898e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.6398, + "grad_norm": 1.6496992111206055, + "learning_rate": 1.8975e-05, + "num_tokens": 70620.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.3614, + "grad_norm": 1.1484179496765137, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.6247, + "grad_norm": 2.376291275024414, + "learning_rate": 1.8965000000000002e-05, + "num_tokens": 71644.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.5296, + "grad_norm": 1.148452877998352, + "learning_rate": 1.896e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.3511, + "grad_norm": 1.6766430139541626, + "learning_rate": 1.8955e-05, + "num_tokens": 72668.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.5254, + "grad_norm": 13.195364952087402, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.5164, + "grad_norm": 10.336882591247559, + "learning_rate": 1.8945000000000002e-05, + "num_tokens": 72850.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.5768, + "grad_norm": 1.2533048391342163, + "learning_rate": 1.894e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.5941, + "grad_norm": 1.1360353231430054, + "learning_rate": 1.8935e-05, + "num_tokens": 73874.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.4831, + "grad_norm": 6.034897327423096, + "learning_rate": 1.893e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.4774, + "grad_norm": 5.36783504486084, + "learning_rate": 1.8925000000000003e-05, + "num_tokens": 74056.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.3472, + "grad_norm": 2.312915563583374, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.4547, + "grad_norm": 5.124778747558594, + "learning_rate": 1.8915e-05, + "num_tokens": 74659.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.4438, + "grad_norm": 3.7214717864990234, + "learning_rate": 1.891e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.5071, + "grad_norm": 1.825179100036621, + "learning_rate": 1.8905000000000003e-05, + "num_tokens": 75262.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.4157, + "grad_norm": 2.892442464828491, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.4085, + "grad_norm": 3.1406774520874023, + "learning_rate": 1.8895e-05, + "num_tokens": 75444.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.532, + "grad_norm": 2.529170274734497, + "learning_rate": 1.889e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.3828, + "grad_norm": 3.846367597579956, + "learning_rate": 1.8885000000000003e-05, + "num_tokens": 76047.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.5073, + "grad_norm": 2.1968491077423096, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.5165, + "grad_norm": 1.508063793182373, + "learning_rate": 1.8875e-05, + "num_tokens": 77071.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.3491, + "grad_norm": 2.4780421257019043, + "learning_rate": 1.887e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.3379, + "grad_norm": 2.2446343898773193, + "learning_rate": 1.8865000000000003e-05, + "num_tokens": 77253.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.3318, + "grad_norm": 3.05029296875, + "learning_rate": 1.886e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.3173, + "grad_norm": 2.2870967388153076, + "learning_rate": 1.8855e-05, + "num_tokens": 77435.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.3278, + "grad_norm": 1.3750704526901245, + "learning_rate": 1.885e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.2964, + "grad_norm": 2.238151788711548, + "learning_rate": 1.8845000000000003e-05, + "num_tokens": 78038.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.305, + "grad_norm": 1.4246138334274292, + "learning_rate": 1.884e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.3385, + "grad_norm": 1.810808777809143, + "learning_rate": 1.8835000000000002e-05, + "num_tokens": 79062.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.5181, + "grad_norm": 2.939674139022827, + "learning_rate": 1.883e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.4909, + "grad_norm": 2.4543910026550293, + "learning_rate": 1.8825000000000004e-05, + "num_tokens": 80086.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.2604, + "grad_norm": 2.63846492767334, + "learning_rate": 1.882e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.2533, + "grad_norm": 3.536795139312744, + "learning_rate": 1.8815000000000002e-05, + "num_tokens": 80268.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.2449, + "grad_norm": 2.941943645477295, + "learning_rate": 1.881e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.4928, + "grad_norm": 2.69899582862854, + "learning_rate": 1.8805000000000004e-05, + "num_tokens": 80871.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.3019, + "grad_norm": 1.5328068733215332, + "learning_rate": 1.88e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.4154, + "grad_norm": 5.932051181793213, + "learning_rate": 1.8795000000000002e-05, + "num_tokens": 81895.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.4072, + "grad_norm": 3.7254579067230225, + "learning_rate": 1.879e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.2266, + "grad_norm": 4.67811918258667, + "learning_rate": 1.8785e-05, + "num_tokens": 82498.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.2835, + "grad_norm": 2.31062650680542, + "learning_rate": 1.878e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.222, + "grad_norm": 4.9225335121154785, + "learning_rate": 1.8775000000000002e-05, + "num_tokens": 83101.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.4098, + "grad_norm": 2.3302409648895264, + "learning_rate": 1.877e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.4401, + "grad_norm": 1.917952299118042, + "learning_rate": 1.8765e-05, + "num_tokens": 84125.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.3927, + "grad_norm": 4.312741279602051, + "learning_rate": 1.876e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.2032, + "grad_norm": 4.237610340118408, + "learning_rate": 1.8755000000000003e-05, + "num_tokens": 84728.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.2, + "grad_norm": 4.144465446472168, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.1974, + "grad_norm": 4.548800945281982, + "learning_rate": 1.8745e-05, + "num_tokens": 84910.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.2936, + "grad_norm": 1.368138313293457, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.4425, + "grad_norm": 1.6547119617462158, + "learning_rate": 1.8735e-05, + "num_tokens": 85934.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.1815, + "grad_norm": 1.936987042427063, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.3853, + "grad_norm": 1.9844653606414795, + "learning_rate": 1.8725e-05, + "num_tokens": 86537.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.3816, + "grad_norm": 2.563992977142334, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.1717, + "grad_norm": 1.9275789260864258, + "learning_rate": 1.8715e-05, + "num_tokens": 87140.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.3635, + "grad_norm": 2.198817014694214, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.166, + "grad_norm": 2.225175380706787, + "learning_rate": 1.8705e-05, + "num_tokens": 87743.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.1618, + "grad_norm": 1.4393062591552734, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.3188, + "grad_norm": 1.8201826810836792, + "learning_rate": 1.8695e-05, + "num_tokens": 88346.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.3957, + "grad_norm": 1.8483490943908691, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.3545, + "grad_norm": 2.5658915042877197, + "learning_rate": 1.8685e-05, + "num_tokens": 89370.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.4109, + "grad_norm": 2.197061777114868, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.3934, + "grad_norm": 1.9570775032043457, + "learning_rate": 1.8675e-05, + "num_tokens": 90394.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.149, + "grad_norm": 2.242249011993408, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.3673, + "grad_norm": 2.5640757083892822, + "learning_rate": 1.8665000000000002e-05, + "num_tokens": 90997.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.3437, + "grad_norm": 1.6239393949508667, + "learning_rate": 1.866e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.1448, + "grad_norm": 2.4205758571624756, + "learning_rate": 1.8655e-05, + "num_tokens": 91600.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.2803, + "grad_norm": 1.5447510480880737, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.2501, + "grad_norm": 1.2362499237060547, + "learning_rate": 1.8645000000000002e-05, + "num_tokens": 92624.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.263, + "grad_norm": 1.3345736265182495, + "learning_rate": 1.864e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.3598, + "grad_norm": 5.145051002502441, + "learning_rate": 1.8635e-05, + "num_tokens": 93648.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.143, + "grad_norm": 3.363790988922119, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.3858, + "grad_norm": 2.9212327003479004, + "learning_rate": 1.8625000000000002e-05, + "num_tokens": 94251.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.1404, + "grad_norm": 2.9169602394104004, + "learning_rate": 1.862e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.2422, + "grad_norm": 1.9243407249450684, + "learning_rate": 1.8615e-05, + "num_tokens": 94854.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.3585, + "grad_norm": 4.024987697601318, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.3474, + "grad_norm": 2.019094944000244, + "learning_rate": 1.8605000000000002e-05, + "num_tokens": 95878.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.3368, + "grad_norm": 1.5415781736373901, + "learning_rate": 1.86e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.1373, + "grad_norm": 3.6068742275238037, + "learning_rate": 1.8595e-05, + "num_tokens": 96481.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.2176, + "grad_norm": 1.1446317434310913, + "learning_rate": 1.859e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.1328, + "grad_norm": 3.26859974861145, + "learning_rate": 1.8585000000000002e-05, + "num_tokens": 97084.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.131, + "grad_norm": 2.849381446838379, + "learning_rate": 1.858e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.3323, + "grad_norm": 4.831865310668945, + "learning_rate": 1.8575e-05, + "num_tokens": 97687.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.3036, + "grad_norm": 1.8017945289611816, + "learning_rate": 1.857e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.3478, + "grad_norm": 4.759650707244873, + "learning_rate": 1.8565000000000003e-05, + "num_tokens": 98711.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.1239, + "grad_norm": 1.6707216501235962, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.3554, + "grad_norm": 3.568655014038086, + "learning_rate": 1.8555e-05, + "num_tokens": 99314.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.1219, + "grad_norm": 1.743139624595642, + "learning_rate": 1.855e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.3297, + "grad_norm": 3.192558526992798, + "learning_rate": 1.8545000000000003e-05, + "num_tokens": 99917.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.339, + "grad_norm": 2.8700854778289795, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.3341, + "grad_norm": 3.1597092151641846, + "learning_rate": 1.8535e-05, + "num_tokens": 100941.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.3151, + "grad_norm": 2.549912929534912, + "learning_rate": 1.853e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.249, + "grad_norm": 4.164290904998779, + "learning_rate": 1.8525000000000003e-05, + "num_tokens": 101965.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.2877, + "grad_norm": 1.8462411165237427, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.2215, + "grad_norm": 1.49083411693573, + "learning_rate": 1.8515e-05, + "num_tokens": 102989.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.2631, + "grad_norm": 1.5168116092681885, + "learning_rate": 1.851e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.3179, + "grad_norm": 3.1732399463653564, + "learning_rate": 1.8505000000000003e-05, + "num_tokens": 104013.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.315, + "grad_norm": 2.9725892543792725, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.2763, + "grad_norm": 1.4138047695159912, + "learning_rate": 1.8495e-05, + "num_tokens": 105037.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.3151, + "grad_norm": 2.3229987621307373, + "learning_rate": 1.849e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.2862, + "grad_norm": 3.2318272590637207, + "learning_rate": 1.8485000000000003e-05, + "num_tokens": 106061.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.2339, + "grad_norm": 3.401787757873535, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.2094, + "grad_norm": 2.1061453819274902, + "learning_rate": 1.8475000000000002e-05, + "num_tokens": 107085.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.2863, + "grad_norm": 1.6479979753494263, + "learning_rate": 1.847e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.1445, + "grad_norm": 7.635932445526123, + "learning_rate": 1.8465e-05, + "num_tokens": 107688.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.1347, + "grad_norm": 6.305334091186523, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.2233, + "grad_norm": 3.41860294342041, + "learning_rate": 1.8455000000000002e-05, + "num_tokens": 108291.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.128, + "grad_norm": 5.801213264465332, + "learning_rate": 1.845e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.1283, + "grad_norm": 5.675178527832031, + "learning_rate": 1.8445e-05, + "num_tokens": 108473.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.3029, + "grad_norm": 5.509076118469238, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.1112, + "grad_norm": 2.6948108673095703, + "learning_rate": 1.8435000000000002e-05, + "num_tokens": 109076.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.107, + "grad_norm": 2.523871421813965, + "learning_rate": 1.843e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.2636, + "grad_norm": 2.1710612773895264, + "learning_rate": 1.8425e-05, + "num_tokens": 109679.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.2891, + "grad_norm": 2.2263383865356445, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.2611, + "grad_norm": 1.752862572669983, + "learning_rate": 1.8415e-05, + "num_tokens": 110703.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.1023, + "grad_norm": 3.256633996963501, + "learning_rate": 1.841e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.1009, + "grad_norm": 2.10860276222229, + "learning_rate": 1.8405e-05, + "num_tokens": 110885.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.2849, + "grad_norm": 3.3475303649902344, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.2727, + "grad_norm": 2.763415575027466, + "learning_rate": 1.8395e-05, + "num_tokens": 111909.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.1914, + "grad_norm": 1.7206056118011475, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.2981, + "grad_norm": 4.825778484344482, + "learning_rate": 1.8385e-05, + "num_tokens": 112933.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.2575, + "grad_norm": 2.3532052040100098, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.3108, + "grad_norm": 2.1766650676727295, + "learning_rate": 1.8375e-05, + "num_tokens": 113957.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.2547, + "grad_norm": 1.6271114349365234, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.2451, + "grad_norm": 1.533071517944336, + "learning_rate": 1.8365e-05, + "num_tokens": 114981.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.2362, + "grad_norm": 1.4881736040115356, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0898, + "grad_norm": 1.764446496963501, + "learning_rate": 1.8355e-05, + "num_tokens": 115584.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.2345, + "grad_norm": 1.3447750806808472, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.2802, + "grad_norm": 3.713470458984375, + "learning_rate": 1.8345e-05, + "num_tokens": 116608.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.1853, + "grad_norm": 1.427515983581543, + "learning_rate": 1.834e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0921, + "grad_norm": 2.3074567317962646, + "learning_rate": 1.8335e-05, + "num_tokens": 117211.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0887, + "grad_norm": 2.2687530517578125, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.2126, + "grad_norm": 3.1814491748809814, + "learning_rate": 1.8325e-05, + "num_tokens": 117814.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0881, + "grad_norm": 2.606569528579712, + "learning_rate": 1.832e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.1751, + "grad_norm": 2.4892592430114746, + "learning_rate": 1.8315e-05, + "num_tokens": 118417.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.2011, + "grad_norm": 2.357940673828125, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.2168, + "grad_norm": 2.8288958072662354, + "learning_rate": 1.8305000000000002e-05, + "num_tokens": 119441.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.183, + "grad_norm": 1.945565104484558, + "learning_rate": 1.83e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0746, + "grad_norm": 1.7267169952392578, + "learning_rate": 1.8295e-05, + "num_tokens": 120044.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0733, + "grad_norm": 1.9393048286437988, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0728, + "grad_norm": 2.1715469360351562, + "learning_rate": 1.8285000000000002e-05, + "num_tokens": 120226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0704, + "grad_norm": 2.0847175121307373, + "learning_rate": 1.828e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.1791, + "grad_norm": 1.5438156127929688, + "learning_rate": 1.8275e-05, + "num_tokens": 120829.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.2073, + "grad_norm": 1.6084765195846558, + "learning_rate": 1.827e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.2215, + "grad_norm": 1.543698787689209, + "learning_rate": 1.8265000000000002e-05, + "num_tokens": 121853.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.1904, + "grad_norm": 1.41824209690094, + "learning_rate": 1.826e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.2005, + "grad_norm": 1.6803160905838013, + "learning_rate": 1.8255e-05, + "num_tokens": 122877.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0605, + "grad_norm": 1.5710349082946777, + "learning_rate": 1.825e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0615, + "grad_norm": 1.633989691734314, + "learning_rate": 1.8245000000000002e-05, + "num_tokens": 123059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.1828, + "grad_norm": 1.6902644634246826, + "learning_rate": 1.824e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0558, + "grad_norm": 1.7157853841781616, + "learning_rate": 1.8235e-05, + "num_tokens": 123662.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0515, + "grad_norm": 1.4476577043533325, + "learning_rate": 1.823e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0502, + "grad_norm": 2.1938326358795166, + "learning_rate": 1.8225000000000003e-05, + "num_tokens": 123844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.1783, + "grad_norm": 2.738436460494995, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.275, + "grad_norm": 3.493831157684326, + "learning_rate": 1.8215e-05, + "num_tokens": 124868.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.1786, + "grad_norm": 1.7162284851074219, + "learning_rate": 1.821e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0448, + "grad_norm": 2.925360679626465, + "learning_rate": 1.8205000000000003e-05, + "num_tokens": 125471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.3138, + "grad_norm": 4.2967753410339355, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.0381, + "grad_norm": 1.3151957988739014, + "learning_rate": 1.8195e-05, + "num_tokens": 126074.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.1773, + "grad_norm": 1.440629243850708, + "learning_rate": 1.819e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0361, + "grad_norm": 1.378117561340332, + "learning_rate": 1.8185000000000003e-05, + "num_tokens": 126677.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0357, + "grad_norm": 1.3120638132095337, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 1.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0333, + "grad_norm": 1.1625266075134277, + "learning_rate": 1.8175e-05, + "num_tokens": 126859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0292, + "grad_norm": 1.198464035987854, + "learning_rate": 1.817e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.193, + "grad_norm": 1.9310072660446167, + "learning_rate": 1.8165000000000003e-05, + "num_tokens": 127462.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": 0.209, + "grad_norm": 1.7112150192260742, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.1398, + "grad_norm": 1.4659478664398193, + "learning_rate": 1.8155e-05, + "num_tokens": 128486.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.1688, + "grad_norm": 3.3470299243927, + "learning_rate": 1.815e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.2416, + "grad_norm": 3.232045888900757, + "learning_rate": 1.8145e-05, + "num_tokens": 129510.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0242, + "grad_norm": 2.809112548828125, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 1.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.0222, + "grad_norm": 2.652397394180298, + "learning_rate": 1.8135000000000002e-05, + "num_tokens": 129692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.1619, + "grad_norm": 1.6935186386108398, + "learning_rate": 1.813e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0206, + "grad_norm": 1.8048573732376099, + "learning_rate": 1.8125e-05, + "num_tokens": 130295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.0199, + "grad_norm": 1.7344465255737305, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0174, + "grad_norm": 1.6794533729553223, + "learning_rate": 1.8115000000000002e-05, + "num_tokens": 130477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0172, + "grad_norm": 2.995704174041748, + "learning_rate": 1.811e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 1.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.241, + "grad_norm": 2.3058347702026367, + "learning_rate": 1.8105e-05, + "num_tokens": 131080.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.2068, + "grad_norm": 2.030050277709961, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.1573, + "grad_norm": 2.108264207839966, + "learning_rate": 1.8095000000000002e-05, + "num_tokens": 132104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0128, + "grad_norm": 0.9666662812232971, + "learning_rate": 1.809e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.1613, + "grad_norm": 1.9703510999679565, + "learning_rate": 1.8085e-05, + "num_tokens": 132707.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.1579, + "grad_norm": 1.7536500692367554, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.2503, + "grad_norm": 3.074944257736206, + "learning_rate": 1.8075000000000002e-05, + "num_tokens": 133731.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.154, + "grad_norm": 2.3541879653930664, + "learning_rate": 1.807e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.1655, + "grad_norm": 1.2853813171386719, + "learning_rate": 1.8065e-05, + "num_tokens": 134755.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.1481, + "grad_norm": 1.4534378051757812, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0156, + "grad_norm": 2.346766710281372, + "learning_rate": 1.8055000000000002e-05, + "num_tokens": 135358.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0179, + "grad_norm": 2.7506628036499023, + "learning_rate": 1.805e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 1.0, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.2665, + "grad_norm": 7.800353050231934, + "learning_rate": 1.8045e-05, + "num_tokens": 135961.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0137, + "grad_norm": 1.6062291860580444, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 1.0, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.1298, + "grad_norm": 1.9706884622573853, + "learning_rate": 1.8035000000000003e-05, + "num_tokens": 136564.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.1587, + "grad_norm": 4.288624286651611, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.1706, + "grad_norm": 2.351865291595459, + "learning_rate": 1.8025e-05, + "num_tokens": 137588.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.1391, + "grad_norm": 2.3107855319976807, + "learning_rate": 1.802e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0116, + "grad_norm": 1.2413067817687988, + "learning_rate": 1.8015000000000003e-05, + "num_tokens": 138191.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.1528, + "grad_norm": 2.238205671310425, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0111, + "grad_norm": 1.0291837453842163, + "learning_rate": 1.8005e-05, + "num_tokens": 138794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": 0.2551, + "grad_norm": 3.0084855556488037, + "learning_rate": 1.8e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.271, + "grad_norm": 3.355750560760498, + "learning_rate": 1.7995000000000003e-05, + "num_tokens": 139818.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.1479, + "grad_norm": 3.3119289875030518, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.1951, + "grad_norm": 3.4890756607055664, + "learning_rate": 1.7985e-05, + "num_tokens": 140842.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.1439, + "grad_norm": 2.5274429321289062, + "learning_rate": 1.798e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.1537, + "grad_norm": 3.0909008979797363, + "learning_rate": 1.7975000000000003e-05, + "num_tokens": 141866.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0107, + "grad_norm": 2.0530686378479004, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 1.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.011, + "grad_norm": 1.7325184345245361, + "learning_rate": 1.7965e-05, + "num_tokens": 142048.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.135, + "grad_norm": 1.9106756448745728, + "learning_rate": 1.796e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.177, + "grad_norm": 3.206461191177368, + "learning_rate": 1.7955000000000003e-05, + "num_tokens": 143072.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0095, + "grad_norm": 0.8696625828742981, + "learning_rate": 1.795e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 1.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.1656, + "grad_norm": 5.9883856773376465, + "learning_rate": 1.7945000000000002e-05, + "num_tokens": 143675.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.2393, + "grad_norm": 3.601959466934204, + "learning_rate": 1.794e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0092, + "grad_norm": 1.547377586364746, + "learning_rate": 1.7935000000000004e-05, + "num_tokens": 144278.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0099, + "grad_norm": 1.7349345684051514, + "learning_rate": 1.793e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 1.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.1454, + "grad_norm": 2.134899377822876, + "learning_rate": 1.7925000000000002e-05, + "num_tokens": 144881.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.2317, + "grad_norm": 3.7199866771698, + "learning_rate": 1.792e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.2081, + "grad_norm": 3.7679033279418945, + "learning_rate": 1.7915000000000004e-05, + "num_tokens": 145905.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0084, + "grad_norm": 0.7981175184249878, + "learning_rate": 1.791e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 1.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0078, + "grad_norm": 0.624564528465271, + "learning_rate": 1.7905000000000002e-05, + "num_tokens": 146087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.153, + "grad_norm": 1.46378755569458, + "learning_rate": 1.79e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0085, + "grad_norm": 1.403277039527893, + "learning_rate": 1.7895000000000004e-05, + "num_tokens": 146690.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.1413, + "grad_norm": 2.821493148803711, + "learning_rate": 1.789e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.1268, + "grad_norm": 2.5567212104797363, + "learning_rate": 1.7885000000000002e-05, + "num_tokens": 147714.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.1303, + "grad_norm": 2.5823540687561035, + "learning_rate": 1.788e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0075, + "grad_norm": 1.26413094997406, + "learning_rate": 1.7875e-05, + "num_tokens": 148317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0067, + "grad_norm": 0.9559513330459595, + "learning_rate": 1.787e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0069, + "grad_norm": 0.641984224319458, + "learning_rate": 1.7865000000000003e-05, + "num_tokens": 148499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.1762, + "grad_norm": 2.6874637603759766, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0062, + "grad_norm": 0.4612693786621094, + "learning_rate": 1.7855e-05, + "num_tokens": 149102.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.1284, + "grad_norm": 2.1469764709472656, + "learning_rate": 1.785e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.1216, + "grad_norm": 2.77829909324646, + "learning_rate": 1.7845000000000003e-05, + "num_tokens": 150126.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0056, + "grad_norm": 0.3416956067085266, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 1.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0055, + "grad_norm": 0.3599971830844879, + "learning_rate": 1.7835e-05, + "num_tokens": 150308.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0054, + "grad_norm": 0.3336946368217468, + "learning_rate": 1.783e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 1.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.1384, + "grad_norm": 2.486008882522583, + "learning_rate": 1.7825e-05, + "num_tokens": 150911.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.1366, + "grad_norm": 1.806955337524414, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.0053, + "grad_norm": 0.3250260651111603, + "learning_rate": 1.7815e-05, + "num_tokens": 151514.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0048, + "grad_norm": 0.33809739351272583, + "learning_rate": 1.781e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 1.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.1241, + "grad_norm": 1.514503002166748, + "learning_rate": 1.7805e-05, + "num_tokens": 152117.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.1369, + "grad_norm": 1.73817777633667, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.005, + "grad_norm": 0.6402959227561951, + "learning_rate": 1.7795e-05, + "num_tokens": 152720.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.1392, + "grad_norm": 2.1087169647216797, + "learning_rate": 1.779e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0056, + "grad_norm": 0.7931351661682129, + "learning_rate": 1.7785e-05, + "num_tokens": 153323.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.1216, + "grad_norm": 2.559343099594116, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.1415, + "grad_norm": 3.7847163677215576, + "learning_rate": 1.7775000000000002e-05, + "num_tokens": 154347.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0056, + "grad_norm": 0.6650505661964417, + "learning_rate": 1.777e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0058, + "grad_norm": 0.6711560487747192, + "learning_rate": 1.7765e-05, + "num_tokens": 154529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.1339, + "grad_norm": 2.383869171142578, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.1384, + "grad_norm": 2.9380829334259033, + "learning_rate": 1.7755000000000002e-05, + "num_tokens": 155553.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.1355, + "grad_norm": 3.530726432800293, + "learning_rate": 1.775e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0057, + "grad_norm": 0.6963756680488586, + "learning_rate": 1.7745e-05, + "num_tokens": 156156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0047, + "grad_norm": 0.45467251539230347, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.1322, + "grad_norm": 2.1101133823394775, + "learning_rate": 1.7735000000000002e-05, + "num_tokens": 156759.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.3436, + "grad_norm": 10.156854629516602, + "learning_rate": 1.773e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.1111, + "grad_norm": 1.9533101320266724, + "learning_rate": 1.7725e-05, + "num_tokens": 157783.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0054, + "grad_norm": 0.571807861328125, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 1.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0045, + "grad_norm": 0.6374226808547974, + "learning_rate": 1.7715000000000002e-05, + "num_tokens": 157965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.1115, + "grad_norm": 1.9669644832611084, + "learning_rate": 1.771e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.1336, + "grad_norm": 1.4811934232711792, + "learning_rate": 1.7705e-05, + "num_tokens": 158989.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.2041, + "grad_norm": 3.112797737121582, + "learning_rate": 1.77e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0045, + "grad_norm": 0.5766833424568176, + "learning_rate": 1.7695000000000003e-05, + "num_tokens": 159592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.1237, + "grad_norm": 1.863338589668274, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.1236, + "grad_norm": 2.4069719314575195, + "learning_rate": 1.7685e-05, + "num_tokens": 160616.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0055, + "grad_norm": 0.8338965177536011, + "learning_rate": 1.768e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 1.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0044, + "grad_norm": 0.5481887459754944, + "learning_rate": 1.7675000000000003e-05, + "num_tokens": 160798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.1354, + "grad_norm": 4.145319938659668, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.1279, + "grad_norm": 3.560887575149536, + "learning_rate": 1.7665e-05, + "num_tokens": 161822.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0044, + "grad_norm": 0.43582797050476074, + "learning_rate": 1.766e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 1.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.004, + "grad_norm": 0.3212014138698578, + "learning_rate": 1.7655000000000003e-05, + "num_tokens": 162004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.1956, + "grad_norm": 2.662240982055664, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0038, + "grad_norm": 0.32649490237236023, + "learning_rate": 1.7645e-05, + "num_tokens": 162607.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0039, + "grad_norm": 0.33435314893722534, + "learning_rate": 1.764e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": 0.1217, + "grad_norm": 3.422117233276367, + "learning_rate": 1.7635000000000003e-05, + "num_tokens": 163210.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.1169, + "grad_norm": 1.9841532707214355, + "learning_rate": 1.763e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0035, + "grad_norm": 0.23611226677894592, + "learning_rate": 1.7625e-05, + "num_tokens": 163813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0036, + "grad_norm": 0.35102367401123047, + "learning_rate": 1.762e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 1.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0034, + "grad_norm": 0.22219745814800262, + "learning_rate": 1.7615000000000003e-05, + "num_tokens": 163995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.1109, + "grad_norm": 1.8000237941741943, + "learning_rate": 1.761e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0034, + "grad_norm": 0.4621182084083557, + "learning_rate": 1.7605000000000002e-05, + "num_tokens": 164598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0035, + "grad_norm": 0.5149714350700378, + "learning_rate": 1.76e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.004, + "grad_norm": 0.5277268886566162, + "learning_rate": 1.7595000000000003e-05, + "num_tokens": 164780.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.1178, + "grad_norm": 1.9578617811203003, + "learning_rate": 1.759e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0032, + "grad_norm": 0.30999821424484253, + "learning_rate": 1.7585000000000002e-05, + "num_tokens": 165383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0032, + "grad_norm": 0.3227098882198334, + "learning_rate": 1.758e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 1.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0032, + "grad_norm": 0.2970958352088928, + "learning_rate": 1.7575000000000004e-05, + "num_tokens": 165565.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.1054, + "grad_norm": 3.3750076293945312, + "learning_rate": 1.757e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.003, + "grad_norm": 0.315746933221817, + "learning_rate": 1.7565000000000002e-05, + "num_tokens": 166168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.1014, + "grad_norm": 1.7110451459884644, + "learning_rate": 1.756e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.1009, + "grad_norm": 2.0282938480377197, + "learning_rate": 1.7555e-05, + "num_tokens": 167192.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0029, + "grad_norm": 0.18862634897232056, + "learning_rate": 1.755e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 1.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.1251, + "grad_norm": 1.5325688123703003, + "learning_rate": 1.7545000000000002e-05, + "num_tokens": 167795.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0032, + "grad_norm": 0.37112897634506226, + "learning_rate": 1.754e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 1.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.0031, + "grad_norm": 0.32201266288757324, + "learning_rate": 1.7535e-05, + "num_tokens": 167977.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.003, + "grad_norm": 0.32648831605911255, + "learning_rate": 1.753e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 1.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": 0.1251, + "grad_norm": 2.044515371322632, + "learning_rate": 1.7525000000000002e-05, + "num_tokens": 168580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.1099, + "grad_norm": 2.5852344036102295, + "learning_rate": 1.752e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0032, + "grad_norm": 0.33884692192077637, + "learning_rate": 1.7515e-05, + "num_tokens": 169183.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.1006, + "grad_norm": 1.9987916946411133, + "learning_rate": 1.751e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0895, + "grad_norm": 2.697984457015991, + "learning_rate": 1.7505e-05, + "num_tokens": 170207.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.0034, + "grad_norm": 0.4763769507408142, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 1.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0899, + "grad_norm": 3.0565173625946045, + "learning_rate": 1.7495e-05, + "num_tokens": 170810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0909, + "grad_norm": 1.3817325830459595, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0044, + "grad_norm": 0.8519660830497742, + "learning_rate": 1.7485e-05, + "num_tokens": 171413.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.1095, + "grad_norm": 2.0203707218170166, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0048, + "grad_norm": 1.1067970991134644, + "learning_rate": 1.7475e-05, + "num_tokens": 172016.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.1167, + "grad_norm": 2.3915855884552, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0049, + "grad_norm": 1.0700874328613281, + "learning_rate": 1.7465e-05, + "num_tokens": 172619.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.004, + "grad_norm": 0.6739718317985535, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 1.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.1176, + "grad_norm": 2.5957095623016357, + "learning_rate": 1.7455e-05, + "num_tokens": 173222.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": 0.0763, + "grad_norm": 2.0077261924743652, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0028, + "grad_norm": 0.2505457103252411, + "learning_rate": 1.7445e-05, + "num_tokens": 173825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0025, + "grad_norm": 0.1596791297197342, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 1.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.1892, + "grad_norm": 2.4415338039398193, + "learning_rate": 1.7435e-05, + "num_tokens": 174428.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.1134, + "grad_norm": 2.0744497776031494, + "learning_rate": 1.743e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0991, + "grad_norm": 2.4540417194366455, + "learning_rate": 1.7425e-05, + "num_tokens": 175452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0025, + "grad_norm": 0.17656919360160828, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.1227, + "grad_norm": 2.1174721717834473, + "learning_rate": 1.7415000000000002e-05, + "num_tokens": 176055.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0026, + "grad_norm": 0.23843693733215332, + "learning_rate": 1.741e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 1.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.1103, + "grad_norm": 3.4821200370788574, + "learning_rate": 1.7405e-05, + "num_tokens": 176658.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0027, + "grad_norm": 0.3274306654930115, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 1.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0924, + "grad_norm": 1.685363531112671, + "learning_rate": 1.7395000000000002e-05, + "num_tokens": 177261.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0028, + "grad_norm": 0.3265073299407959, + "learning_rate": 1.739e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 1.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.1099, + "grad_norm": 3.1508426666259766, + "learning_rate": 1.7385e-05, + "num_tokens": 177864.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.1034, + "grad_norm": 1.8193601369857788, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.1016, + "grad_norm": 1.59476637840271, + "learning_rate": 1.7375000000000002e-05, + "num_tokens": 178888.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.1998, + "grad_norm": 3.547844648361206, + "learning_rate": 1.737e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.004, + "grad_norm": 0.7272564172744751, + "learning_rate": 1.7365e-05, + "num_tokens": 179491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0046, + "grad_norm": 0.918525755405426, + "learning_rate": 1.736e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 1.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.1078, + "grad_norm": 2.3493764400482178, + "learning_rate": 1.7355000000000002e-05, + "num_tokens": 180094.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0042, + "grad_norm": 0.7224324941635132, + "learning_rate": 1.735e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 1.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0043, + "grad_norm": 0.6705859303474426, + "learning_rate": 1.7345e-05, + "num_tokens": 180276.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.1953, + "grad_norm": 2.93843674659729, + "learning_rate": 1.734e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.0034, + "grad_norm": 0.46903571486473083, + "learning_rate": 1.7335000000000003e-05, + "num_tokens": 180879.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0938, + "grad_norm": 2.1053452491760254, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0026, + "grad_norm": 0.24292589724063873, + "learning_rate": 1.7325e-05, + "num_tokens": 181482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0922, + "grad_norm": 2.257225275039673, + "learning_rate": 1.732e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.003, + "grad_norm": 0.4069388508796692, + "learning_rate": 1.7315000000000003e-05, + "num_tokens": 182085.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.183, + "grad_norm": 3.2919442653656006, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.1693, + "grad_norm": 2.224686861038208, + "learning_rate": 1.7305e-05, + "num_tokens": 183109.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.1085, + "grad_norm": 1.8910117149353027, + "learning_rate": 1.73e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0026, + "grad_norm": 0.40661975741386414, + "learning_rate": 1.7295000000000003e-05, + "num_tokens": 183712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0027, + "grad_norm": 0.4873325228691101, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 1.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0028, + "grad_norm": 0.6161079406738281, + "learning_rate": 1.7285e-05, + "num_tokens": 183894.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0027, + "grad_norm": 0.4630989134311676, + "learning_rate": 1.728e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 1.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0985, + "grad_norm": 1.9053902626037598, + "learning_rate": 1.7275000000000003e-05, + "num_tokens": 184497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.0026, + "grad_norm": 0.37032097578048706, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 1.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.0024, + "grad_norm": 0.27917778491973877, + "learning_rate": 1.7265e-05, + "num_tokens": 184679.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0826, + "grad_norm": 2.2242591381073, + "learning_rate": 1.726e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0023, + "grad_norm": 0.22320418059825897, + "learning_rate": 1.7255000000000003e-05, + "num_tokens": 185282.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0958, + "grad_norm": 2.1955316066741943, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.1204, + "grad_norm": 2.8383123874664307, + "learning_rate": 1.7245000000000002e-05, + "num_tokens": 186306.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0025, + "grad_norm": 0.2997134327888489, + "learning_rate": 1.724e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0024, + "grad_norm": 0.24415498971939087, + "learning_rate": 1.7235e-05, + "num_tokens": 186488.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0885, + "grad_norm": 2.02583384513855, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0927, + "grad_norm": 2.139193534851074, + "learning_rate": 1.7225000000000002e-05, + "num_tokens": 187512.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0025, + "grad_norm": 0.3212721347808838, + "learning_rate": 1.722e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.1594, + "grad_norm": 1.6018428802490234, + "learning_rate": 1.7215e-05, + "num_tokens": 188115.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0027, + "grad_norm": 0.43617552518844604, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 1.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.1228, + "grad_norm": 1.8676470518112183, + "learning_rate": 1.7205000000000002e-05, + "num_tokens": 188718.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": 0.1523, + "grad_norm": 2.5800390243530273, + "learning_rate": 1.72e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0036, + "grad_norm": 0.7294099926948547, + "learning_rate": 1.7195e-05, + "num_tokens": 189321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.0797, + "grad_norm": 2.594087600708008, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.1031, + "grad_norm": 3.2291526794433594, + "learning_rate": 1.7185e-05, + "num_tokens": 190345.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0036, + "grad_norm": 0.7465726733207703, + "learning_rate": 1.718e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 1.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.1692, + "grad_norm": 2.709357500076294, + "learning_rate": 1.7175e-05, + "num_tokens": 190948.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.1003, + "grad_norm": 2.117990493774414, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.1015, + "grad_norm": 2.4742591381073, + "learning_rate": 1.7165e-05, + "num_tokens": 191972.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0911, + "grad_norm": 2.098302125930786, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.1107, + "grad_norm": 1.915540337562561, + "learning_rate": 1.7155e-05, + "num_tokens": 192996.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0049, + "grad_norm": 1.0682960748672485, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0965, + "grad_norm": 1.5651695728302002, + "learning_rate": 1.7145e-05, + "num_tokens": 193599.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.103, + "grad_norm": 2.3110480308532715, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.005, + "grad_norm": 1.1688706874847412, + "learning_rate": 1.7135e-05, + "num_tokens": 194202.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0801, + "grad_norm": 2.4091689586639404, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.159, + "grad_norm": 2.0551347732543945, + "learning_rate": 1.7125e-05, + "num_tokens": 195226.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.004, + "grad_norm": 0.8690920472145081, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0052, + "grad_norm": 1.225834608078003, + "learning_rate": 1.7115e-05, + "num_tokens": 195408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0038, + "grad_norm": 0.7105492949485779, + "learning_rate": 1.711e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0027, + "grad_norm": 0.3135615587234497, + "learning_rate": 1.7105e-05, + "num_tokens": 195590.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0025, + "grad_norm": 0.33731189370155334, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 1.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0024, + "grad_norm": 0.6950210928916931, + "learning_rate": 1.7095e-05, + "num_tokens": 195772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.166, + "grad_norm": 3.7873523235321045, + "learning_rate": 1.709e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.081, + "grad_norm": 2.6900861263275146, + "learning_rate": 1.7085e-05, + "num_tokens": 196796.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.002, + "grad_norm": 0.19354696571826935, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 1.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0935, + "grad_norm": 2.4997594356536865, + "learning_rate": 1.7075e-05, + "num_tokens": 197399.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.002, + "grad_norm": 0.24508339166641235, + "learning_rate": 1.707e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 1.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0019, + "grad_norm": 0.1790609359741211, + "learning_rate": 1.7065e-05, + "num_tokens": 197581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.1101, + "grad_norm": 2.382162570953369, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.1892, + "grad_norm": 3.0123023986816406, + "learning_rate": 1.7055000000000002e-05, + "num_tokens": 198605.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0019, + "grad_norm": 0.27882760763168335, + "learning_rate": 1.705e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0019, + "grad_norm": 0.23136040568351746, + "learning_rate": 1.7045e-05, + "num_tokens": 198787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.1046, + "grad_norm": 1.8799446821212769, + "learning_rate": 1.704e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0018, + "grad_norm": 0.23780478537082672, + "learning_rate": 1.7035000000000002e-05, + "num_tokens": 199390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0849, + "grad_norm": 1.9498792886734009, + "learning_rate": 1.703e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.298, + "step": 596 + }, + { + "loss": 0.0953, + "grad_norm": 2.2400667667388916, + "learning_rate": 1.7025e-05, + "num_tokens": 200414.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.002, + "grad_norm": 0.3908434510231018, + "learning_rate": 1.702e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 1.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0939, + "grad_norm": 2.667379140853882, + "learning_rate": 1.7015000000000002e-05, + "num_tokens": 201017.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.0745, + "grad_norm": 2.066331624984741, + "learning_rate": 1.701e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0025, + "grad_norm": 0.5688944458961487, + "learning_rate": 1.7005e-05, + "num_tokens": 201620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.1069, + "grad_norm": 2.021451950073242, + "learning_rate": 1.7e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.003, + "grad_norm": 0.6418687105178833, + "learning_rate": 1.6995000000000002e-05, + "num_tokens": 202223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0029, + "grad_norm": 0.6194710731506348, + "learning_rate": 1.699e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 1.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.1193, + "grad_norm": 3.001216411590576, + "learning_rate": 1.6985e-05, + "num_tokens": 202826.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": 0.1078, + "grad_norm": 2.1146023273468018, + "learning_rate": 1.698e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.098, + "grad_norm": 3.064103841781616, + "learning_rate": 1.6975000000000003e-05, + "num_tokens": 203850.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0747, + "grad_norm": 3.1524202823638916, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.1506, + "grad_norm": 3.1213419437408447, + "learning_rate": 1.6965e-05, + "num_tokens": 204874.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0038, + "grad_norm": 0.8761835098266602, + "learning_rate": 1.696e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0733, + "grad_norm": 2.0461108684539795, + "learning_rate": 1.6955000000000003e-05, + "num_tokens": 205477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0948, + "grad_norm": 2.52803111076355, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0036, + "grad_norm": 0.837294340133667, + "learning_rate": 1.6945e-05, + "num_tokens": 206080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0036, + "grad_norm": 0.8330880403518677, + "learning_rate": 1.694e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0608, + "grad_norm": 1.6941643953323364, + "learning_rate": 1.6935000000000003e-05, + "num_tokens": 206683.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0897, + "grad_norm": 1.850446105003357, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.0933, + "grad_norm": 2.3541157245635986, + "learning_rate": 1.6925e-05, + "num_tokens": 207707.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0026, + "grad_norm": 0.45243605971336365, + "learning_rate": 1.692e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0694, + "grad_norm": 2.299668312072754, + "learning_rate": 1.6915e-05, + "num_tokens": 208310.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0029, + "grad_norm": 0.6032459139823914, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.0967, + "grad_norm": 2.7924766540527344, + "learning_rate": 1.6905e-05, + "num_tokens": 208913.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0027, + "grad_norm": 0.5459297299385071, + "learning_rate": 1.69e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0033, + "grad_norm": 0.7005264759063721, + "learning_rate": 1.6895e-05, + "num_tokens": 209095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0712, + "grad_norm": 2.0087270736694336, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0816, + "grad_norm": 2.023620843887329, + "learning_rate": 1.6885000000000002e-05, + "num_tokens": 210119.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0884, + "grad_norm": 3.3579723834991455, + "learning_rate": 1.688e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.1001, + "grad_norm": 2.1446380615234375, + "learning_rate": 1.6875e-05, + "num_tokens": 211143.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0024, + "grad_norm": 0.46906810998916626, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.003, + "grad_norm": 0.6180875897407532, + "learning_rate": 1.6865000000000002e-05, + "num_tokens": 211325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0024, + "grad_norm": 0.44018203020095825, + "learning_rate": 1.686e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0021, + "grad_norm": 0.3610388934612274, + "learning_rate": 1.6855e-05, + "num_tokens": 211507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0025, + "grad_norm": 0.42492103576660156, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0785, + "grad_norm": 2.052070379257202, + "learning_rate": 1.6845000000000002e-05, + "num_tokens": 212110.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0955, + "grad_norm": 1.5501021146774292, + "learning_rate": 1.684e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0017, + "grad_norm": 0.14774425327777863, + "learning_rate": 1.6835e-05, + "num_tokens": 212713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0016, + "grad_norm": 0.13003599643707275, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0016, + "grad_norm": 0.11263933777809143, + "learning_rate": 1.6825000000000002e-05, + "num_tokens": 212895.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0814, + "grad_norm": 2.4652907848358154, + "learning_rate": 1.682e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0016, + "grad_norm": 0.1284048706293106, + "learning_rate": 1.6815e-05, + "num_tokens": 213498.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0016, + "grad_norm": 0.14626798033714294, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 1.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0984, + "grad_norm": 2.53958797454834, + "learning_rate": 1.6805000000000003e-05, + "num_tokens": 214101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0983, + "grad_norm": 2.0881552696228027, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0016, + "grad_norm": 0.14537213742733002, + "learning_rate": 1.6795e-05, + "num_tokens": 214704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.0642, + "grad_norm": 2.0831480026245117, + "learning_rate": 1.679e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.0016, + "grad_norm": 0.12770842015743256, + "learning_rate": 1.6785000000000003e-05, + "num_tokens": 215307.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0878, + "grad_norm": 2.531637668609619, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0864, + "grad_norm": 2.4697654247283936, + "learning_rate": 1.6775e-05, + "num_tokens": 216331.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0865, + "grad_norm": 1.655576229095459, + "learning_rate": 1.677e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.1086, + "grad_norm": 2.826423168182373, + "learning_rate": 1.6765000000000003e-05, + "num_tokens": 217355.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.1042, + "grad_norm": 3.4096198081970215, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.0027, + "grad_norm": 0.5534147620201111, + "learning_rate": 1.6755e-05, + "num_tokens": 217958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0611, + "grad_norm": 1.5646562576293945, + "learning_rate": 1.675e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0033, + "grad_norm": 1.048545479774475, + "learning_rate": 1.6745000000000003e-05, + "num_tokens": 218561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.005, + "grad_norm": 1.3414465188980103, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0032, + "grad_norm": 0.636330246925354, + "learning_rate": 1.6735e-05, + "num_tokens": 218743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0882, + "grad_norm": 1.7900675535202026, + "learning_rate": 1.673e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.0883, + "grad_norm": 1.8037763833999634, + "learning_rate": 1.6725000000000003e-05, + "num_tokens": 219767.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0733, + "grad_norm": 1.7987661361694336, + "learning_rate": 1.672e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0033, + "grad_norm": 0.6671841740608215, + "learning_rate": 1.6715000000000002e-05, + "num_tokens": 220370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": 0.0699, + "grad_norm": 2.178269147872925, + "learning_rate": 1.671e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0653, + "grad_norm": 2.165506601333618, + "learning_rate": 1.6705000000000004e-05, + "num_tokens": 221394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0828, + "grad_norm": 1.837323546409607, + "learning_rate": 1.67e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0798, + "grad_norm": 2.296050548553467, + "learning_rate": 1.6695000000000002e-05, + "num_tokens": 222418.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.169, + "grad_norm": 3.554818868637085, + "learning_rate": 1.669e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.1585, + "grad_norm": 2.993666887283325, + "learning_rate": 1.6685000000000004e-05, + "num_tokens": 223442.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0864, + "grad_norm": 3.0106112957000732, + "learning_rate": 1.668e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0605, + "grad_norm": 1.362823247909546, + "learning_rate": 1.6675000000000002e-05, + "num_tokens": 224466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.0055, + "grad_norm": 1.2802313566207886, + "learning_rate": 1.667e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0906, + "grad_norm": 2.1969728469848633, + "learning_rate": 1.6665000000000004e-05, + "num_tokens": 225069.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.0919, + "grad_norm": 3.0707828998565674, + "learning_rate": 1.666e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0061, + "grad_norm": 1.514074444770813, + "learning_rate": 1.6655000000000002e-05, + "num_tokens": 225672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0968, + "grad_norm": 2.7561936378479004, + "learning_rate": 1.665e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0887, + "grad_norm": 2.4263193607330322, + "learning_rate": 1.6645e-05, + "num_tokens": 226696.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0924, + "grad_norm": 2.360464572906494, + "learning_rate": 1.664e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.0926, + "grad_norm": 2.564941644668579, + "learning_rate": 1.6635000000000003e-05, + "num_tokens": 227720.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0849, + "grad_norm": 3.0359439849853516, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.1488, + "grad_norm": 2.505728006362915, + "learning_rate": 1.6625e-05, + "num_tokens": 228744.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0757, + "grad_norm": 1.8170560598373413, + "learning_rate": 1.662e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0083, + "grad_norm": 2.0260066986083984, + "learning_rate": 1.6615000000000003e-05, + "num_tokens": 229347.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0088, + "grad_norm": 2.0579655170440674, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0698, + "grad_norm": 2.465139865875244, + "learning_rate": 1.6605e-05, + "num_tokens": 229950.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.0865, + "grad_norm": 2.2099132537841797, + "learning_rate": 1.66e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0585, + "grad_norm": 2.1250336170196533, + "learning_rate": 1.6595e-05, + "num_tokens": 230974.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0047, + "grad_norm": 1.0128132104873657, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 1.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0934, + "grad_norm": 2.2283778190612793, + "learning_rate": 1.6585e-05, + "num_tokens": 231577.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0616, + "grad_norm": 1.5224443674087524, + "learning_rate": 1.658e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0831, + "grad_norm": 2.9646942615509033, + "learning_rate": 1.6575e-05, + "num_tokens": 232601.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.1237, + "grad_norm": 2.9797046184539795, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0636, + "grad_norm": 2.184934139251709, + "learning_rate": 1.6565e-05, + "num_tokens": 233625.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0624, + "grad_norm": 2.1586413383483887, + "learning_rate": 1.656e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.004, + "grad_norm": 0.7300480604171753, + "learning_rate": 1.6555e-05, + "num_tokens": 234228.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0034, + "grad_norm": 0.6544972062110901, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 1.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0696, + "grad_norm": 2.013485908508301, + "learning_rate": 1.6545e-05, + "num_tokens": 234831.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0029, + "grad_norm": 0.5221191048622131, + "learning_rate": 1.654e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 1.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0772, + "grad_norm": 1.8417952060699463, + "learning_rate": 1.6535e-05, + "num_tokens": 235434.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0713, + "grad_norm": 1.9944443702697754, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.0658, + "grad_norm": 1.900722861289978, + "learning_rate": 1.6525000000000002e-05, + "num_tokens": 236458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0679, + "grad_norm": 2.4299168586730957, + "learning_rate": 1.652e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.06, + "grad_norm": 1.561680793762207, + "learning_rate": 1.6515e-05, + "num_tokens": 237482.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0739, + "grad_norm": 1.774482011795044, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0436, + "grad_norm": 1.7762006521224976, + "learning_rate": 1.6505000000000002e-05, + "num_tokens": 238506.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0034, + "grad_norm": 0.7131043672561646, + "learning_rate": 1.65e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0823, + "grad_norm": 2.994682550430298, + "learning_rate": 1.6495e-05, + "num_tokens": 239109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": 0.0776, + "grad_norm": 2.6362464427948, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0846, + "grad_norm": 2.8052642345428467, + "learning_rate": 1.6485000000000002e-05, + "num_tokens": 240133.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0048, + "grad_norm": 1.1239407062530518, + "learning_rate": 1.648e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 1.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0841, + "grad_norm": 2.1707019805908203, + "learning_rate": 1.6475e-05, + "num_tokens": 240736.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0423, + "grad_norm": 1.9918863773345947, + "learning_rate": 1.647e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.0903, + "grad_norm": 2.1334235668182373, + "learning_rate": 1.6465000000000002e-05, + "num_tokens": 241760.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0067, + "grad_norm": 1.6682239770889282, + "learning_rate": 1.646e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 1.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0652, + "grad_norm": 1.4505804777145386, + "learning_rate": 1.6455e-05, + "num_tokens": 242363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0769, + "grad_norm": 1.6511123180389404, + "learning_rate": 1.645e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.006, + "grad_norm": 1.3824306726455688, + "learning_rate": 1.6445000000000003e-05, + "num_tokens": 242966.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0742, + "grad_norm": 2.109647512435913, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.1414, + "grad_norm": 2.5469703674316406, + "learning_rate": 1.6435e-05, + "num_tokens": 243990.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0669, + "grad_norm": 1.3465361595153809, + "learning_rate": 1.643e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.082, + "grad_norm": 2.1633052825927734, + "learning_rate": 1.6425000000000003e-05, + "num_tokens": 245014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0043, + "grad_norm": 0.926991879940033, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.143, + "grad_norm": 2.2284176349639893, + "learning_rate": 1.6415e-05, + "num_tokens": 245617.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0981, + "grad_norm": 2.301908493041992, + "learning_rate": 1.641e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0448, + "grad_norm": 1.2258681058883667, + "learning_rate": 1.6405000000000003e-05, + "num_tokens": 246641.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.0043, + "grad_norm": 0.9370044469833374, + "learning_rate": 1.64e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 1.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0788, + "grad_norm": 3.762192964553833, + "learning_rate": 1.6395e-05, + "num_tokens": 247244.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.0046, + "grad_norm": 0.9186903238296509, + "learning_rate": 1.639e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 1.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0035, + "grad_norm": 0.6930652260780334, + "learning_rate": 1.6385000000000003e-05, + "num_tokens": 247426.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.3322, + "grad_norm": 9.659932136535645, + "learning_rate": 1.638e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0662, + "grad_norm": 1.7305420637130737, + "learning_rate": 1.6375e-05, + "num_tokens": 248450.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0024, + "grad_norm": 0.3103489577770233, + "learning_rate": 1.637e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 1.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0915, + "grad_norm": 2.235250234603882, + "learning_rate": 1.6365000000000003e-05, + "num_tokens": 249053.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0596, + "grad_norm": 2.24996280670166, + "learning_rate": 1.636e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0679, + "grad_norm": 2.596879005432129, + "learning_rate": 1.6355000000000002e-05, + "num_tokens": 250077.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0642, + "grad_norm": 1.9771475791931152, + "learning_rate": 1.635e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0024, + "grad_norm": 0.7699919939041138, + "learning_rate": 1.6345000000000004e-05, + "num_tokens": 250680.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792540490627289, + "learning_rate": 1.634e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0021, + "grad_norm": 0.32606813311576843, + "learning_rate": 1.6335000000000002e-05, + "num_tokens": 250862.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0574, + "grad_norm": 2.3009800910949707, + "learning_rate": 1.633e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0966, + "grad_norm": 2.396700859069824, + "learning_rate": 1.6325e-05, + "num_tokens": 251886.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.1378, + "grad_norm": 2.726357936859131, + "learning_rate": 1.632e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0022, + "grad_norm": 0.36913836002349854, + "learning_rate": 1.6315000000000002e-05, + "num_tokens": 252489.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0021, + "grad_norm": 0.34592556953430176, + "learning_rate": 1.631e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0024, + "grad_norm": 0.45417988300323486, + "learning_rate": 1.6305e-05, + "num_tokens": 252671.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0672, + "grad_norm": 2.153691053390503, + "learning_rate": 1.63e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0021, + "grad_norm": 0.35626691579818726, + "learning_rate": 1.6295000000000002e-05, + "num_tokens": 253274.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0021, + "grad_norm": 0.37343284487724304, + "learning_rate": 1.629e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.002, + "grad_norm": 0.34979110956192017, + "learning_rate": 1.6285e-05, + "num_tokens": 253456.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.078, + "grad_norm": 2.1453590393066406, + "learning_rate": 1.628e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0019, + "grad_norm": 0.21562984585762024, + "learning_rate": 1.6275e-05, + "num_tokens": 254059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0017, + "grad_norm": 0.18868863582611084, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.075, + "grad_norm": 2.238870143890381, + "learning_rate": 1.6265e-05, + "num_tokens": 254662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0712, + "grad_norm": 1.3297274112701416, + "learning_rate": 1.626e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.2668, + "grad_norm": 6.078666687011719, + "learning_rate": 1.6255e-05, + "num_tokens": 255686.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0017, + "grad_norm": 0.18387450277805328, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 1.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0017, + "grad_norm": 0.1908990740776062, + "learning_rate": 1.6245e-05, + "num_tokens": 255868.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0796, + "grad_norm": 1.9942879676818848, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0017, + "grad_norm": 0.18278343975543976, + "learning_rate": 1.6235e-05, + "num_tokens": 256471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0017, + "grad_norm": 0.2012937068939209, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0019, + "grad_norm": 0.23027914762496948, + "learning_rate": 1.6225e-05, + "num_tokens": 256653.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.088, + "grad_norm": 2.3463082313537598, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0015, + "grad_norm": 0.1516222059726715, + "learning_rate": 1.6215e-05, + "num_tokens": 257256.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0592, + "grad_norm": 1.780516505241394, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0016, + "grad_norm": 0.1569552719593048, + "learning_rate": 1.6205e-05, + "num_tokens": 257859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0015, + "grad_norm": 0.15376536548137665, + "learning_rate": 1.62e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0016, + "grad_norm": 0.16803313791751862, + "learning_rate": 1.6195e-05, + "num_tokens": 258041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0877, + "grad_norm": 1.7319484949111938, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0015, + "grad_norm": 0.14868228137493134, + "learning_rate": 1.6185000000000002e-05, + "num_tokens": 258644.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0703, + "grad_norm": 1.626076102256775, + "learning_rate": 1.618e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0852, + "grad_norm": 1.4952802658081055, + "learning_rate": 1.6175e-05, + "num_tokens": 259668.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0782, + "grad_norm": 1.6785380840301514, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0806, + "grad_norm": 1.424209475517273, + "learning_rate": 1.6165000000000002e-05, + "num_tokens": 260692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0018, + "grad_norm": 0.27588197588920593, + "learning_rate": 1.616e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 1.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0682, + "grad_norm": 2.780993938446045, + "learning_rate": 1.6155e-05, + "num_tokens": 261295.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.0027, + "grad_norm": 0.5201116800308228, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0028, + "grad_norm": 0.5331841111183167, + "learning_rate": 1.6145000000000002e-05, + "num_tokens": 261477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.1404, + "grad_norm": 3.156398296356201, + "learning_rate": 1.614e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.003, + "grad_norm": 0.5515365600585938, + "learning_rate": 1.6135e-05, + "num_tokens": 262080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.0029, + "grad_norm": 0.5499039888381958, + "learning_rate": 1.613e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0473, + "grad_norm": 1.4062751531600952, + "learning_rate": 1.6125000000000002e-05, + "num_tokens": 262683.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0638, + "grad_norm": 1.5207608938217163, + "learning_rate": 1.612e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0018, + "grad_norm": 0.24566565454006195, + "learning_rate": 1.6115e-05, + "num_tokens": 263286.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0019, + "grad_norm": 0.26229217648506165, + "learning_rate": 1.611e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 1.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0019, + "grad_norm": 0.2518826425075531, + "learning_rate": 1.6105000000000003e-05, + "num_tokens": 263468.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.066, + "grad_norm": 1.8491489887237549, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0771, + "grad_norm": 2.3547780513763428, + "learning_rate": 1.6095e-05, + "num_tokens": 264492.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": 0.067, + "grad_norm": 1.581396222114563, + "learning_rate": 1.609e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0017, + "grad_norm": 0.22524242103099823, + "learning_rate": 1.6085000000000003e-05, + "num_tokens": 265095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0825, + "grad_norm": 1.542362928390503, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.0019, + "grad_norm": 0.2753300964832306, + "learning_rate": 1.6075e-05, + "num_tokens": 265698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0594, + "grad_norm": 2.435917377471924, + "learning_rate": 1.607e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0671, + "grad_norm": 1.3892773389816284, + "learning_rate": 1.6065000000000003e-05, + "num_tokens": 266722.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0552, + "grad_norm": 1.9706708192825317, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0029, + "grad_norm": 0.5541112422943115, + "learning_rate": 1.6055e-05, + "num_tokens": 267325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0765, + "grad_norm": 2.187875270843506, + "learning_rate": 1.605e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0533, + "grad_norm": 1.9069744348526, + "learning_rate": 1.6045000000000003e-05, + "num_tokens": 268349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0034, + "grad_norm": 0.6806110739707947, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0033, + "grad_norm": 0.6904415488243103, + "learning_rate": 1.6035e-05, + "num_tokens": 268531.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0835, + "grad_norm": 1.7817496061325073, + "learning_rate": 1.603e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.003, + "grad_norm": 0.576019823551178, + "learning_rate": 1.6025000000000003e-05, + "num_tokens": 269134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0444, + "grad_norm": 2.0043082237243652, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0456, + "grad_norm": 1.6300431489944458, + "learning_rate": 1.6015e-05, + "num_tokens": 270158.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.002, + "grad_norm": 0.3286590874195099, + "learning_rate": 1.601e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0593, + "grad_norm": 3.0931613445281982, + "learning_rate": 1.6005e-05, + "num_tokens": 270761.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0774, + "grad_norm": 2.7380502223968506, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0025, + "grad_norm": 0.5391877293586731, + "learning_rate": 1.5995000000000002e-05, + "num_tokens": 271364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0022, + "grad_norm": 0.43329155445098877, + "learning_rate": 1.599e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0753, + "grad_norm": 2.46846866607666, + "learning_rate": 1.5985e-05, + "num_tokens": 271967.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0021, + "grad_norm": 0.3546755313873291, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0023, + "grad_norm": 0.4083067774772644, + "learning_rate": 1.5975000000000002e-05, + "num_tokens": 272149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.002, + "grad_norm": 0.3581921458244324, + "learning_rate": 1.597e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0613, + "grad_norm": 2.8087387084960938, + "learning_rate": 1.5965e-05, + "num_tokens": 272752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0015, + "grad_norm": 0.1888950765132904, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0671, + "grad_norm": 2.2728195190429688, + "learning_rate": 1.5955e-05, + "num_tokens": 273355.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0829, + "grad_norm": 2.8371574878692627, + "learning_rate": 1.595e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0013, + "grad_norm": 0.12679244577884674, + "learning_rate": 1.5945e-05, + "num_tokens": 273958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0014, + "grad_norm": 0.14318323135375977, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0013, + "grad_norm": 0.12078670412302017, + "learning_rate": 1.5935e-05, + "num_tokens": 274140.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0605, + "grad_norm": 2.762150764465332, + "learning_rate": 1.593e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0013, + "grad_norm": 0.1383422166109085, + "learning_rate": 1.5925e-05, + "num_tokens": 274743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0012, + "grad_norm": 0.1123310998082161, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0802, + "grad_norm": 2.965071201324463, + "learning_rate": 1.5915e-05, + "num_tokens": 275346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.1343, + "grad_norm": 3.2984137535095215, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0829, + "grad_norm": 1.568178415298462, + "learning_rate": 1.5905e-05, + "num_tokens": 276370.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0014, + "grad_norm": 0.21307793259620667, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 1.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0579, + "grad_norm": 2.5958898067474365, + "learning_rate": 1.5895e-05, + "num_tokens": 276973.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0013, + "grad_norm": 0.1617453545331955, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0014, + "grad_norm": 0.1798456758260727, + "learning_rate": 1.5885e-05, + "num_tokens": 277155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0016, + "grad_norm": 0.20433904230594635, + "learning_rate": 1.588e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0486, + "grad_norm": 1.5812333822250366, + "learning_rate": 1.5875e-05, + "num_tokens": 277758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.1437, + "grad_norm": 3.0360054969787598, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0804, + "grad_norm": 2.6603028774261475, + "learning_rate": 1.5865e-05, + "num_tokens": 278782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": 0.0814, + "grad_norm": 1.870706558227539, + "learning_rate": 1.586e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0718, + "grad_norm": 1.5813627243041992, + "learning_rate": 1.5855e-05, + "num_tokens": 279806.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0729, + "grad_norm": 2.107619285583496, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0683, + "grad_norm": 1.209026575088501, + "learning_rate": 1.5845e-05, + "num_tokens": 280830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.2674, + "grad_norm": 6.916773319244385, + "learning_rate": 1.584e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0581, + "grad_norm": 2.1409847736358643, + "learning_rate": 1.5835e-05, + "num_tokens": 281854.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0054, + "grad_norm": 1.191935420036316, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0057, + "grad_norm": 1.2228178977966309, + "learning_rate": 1.5825000000000002e-05, + "num_tokens": 282036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.005, + "grad_norm": 1.1271437406539917, + "learning_rate": 1.582e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0545, + "grad_norm": 2.2059969902038574, + "learning_rate": 1.5815e-05, + "num_tokens": 282639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.1348, + "grad_norm": 2.8853166103363037, + "learning_rate": 1.581e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0027, + "grad_norm": 0.5147932767868042, + "learning_rate": 1.5805000000000002e-05, + "num_tokens": 283242.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0514, + "grad_norm": 1.7287933826446533, + "learning_rate": 1.58e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0024, + "grad_norm": 0.41022399067878723, + "learning_rate": 1.5795e-05, + "num_tokens": 283845.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0021, + "grad_norm": 0.31408146023750305, + "learning_rate": 1.579e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 1.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0019, + "grad_norm": 0.3368740677833557, + "learning_rate": 1.5785000000000002e-05, + "num_tokens": 284027.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0733, + "grad_norm": 1.9898301362991333, + "learning_rate": 1.578e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.2631, + "grad_norm": 6.1759562492370605, + "learning_rate": 1.5775e-05, + "num_tokens": 285051.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0518, + "grad_norm": 1.7494398355484009, + "learning_rate": 1.577e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0634, + "grad_norm": 3.39536452293396, + "learning_rate": 1.5765000000000002e-05, + "num_tokens": 286075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0015, + "grad_norm": 0.16311416029930115, + "learning_rate": 1.576e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0014, + "grad_norm": 0.1292622685432434, + "learning_rate": 1.5755e-05, + "num_tokens": 286257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0487, + "grad_norm": 1.4789959192276, + "learning_rate": 1.575e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0704, + "grad_norm": 1.8533966541290283, + "learning_rate": 1.5745000000000003e-05, + "num_tokens": 287281.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0431, + "grad_norm": 1.6309059858322144, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.125, + "grad_norm": 1.811131238937378, + "learning_rate": 1.5735e-05, + "num_tokens": 288305.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0018, + "grad_norm": 0.2807428240776062, + "learning_rate": 1.573e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 1.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.0991, + "grad_norm": 2.5759706497192383, + "learning_rate": 1.5725000000000003e-05, + "num_tokens": 288908.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0646, + "grad_norm": 2.325784206390381, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0021, + "grad_norm": 0.398372620344162, + "learning_rate": 1.5715e-05, + "num_tokens": 289511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.002, + "grad_norm": 0.34870296716690063, + "learning_rate": 1.571e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0458, + "grad_norm": 1.5269895792007446, + "learning_rate": 1.5705000000000003e-05, + "num_tokens": 290114.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0023, + "grad_norm": 0.4617532789707184, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.1164, + "grad_norm": 2.049588680267334, + "learning_rate": 1.5695e-05, + "num_tokens": 290717.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0707, + "grad_norm": 3.5546929836273193, + "learning_rate": 1.569e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0686, + "grad_norm": 1.6962814331054688, + "learning_rate": 1.5685e-05, + "num_tokens": 291741.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0509, + "grad_norm": 1.9832770824432373, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0028, + "grad_norm": 0.5347197651863098, + "learning_rate": 1.5675e-05, + "num_tokens": 292344.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.0716, + "grad_norm": 2.209432363510132, + "learning_rate": 1.567e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0826, + "grad_norm": 1.7408462762832642, + "learning_rate": 1.5665e-05, + "num_tokens": 293368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0553, + "grad_norm": 1.7983943223953247, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0042, + "grad_norm": 0.8812737464904785, + "learning_rate": 1.5655000000000002e-05, + "num_tokens": 293971.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0649, + "grad_norm": 2.0859007835388184, + "learning_rate": 1.565e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0581, + "grad_norm": 1.566475510597229, + "learning_rate": 1.5645e-05, + "num_tokens": 294995.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0045, + "grad_norm": 0.9423922896385193, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0517, + "grad_norm": 1.8182531595230103, + "learning_rate": 1.5635e-05, + "num_tokens": 295598.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.1177, + "grad_norm": 2.7388081550598145, + "learning_rate": 1.563e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.1132, + "grad_norm": 2.579310655593872, + "learning_rate": 1.5625e-05, + "num_tokens": 296622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": 0.065, + "grad_norm": 1.4705184698104858, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0032, + "grad_norm": 0.6671587228775024, + "learning_rate": 1.5615000000000002e-05, + "num_tokens": 297225.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0579, + "grad_norm": 2.3290131092071533, + "learning_rate": 1.561e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0814, + "grad_norm": 2.8370614051818848, + "learning_rate": 1.5605e-05, + "num_tokens": 298249.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0689, + "grad_norm": 2.715596914291382, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.0671, + "grad_norm": 1.7622898817062378, + "learning_rate": 1.5595000000000002e-05, + "num_tokens": 299273.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0042, + "grad_norm": 0.9052322506904602, + "learning_rate": 1.559e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.049, + "grad_norm": 1.3162498474121094, + "learning_rate": 1.5585e-05, + "num_tokens": 299876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0036, + "grad_norm": 0.7319129109382629, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 1.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.0032, + "grad_norm": 0.6452810764312744, + "learning_rate": 1.5575000000000002e-05, + "num_tokens": 300058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0757, + "grad_norm": 2.2865378856658936, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0538, + "grad_norm": 1.7665457725524902, + "learning_rate": 1.5565e-05, + "num_tokens": 301082.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.047, + "grad_norm": 1.9683163166046143, + "learning_rate": 1.556e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0664, + "grad_norm": 2.087733030319214, + "learning_rate": 1.5555000000000003e-05, + "num_tokens": 302106.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0023, + "grad_norm": 0.39902573823928833, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 1.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0021, + "grad_norm": 0.34475409984588623, + "learning_rate": 1.5545e-05, + "num_tokens": 302288.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0532, + "grad_norm": 1.763016700744629, + "learning_rate": 1.554e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0625, + "grad_norm": 2.4447097778320312, + "learning_rate": 1.5535000000000003e-05, + "num_tokens": 303312.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.2444, + "grad_norm": 5.089849948883057, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.1233, + "grad_norm": 1.9174350500106812, + "learning_rate": 1.5525e-05, + "num_tokens": 304336.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.002, + "grad_norm": 0.34749460220336914, + "learning_rate": 1.552e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 1.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.075, + "grad_norm": 1.8123295307159424, + "learning_rate": 1.5515000000000003e-05, + "num_tokens": 304939.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0942, + "grad_norm": 2.2524919509887695, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0023, + "grad_norm": 0.4282050132751465, + "learning_rate": 1.5505e-05, + "num_tokens": 305542.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0022, + "grad_norm": 0.4201665222644806, + "learning_rate": 1.55e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0022, + "grad_norm": 0.38267236948013306, + "learning_rate": 1.5495000000000003e-05, + "num_tokens": 305724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0668, + "grad_norm": 1.5852563381195068, + "learning_rate": 1.549e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0686, + "grad_norm": 2.5186655521392822, + "learning_rate": 1.5485e-05, + "num_tokens": 306748.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0018, + "grad_norm": 0.3009900450706482, + "learning_rate": 1.548e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 1.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0591, + "grad_norm": 2.0340046882629395, + "learning_rate": 1.5475000000000003e-05, + "num_tokens": 307351.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.0652, + "grad_norm": 2.206228017807007, + "learning_rate": 1.547e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0485, + "grad_norm": 1.763405203819275, + "learning_rate": 1.5465000000000002e-05, + "num_tokens": 308375.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.002, + "grad_norm": 0.35779571533203125, + "learning_rate": 1.546e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0019, + "grad_norm": 0.32313865423202515, + "learning_rate": 1.5455000000000004e-05, + "num_tokens": 308557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0749, + "grad_norm": 2.2083141803741455, + "learning_rate": 1.545e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0762, + "grad_norm": 1.5048847198486328, + "learning_rate": 1.5445000000000002e-05, + "num_tokens": 309581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0536, + "grad_norm": 1.6958098411560059, + "learning_rate": 1.544e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0718, + "grad_norm": 1.9835456609725952, + "learning_rate": 1.5435000000000004e-05, + "num_tokens": 310605.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0459, + "grad_norm": 1.618090033531189, + "learning_rate": 1.543e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0611, + "grad_norm": 1.508302092552185, + "learning_rate": 1.5425000000000002e-05, + "num_tokens": 311629.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.1341, + "grad_norm": 3.744704008102417, + "learning_rate": 1.542e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0649, + "grad_norm": 1.4073272943496704, + "learning_rate": 1.5415e-05, + "num_tokens": 312653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0059, + "grad_norm": 1.3199745416641235, + "learning_rate": 1.541e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0445, + "grad_norm": 1.7224688529968262, + "learning_rate": 1.5405000000000002e-05, + "num_tokens": 313256.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.0697, + "grad_norm": 1.5272228717803955, + "learning_rate": 1.54e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0061, + "grad_norm": 1.3069825172424316, + "learning_rate": 1.5395e-05, + "num_tokens": 313859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0059, + "grad_norm": 1.285326600074768, + "learning_rate": 1.539e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0044, + "grad_norm": 0.9240864515304565, + "learning_rate": 1.5385000000000003e-05, + "num_tokens": 314041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0535, + "grad_norm": 1.9520580768585205, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0426, + "grad_norm": 1.3014405965805054, + "learning_rate": 1.5375e-05, + "num_tokens": 315065.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0024, + "grad_norm": 0.4011932611465454, + "learning_rate": 1.537e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0019, + "grad_norm": 0.2749421298503876, + "learning_rate": 1.5365e-05, + "num_tokens": 315247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0022, + "grad_norm": 0.31892502307891846, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 1.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0483, + "grad_norm": 2.0664267539978027, + "learning_rate": 1.5355e-05, + "num_tokens": 315850.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0819, + "grad_norm": 2.846149206161499, + "learning_rate": 1.535e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0013, + "grad_norm": 0.1373102068901062, + "learning_rate": 1.5345e-05, + "num_tokens": 316453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0013, + "grad_norm": 0.1736987680196762, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.053, + "grad_norm": 1.4268443584442139, + "learning_rate": 1.5335e-05, + "num_tokens": 317056.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0917, + "grad_norm": 1.9649128913879395, + "learning_rate": 1.533e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.1411, + "grad_norm": 2.5292632579803467, + "learning_rate": 1.5325e-05, + "num_tokens": 318080.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0527, + "grad_norm": 1.9480016231536865, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.0846, + "grad_norm": 2.2493338584899902, + "learning_rate": 1.5315e-05, + "num_tokens": 319104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0013, + "grad_norm": 0.13474015891551971, + "learning_rate": 1.531e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0832, + "grad_norm": 1.5178154706954956, + "learning_rate": 1.5305e-05, + "num_tokens": 319707.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": 0.0795, + "grad_norm": 2.071016788482666, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.1163, + "grad_norm": 2.11936092376709, + "learning_rate": 1.5295000000000002e-05, + "num_tokens": 320731.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0018, + "grad_norm": 0.2738206088542938, + "learning_rate": 1.529e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 1.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0674, + "grad_norm": 1.7774465084075928, + "learning_rate": 1.5285e-05, + "num_tokens": 321334.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0019, + "grad_norm": 0.3061210513114929, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.1228, + "grad_norm": 2.0818684101104736, + "learning_rate": 1.5275000000000002e-05, + "num_tokens": 321937.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0716, + "grad_norm": 1.6649255752563477, + "learning_rate": 1.527e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0026, + "grad_norm": 0.477672815322876, + "learning_rate": 1.5265e-05, + "num_tokens": 322540.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0728, + "grad_norm": 1.9350183010101318, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0631, + "grad_norm": 1.786603569984436, + "learning_rate": 1.5255000000000002e-05, + "num_tokens": 323564.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.1006, + "grad_norm": 2.4447789192199707, + "learning_rate": 1.525e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0034, + "grad_norm": 0.6078147292137146, + "learning_rate": 1.5245e-05, + "num_tokens": 324167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0769, + "grad_norm": 1.76687753200531, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.1099, + "grad_norm": 1.7330924272537231, + "learning_rate": 1.5235000000000002e-05, + "num_tokens": 325191.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.1119, + "grad_norm": 2.317302942276001, + "learning_rate": 1.523e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0047, + "grad_norm": 0.8692587018013, + "learning_rate": 1.5225e-05, + "num_tokens": 325794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0752, + "grad_norm": 2.7787444591522217, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0042, + "grad_norm": 0.7904698252677917, + "learning_rate": 1.5215000000000003e-05, + "num_tokens": 326397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0637, + "grad_norm": 1.9206311702728271, + "learning_rate": 1.521e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0641, + "grad_norm": 1.5487322807312012, + "learning_rate": 1.5205000000000001e-05, + "num_tokens": 327421.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0034, + "grad_norm": 0.6128824949264526, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0026, + "grad_norm": 0.4303649365901947, + "learning_rate": 1.5195000000000003e-05, + "num_tokens": 327603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0024, + "grad_norm": 0.3603818118572235, + "learning_rate": 1.519e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 1.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.0722, + "grad_norm": 1.3239399194717407, + "learning_rate": 1.5185000000000001e-05, + "num_tokens": 328206.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0714, + "grad_norm": 1.5037869215011597, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0462, + "grad_norm": 1.4942961931228638, + "learning_rate": 1.5175000000000001e-05, + "num_tokens": 329230.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0019, + "grad_norm": 0.2582552134990692, + "learning_rate": 1.517e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0018, + "grad_norm": 0.22304527461528778, + "learning_rate": 1.5165000000000001e-05, + "num_tokens": 329412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.083, + "grad_norm": 2.117966890335083, + "learning_rate": 1.516e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.0018, + "grad_norm": 0.21721050143241882, + "learning_rate": 1.5155000000000001e-05, + "num_tokens": 330015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0016, + "grad_norm": 0.20195893943309784, + "learning_rate": 1.515e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0807, + "grad_norm": 2.2437827587127686, + "learning_rate": 1.5145000000000002e-05, + "num_tokens": 330618.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0803, + "grad_norm": 2.0074269771575928, + "learning_rate": 1.514e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.1081, + "grad_norm": 2.117880344390869, + "learning_rate": 1.5135000000000002e-05, + "num_tokens": 331642.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0498, + "grad_norm": 1.624760389328003, + "learning_rate": 1.513e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0017, + "grad_norm": 0.2406463772058487, + "learning_rate": 1.5125e-05, + "num_tokens": 332245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.078, + "grad_norm": 1.9976122379302979, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0017, + "grad_norm": 0.2691337466239929, + "learning_rate": 1.5115000000000002e-05, + "num_tokens": 332848.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0017, + "grad_norm": 0.3240523040294647, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.002, + "grad_norm": 0.3948870897293091, + "learning_rate": 1.5105e-05, + "num_tokens": 333030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.058, + "grad_norm": 2.228799343109131, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0019, + "grad_norm": 0.30388572812080383, + "learning_rate": 1.5095000000000002e-05, + "num_tokens": 333633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0018, + "grad_norm": 0.23492957651615143, + "learning_rate": 1.509e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0767, + "grad_norm": 1.961020588874817, + "learning_rate": 1.5085e-05, + "num_tokens": 334236.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0015, + "grad_norm": 0.18129733204841614, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0016, + "grad_norm": 0.20082105696201324, + "learning_rate": 1.5075000000000002e-05, + "num_tokens": 334418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0715, + "grad_norm": 1.6847742795944214, + "learning_rate": 1.507e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.1066, + "grad_norm": 1.804700255393982, + "learning_rate": 1.5065e-05, + "num_tokens": 335442.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0017, + "grad_norm": 0.24969542026519775, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 1.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.076, + "grad_norm": 1.119564175605774, + "learning_rate": 1.5055000000000002e-05, + "num_tokens": 336045.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.1127, + "grad_norm": 1.9994937181472778, + "learning_rate": 1.505e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0018, + "grad_norm": 0.27987295389175415, + "learning_rate": 1.5045e-05, + "num_tokens": 336648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0019, + "grad_norm": 0.3454192876815796, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0023, + "grad_norm": 0.4122897684574127, + "learning_rate": 1.5035000000000003e-05, + "num_tokens": 336830.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": 0.1004, + "grad_norm": 1.930411696434021, + "learning_rate": 1.503e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0019, + "grad_norm": 0.29886701703071594, + "learning_rate": 1.5025000000000001e-05, + "num_tokens": 337433.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0016, + "grad_norm": 0.2443024218082428, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.0673, + "grad_norm": 1.4124706983566284, + "learning_rate": 1.5015000000000001e-05, + "num_tokens": 338036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0799, + "grad_norm": 2.3533709049224854, + "learning_rate": 1.501e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0713, + "grad_norm": 1.8907470703125, + "learning_rate": 1.5005000000000001e-05, + "num_tokens": 339060.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0689, + "grad_norm": 2.691020965576172, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0497, + "grad_norm": 1.6671160459518433, + "learning_rate": 1.4995000000000001e-05, + "num_tokens": 340084.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.002, + "grad_norm": 0.29797157645225525, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 1.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0019, + "grad_norm": 0.29996100068092346, + "learning_rate": 1.4985000000000001e-05, + "num_tokens": 340266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.0024, + "grad_norm": 0.4070133566856384, + "learning_rate": 1.498e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0021, + "grad_norm": 0.3220314681529999, + "learning_rate": 1.4975000000000001e-05, + "num_tokens": 340448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0019, + "grad_norm": 0.3058181405067444, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0018, + "grad_norm": 0.28231292963027954, + "learning_rate": 1.4965e-05, + "num_tokens": 340630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0692, + "grad_norm": 1.5155085325241089, + "learning_rate": 1.496e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.0683, + "grad_norm": 1.8045986890792847, + "learning_rate": 1.4955000000000002e-05, + "num_tokens": 341654.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.0408, + "grad_norm": 1.349377989768982, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0734, + "grad_norm": 1.7803888320922852, + "learning_rate": 1.4945e-05, + "num_tokens": 342678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0014, + "grad_norm": 0.1658269613981247, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 1.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0625, + "grad_norm": 1.7009806632995605, + "learning_rate": 1.4935000000000002e-05, + "num_tokens": 343281.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0017, + "grad_norm": 0.25617343187332153, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.0625, + "grad_norm": 1.769629955291748, + "learning_rate": 1.4925e-05, + "num_tokens": 343884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0017, + "grad_norm": 0.2548482418060303, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 1.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.0016, + "grad_norm": 0.2222324013710022, + "learning_rate": 1.4915000000000002e-05, + "num_tokens": 344066.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0774, + "grad_norm": 4.686360836029053, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0459, + "grad_norm": 2.749084234237671, + "learning_rate": 1.4905e-05, + "num_tokens": 345090.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.1302, + "grad_norm": 4.177389621734619, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.1173, + "grad_norm": 4.055930137634277, + "learning_rate": 1.4895000000000002e-05, + "num_tokens": 346114.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.002, + "grad_norm": 0.3603017032146454, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": 0.0693, + "grad_norm": 1.6064629554748535, + "learning_rate": 1.4885e-05, + "num_tokens": 346717.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0652, + "grad_norm": 1.3037128448486328, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0689, + "grad_norm": 2.06034779548645, + "learning_rate": 1.4875000000000002e-05, + "num_tokens": 347741.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0029, + "grad_norm": 0.5724895596504211, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 1.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0033, + "grad_norm": 0.6629590392112732, + "learning_rate": 1.4865e-05, + "num_tokens": 347923.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0024, + "grad_norm": 0.453980416059494, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 1.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0024, + "grad_norm": 0.4251463711261749, + "learning_rate": 1.4855000000000001e-05, + "num_tokens": 348105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0019, + "grad_norm": 0.30966171622276306, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 1.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.002, + "grad_norm": 0.3118286430835724, + "learning_rate": 1.4845000000000001e-05, + "num_tokens": 348287.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0555, + "grad_norm": 1.792464256286621, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0675, + "grad_norm": 1.5182185173034668, + "learning_rate": 1.4835000000000001e-05, + "num_tokens": 349311.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0674, + "grad_norm": 2.3636367321014404, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0656, + "grad_norm": 2.3102426528930664, + "learning_rate": 1.4825000000000001e-05, + "num_tokens": 350335.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0649, + "grad_norm": 1.6550447940826416, + "learning_rate": 1.482e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0633, + "grad_norm": 1.6831378936767578, + "learning_rate": 1.4815000000000001e-05, + "num_tokens": 351359.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0012, + "grad_norm": 0.14287354052066803, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 1.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0506, + "grad_norm": 1.8767977952957153, + "learning_rate": 1.4805e-05, + "num_tokens": 351962.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0798, + "grad_norm": 1.768181562423706, + "learning_rate": 1.48e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0537, + "grad_norm": 1.7165502309799194, + "learning_rate": 1.4795000000000001e-05, + "num_tokens": 352986.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0016, + "grad_norm": 0.24984677135944366, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.06, + "grad_norm": 1.5225651264190674, + "learning_rate": 1.4785e-05, + "num_tokens": 353589.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0453, + "grad_norm": 1.48419988155365, + "learning_rate": 1.478e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": 0.0693, + "grad_norm": 1.9988808631896973, + "learning_rate": 1.4775000000000002e-05, + "num_tokens": 354613.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0419, + "grad_norm": 1.4052188396453857, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0582, + "grad_norm": 1.6217740774154663, + "learning_rate": 1.4765e-05, + "num_tokens": 355637.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0778, + "grad_norm": 1.9261959791183472, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0593, + "grad_norm": 1.315152645111084, + "learning_rate": 1.4755000000000002e-05, + "num_tokens": 356661.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0069, + "grad_norm": 1.2978978157043457, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0412, + "grad_norm": 1.215545654296875, + "learning_rate": 1.4745e-05, + "num_tokens": 357264.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0075, + "grad_norm": 1.4120475053787231, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 1.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": 0.033, + "grad_norm": 1.2826626300811768, + "learning_rate": 1.4735000000000002e-05, + "num_tokens": 357867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.0074, + "grad_norm": 1.4002093076705933, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0695, + "grad_norm": 2.1978306770324707, + "learning_rate": 1.4725e-05, + "num_tokens": 358470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0693, + "grad_norm": 1.8518682718276978, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0048, + "grad_norm": 0.920648455619812, + "learning_rate": 1.4715000000000002e-05, + "num_tokens": 359073.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0041, + "grad_norm": 0.7800686955451965, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0515, + "grad_norm": 2.606135606765747, + "learning_rate": 1.4705e-05, + "num_tokens": 359676.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0024, + "grad_norm": 0.40420445799827576, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 1.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0719, + "grad_norm": 1.9594024419784546, + "learning_rate": 1.4695e-05, + "num_tokens": 360279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0018, + "grad_norm": 0.245815709233284, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.0787, + "grad_norm": 2.42266845703125, + "learning_rate": 1.4685000000000001e-05, + "num_tokens": 360882.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0014, + "grad_norm": 0.19625961780548096, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 1.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0014, + "grad_norm": 0.18439820408821106, + "learning_rate": 1.4675000000000001e-05, + "num_tokens": 361064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0012, + "grad_norm": 0.15009146928787231, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0961, + "grad_norm": 1.6586538553237915, + "learning_rate": 1.4665000000000001e-05, + "num_tokens": 361667.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.056, + "grad_norm": 1.6204346418380737, + "learning_rate": 1.466e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0629, + "grad_norm": 3.179530382156372, + "learning_rate": 1.4655000000000001e-05, + "num_tokens": 362691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0475, + "grad_norm": 1.5324857234954834, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0475, + "grad_norm": 1.6246694326400757, + "learning_rate": 1.4645e-05, + "num_tokens": 363715.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.1217, + "grad_norm": 3.528550624847412, + "learning_rate": 1.464e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0013, + "grad_norm": 0.17739705741405487, + "learning_rate": 1.4635000000000001e-05, + "num_tokens": 364318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0736, + "grad_norm": 1.7169992923736572, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.1137, + "grad_norm": 2.5113534927368164, + "learning_rate": 1.4625e-05, + "num_tokens": 365342.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.105, + "grad_norm": 2.1154234409332275, + "learning_rate": 1.462e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.0014, + "grad_norm": 0.19033615291118622, + "learning_rate": 1.4615000000000002e-05, + "num_tokens": 365945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0521, + "grad_norm": 1.7730141878128052, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.0016, + "grad_norm": 0.24216671288013458, + "learning_rate": 1.4605e-05, + "num_tokens": 366548.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0018, + "grad_norm": 0.27462536096572876, + "learning_rate": 1.46e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": 0.0744, + "grad_norm": 1.9374821186065674, + "learning_rate": 1.4595000000000002e-05, + "num_tokens": 367151.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0679, + "grad_norm": 1.6294903755187988, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0495, + "grad_norm": 1.4929898977279663, + "learning_rate": 1.4585e-05, + "num_tokens": 368175.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0026, + "grad_norm": 0.4472891092300415, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0023, + "grad_norm": 0.36597439646720886, + "learning_rate": 1.4575000000000002e-05, + "num_tokens": 368357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0024, + "grad_norm": 0.42359644174575806, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0022, + "grad_norm": 0.37764036655426025, + "learning_rate": 1.4565e-05, + "num_tokens": 368539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0021, + "grad_norm": 0.34881848096847534, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0019, + "grad_norm": 0.2842845320701599, + "learning_rate": 1.4555000000000002e-05, + "num_tokens": 368721.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0016, + "grad_norm": 0.23593850433826447, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.0773, + "grad_norm": 1.4594675302505493, + "learning_rate": 1.4545e-05, + "num_tokens": 369324.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.1, + "grad_norm": 1.863494873046875, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0013, + "grad_norm": 0.13081954419612885, + "learning_rate": 1.4535e-05, + "num_tokens": 369927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0443, + "grad_norm": 1.7305635213851929, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0012, + "grad_norm": 0.12010564655065536, + "learning_rate": 1.4525e-05, + "num_tokens": 370530.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.046, + "grad_norm": 1.4965153932571411, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0013, + "grad_norm": 0.1335715800523758, + "learning_rate": 1.4515e-05, + "num_tokens": 371133.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0789, + "grad_norm": 2.0868091583251953, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0013, + "grad_norm": 0.1260039061307907, + "learning_rate": 1.4505000000000001e-05, + "num_tokens": 371736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0013, + "grad_norm": 0.1729843020439148, + "learning_rate": 1.45e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0014, + "grad_norm": 0.1744985431432724, + "learning_rate": 1.4495000000000001e-05, + "num_tokens": 371918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0012, + "grad_norm": 0.12203537672758102, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.1175, + "grad_norm": 2.857239007949829, + "learning_rate": 1.4485e-05, + "num_tokens": 372521.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0012, + "grad_norm": 0.13221806287765503, + "learning_rate": 1.448e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0558, + "grad_norm": 1.8117022514343262, + "learning_rate": 1.4475000000000001e-05, + "num_tokens": 373124.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.0746, + "grad_norm": 1.5601890087127686, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0667, + "grad_norm": 2.6270835399627686, + "learning_rate": 1.4465e-05, + "num_tokens": 374148.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.07, + "grad_norm": 2.4209983348846436, + "learning_rate": 1.446e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0759, + "grad_norm": 1.9546290636062622, + "learning_rate": 1.4455000000000001e-05, + "num_tokens": 375172.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0673, + "grad_norm": 2.9238405227661133, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0505, + "grad_norm": 1.4308744668960571, + "learning_rate": 1.4445e-05, + "num_tokens": 376196.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0491, + "grad_norm": 1.8547859191894531, + "learning_rate": 1.444e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0611, + "grad_norm": 1.7769485712051392, + "learning_rate": 1.4435000000000002e-05, + "num_tokens": 377220.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0026, + "grad_norm": 0.4414771497249603, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 1.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0645, + "grad_norm": 2.1288139820098877, + "learning_rate": 1.4425e-05, + "num_tokens": 377823.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0449, + "grad_norm": 1.480977177619934, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0622, + "grad_norm": 1.4551938772201538, + "learning_rate": 1.4415000000000002e-05, + "num_tokens": 378847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0435, + "grad_norm": 1.613083004951477, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0666, + "grad_norm": 1.3638219833374023, + "learning_rate": 1.4405e-05, + "num_tokens": 379871.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0447, + "grad_norm": 1.5498117208480835, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0423, + "grad_norm": 1.8802024126052856, + "learning_rate": 1.4395000000000002e-05, + "num_tokens": 380895.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0077, + "grad_norm": 1.3431289196014404, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0075, + "grad_norm": 1.2728586196899414, + "learning_rate": 1.4385e-05, + "num_tokens": 381077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0072, + "grad_norm": 1.205004096031189, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0776, + "grad_norm": 1.9510324001312256, + "learning_rate": 1.4375e-05, + "num_tokens": 381680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0585, + "grad_norm": 1.6569032669067383, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0437, + "grad_norm": 1.996708631515503, + "learning_rate": 1.4365000000000002e-05, + "num_tokens": 382704.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.1022, + "grad_norm": 1.9323452711105347, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.1023, + "grad_norm": 2.318890333175659, + "learning_rate": 1.4355e-05, + "num_tokens": 383728.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0406, + "grad_norm": 1.4253126382827759, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0032, + "grad_norm": 0.5123540759086609, + "learning_rate": 1.4345000000000002e-05, + "num_tokens": 384331.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0466, + "grad_norm": 1.6153643131256104, + "learning_rate": 1.434e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.003, + "grad_norm": 0.468280553817749, + "learning_rate": 1.4335e-05, + "num_tokens": 384934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0028, + "grad_norm": 0.4284001588821411, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0484, + "grad_norm": 1.9119105339050293, + "learning_rate": 1.4325000000000003e-05, + "num_tokens": 385537.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0616, + "grad_norm": 2.9587130546569824, + "learning_rate": 1.432e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0645, + "grad_norm": 2.1663818359375, + "learning_rate": 1.4315000000000001e-05, + "num_tokens": 386561.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0022, + "grad_norm": 0.33302196860313416, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0019, + "grad_norm": 0.2560519278049469, + "learning_rate": 1.4305000000000003e-05, + "num_tokens": 386743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0504, + "grad_norm": 2.333263397216797, + "learning_rate": 1.43e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0514, + "grad_norm": 1.790854573249817, + "learning_rate": 1.4295000000000001e-05, + "num_tokens": 387767.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0478, + "grad_norm": 1.8263012170791626, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0017, + "grad_norm": 0.22925561666488647, + "learning_rate": 1.4285000000000003e-05, + "num_tokens": 388370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0508, + "grad_norm": 1.9549782276153564, + "learning_rate": 1.428e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0461, + "grad_norm": 2.7456071376800537, + "learning_rate": 1.4275000000000001e-05, + "num_tokens": 389394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0019, + "grad_norm": 0.25512465834617615, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0018, + "grad_norm": 0.2454918771982193, + "learning_rate": 1.4265000000000001e-05, + "num_tokens": 389576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.0016, + "grad_norm": 0.20499202609062195, + "learning_rate": 1.426e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0016, + "grad_norm": 0.22024467587471008, + "learning_rate": 1.4255000000000002e-05, + "num_tokens": 389758.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.1054, + "grad_norm": 1.7958146333694458, + "learning_rate": 1.425e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0016, + "grad_norm": 0.19123780727386475, + "learning_rate": 1.4245000000000002e-05, + "num_tokens": 390361.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0015, + "grad_norm": 0.1973554641008377, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0515, + "grad_norm": 1.5054925680160522, + "learning_rate": 1.4235000000000002e-05, + "num_tokens": 390964.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0645, + "grad_norm": 1.4418784379959106, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0428, + "grad_norm": 1.3686002492904663, + "learning_rate": 1.4225000000000002e-05, + "num_tokens": 391988.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0015, + "grad_norm": 0.18040749430656433, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 1.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0666, + "grad_norm": 1.9525736570358276, + "learning_rate": 1.4215e-05, + "num_tokens": 392591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0416, + "grad_norm": 1.5055146217346191, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0016, + "grad_norm": 0.21493053436279297, + "learning_rate": 1.4205000000000002e-05, + "num_tokens": 393194.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0483, + "grad_norm": 1.4553972482681274, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0017, + "grad_norm": 0.24199633300304413, + "learning_rate": 1.4195e-05, + "num_tokens": 393797.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0017, + "grad_norm": 0.22347070276737213, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0446, + "grad_norm": 1.314347743988037, + "learning_rate": 1.4185000000000002e-05, + "num_tokens": 394400.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.002, + "grad_norm": 0.3113741874694824, + "learning_rate": 1.418e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0634, + "grad_norm": 1.786219596862793, + "learning_rate": 1.4175e-05, + "num_tokens": 395003.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0906, + "grad_norm": 2.9753689765930176, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0019, + "grad_norm": 0.2806491255760193, + "learning_rate": 1.4165000000000002e-05, + "num_tokens": 395606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0444, + "grad_norm": 1.8984386920928955, + "learning_rate": 1.416e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0023, + "grad_norm": 0.3554719090461731, + "learning_rate": 1.4155000000000001e-05, + "num_tokens": 396209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0021, + "grad_norm": 0.3154850900173187, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.002, + "grad_norm": 0.2822473347187042, + "learning_rate": 1.4145000000000003e-05, + "num_tokens": 396391.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.0933, + "grad_norm": 2.0030465126037598, + "learning_rate": 1.414e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0018, + "grad_norm": 0.25846239924430847, + "learning_rate": 1.4135000000000001e-05, + "num_tokens": 396994.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0576, + "grad_norm": 1.3536447286605835, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.0018, + "grad_norm": 0.23509684205055237, + "learning_rate": 1.4125000000000003e-05, + "num_tokens": 397597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0402, + "grad_norm": 1.1482503414154053, + "learning_rate": 1.412e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.065, + "grad_norm": 1.7037919759750366, + "learning_rate": 1.4115000000000001e-05, + "num_tokens": 398621.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0691, + "grad_norm": 1.7646807432174683, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0804, + "grad_norm": 1.7181248664855957, + "learning_rate": 1.4105000000000001e-05, + "num_tokens": 399645.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0019, + "grad_norm": 0.2505536675453186, + "learning_rate": 1.41e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0618, + "grad_norm": 1.5859951972961426, + "learning_rate": 1.4095000000000001e-05, + "num_tokens": 400248.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0019, + "grad_norm": 0.2755191922187805, + "learning_rate": 1.409e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 1.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0555, + "grad_norm": 1.4727070331573486, + "learning_rate": 1.4085000000000002e-05, + "num_tokens": 400851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0481, + "grad_norm": 1.8706026077270508, + "learning_rate": 1.408e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.0474, + "grad_norm": 1.1995218992233276, + "learning_rate": 1.4075000000000002e-05, + "num_tokens": 401875.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0543, + "grad_norm": 1.2178373336791992, + "learning_rate": 1.407e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0562, + "grad_norm": 1.595617413520813, + "learning_rate": 1.4065000000000002e-05, + "num_tokens": 402899.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0029, + "grad_norm": 0.46309027075767517, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 1.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0031, + "grad_norm": 0.5019537210464478, + "learning_rate": 1.4055e-05, + "num_tokens": 403081.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.0481, + "grad_norm": 1.4502179622650146, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0612, + "grad_norm": 1.3172924518585205, + "learning_rate": 1.4045000000000002e-05, + "num_tokens": 404105.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0643, + "grad_norm": 1.8145051002502441, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0939, + "grad_norm": 2.2837142944335938, + "learning_rate": 1.4035e-05, + "num_tokens": 405129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0444, + "grad_norm": 1.4133625030517578, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0762, + "grad_norm": 3.3270263671875, + "learning_rate": 1.4025000000000002e-05, + "num_tokens": 406153.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0382, + "grad_norm": 1.5502580404281616, + "learning_rate": 1.402e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0624, + "grad_norm": 2.8620283603668213, + "learning_rate": 1.4015e-05, + "num_tokens": 407177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0062, + "grad_norm": 0.9600316286087036, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": 0.232, + "grad_norm": 6.662532329559326, + "learning_rate": 1.4005000000000002e-05, + "num_tokens": 407780.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.2308, + "grad_norm": 5.728747844696045, + "learning_rate": 1.4e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0064, + "grad_norm": 1.0067918300628662, + "learning_rate": 1.3995e-05, + "num_tokens": 408383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0717, + "grad_norm": 2.222224712371826, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0049, + "grad_norm": 0.7748068571090698, + "learning_rate": 1.3985000000000002e-05, + "num_tokens": 408986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0042, + "grad_norm": 0.6555838584899902, + "learning_rate": 1.398e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.1053, + "grad_norm": 2.1453135013580322, + "learning_rate": 1.3975000000000001e-05, + "num_tokens": 409589.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0814, + "grad_norm": 2.092453718185425, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0025, + "grad_norm": 0.37734025716781616, + "learning_rate": 1.3965000000000003e-05, + "num_tokens": 410192.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.0859, + "grad_norm": 2.4313082695007324, + "learning_rate": 1.396e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0571, + "grad_norm": 1.533075213432312, + "learning_rate": 1.3955000000000001e-05, + "num_tokens": 411216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0907, + "grad_norm": 1.7440866231918335, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0015, + "grad_norm": 0.19383682310581207, + "learning_rate": 1.3945000000000001e-05, + "num_tokens": 411819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0015, + "grad_norm": 0.1786634922027588, + "learning_rate": 1.394e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.065, + "grad_norm": 2.1025426387786865, + "learning_rate": 1.3935000000000001e-05, + "num_tokens": 412422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0879, + "grad_norm": 1.9717315435409546, + "learning_rate": 1.393e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0726, + "grad_norm": 2.1733202934265137, + "learning_rate": 1.3925000000000001e-05, + "num_tokens": 413446.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": 0.0635, + "grad_norm": 2.1671876907348633, + "learning_rate": 1.392e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0728, + "grad_norm": 1.5356316566467285, + "learning_rate": 1.3915000000000001e-05, + "num_tokens": 414470.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0014, + "grad_norm": 0.16603456437587738, + "learning_rate": 1.391e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0558, + "grad_norm": 1.9890317916870117, + "learning_rate": 1.3905000000000002e-05, + "num_tokens": 415073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0015, + "grad_norm": 0.20005646347999573, + "learning_rate": 1.39e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.1005, + "grad_norm": 3.5178253650665283, + "learning_rate": 1.3895e-05, + "num_tokens": 415676.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0702, + "grad_norm": 2.5081353187561035, + "learning_rate": 1.389e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0017, + "grad_norm": 0.23757857084274292, + "learning_rate": 1.3885000000000002e-05, + "num_tokens": 416279.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0531, + "grad_norm": 1.5659825801849365, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.002, + "grad_norm": 0.3491363525390625, + "learning_rate": 1.3875e-05, + "num_tokens": 416882.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0663, + "grad_norm": 1.5751999616622925, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0018, + "grad_norm": 0.3209178149700165, + "learning_rate": 1.3865000000000002e-05, + "num_tokens": 417485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0019, + "grad_norm": 0.3630707561969757, + "learning_rate": 1.386e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0437, + "grad_norm": 1.6397857666015625, + "learning_rate": 1.3855e-05, + "num_tokens": 418088.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0585, + "grad_norm": 2.164947748184204, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0754, + "grad_norm": 1.7066527605056763, + "learning_rate": 1.3845000000000002e-05, + "num_tokens": 419112.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0021, + "grad_norm": 0.3518334627151489, + "learning_rate": 1.384e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 1.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.0505, + "grad_norm": 1.5215017795562744, + "learning_rate": 1.3835e-05, + "num_tokens": 419715.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0572, + "grad_norm": 1.9514737129211426, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0023, + "grad_norm": 0.4249929189682007, + "learning_rate": 1.3825000000000002e-05, + "num_tokens": 420318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0661, + "grad_norm": 1.7851744890213013, + "learning_rate": 1.382e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": 0.0621, + "grad_norm": 1.3740767240524292, + "learning_rate": 1.3815e-05, + "num_tokens": 421342.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0841, + "grad_norm": 2.665015459060669, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0028, + "grad_norm": 0.4941730797290802, + "learning_rate": 1.3805000000000003e-05, + "num_tokens": 421945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.044, + "grad_norm": 1.4924557209014893, + "learning_rate": 1.38e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.0511, + "grad_norm": 2.1234307289123535, + "learning_rate": 1.3795000000000001e-05, + "num_tokens": 422969.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0426, + "grad_norm": 1.1785792112350464, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.0773, + "grad_norm": 1.6448895931243896, + "learning_rate": 1.3785000000000001e-05, + "num_tokens": 423993.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0594, + "grad_norm": 1.792230486869812, + "learning_rate": 1.378e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0535, + "grad_norm": 1.3552350997924805, + "learning_rate": 1.3775000000000001e-05, + "num_tokens": 425017.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0388, + "grad_norm": 1.0532437562942505, + "learning_rate": 1.377e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0759, + "grad_norm": 2.1115078926086426, + "learning_rate": 1.3765000000000001e-05, + "num_tokens": 426041.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0056, + "grad_norm": 0.8818362355232239, + "learning_rate": 1.376e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 1.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0051, + "grad_norm": 0.8002524971961975, + "learning_rate": 1.3755000000000001e-05, + "num_tokens": 426223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0619, + "grad_norm": 2.207181692123413, + "learning_rate": 1.375e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0053, + "grad_norm": 0.814557671546936, + "learning_rate": 1.3745000000000001e-05, + "num_tokens": 426826.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0621, + "grad_norm": 1.6394788026809692, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.0678, + "grad_norm": 1.9382132291793823, + "learning_rate": 1.3735e-05, + "num_tokens": 427850.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0396, + "grad_norm": 1.3062744140625, + "learning_rate": 1.373e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.1056, + "grad_norm": 1.7765963077545166, + "learning_rate": 1.3725000000000002e-05, + "num_tokens": 428874.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0036, + "grad_norm": 0.5703164339065552, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.062, + "grad_norm": 1.6491400003433228, + "learning_rate": 1.3715e-05, + "num_tokens": 429477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0414, + "grad_norm": 1.2670550346374512, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0977, + "grad_norm": 2.5612552165985107, + "learning_rate": 1.3705000000000002e-05, + "num_tokens": 430501.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.043, + "grad_norm": 1.5120333433151245, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0513, + "grad_norm": 1.3469822406768799, + "learning_rate": 1.3695e-05, + "num_tokens": 431525.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.052, + "grad_norm": 1.3584448099136353, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0043, + "grad_norm": 0.6871080994606018, + "learning_rate": 1.3685000000000002e-05, + "num_tokens": 432128.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0038, + "grad_norm": 0.6316184401512146, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 1.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0039, + "grad_norm": 0.6172608733177185, + "learning_rate": 1.3675e-05, + "num_tokens": 432310.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0034, + "grad_norm": 0.5193918943405151, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": 0.0545, + "grad_norm": 1.789426326751709, + "learning_rate": 1.3665000000000002e-05, + "num_tokens": 432913.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.0681, + "grad_norm": 1.8359259366989136, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0856, + "grad_norm": 2.033186197280884, + "learning_rate": 1.3655e-05, + "num_tokens": 433937.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0019, + "grad_norm": 0.2717677354812622, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0902, + "grad_norm": 1.8082786798477173, + "learning_rate": 1.3645000000000002e-05, + "num_tokens": 434540.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0019, + "grad_norm": 0.27892598509788513, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0017, + "grad_norm": 0.21636277437210083, + "learning_rate": 1.3635e-05, + "num_tokens": 434722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0017, + "grad_norm": 0.21708306670188904, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0442, + "grad_norm": 1.8083100318908691, + "learning_rate": 1.3625e-05, + "num_tokens": 435325.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0015, + "grad_norm": 0.16797110438346863, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0013, + "grad_norm": 0.1489250212907791, + "learning_rate": 1.3615000000000001e-05, + "num_tokens": 435507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0013, + "grad_norm": 0.14432698488235474, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0845, + "grad_norm": 1.7793538570404053, + "learning_rate": 1.3605000000000001e-05, + "num_tokens": 436110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.072, + "grad_norm": 2.0468149185180664, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0012, + "grad_norm": 0.13057845830917358, + "learning_rate": 1.3595000000000001e-05, + "num_tokens": 436713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0012, + "grad_norm": 0.1187715157866478, + "learning_rate": 1.359e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0652, + "grad_norm": 1.7846852540969849, + "learning_rate": 1.3585000000000001e-05, + "num_tokens": 437316.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.001, + "grad_norm": 0.09880056232213974, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 1.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0543, + "grad_norm": 1.7948801517486572, + "learning_rate": 1.3575e-05, + "num_tokens": 437919.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0613, + "grad_norm": 1.7139854431152344, + "learning_rate": 1.357e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0932, + "grad_norm": 2.8757143020629883, + "learning_rate": 1.3565000000000001e-05, + "num_tokens": 438943.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0542, + "grad_norm": 1.7751576900482178, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0011, + "grad_norm": 0.10208199918270111, + "learning_rate": 1.3555e-05, + "num_tokens": 439546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0441, + "grad_norm": 1.3240106105804443, + "learning_rate": 1.355e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0013, + "grad_norm": 0.14222493767738342, + "learning_rate": 1.3545000000000002e-05, + "num_tokens": 440149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0013, + "grad_norm": 0.15622317790985107, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.049, + "grad_norm": 1.685028076171875, + "learning_rate": 1.3535e-05, + "num_tokens": 440752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0013, + "grad_norm": 0.15723161399364471, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0014, + "grad_norm": 0.1701563447713852, + "learning_rate": 1.3525000000000002e-05, + "num_tokens": 440934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0554, + "grad_norm": 1.94820237159729, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": 0.0868, + "grad_norm": 1.4613052606582642, + "learning_rate": 1.3515e-05, + "num_tokens": 441958.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0606, + "grad_norm": 1.5318107604980469, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0695, + "grad_norm": 1.676740050315857, + "learning_rate": 1.3505000000000002e-05, + "num_tokens": 442982.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0588, + "grad_norm": 1.5801854133605957, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.002, + "grad_norm": 0.27110394835472107, + "learning_rate": 1.3495e-05, + "num_tokens": 443585.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0493, + "grad_norm": 1.5821062326431274, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0409, + "grad_norm": 1.4319894313812256, + "learning_rate": 1.3485000000000002e-05, + "num_tokens": 444609.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0451, + "grad_norm": 1.562462329864502, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0621, + "grad_norm": 1.4181314706802368, + "learning_rate": 1.3475e-05, + "num_tokens": 445633.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0031, + "grad_norm": 0.48450395464897156, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0561, + "grad_norm": 1.5698680877685547, + "learning_rate": 1.3465e-05, + "num_tokens": 446236.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0035, + "grad_norm": 0.5244553685188293, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0036, + "grad_norm": 0.534037709236145, + "learning_rate": 1.3455e-05, + "num_tokens": 446418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0728, + "grad_norm": 2.4191722869873047, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0694, + "grad_norm": 2.0287888050079346, + "learning_rate": 1.3445000000000001e-05, + "num_tokens": 447442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.057, + "grad_norm": 1.7234476804733276, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0033, + "grad_norm": 0.48596495389938354, + "learning_rate": 1.3435000000000001e-05, + "num_tokens": 448045.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0572, + "grad_norm": 1.4727040529251099, + "learning_rate": 1.343e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0031, + "grad_norm": 0.4591142535209656, + "learning_rate": 1.3425000000000001e-05, + "num_tokens": 448648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0578, + "grad_norm": 1.542529582977295, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0544, + "grad_norm": 1.567787766456604, + "learning_rate": 1.3415e-05, + "num_tokens": 449672.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.048, + "grad_norm": 1.4822731018066406, + "learning_rate": 1.341e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0033, + "grad_norm": 0.47298771142959595, + "learning_rate": 1.3405000000000001e-05, + "num_tokens": 450275.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.0885, + "grad_norm": 2.084674119949341, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0571, + "grad_norm": 1.5821152925491333, + "learning_rate": 1.3395e-05, + "num_tokens": 451299.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.003, + "grad_norm": 0.44274547696113586, + "learning_rate": 1.339e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0461, + "grad_norm": 1.7462387084960938, + "learning_rate": 1.3385000000000001e-05, + "num_tokens": 451902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0557, + "grad_norm": 1.9857844114303589, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0419, + "grad_norm": 1.386896014213562, + "learning_rate": 1.3375e-05, + "num_tokens": 452926.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0457, + "grad_norm": 1.6964994668960571, + "learning_rate": 1.337e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.0029, + "grad_norm": 0.42876869440078735, + "learning_rate": 1.3365000000000002e-05, + "num_tokens": 453529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.1072, + "grad_norm": 2.350618839263916, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0495, + "grad_norm": 1.449182152748108, + "learning_rate": 1.3355e-05, + "num_tokens": 454553.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0608, + "grad_norm": 2.024829149246216, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0431, + "grad_norm": 1.3092213869094849, + "learning_rate": 1.3345000000000002e-05, + "num_tokens": 455577.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0035, + "grad_norm": 0.5321254134178162, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 1.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0033, + "grad_norm": 0.4984612762928009, + "learning_rate": 1.3335e-05, + "num_tokens": 455759.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.2288, + "grad_norm": 3.947110652923584, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0031, + "grad_norm": 0.4745834767818451, + "learning_rate": 1.3325000000000002e-05, + "num_tokens": 456362.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0032, + "grad_norm": 0.5151614546775818, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0542, + "grad_norm": 1.0336432456970215, + "learning_rate": 1.3315e-05, + "num_tokens": 456965.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0562, + "grad_norm": 1.5250927209854126, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0431, + "grad_norm": 1.4132592678070068, + "learning_rate": 1.3305e-05, + "num_tokens": 457989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.074, + "grad_norm": 1.864004373550415, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.0023, + "grad_norm": 0.32277822494506836, + "learning_rate": 1.3295e-05, + "num_tokens": 458592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": 0.0656, + "grad_norm": 1.8421293497085571, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0784, + "grad_norm": 1.431746482849121, + "learning_rate": 1.3285e-05, + "num_tokens": 459616.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0859, + "grad_norm": 2.2143869400024414, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0638, + "grad_norm": 2.397982597351074, + "learning_rate": 1.3275e-05, + "num_tokens": 460640.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.072, + "grad_norm": 1.9987224340438843, + "learning_rate": 1.327e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0026, + "grad_norm": 0.3712107837200165, + "learning_rate": 1.3265000000000001e-05, + "num_tokens": 461243.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0027, + "grad_norm": 0.3893998861312866, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 1.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0024, + "grad_norm": 0.3540315330028534, + "learning_rate": 1.3255e-05, + "num_tokens": 461425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0022, + "grad_norm": 0.3253246545791626, + "learning_rate": 1.325e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 1.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0585, + "grad_norm": 1.6001460552215576, + "learning_rate": 1.3245000000000001e-05, + "num_tokens": 462028.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.0472, + "grad_norm": 1.4387136697769165, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.002, + "grad_norm": 0.2645460069179535, + "learning_rate": 1.3235e-05, + "num_tokens": 462631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0486, + "grad_norm": 1.7650330066680908, + "learning_rate": 1.323e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0018, + "grad_norm": 0.23414187133312225, + "learning_rate": 1.3225000000000001e-05, + "num_tokens": 463234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0019, + "grad_norm": 0.2595520317554474, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0598, + "grad_norm": 1.4952349662780762, + "learning_rate": 1.3215e-05, + "num_tokens": 463837.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.0777, + "grad_norm": 1.956957221031189, + "learning_rate": 1.321e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0426, + "grad_norm": 1.263728141784668, + "learning_rate": 1.3205000000000001e-05, + "num_tokens": 464861.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0018, + "grad_norm": 0.2717933654785156, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 1.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0017, + "grad_norm": 0.24730290472507477, + "learning_rate": 1.3195e-05, + "num_tokens": 465043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0017, + "grad_norm": 0.25752246379852295, + "learning_rate": 1.319e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0594, + "grad_norm": 1.2743943929672241, + "learning_rate": 1.3185000000000002e-05, + "num_tokens": 465646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0468, + "grad_norm": 1.4228495359420776, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0015, + "grad_norm": 0.2151045948266983, + "learning_rate": 1.3175e-05, + "num_tokens": 466249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0707, + "grad_norm": 1.637633204460144, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0557, + "grad_norm": 1.91914963722229, + "learning_rate": 1.3165000000000002e-05, + "num_tokens": 467273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0017, + "grad_norm": 0.22663576900959015, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0447, + "grad_norm": 1.3842930793762207, + "learning_rate": 1.3155e-05, + "num_tokens": 467876.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0402, + "grad_norm": 1.3382936716079712, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.0722, + "grad_norm": 1.7016624212265015, + "learning_rate": 1.3145e-05, + "num_tokens": 468900.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0603, + "grad_norm": 1.7416592836380005, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0545, + "grad_norm": 2.0610973834991455, + "learning_rate": 1.3135e-05, + "num_tokens": 469924.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0027, + "grad_norm": 0.42048102617263794, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0647, + "grad_norm": 1.5505709648132324, + "learning_rate": 1.3125e-05, + "num_tokens": 470527.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0899, + "grad_norm": 1.7793169021606445, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0032, + "grad_norm": 0.5216090083122253, + "learning_rate": 1.3115000000000002e-05, + "num_tokens": 471130.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0886, + "grad_norm": 1.749000906944275, + "learning_rate": 1.311e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0917, + "grad_norm": 2.4577291011810303, + "learning_rate": 1.3105e-05, + "num_tokens": 472154.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0032, + "grad_norm": 0.5224512815475464, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0637, + "grad_norm": 1.690381646156311, + "learning_rate": 1.3095000000000003e-05, + "num_tokens": 472757.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0718, + "grad_norm": 2.1140615940093994, + "learning_rate": 1.309e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0041, + "grad_norm": 0.6610037684440613, + "learning_rate": 1.3085000000000001e-05, + "num_tokens": 473360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.1995, + "grad_norm": 5.919976711273193, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0035, + "grad_norm": 0.5762227177619934, + "learning_rate": 1.3075000000000003e-05, + "num_tokens": 473963.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0035, + "grad_norm": 0.558562695980072, + "learning_rate": 1.307e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 1.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0029, + "grad_norm": 0.4903852343559265, + "learning_rate": 1.3065000000000001e-05, + "num_tokens": 474145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0024, + "grad_norm": 0.40001630783081055, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 1.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.002, + "grad_norm": 0.3093484044075012, + "learning_rate": 1.3055000000000003e-05, + "num_tokens": 474327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0813, + "grad_norm": 1.846347451210022, + "learning_rate": 1.305e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0466, + "grad_norm": 1.9397575855255127, + "learning_rate": 1.3045000000000001e-05, + "num_tokens": 475351.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.0012, + "grad_norm": 0.1433739811182022, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 1.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0647, + "grad_norm": 1.7246447801589966, + "learning_rate": 1.3035000000000001e-05, + "num_tokens": 475954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0774, + "grad_norm": 1.6557238101959229, + "learning_rate": 1.303e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0732, + "grad_norm": 1.2370885610580444, + "learning_rate": 1.3025000000000002e-05, + "num_tokens": 476978.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0011, + "grad_norm": 0.11068759858608246, + "learning_rate": 1.302e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 1.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0921, + "grad_norm": 2.1499900817871094, + "learning_rate": 1.3015000000000002e-05, + "num_tokens": 477581.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0012, + "grad_norm": 0.12917853891849518, + "learning_rate": 1.301e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0644, + "grad_norm": 1.2409875392913818, + "learning_rate": 1.3005000000000002e-05, + "num_tokens": 478184.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0594, + "grad_norm": 1.3983649015426636, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0013, + "grad_norm": 0.17072346806526184, + "learning_rate": 1.2995000000000002e-05, + "num_tokens": 478787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0623, + "grad_norm": 1.6930880546569824, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0395, + "grad_norm": 1.0536465644836426, + "learning_rate": 1.2985e-05, + "num_tokens": 479811.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0593, + "grad_norm": 1.2563151121139526, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.0455, + "grad_norm": 1.3295787572860718, + "learning_rate": 1.2975000000000002e-05, + "num_tokens": 480835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.074, + "grad_norm": 1.3767396211624146, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0456, + "grad_norm": 1.3392114639282227, + "learning_rate": 1.2965e-05, + "num_tokens": 481859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.091, + "grad_norm": 2.6617116928100586, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0783, + "grad_norm": 2.208951473236084, + "learning_rate": 1.2955000000000002e-05, + "num_tokens": 482883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0026, + "grad_norm": 0.425293892621994, + "learning_rate": 1.295e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 1.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0751, + "grad_norm": 1.7252588272094727, + "learning_rate": 1.2945e-05, + "num_tokens": 483486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0032, + "grad_norm": 0.5211181640625, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 1.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": 0.0931, + "grad_norm": 2.448201894760132, + "learning_rate": 1.2935000000000002e-05, + "num_tokens": 484089.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.059, + "grad_norm": 1.2256298065185547, + "learning_rate": 1.293e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0037, + "grad_norm": 0.5853725671768188, + "learning_rate": 1.2925e-05, + "num_tokens": 484692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0667, + "grad_norm": 1.6646796464920044, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0034, + "grad_norm": 0.5198765993118286, + "learning_rate": 1.2915000000000003e-05, + "num_tokens": 485295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.06, + "grad_norm": 1.8327956199645996, + "learning_rate": 1.291e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0578, + "grad_norm": 1.4550710916519165, + "learning_rate": 1.2905000000000001e-05, + "num_tokens": 486319.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0035, + "grad_norm": 0.5253085494041443, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 1.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": 0.0664, + "grad_norm": 2.0553388595581055, + "learning_rate": 1.2895000000000003e-05, + "num_tokens": 486922.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0507, + "grad_norm": 1.2666943073272705, + "learning_rate": 1.289e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0419, + "grad_norm": 1.1951980590820312, + "learning_rate": 1.2885000000000001e-05, + "num_tokens": 487946.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0521, + "grad_norm": 1.5074187517166138, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0039, + "grad_norm": 0.5865699648857117, + "learning_rate": 1.2875000000000001e-05, + "num_tokens": 488549.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0032, + "grad_norm": 0.4775572121143341, + "learning_rate": 1.287e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 1.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0861, + "grad_norm": 1.977977991104126, + "learning_rate": 1.2865000000000001e-05, + "num_tokens": 489152.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0415, + "grad_norm": 1.351745843887329, + "learning_rate": 1.286e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0027, + "grad_norm": 0.3994472920894623, + "learning_rate": 1.2855000000000001e-05, + "num_tokens": 489755.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0027, + "grad_norm": 0.40307220816612244, + "learning_rate": 1.285e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 1.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0023, + "grad_norm": 0.3672088086605072, + "learning_rate": 1.2845000000000002e-05, + "num_tokens": 489937.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0024, + "grad_norm": 0.3693186938762665, + "learning_rate": 1.284e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0022, + "grad_norm": 0.3379809856414795, + "learning_rate": 1.2835000000000002e-05, + "num_tokens": 490119.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0692, + "grad_norm": 1.80624520778656, + "learning_rate": 1.283e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0015, + "grad_norm": 0.19782321155071259, + "learning_rate": 1.2825e-05, + "num_tokens": 490722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.0765, + "grad_norm": 2.1652674674987793, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0524, + "grad_norm": 1.3651760816574097, + "learning_rate": 1.2815000000000002e-05, + "num_tokens": 491746.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0013, + "grad_norm": 0.15779025852680206, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 1.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0405, + "grad_norm": 1.4021095037460327, + "learning_rate": 1.2805e-05, + "num_tokens": 492349.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0012, + "grad_norm": 0.14934077858924866, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0466, + "grad_norm": 1.3255256414413452, + "learning_rate": 1.2795000000000002e-05, + "num_tokens": 492952.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0011, + "grad_norm": 0.13669109344482422, + "learning_rate": 1.279e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0706, + "grad_norm": 2.915336847305298, + "learning_rate": 1.2785e-05, + "num_tokens": 493555.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0012, + "grad_norm": 0.14015723764896393, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 1.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0731, + "grad_norm": 1.5240583419799805, + "learning_rate": 1.2775000000000002e-05, + "num_tokens": 494158.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0011, + "grad_norm": 0.11803555488586426, + "learning_rate": 1.277e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0011, + "grad_norm": 0.13458400964736938, + "learning_rate": 1.2765e-05, + "num_tokens": 494340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0012, + "grad_norm": 0.14607498049736023, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0011, + "grad_norm": 0.12011824548244476, + "learning_rate": 1.2755000000000002e-05, + "num_tokens": 494522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0011, + "grad_norm": 0.13116565346717834, + "learning_rate": 1.275e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0011, + "grad_norm": 0.11727877706289291, + "learning_rate": 1.2745e-05, + "num_tokens": 494704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.0501, + "grad_norm": 1.6986955404281616, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0472, + "grad_norm": 1.4376126527786255, + "learning_rate": 1.2735000000000003e-05, + "num_tokens": 495728.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.001, + "grad_norm": 0.11870448291301727, + "learning_rate": 1.273e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0011, + "grad_norm": 0.11969612538814545, + "learning_rate": 1.2725000000000001e-05, + "num_tokens": 495910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0493, + "grad_norm": 1.3840702772140503, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.001, + "grad_norm": 0.10890035331249237, + "learning_rate": 1.2715000000000001e-05, + "num_tokens": 496513.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0011, + "grad_norm": 0.12227390706539154, + "learning_rate": 1.271e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0752, + "grad_norm": 2.110506057739258, + "learning_rate": 1.2705000000000001e-05, + "num_tokens": 497116.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0011, + "grad_norm": 0.1325536072254181, + "learning_rate": 1.27e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 1.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0386, + "grad_norm": 1.118979811668396, + "learning_rate": 1.2695000000000001e-05, + "num_tokens": 497719.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.066, + "grad_norm": 1.572615623474121, + "learning_rate": 1.269e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0679, + "grad_norm": 1.6447997093200684, + "learning_rate": 1.2685000000000001e-05, + "num_tokens": 498743.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0012, + "grad_norm": 0.1418675184249878, + "learning_rate": 1.268e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0471, + "grad_norm": 1.3554447889328003, + "learning_rate": 1.2675000000000001e-05, + "num_tokens": 499346.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0012, + "grad_norm": 0.1589028388261795, + "learning_rate": 1.267e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0943, + "grad_norm": 2.5991010665893555, + "learning_rate": 1.2665e-05, + "num_tokens": 499949.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0495, + "grad_norm": 1.6441336870193481, + "learning_rate": 1.266e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0765, + "grad_norm": 1.842661738395691, + "learning_rate": 1.2655000000000002e-05, + "num_tokens": 500973.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0016, + "grad_norm": 0.22247855365276337, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0401, + "grad_norm": 1.3632177114486694, + "learning_rate": 1.2645e-05, + "num_tokens": 501576.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0021, + "grad_norm": 0.31719765067100525, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0376, + "grad_norm": 1.1765908002853394, + "learning_rate": 1.2635000000000002e-05, + "num_tokens": 502179.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0024, + "grad_norm": 0.33981993794441223, + "learning_rate": 1.263e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 1.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0712, + "grad_norm": 1.7833467721939087, + "learning_rate": 1.2625e-05, + "num_tokens": 502782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0407, + "grad_norm": 1.2483290433883667, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0597, + "grad_norm": 1.2847890853881836, + "learning_rate": 1.2615000000000002e-05, + "num_tokens": 503806.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0706, + "grad_norm": 2.0048041343688965, + "learning_rate": 1.261e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0033, + "grad_norm": 0.48029038310050964, + "learning_rate": 1.2605e-05, + "num_tokens": 504409.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0853, + "grad_norm": 1.8489866256713867, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0664, + "grad_norm": 1.9049607515335083, + "learning_rate": 1.2595000000000002e-05, + "num_tokens": 505433.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0038, + "grad_norm": 0.5629300475120544, + "learning_rate": 1.259e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0035, + "grad_norm": 0.5016162395477295, + "learning_rate": 1.2585e-05, + "num_tokens": 505615.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.0034, + "grad_norm": 0.533896803855896, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 1.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0398, + "grad_norm": 1.6724116802215576, + "learning_rate": 1.2575000000000002e-05, + "num_tokens": 506218.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0649, + "grad_norm": 1.1757819652557373, + "learning_rate": 1.257e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0028, + "grad_norm": 0.3974631726741791, + "learning_rate": 1.2565e-05, + "num_tokens": 506821.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0022, + "grad_norm": 0.33079567551612854, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 1.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0612, + "grad_norm": 1.6804654598236084, + "learning_rate": 1.2555000000000001e-05, + "num_tokens": 507424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0814, + "grad_norm": 1.6637822389602661, + "learning_rate": 1.255e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0463, + "grad_norm": 1.2395890951156616, + "learning_rate": 1.2545000000000001e-05, + "num_tokens": 508448.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0022, + "grad_norm": 0.3290168046951294, + "learning_rate": 1.254e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0474, + "grad_norm": 1.62813138961792, + "learning_rate": 1.2535000000000001e-05, + "num_tokens": 509051.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0432, + "grad_norm": 1.1684247255325317, + "learning_rate": 1.253e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.191, + "grad_norm": 4.108924865722656, + "learning_rate": 1.2525000000000001e-05, + "num_tokens": 510075.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0022, + "grad_norm": 0.32842448353767395, + "learning_rate": 1.252e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0692, + "grad_norm": 1.0593329668045044, + "learning_rate": 1.2515000000000001e-05, + "num_tokens": 510678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.046, + "grad_norm": 1.279249906539917, + "learning_rate": 1.251e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0021, + "grad_norm": 0.32091253995895386, + "learning_rate": 1.2505e-05, + "num_tokens": 511281.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0404, + "grad_norm": 1.2973002195358276, + "learning_rate": 1.25e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0023, + "grad_norm": 0.34064143896102905, + "learning_rate": 1.2495000000000001e-05, + "num_tokens": 511884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0793, + "grad_norm": 1.864046573638916, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0023, + "grad_norm": 0.3757898211479187, + "learning_rate": 1.2485e-05, + "num_tokens": 512487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0024, + "grad_norm": 0.381061315536499, + "learning_rate": 1.248e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 1.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0676, + "grad_norm": 1.62307608127594, + "learning_rate": 1.2475000000000002e-05, + "num_tokens": 513090.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.047, + "grad_norm": 1.570786476135254, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0566, + "grad_norm": 1.7626087665557861, + "learning_rate": 1.2465e-05, + "num_tokens": 514114.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0581, + "grad_norm": 1.7678264379501343, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0417, + "grad_norm": 1.4467406272888184, + "learning_rate": 1.2455000000000002e-05, + "num_tokens": 515138.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0712, + "grad_norm": 1.5711795091629028, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.0026, + "grad_norm": 0.41801631450653076, + "learning_rate": 1.2445e-05, + "num_tokens": 515741.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0499, + "grad_norm": 1.5882858037948608, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0504, + "grad_norm": 1.1772035360336304, + "learning_rate": 1.2435000000000002e-05, + "num_tokens": 516765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0618, + "grad_norm": 1.7687872648239136, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0664, + "grad_norm": 1.677937626838684, + "learning_rate": 1.2425e-05, + "num_tokens": 517789.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.004, + "grad_norm": 0.654071569442749, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 1.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0409, + "grad_norm": 1.5208879709243774, + "learning_rate": 1.2415000000000002e-05, + "num_tokens": 518392.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0035, + "grad_norm": 0.5567553639411926, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 1.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0798, + "grad_norm": 2.2302029132843018, + "learning_rate": 1.2405e-05, + "num_tokens": 518995.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0669, + "grad_norm": 2.0240256786346436, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0839, + "grad_norm": 1.8468784093856812, + "learning_rate": 1.2395e-05, + "num_tokens": 520019.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0584, + "grad_norm": 2.1111018657684326, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0043, + "grad_norm": 0.755431592464447, + "learning_rate": 1.2385000000000001e-05, + "num_tokens": 520622.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0432, + "grad_norm": 1.864660620689392, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0693, + "grad_norm": 3.3374569416046143, + "learning_rate": 1.2375000000000001e-05, + "num_tokens": 521646.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0388, + "grad_norm": 1.5575084686279297, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.0645, + "grad_norm": 1.5467334985733032, + "learning_rate": 1.2365000000000001e-05, + "num_tokens": 522670.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0037, + "grad_norm": 0.5897421836853027, + "learning_rate": 1.236e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 1.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0819, + "grad_norm": 3.0543386936187744, + "learning_rate": 1.2355000000000001e-05, + "num_tokens": 523273.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.004, + "grad_norm": 0.647894024848938, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0032, + "grad_norm": 0.5120076537132263, + "learning_rate": 1.2345e-05, + "num_tokens": 523455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0032, + "grad_norm": 0.50294429063797, + "learning_rate": 1.234e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0654, + "grad_norm": 1.3424628973007202, + "learning_rate": 1.2335000000000001e-05, + "num_tokens": 524058.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0898, + "grad_norm": 2.0473086833953857, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0023, + "grad_norm": 0.36929139494895935, + "learning_rate": 1.2325e-05, + "num_tokens": 524661.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0021, + "grad_norm": 0.3227180540561676, + "learning_rate": 1.232e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0663, + "grad_norm": 1.83015775680542, + "learning_rate": 1.2315000000000002e-05, + "num_tokens": 525264.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0657, + "grad_norm": 1.8247884511947632, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0016, + "grad_norm": 0.21814872324466705, + "learning_rate": 1.2305e-05, + "num_tokens": 525867.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.07, + "grad_norm": 1.3606796264648438, + "learning_rate": 1.23e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.0521, + "grad_norm": 1.5558913946151733, + "learning_rate": 1.2295000000000002e-05, + "num_tokens": 526891.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": 0.0768, + "grad_norm": 1.718390703201294, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.2012, + "grad_norm": 3.623452663421631, + "learning_rate": 1.2285e-05, + "num_tokens": 527915.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0922, + "grad_norm": 2.289684534072876, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.0665, + "grad_norm": 1.6864427328109741, + "learning_rate": 1.2275000000000002e-05, + "num_tokens": 528939.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0017, + "grad_norm": 0.2226596623659134, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.195, + "grad_norm": 3.805149555206299, + "learning_rate": 1.2265e-05, + "num_tokens": 529542.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0651, + "grad_norm": 1.3887238502502441, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0016, + "grad_norm": 0.20220878720283508, + "learning_rate": 1.2255000000000002e-05, + "num_tokens": 530145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0456, + "grad_norm": 1.4763877391815186, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0017, + "grad_norm": 0.2297908216714859, + "learning_rate": 1.2245e-05, + "num_tokens": 530748.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0479, + "grad_norm": 1.846569538116455, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0018, + "grad_norm": 0.2527587115764618, + "learning_rate": 1.2235e-05, + "num_tokens": 531351.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0828, + "grad_norm": 1.8091585636138916, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.002, + "grad_norm": 0.29240918159484863, + "learning_rate": 1.2225e-05, + "num_tokens": 531954.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.0568, + "grad_norm": 1.4905025959014893, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0022, + "grad_norm": 0.29934078454971313, + "learning_rate": 1.2215e-05, + "num_tokens": 532557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.0655, + "grad_norm": 1.620811939239502, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0699, + "grad_norm": 1.4509178400039673, + "learning_rate": 1.2205000000000001e-05, + "num_tokens": 533581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0537, + "grad_norm": 1.6190178394317627, + "learning_rate": 1.22e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.083, + "grad_norm": 2.0025248527526855, + "learning_rate": 1.2195000000000001e-05, + "num_tokens": 534605.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3503265976905823, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": 0.0699, + "grad_norm": 1.2692803144454956, + "learning_rate": 1.2185e-05, + "num_tokens": 535208.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0024, + "grad_norm": 0.3514065146446228, + "learning_rate": 1.218e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 1.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0025, + "grad_norm": 0.3770548701286316, + "learning_rate": 1.2175000000000001e-05, + "num_tokens": 535390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0024, + "grad_norm": 0.3553021550178528, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 1.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0657, + "grad_norm": 1.3145198822021484, + "learning_rate": 1.2165e-05, + "num_tokens": 535993.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0453, + "grad_norm": 1.1688368320465088, + "learning_rate": 1.216e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.1801, + "grad_norm": 3.7217485904693604, + "learning_rate": 1.2155000000000001e-05, + "num_tokens": 537017.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0029, + "grad_norm": 0.4446180462837219, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 1.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0863, + "grad_norm": 2.0155787467956543, + "learning_rate": 1.2145e-05, + "num_tokens": 537620.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0022, + "grad_norm": 0.3482968807220459, + "learning_rate": 1.214e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0023, + "grad_norm": 0.32771721482276917, + "learning_rate": 1.2135000000000002e-05, + "num_tokens": 537802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.056, + "grad_norm": 1.8173542022705078, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.039, + "grad_norm": 1.1963605880737305, + "learning_rate": 1.2125e-05, + "num_tokens": 538826.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.0594, + "grad_norm": 1.7138198614120483, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.002, + "grad_norm": 0.2943565249443054, + "learning_rate": 1.2115000000000002e-05, + "num_tokens": 539429.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.002, + "grad_norm": 0.2892753481864929, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0019, + "grad_norm": 0.2714136838912964, + "learning_rate": 1.2105e-05, + "num_tokens": 539611.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0504, + "grad_norm": 1.0601574182510376, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0019, + "grad_norm": 0.2627917230129242, + "learning_rate": 1.2095000000000002e-05, + "num_tokens": 540214.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0566, + "grad_norm": 1.1405881643295288, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0018, + "grad_norm": 0.2452574223279953, + "learning_rate": 1.2085e-05, + "num_tokens": 540817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.0018, + "grad_norm": 0.24650417268276215, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0014, + "grad_norm": 0.19634543359279633, + "learning_rate": 1.2075e-05, + "num_tokens": 540999.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.0014, + "grad_norm": 0.17830893397331238, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.045, + "grad_norm": 1.1427490711212158, + "learning_rate": 1.2065e-05, + "num_tokens": 541602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.043, + "grad_norm": 1.0804896354675293, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0729, + "grad_norm": 1.6100242137908936, + "learning_rate": 1.2055e-05, + "num_tokens": 542626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.0585, + "grad_norm": 1.2319777011871338, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0014, + "grad_norm": 0.18333016335964203, + "learning_rate": 1.2045e-05, + "num_tokens": 543229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0014, + "grad_norm": 0.17933838069438934, + "learning_rate": 1.204e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.0606, + "grad_norm": 1.531948208808899, + "learning_rate": 1.2035e-05, + "num_tokens": 543832.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.0798, + "grad_norm": 1.4439104795455933, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.0798, + "grad_norm": 1.6658635139465332, + "learning_rate": 1.2025e-05, + "num_tokens": 544856.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.0666, + "grad_norm": 1.2919996976852417, + "learning_rate": 1.202e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.0526, + "grad_norm": 1.7219940423965454, + "learning_rate": 1.2015000000000001e-05, + "num_tokens": 545880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0453, + "grad_norm": 1.3877556324005127, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0675, + "grad_norm": 1.6357606649398804, + "learning_rate": 1.2005e-05, + "num_tokens": 546904.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0023, + "grad_norm": 0.3360651433467865, + "learning_rate": 1.2e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0025, + "grad_norm": 0.36647501587867737, + "learning_rate": 1.1995000000000001e-05, + "num_tokens": 547086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.1876, + "grad_norm": 3.880563974380493, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0026, + "grad_norm": 0.3927272856235504, + "learning_rate": 1.1985e-05, + "num_tokens": 547689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0616, + "grad_norm": 1.807646632194519, + "learning_rate": 1.198e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.0939, + "grad_norm": 3.455456018447876, + "learning_rate": 1.1975000000000001e-05, + "num_tokens": 548713.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0576, + "grad_norm": 1.2851530313491821, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0613, + "grad_norm": 1.2460367679595947, + "learning_rate": 1.1965e-05, + "num_tokens": 549737.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0498, + "grad_norm": 1.8220652341842651, + "learning_rate": 1.196e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0029, + "grad_norm": 0.43996259570121765, + "learning_rate": 1.1955000000000002e-05, + "num_tokens": 550340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.164, + "grad_norm": 3.639434814453125, + "learning_rate": 1.195e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0033, + "grad_norm": 0.49846982955932617, + "learning_rate": 1.1945e-05, + "num_tokens": 550943.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.0034, + "grad_norm": 0.5146701335906982, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0552, + "grad_norm": 0.9798343777656555, + "learning_rate": 1.1935000000000002e-05, + "num_tokens": 551546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0033, + "grad_norm": 0.49275118112564087, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 1.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.0699, + "grad_norm": 1.1279994249343872, + "learning_rate": 1.1925e-05, + "num_tokens": 552149.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0029, + "grad_norm": 0.4336951673030853, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0682, + "grad_norm": 1.8408714532852173, + "learning_rate": 1.1915e-05, + "num_tokens": 552752.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0025, + "grad_norm": 0.3696609139442444, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0468, + "grad_norm": 1.6169545650482178, + "learning_rate": 1.1905e-05, + "num_tokens": 553355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0669, + "grad_norm": 1.641153335571289, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0019, + "grad_norm": 0.2700659930706024, + "learning_rate": 1.1895e-05, + "num_tokens": 553958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0021, + "grad_norm": 0.30612003803253174, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0755, + "grad_norm": 1.821285367012024, + "learning_rate": 1.1885e-05, + "num_tokens": 554561.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0628, + "grad_norm": 1.6025607585906982, + "learning_rate": 1.188e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0018, + "grad_norm": 0.24747499823570251, + "learning_rate": 1.1875e-05, + "num_tokens": 555164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0017, + "grad_norm": 0.2355332225561142, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 1.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0016, + "grad_norm": 0.22167058289051056, + "learning_rate": 1.1865000000000002e-05, + "num_tokens": 555346.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0014, + "grad_norm": 0.1909945011138916, + "learning_rate": 1.186e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0014, + "grad_norm": 0.17070873081684113, + "learning_rate": 1.1855e-05, + "num_tokens": 555528.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0714, + "grad_norm": 1.4018418788909912, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0859, + "grad_norm": 2.558520793914795, + "learning_rate": 1.1845000000000003e-05, + "num_tokens": 556552.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0012, + "grad_norm": 0.14977574348449707, + "learning_rate": 1.184e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0011, + "grad_norm": 0.12937067449092865, + "learning_rate": 1.1835000000000001e-05, + "num_tokens": 556734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0604, + "grad_norm": 1.5028055906295776, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0012, + "grad_norm": 0.13798221945762634, + "learning_rate": 1.1825000000000003e-05, + "num_tokens": 557337.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0508, + "grad_norm": 1.1325984001159668, + "learning_rate": 1.182e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0608, + "grad_norm": 1.3021001815795898, + "learning_rate": 1.1815000000000001e-05, + "num_tokens": 558361.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0563, + "grad_norm": 1.5208338499069214, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.0669, + "grad_norm": 1.6899033784866333, + "learning_rate": 1.1805000000000001e-05, + "num_tokens": 559385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0582, + "grad_norm": 1.563767910003662, + "learning_rate": 1.18e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.0674, + "grad_norm": 1.4604460000991821, + "learning_rate": 1.1795000000000001e-05, + "num_tokens": 560409.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.051, + "grad_norm": 1.4536890983581543, + "learning_rate": 1.179e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0681, + "grad_norm": 1.4582575559616089, + "learning_rate": 1.1785000000000002e-05, + "num_tokens": 561433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0625, + "grad_norm": 1.5202876329421997, + "learning_rate": 1.178e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0018, + "grad_norm": 0.25325441360473633, + "learning_rate": 1.1775000000000002e-05, + "num_tokens": 562036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0533, + "grad_norm": 1.4468379020690918, + "learning_rate": 1.177e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0023, + "grad_norm": 0.32276058197021484, + "learning_rate": 1.1765000000000002e-05, + "num_tokens": 562639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0025, + "grad_norm": 0.36645182967185974, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.067, + "grad_norm": 2.532277822494507, + "learning_rate": 1.1755e-05, + "num_tokens": 563242.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0025, + "grad_norm": 0.3641115427017212, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 1.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0621, + "grad_norm": 1.6259859800338745, + "learning_rate": 1.1745000000000002e-05, + "num_tokens": 563845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.0431, + "grad_norm": 1.5126338005065918, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0416, + "grad_norm": 1.3851490020751953, + "learning_rate": 1.1735e-05, + "num_tokens": 564869.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0621, + "grad_norm": 1.7890119552612305, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.0661, + "grad_norm": 1.2367877960205078, + "learning_rate": 1.1725000000000002e-05, + "num_tokens": 565893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0032, + "grad_norm": 0.49922677874565125, + "learning_rate": 1.172e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 1.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.0033, + "grad_norm": 0.49921202659606934, + "learning_rate": 1.1715e-05, + "num_tokens": 566075.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.0035, + "grad_norm": 0.5215579867362976, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0031, + "grad_norm": 0.43590739369392395, + "learning_rate": 1.1705000000000002e-05, + "num_tokens": 566257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0397, + "grad_norm": 1.2309280633926392, + "learning_rate": 1.17e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.04, + "grad_norm": 1.2009049654006958, + "learning_rate": 1.1695e-05, + "num_tokens": 567281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0608, + "grad_norm": 1.7890830039978027, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0022, + "grad_norm": 0.33328190445899963, + "learning_rate": 1.1685000000000002e-05, + "num_tokens": 567884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0725, + "grad_norm": 1.7722251415252686, + "learning_rate": 1.168e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.002, + "grad_norm": 0.2905958592891693, + "learning_rate": 1.1675000000000001e-05, + "num_tokens": 568487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0778, + "grad_norm": 1.8844209909439087, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0558, + "grad_norm": 1.4232587814331055, + "learning_rate": 1.1665000000000003e-05, + "num_tokens": 569511.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0588, + "grad_norm": 1.4562510251998901, + "learning_rate": 1.166e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.0019, + "grad_norm": 0.2660907804965973, + "learning_rate": 1.1655000000000001e-05, + "num_tokens": 570114.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.079, + "grad_norm": 1.9491440057754517, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.055, + "grad_norm": 1.847509741783142, + "learning_rate": 1.1645000000000001e-05, + "num_tokens": 571138.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0517, + "grad_norm": 1.504838466644287, + "learning_rate": 1.164e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": 0.0416, + "grad_norm": 1.0979009866714478, + "learning_rate": 1.1635000000000001e-05, + "num_tokens": 572162.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0448, + "grad_norm": 1.3496202230453491, + "learning_rate": 1.163e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0026, + "grad_norm": 0.382183700799942, + "learning_rate": 1.1625000000000001e-05, + "num_tokens": 572765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0026, + "grad_norm": 0.37047019600868225, + "learning_rate": 1.162e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 1.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0702, + "grad_norm": 1.7991583347320557, + "learning_rate": 1.1615000000000001e-05, + "num_tokens": 573368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0442, + "grad_norm": 1.4013893604278564, + "learning_rate": 1.161e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.0409, + "grad_norm": 1.3295344114303589, + "learning_rate": 1.1605000000000002e-05, + "num_tokens": 574392.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.0388, + "grad_norm": 1.3626537322998047, + "learning_rate": 1.16e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0031, + "grad_norm": 0.4437231123447418, + "learning_rate": 1.1595e-05, + "num_tokens": 574995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0036, + "grad_norm": 0.5210691094398499, + "learning_rate": 1.159e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.065, + "grad_norm": 2.1340172290802, + "learning_rate": 1.1585000000000002e-05, + "num_tokens": 575598.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0647, + "grad_norm": 1.9830479621887207, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0684, + "grad_norm": 2.2673563957214355, + "learning_rate": 1.1575e-05, + "num_tokens": 576622.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0031, + "grad_norm": 0.44506582617759705, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.038, + "grad_norm": 1.131693959236145, + "learning_rate": 1.1565000000000002e-05, + "num_tokens": 577225.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.0369, + "grad_norm": 1.1869642734527588, + "learning_rate": 1.156e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0031, + "grad_norm": 0.4332590401172638, + "learning_rate": 1.1555e-05, + "num_tokens": 577828.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0026, + "grad_norm": 0.359754741191864, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0028, + "grad_norm": 0.3960857689380646, + "learning_rate": 1.1545000000000002e-05, + "num_tokens": 578010.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0569, + "grad_norm": 1.7389343976974487, + "learning_rate": 1.154e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0714, + "grad_norm": 1.75542414188385, + "learning_rate": 1.1535e-05, + "num_tokens": 579034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0026, + "grad_norm": 0.3733665943145752, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 1.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151729702949524, + "learning_rate": 1.1525000000000002e-05, + "num_tokens": 579216.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0725, + "grad_norm": 2.008699417114258, + "learning_rate": 1.152e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0679, + "grad_norm": 2.3607006072998047, + "learning_rate": 1.1515e-05, + "num_tokens": 580240.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.043, + "grad_norm": 1.3802534341812134, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0018, + "grad_norm": 0.24884727597236633, + "learning_rate": 1.1505000000000003e-05, + "num_tokens": 580843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0517, + "grad_norm": 1.4253575801849365, + "learning_rate": 1.15e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0479, + "grad_norm": 1.2443790435791016, + "learning_rate": 1.1495000000000001e-05, + "num_tokens": 581867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0017, + "grad_norm": 0.22854706645011902, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0742, + "grad_norm": 1.5941340923309326, + "learning_rate": 1.1485000000000001e-05, + "num_tokens": 582470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.002, + "grad_norm": 0.27522599697113037, + "learning_rate": 1.148e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0019, + "grad_norm": 0.2548190653324127, + "learning_rate": 1.1475000000000001e-05, + "num_tokens": 582652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0586, + "grad_norm": 0.9956546425819397, + "learning_rate": 1.147e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0399, + "grad_norm": 1.2318187952041626, + "learning_rate": 1.1465000000000001e-05, + "num_tokens": 583676.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.057, + "grad_norm": 1.2258297204971313, + "learning_rate": 1.146e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0593, + "grad_norm": 1.4450581073760986, + "learning_rate": 1.1455000000000001e-05, + "num_tokens": 584700.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0589, + "grad_norm": 2.703789472579956, + "learning_rate": 1.145e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0022, + "grad_norm": 0.2988422214984894, + "learning_rate": 1.1445000000000001e-05, + "num_tokens": 585303.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.002, + "grad_norm": 0.2543957829475403, + "learning_rate": 1.144e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0634, + "grad_norm": 1.5069470405578613, + "learning_rate": 1.1435e-05, + "num_tokens": 585906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0777, + "grad_norm": 1.8321071863174438, + "learning_rate": 1.143e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0705, + "grad_norm": 1.7684837579727173, + "learning_rate": 1.1425000000000002e-05, + "num_tokens": 586930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0646, + "grad_norm": 1.7334975004196167, + "learning_rate": 1.142e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0657, + "grad_norm": 1.7223514318466187, + "learning_rate": 1.1415e-05, + "num_tokens": 587954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0551, + "grad_norm": 2.0270273685455322, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0642, + "grad_norm": 1.5014370679855347, + "learning_rate": 1.1405000000000002e-05, + "num_tokens": 588978.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0766, + "grad_norm": 1.7329357862472534, + "learning_rate": 1.14e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.0038, + "grad_norm": 0.5561279654502869, + "learning_rate": 1.1395e-05, + "num_tokens": 589581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0805, + "grad_norm": 2.5624947547912598, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0036, + "grad_norm": 0.5101985931396484, + "learning_rate": 1.1385000000000002e-05, + "num_tokens": 590184.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0564, + "grad_norm": 1.227173924446106, + "learning_rate": 1.138e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0036, + "grad_norm": 0.5354023575782776, + "learning_rate": 1.1375e-05, + "num_tokens": 590787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0443, + "grad_norm": 1.4744853973388672, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0715, + "grad_norm": 1.5623061656951904, + "learning_rate": 1.1365000000000002e-05, + "num_tokens": 591811.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0529, + "grad_norm": 1.357082486152649, + "learning_rate": 1.136e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0037, + "grad_norm": 0.54876309633255, + "learning_rate": 1.1355e-05, + "num_tokens": 592414.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.0635, + "grad_norm": 1.2679226398468018, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0458, + "grad_norm": 1.1748446226119995, + "learning_rate": 1.1345000000000002e-05, + "num_tokens": 593438.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0035, + "grad_norm": 0.5624827146530151, + "learning_rate": 1.134e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.003, + "grad_norm": 0.4557420015335083, + "learning_rate": 1.1335e-05, + "num_tokens": 593620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.003, + "grad_norm": 0.46185532212257385, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0028, + "grad_norm": 0.42278051376342773, + "learning_rate": 1.1325e-05, + "num_tokens": 593802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0453, + "grad_norm": 1.387130856513977, + "learning_rate": 1.132e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025925099849701, + "learning_rate": 1.1315000000000001e-05, + "num_tokens": 594405.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0022, + "grad_norm": 0.33897924423217773, + "learning_rate": 1.131e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 1.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0737, + "grad_norm": 1.979303240776062, + "learning_rate": 1.1305000000000001e-05, + "num_tokens": 595008.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.064, + "grad_norm": 1.5425118207931519, + "learning_rate": 1.13e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0671, + "grad_norm": 1.1620323657989502, + "learning_rate": 1.1295000000000001e-05, + "num_tokens": 596032.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0785, + "grad_norm": 2.378268003463745, + "learning_rate": 1.129e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0016, + "grad_norm": 0.22170788049697876, + "learning_rate": 1.1285000000000001e-05, + "num_tokens": 596635.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0015, + "grad_norm": 0.20151561498641968, + "learning_rate": 1.128e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0017, + "grad_norm": 0.2272740602493286, + "learning_rate": 1.1275e-05, + "num_tokens": 596817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.0013, + "grad_norm": 0.15716217458248138, + "learning_rate": 1.127e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0609, + "grad_norm": 1.5205357074737549, + "learning_rate": 1.1265000000000001e-05, + "num_tokens": 597420.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0013, + "grad_norm": 0.16709472239017487, + "learning_rate": 1.126e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0771, + "grad_norm": 1.7946810722351074, + "learning_rate": 1.1255e-05, + "num_tokens": 598023.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0581, + "grad_norm": 1.250422716140747, + "learning_rate": 1.125e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0566, + "grad_norm": 1.8859542608261108, + "learning_rate": 1.1245000000000002e-05, + "num_tokens": 599047.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.042, + "grad_norm": 1.3896710872650146, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0012, + "grad_norm": 0.13600599765777588, + "learning_rate": 1.1235e-05, + "num_tokens": 599650.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0455, + "grad_norm": 1.2671265602111816, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0792, + "grad_norm": 1.9507051706314087, + "learning_rate": 1.1225000000000002e-05, + "num_tokens": 600674.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0015, + "grad_norm": 0.18869547545909882, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0643, + "grad_norm": 2.124163866043091, + "learning_rate": 1.1215e-05, + "num_tokens": 601277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0017, + "grad_norm": 0.22649085521697998, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 1.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0014, + "grad_norm": 0.1775384545326233, + "learning_rate": 1.1205000000000002e-05, + "num_tokens": 601459.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0675, + "grad_norm": 2.2713491916656494, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0504, + "grad_norm": 1.3982276916503906, + "learning_rate": 1.1195e-05, + "num_tokens": 602483.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0478, + "grad_norm": 1.40345299243927, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0655, + "grad_norm": 2.0257670879364014, + "learning_rate": 1.1185000000000002e-05, + "num_tokens": 603507.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.0019, + "grad_norm": 0.2651630938053131, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": 0.0772, + "grad_norm": 2.0185799598693848, + "learning_rate": 1.1175e-05, + "num_tokens": 604110.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0022, + "grad_norm": 0.30773913860321045, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 1.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0401, + "grad_norm": 1.1661447286605835, + "learning_rate": 1.1165e-05, + "num_tokens": 604713.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0805, + "grad_norm": 2.5561182498931885, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.0023, + "grad_norm": 0.3356492221355438, + "learning_rate": 1.1155e-05, + "num_tokens": 605316.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0519, + "grad_norm": 1.2280339002609253, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.0412, + "grad_norm": 1.1461997032165527, + "learning_rate": 1.1145000000000001e-05, + "num_tokens": 606340.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0024, + "grad_norm": 0.33912718296051025, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0026, + "grad_norm": 0.3827052116394043, + "learning_rate": 1.1135000000000001e-05, + "num_tokens": 606522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025944471359253, + "learning_rate": 1.113e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0025, + "grad_norm": 0.34845641255378723, + "learning_rate": 1.1125000000000001e-05, + "num_tokens": 606704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0704, + "grad_norm": 1.9853920936584473, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0431, + "grad_norm": 1.3894938230514526, + "learning_rate": 1.1115e-05, + "num_tokens": 607728.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.069, + "grad_norm": 1.2977555990219116, + "learning_rate": 1.111e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0366, + "grad_norm": 1.1859874725341797, + "learning_rate": 1.1105000000000001e-05, + "num_tokens": 608752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0022, + "grad_norm": 0.3078896105289459, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.002, + "grad_norm": 0.28668129444122314, + "learning_rate": 1.1095e-05, + "num_tokens": 608934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0021, + "grad_norm": 0.30314162373542786, + "learning_rate": 1.109e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0741, + "grad_norm": 1.5230200290679932, + "learning_rate": 1.1085000000000001e-05, + "num_tokens": 609537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.002, + "grad_norm": 0.26326534152030945, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 1.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.002, + "grad_norm": 0.2711552381515503, + "learning_rate": 1.1075e-05, + "num_tokens": 609719.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.0616, + "grad_norm": 1.274338960647583, + "learning_rate": 1.107e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.0016, + "grad_norm": 0.2114490568637848, + "learning_rate": 1.1065000000000002e-05, + "num_tokens": 610322.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0682, + "grad_norm": 1.6731176376342773, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0813, + "grad_norm": 1.9255222082138062, + "learning_rate": 1.1055e-05, + "num_tokens": 611346.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0016, + "grad_norm": 0.21615324914455414, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0688, + "grad_norm": 1.5003544092178345, + "learning_rate": 1.1045000000000002e-05, + "num_tokens": 611949.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0014, + "grad_norm": 0.18165816366672516, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 1.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0583, + "grad_norm": 1.9068502187728882, + "learning_rate": 1.1035e-05, + "num_tokens": 612552.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0015, + "grad_norm": 0.18768055737018585, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0014, + "grad_norm": 0.1921229511499405, + "learning_rate": 1.1025000000000002e-05, + "num_tokens": 612734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0015, + "grad_norm": 0.19404935836791992, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0619, + "grad_norm": 1.6527628898620605, + "learning_rate": 1.1015e-05, + "num_tokens": 613337.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0413, + "grad_norm": 1.2340315580368042, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0015, + "grad_norm": 0.19533570110797882, + "learning_rate": 1.1005e-05, + "num_tokens": 613940.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0639, + "grad_norm": 1.0601844787597656, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0014, + "grad_norm": 0.18472979962825775, + "learning_rate": 1.0995e-05, + "num_tokens": 614543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0642, + "grad_norm": 1.2736060619354248, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0581, + "grad_norm": 1.4980621337890625, + "learning_rate": 1.0985e-05, + "num_tokens": 615567.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0489, + "grad_norm": 1.1453659534454346, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0625, + "grad_norm": 1.6183781623840332, + "learning_rate": 1.0975e-05, + "num_tokens": 616591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0018, + "grad_norm": 0.24508105218410492, + "learning_rate": 1.097e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 1.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.002, + "grad_norm": 0.2894340753555298, + "learning_rate": 1.0965000000000001e-05, + "num_tokens": 616773.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0394, + "grad_norm": 1.3422820568084717, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0018, + "grad_norm": 0.26346835494041443, + "learning_rate": 1.0955e-05, + "num_tokens": 617376.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.002, + "grad_norm": 0.28616681694984436, + "learning_rate": 1.095e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0629, + "grad_norm": 1.515001654624939, + "learning_rate": 1.0945000000000001e-05, + "num_tokens": 617979.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.0429, + "grad_norm": 1.3231642246246338, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0498, + "grad_norm": 1.3477892875671387, + "learning_rate": 1.0935e-05, + "num_tokens": 619003.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.0686, + "grad_norm": 1.4584791660308838, + "learning_rate": 1.093e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.0021, + "grad_norm": 0.29815393686294556, + "learning_rate": 1.0925000000000001e-05, + "num_tokens": 619606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.087, + "grad_norm": 2.550358533859253, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.0021, + "grad_norm": 0.3024434447288513, + "learning_rate": 1.0915e-05, + "num_tokens": 620209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0559, + "grad_norm": 1.8500303030014038, + "learning_rate": 1.091e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0024, + "grad_norm": 0.3702225685119629, + "learning_rate": 1.0905000000000001e-05, + "num_tokens": 620812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0782, + "grad_norm": 1.9154956340789795, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0613, + "grad_norm": 1.6961833238601685, + "learning_rate": 1.0895e-05, + "num_tokens": 621836.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0022, + "grad_norm": 0.3193221390247345, + "learning_rate": 1.089e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0025, + "grad_norm": 0.36297887563705444, + "learning_rate": 1.0885000000000002e-05, + "num_tokens": 622018.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0022, + "grad_norm": 0.3415636420249939, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0408, + "grad_norm": 1.2334237098693848, + "learning_rate": 1.0875e-05, + "num_tokens": 622621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.002, + "grad_norm": 0.2912217974662781, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.043, + "grad_norm": 1.9397270679473877, + "learning_rate": 1.0865000000000002e-05, + "num_tokens": 623224.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.0395, + "grad_norm": 1.2516388893127441, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0018, + "grad_norm": 0.24329343438148499, + "learning_rate": 1.0855e-05, + "num_tokens": 623827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0019, + "grad_norm": 0.2603467106819153, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0588, + "grad_norm": 1.736319661140442, + "learning_rate": 1.0845e-05, + "num_tokens": 624430.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0014, + "grad_norm": 0.19694186747074127, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 1.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.0015, + "grad_norm": 0.20471760630607605, + "learning_rate": 1.0835e-05, + "num_tokens": 624612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0016, + "grad_norm": 0.21806074678897858, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0014, + "grad_norm": 0.19000421464443207, + "learning_rate": 1.0825e-05, + "num_tokens": 624794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.0516, + "grad_norm": 1.4601935148239136, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0713, + "grad_norm": 2.011367082595825, + "learning_rate": 1.0815e-05, + "num_tokens": 625818.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0012, + "grad_norm": 0.15841880440711975, + "learning_rate": 1.081e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 1.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0711, + "grad_norm": 2.100233793258667, + "learning_rate": 1.0805e-05, + "num_tokens": 626421.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0012, + "grad_norm": 0.1544499695301056, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0012, + "grad_norm": 0.15288732945919037, + "learning_rate": 1.0794999999999999e-05, + "num_tokens": 626603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.0379, + "grad_norm": 1.210354208946228, + "learning_rate": 1.079e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.042, + "grad_norm": 1.1011019945144653, + "learning_rate": 1.0785000000000001e-05, + "num_tokens": 627627.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0646, + "grad_norm": 1.4223557710647583, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0011, + "grad_norm": 0.14515887200832367, + "learning_rate": 1.0775e-05, + "num_tokens": 628230.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0012, + "grad_norm": 0.14745497703552246, + "learning_rate": 1.077e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0013, + "grad_norm": 0.16342398524284363, + "learning_rate": 1.0765000000000001e-05, + "num_tokens": 628412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0711, + "grad_norm": 1.4518134593963623, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0719, + "grad_norm": 1.6602455377578735, + "learning_rate": 1.0755e-05, + "num_tokens": 629436.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0676, + "grad_norm": 1.4668382406234741, + "learning_rate": 1.075e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0675, + "grad_norm": 1.7040259838104248, + "learning_rate": 1.0745000000000001e-05, + "num_tokens": 630460.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0015, + "grad_norm": 0.2076033502817154, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 1.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0578, + "grad_norm": 1.4224144220352173, + "learning_rate": 1.0735e-05, + "num_tokens": 631063.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0359, + "grad_norm": 1.0415198802947998, + "learning_rate": 1.073e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0549, + "grad_norm": 1.3249598741531372, + "learning_rate": 1.0725000000000001e-05, + "num_tokens": 632087.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27819395065307617, + "learning_rate": 1.072e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 1.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.002, + "grad_norm": 0.28510138392448425, + "learning_rate": 1.0715e-05, + "num_tokens": 632269.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0023, + "grad_norm": 0.33845254778862, + "learning_rate": 1.071e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.0022, + "grad_norm": 0.3247784972190857, + "learning_rate": 1.0705000000000002e-05, + "num_tokens": 632451.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.043, + "grad_norm": 1.0912247896194458, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.0578, + "grad_norm": 1.1355180740356445, + "learning_rate": 1.0695e-05, + "num_tokens": 633475.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.0024, + "grad_norm": 0.3479563593864441, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 1.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0022, + "grad_norm": 0.3158959448337555, + "learning_rate": 1.0685e-05, + "num_tokens": 633657.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": 0.0428, + "grad_norm": 1.4031771421432495, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.055, + "grad_norm": 1.2979878187179565, + "learning_rate": 1.0675e-05, + "num_tokens": 634681.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0021, + "grad_norm": 0.30659785866737366, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0407, + "grad_norm": 1.1281771659851074, + "learning_rate": 1.0665e-05, + "num_tokens": 635284.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0021, + "grad_norm": 0.3046596050262451, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 1.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.002, + "grad_norm": 0.29561498761177063, + "learning_rate": 1.0655e-05, + "num_tokens": 635466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.042, + "grad_norm": 1.11528480052948, + "learning_rate": 1.065e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0603, + "grad_norm": 1.633859634399414, + "learning_rate": 1.0645e-05, + "num_tokens": 636490.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0617, + "grad_norm": 1.5089678764343262, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0393, + "grad_norm": 1.644981026649475, + "learning_rate": 1.0634999999999999e-05, + "num_tokens": 637514.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0548, + "grad_norm": 1.4219714403152466, + "learning_rate": 1.063e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0022, + "grad_norm": 0.3061341941356659, + "learning_rate": 1.0625e-05, + "num_tokens": 638117.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0439, + "grad_norm": 1.3055533170700073, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0844, + "grad_norm": 2.4925858974456787, + "learning_rate": 1.0615000000000003e-05, + "num_tokens": 639141.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0409, + "grad_norm": 1.2279584407806396, + "learning_rate": 1.061e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.0023, + "grad_norm": 0.3406059145927429, + "learning_rate": 1.0605000000000001e-05, + "num_tokens": 639744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.0024, + "grad_norm": 0.3423788249492645, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 1.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0758, + "grad_norm": 2.193775177001953, + "learning_rate": 1.0595000000000003e-05, + "num_tokens": 640347.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0397, + "grad_norm": 1.2993077039718628, + "learning_rate": 1.059e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0025, + "grad_norm": 0.37831318378448486, + "learning_rate": 1.0585000000000001e-05, + "num_tokens": 640950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0695, + "grad_norm": 1.9661240577697754, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0514, + "grad_norm": 1.348526954650879, + "learning_rate": 1.0575000000000001e-05, + "num_tokens": 641974.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0422, + "grad_norm": 1.4465380907058716, + "learning_rate": 1.057e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0792, + "grad_norm": 1.823074460029602, + "learning_rate": 1.0565000000000001e-05, + "num_tokens": 642998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0707, + "grad_norm": 1.9393905401229858, + "learning_rate": 1.056e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": 0.0765, + "grad_norm": 2.4390299320220947, + "learning_rate": 1.0555000000000001e-05, + "num_tokens": 644022.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0377, + "grad_norm": 1.2858082056045532, + "learning_rate": 1.055e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.036, + "grad_norm": 1.1891300678253174, + "learning_rate": 1.0545000000000002e-05, + "num_tokens": 645046.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0596, + "grad_norm": 1.3432769775390625, + "learning_rate": 1.054e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0409, + "grad_norm": 1.3289687633514404, + "learning_rate": 1.0535000000000002e-05, + "num_tokens": 646070.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0703, + "grad_norm": 1.9712656736373901, + "learning_rate": 1.053e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0674, + "grad_norm": 1.360931634902954, + "learning_rate": 1.0525e-05, + "num_tokens": 647094.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0712, + "grad_norm": 1.7070671319961548, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0553, + "grad_norm": 1.2540414333343506, + "learning_rate": 1.0515000000000002e-05, + "num_tokens": 648118.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0462, + "grad_norm": 1.0861750841140747, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0606, + "grad_norm": 1.2730586528778076, + "learning_rate": 1.0505e-05, + "num_tokens": 649142.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0678, + "grad_norm": 1.881486177444458, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0358, + "grad_norm": 1.520228385925293, + "learning_rate": 1.0495000000000002e-05, + "num_tokens": 650166.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0102, + "grad_norm": 1.2519571781158447, + "learning_rate": 1.049e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0603, + "grad_norm": 1.7512507438659668, + "learning_rate": 1.0485e-05, + "num_tokens": 650769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": 0.0422, + "grad_norm": 1.2172882556915283, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0111, + "grad_norm": 1.2125916481018066, + "learning_rate": 1.0475000000000002e-05, + "num_tokens": 651372.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0104, + "grad_norm": 1.187291145324707, + "learning_rate": 1.047e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.067, + "grad_norm": 1.5227930545806885, + "learning_rate": 1.0465e-05, + "num_tokens": 651975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0512, + "grad_norm": 1.1584064960479736, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0585, + "grad_norm": 1.5452741384506226, + "learning_rate": 1.0455000000000002e-05, + "num_tokens": 652999.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.037, + "grad_norm": 1.2185399532318115, + "learning_rate": 1.045e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0073, + "grad_norm": 0.8913355469703674, + "learning_rate": 1.0445e-05, + "num_tokens": 653602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.1718, + "grad_norm": 3.605719804763794, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0545, + "grad_norm": 0.8743512034416199, + "learning_rate": 1.0435000000000003e-05, + "num_tokens": 654626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0601, + "grad_norm": 1.5047037601470947, + "learning_rate": 1.043e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0048, + "grad_norm": 0.6472101211547852, + "learning_rate": 1.0425000000000001e-05, + "num_tokens": 655229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0819, + "grad_norm": 2.8786802291870117, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0722, + "grad_norm": 1.6400585174560547, + "learning_rate": 1.0415000000000001e-05, + "num_tokens": 656253.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0379, + "grad_norm": 1.1578104496002197, + "learning_rate": 1.041e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0651, + "grad_norm": 1.9455623626708984, + "learning_rate": 1.0405000000000001e-05, + "num_tokens": 657277.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0588, + "grad_norm": 1.3513238430023193, + "learning_rate": 1.04e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0584, + "grad_norm": 2.0099873542785645, + "learning_rate": 1.0395000000000001e-05, + "num_tokens": 658301.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0422, + "grad_norm": 1.1260371208190918, + "learning_rate": 1.039e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.1567, + "grad_norm": 4.341492652893066, + "learning_rate": 1.0385000000000001e-05, + "num_tokens": 659325.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0034, + "grad_norm": 0.5023797154426575, + "learning_rate": 1.038e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.0515, + "grad_norm": 1.3957620859146118, + "learning_rate": 1.0375000000000001e-05, + "num_tokens": 659928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.074, + "grad_norm": 1.8058022260665894, + "learning_rate": 1.037e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.0683, + "grad_norm": 1.5976930856704712, + "learning_rate": 1.0365e-05, + "num_tokens": 660952.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.042, + "grad_norm": 1.2127424478530884, + "learning_rate": 1.036e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0525, + "grad_norm": 1.24295175075531, + "learning_rate": 1.0355000000000002e-05, + "num_tokens": 661976.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0553, + "grad_norm": 1.3676091432571411, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0043, + "grad_norm": 0.5990502834320068, + "learning_rate": 1.0345e-05, + "num_tokens": 662579.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.0651, + "grad_norm": 1.8467062711715698, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.0035, + "grad_norm": 0.4997740089893341, + "learning_rate": 1.0335000000000002e-05, + "num_tokens": 663182.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0564, + "grad_norm": 0.9972801804542542, + "learning_rate": 1.033e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0587, + "grad_norm": 1.6288121938705444, + "learning_rate": 1.0325e-05, + "num_tokens": 664206.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0387, + "grad_norm": 1.0264148712158203, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.0044, + "grad_norm": 0.6445260047912598, + "learning_rate": 1.0315000000000002e-05, + "num_tokens": 664809.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0379, + "grad_norm": 1.0764647722244263, + "learning_rate": 1.031e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.0483, + "grad_norm": 1.6414856910705566, + "learning_rate": 1.0305e-05, + "num_tokens": 665833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.0392, + "grad_norm": 1.0878779888153076, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0721, + "grad_norm": 1.8314939737319946, + "learning_rate": 1.0295000000000002e-05, + "num_tokens": 666857.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0404, + "grad_norm": 1.2442834377288818, + "learning_rate": 1.029e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0344, + "grad_norm": 1.0829095840454102, + "learning_rate": 1.0285e-05, + "num_tokens": 667881.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.005, + "grad_norm": 0.7069464921951294, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 1.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0701, + "grad_norm": 1.8649088144302368, + "learning_rate": 1.0275000000000002e-05, + "num_tokens": 668484.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0445, + "grad_norm": 1.5859991312026978, + "learning_rate": 1.027e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.0617, + "grad_norm": 1.400742530822754, + "learning_rate": 1.0265e-05, + "num_tokens": 669508.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0527, + "grad_norm": 1.4805254936218262, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.0052, + "grad_norm": 0.7180629968643188, + "learning_rate": 1.0255000000000001e-05, + "num_tokens": 670111.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0404, + "grad_norm": 1.3597116470336914, + "learning_rate": 1.025e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0054, + "grad_norm": 0.7400949597358704, + "learning_rate": 1.0245000000000001e-05, + "num_tokens": 670714.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.0049, + "grad_norm": 0.6836004853248596, + "learning_rate": 1.024e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 1.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0736, + "grad_norm": 2.3706512451171875, + "learning_rate": 1.0235000000000001e-05, + "num_tokens": 671317.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0045, + "grad_norm": 0.6252732872962952, + "learning_rate": 1.023e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0538, + "grad_norm": 1.2009153366088867, + "learning_rate": 1.0225000000000001e-05, + "num_tokens": 671920.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0032, + "grad_norm": 0.4667681157588959, + "learning_rate": 1.022e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 1.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0612, + "grad_norm": 1.505027413368225, + "learning_rate": 1.0215000000000001e-05, + "num_tokens": 672523.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.0551, + "grad_norm": 1.3336291313171387, + "learning_rate": 1.021e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0402, + "grad_norm": 1.1181267499923706, + "learning_rate": 1.0205e-05, + "num_tokens": 673547.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0633, + "grad_norm": 1.5764997005462646, + "learning_rate": 1.02e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0024, + "grad_norm": 0.33718812465667725, + "learning_rate": 1.0195000000000001e-05, + "num_tokens": 674150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0683, + "grad_norm": 1.428412675857544, + "learning_rate": 1.019e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441157937049866, + "learning_rate": 1.0185e-05, + "num_tokens": 674753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0023, + "grad_norm": 0.33211714029312134, + "learning_rate": 1.018e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0022, + "grad_norm": 0.3089843988418579, + "learning_rate": 1.0175000000000002e-05, + "num_tokens": 674935.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0635, + "grad_norm": 1.286823034286499, + "learning_rate": 1.017e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0419, + "grad_norm": 1.0465713739395142, + "learning_rate": 1.0165e-05, + "num_tokens": 675959.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0019, + "grad_norm": 0.27270686626434326, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 1.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0614, + "grad_norm": 1.536331295967102, + "learning_rate": 1.0155000000000002e-05, + "num_tokens": 676562.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0521, + "grad_norm": 1.3282392024993896, + "learning_rate": 1.015e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0573, + "grad_norm": 1.3458013534545898, + "learning_rate": 1.0145e-05, + "num_tokens": 677586.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0607, + "grad_norm": 1.5142616033554077, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0427, + "grad_norm": 1.3866674900054932, + "learning_rate": 1.0135000000000002e-05, + "num_tokens": 678610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0652, + "grad_norm": 1.3013007640838623, + "learning_rate": 1.013e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0021, + "grad_norm": 0.2967868447303772, + "learning_rate": 1.0125e-05, + "num_tokens": 679213.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.002, + "grad_norm": 0.2977685332298279, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0022, + "grad_norm": 0.3109460473060608, + "learning_rate": 1.0115000000000002e-05, + "num_tokens": 679395.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0563, + "grad_norm": 1.1927019357681274, + "learning_rate": 1.011e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0019, + "grad_norm": 0.27015697956085205, + "learning_rate": 1.0105e-05, + "num_tokens": 679998.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.054, + "grad_norm": 1.8113130331039429, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0688, + "grad_norm": 1.6508032083511353, + "learning_rate": 1.0095e-05, + "num_tokens": 681022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0502, + "grad_norm": 1.1528620719909668, + "learning_rate": 1.009e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29425331950187683, + "learning_rate": 1.0085000000000001e-05, + "num_tokens": 681625.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0641, + "grad_norm": 1.702049732208252, + "learning_rate": 1.008e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.036, + "grad_norm": 1.1969891786575317, + "learning_rate": 1.0075000000000001e-05, + "num_tokens": 682649.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0022, + "grad_norm": 0.31679248809814453, + "learning_rate": 1.007e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 1.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.0403, + "grad_norm": 1.1920922994613647, + "learning_rate": 1.0065000000000001e-05, + "num_tokens": 683252.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0544, + "grad_norm": 1.1415454149246216, + "learning_rate": 1.006e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0028, + "grad_norm": 0.42351487278938293, + "learning_rate": 1.0055000000000001e-05, + "num_tokens": 683855.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0466, + "grad_norm": 1.6247456073760986, + "learning_rate": 1.005e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0524, + "grad_norm": 1.2605568170547485, + "learning_rate": 1.0045e-05, + "num_tokens": 684879.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.057, + "grad_norm": 1.483921766281128, + "learning_rate": 1.004e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0029, + "grad_norm": 0.420865923166275, + "learning_rate": 1.0035000000000001e-05, + "num_tokens": 685482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0483, + "grad_norm": 1.9411001205444336, + "learning_rate": 1.003e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0418, + "grad_norm": 1.1357734203338623, + "learning_rate": 1.0025e-05, + "num_tokens": 686506.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0031, + "grad_norm": 0.4264874756336212, + "learning_rate": 1.002e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0628, + "grad_norm": 1.5096089839935303, + "learning_rate": 1.0015000000000002e-05, + "num_tokens": 687109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.003, + "grad_norm": 0.41657188534736633, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0, + "step": 2000 + }, + { + "loss": 0.0028, + "grad_norm": 0.3918426036834717, + "learning_rate": 1.0005e-05, + "num_tokens": 687291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0005, + "step": 2001 + }, + { + "loss": 0.0524, + "grad_norm": 1.1938209533691406, + "learning_rate": 1e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.001, + "step": 2002 + }, + { + "loss": 0.0027, + "grad_norm": 0.3788990080356598, + "learning_rate": 9.995000000000002e-06, + "num_tokens": 687894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0015, + "step": 2003 + }, + { + "loss": 0.0025, + "grad_norm": 0.3577810227870941, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 687985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.002, + "step": 2004 + }, + { + "loss": 0.0024, + "grad_norm": 0.3305366039276123, + "learning_rate": 9.985000000000002e-06, + "num_tokens": 688076.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0025, + "step": 2005 + }, + { + "loss": 0.002, + "grad_norm": 0.277047336101532, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 688167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.003, + "step": 2006 + }, + { + "loss": 0.0019, + "grad_norm": 0.2567979693412781, + "learning_rate": 9.975000000000002e-06, + "num_tokens": 688258.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0035, + "step": 2007 + }, + { + "loss": 0.0682, + "grad_norm": 1.844512701034546, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.004, + "step": 2008 + }, + { + "loss": 0.0487, + "grad_norm": 1.2499569654464722, + "learning_rate": 9.965000000000002e-06, + "num_tokens": 689282.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0045, + "step": 2009 + }, + { + "loss": 0.0432, + "grad_norm": 1.2406448125839233, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.005, + "step": 2010 + }, + { + "loss": 0.0804, + "grad_norm": 1.833058476448059, + "learning_rate": 9.955000000000002e-06, + "num_tokens": 690306.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0055, + "step": 2011 + }, + { + "loss": 0.0464, + "grad_norm": 1.3244189023971558, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.006, + "step": 2012 + }, + { + "loss": 0.0416, + "grad_norm": 1.044066309928894, + "learning_rate": 9.945e-06, + "num_tokens": 691330.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0065, + "step": 2013 + }, + { + "loss": 0.0646, + "grad_norm": 1.5272581577301025, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.007, + "step": 2014 + }, + { + "loss": 0.0401, + "grad_norm": 1.2222588062286377, + "learning_rate": 9.935e-06, + "num_tokens": 692354.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0075, + "step": 2015 + }, + { + "loss": 0.0833, + "grad_norm": 2.3880302906036377, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.008, + "step": 2016 + }, + { + "loss": 0.0661, + "grad_norm": 1.666345238685608, + "learning_rate": 9.925e-06, + "num_tokens": 693378.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0085, + "step": 2017 + }, + { + "loss": 0.061, + "grad_norm": 1.2552286386489868, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.009, + "step": 2018 + }, + { + "loss": 0.0022, + "grad_norm": 0.2978605329990387, + "learning_rate": 9.915e-06, + "num_tokens": 693981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0095, + "step": 2019 + }, + { + "loss": 0.0419, + "grad_norm": 1.1351749897003174, + "learning_rate": 9.91e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.01, + "step": 2020 + }, + { + "loss": 0.0028, + "grad_norm": 0.4339805245399475, + "learning_rate": 9.905000000000001e-06, + "num_tokens": 694584.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0105, + "step": 2021 + }, + { + "loss": 0.0027, + "grad_norm": 0.3737834393978119, + "learning_rate": 9.9e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 2022 + }, + { + "loss": 0.0724, + "grad_norm": 1.6216633319854736, + "learning_rate": 9.895000000000001e-06, + "num_tokens": 695187.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0115, + "step": 2023 + }, + { + "loss": 0.0026, + "grad_norm": 0.38558149337768555, + "learning_rate": 9.89e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 1.0, + "epoch": 1.012, + "step": 2024 + }, + { + "loss": 0.0457, + "grad_norm": 1.2241498231887817, + "learning_rate": 9.885000000000001e-06, + "num_tokens": 695790.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0125, + "step": 2025 + }, + { + "loss": 0.0387, + "grad_norm": 1.4335367679595947, + "learning_rate": 9.88e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.013, + "step": 2026 + }, + { + "loss": 0.0716, + "grad_norm": 1.5836760997772217, + "learning_rate": 9.875000000000001e-06, + "num_tokens": 696814.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0135, + "step": 2027 + }, + { + "loss": 0.0419, + "grad_norm": 1.2072887420654297, + "learning_rate": 9.87e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.014, + "step": 2028 + }, + { + "loss": 0.0376, + "grad_norm": 0.9630845189094543, + "learning_rate": 9.865000000000001e-06, + "num_tokens": 697838.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.0145, + "step": 2029 + }, + { + "loss": 0.0562, + "grad_norm": 1.396782636642456, + "learning_rate": 9.86e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.015, + "step": 2030 + }, + { + "loss": 0.0611, + "grad_norm": 1.526076316833496, + "learning_rate": 9.855000000000001e-06, + "num_tokens": 698862.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0155, + "step": 2031 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280098915100098, + "learning_rate": 9.85e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.016, + "step": 2032 + }, + { + "loss": 0.0036, + "grad_norm": 0.5271911025047302, + "learning_rate": 9.845000000000001e-06, + "num_tokens": 699044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0165, + "step": 2033 + }, + { + "loss": 0.0638, + "grad_norm": 1.2341188192367554, + "learning_rate": 9.84e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.017, + "step": 2034 + }, + { + "loss": 0.0386, + "grad_norm": 1.0637688636779785, + "learning_rate": 9.835000000000002e-06, + "num_tokens": 700068.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0175, + "step": 2035 + }, + { + "loss": 0.0036, + "grad_norm": 0.52369225025177, + "learning_rate": 9.83e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 1.0, + "epoch": 1.018, + "step": 2036 + }, + { + "loss": 0.0494, + "grad_norm": 2.351320266723633, + "learning_rate": 9.825000000000002e-06, + "num_tokens": 700671.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0185, + "step": 2037 + }, + { + "loss": 0.0034, + "grad_norm": 0.4984705150127411, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.019, + "step": 2038 + }, + { + "loss": 0.0406, + "grad_norm": 1.5286310911178589, + "learning_rate": 9.815000000000002e-06, + "num_tokens": 701274.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0195, + "step": 2039 + }, + { + "loss": 0.0523, + "grad_norm": 1.7273446321487427, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.02, + "step": 2040 + }, + { + "loss": 0.0033, + "grad_norm": 0.4823690950870514, + "learning_rate": 9.805000000000002e-06, + "num_tokens": 701877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0205, + "step": 2041 + }, + { + "loss": 0.0032, + "grad_norm": 0.4507608711719513, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.021, + "step": 2042 + }, + { + "loss": 0.0703, + "grad_norm": 1.77262544631958, + "learning_rate": 9.795000000000002e-06, + "num_tokens": 702480.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0215, + "step": 2043 + }, + { + "loss": 0.0026, + "grad_norm": 0.3709382116794586, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.022, + "step": 2044 + }, + { + "loss": 0.0683, + "grad_norm": 3.5564355850219727, + "learning_rate": 9.785e-06, + "num_tokens": 703083.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0225, + "step": 2045 + }, + { + "loss": 0.0024, + "grad_norm": 0.3166162967681885, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.023, + "step": 2046 + }, + { + "loss": 0.0022, + "grad_norm": 0.2928009331226349, + "learning_rate": 9.775e-06, + "num_tokens": 703265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0235, + "step": 2047 + }, + { + "loss": 0.0621, + "grad_norm": 1.902612566947937, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.024, + "step": 2048 + }, + { + "loss": 0.0018, + "grad_norm": 0.23954610526561737, + "learning_rate": 9.765e-06, + "num_tokens": 703868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0245, + "step": 2049 + }, + { + "loss": 0.0409, + "grad_norm": 1.3355653285980225, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.025, + "step": 2050 + }, + { + "loss": 0.0705, + "grad_norm": 1.6696054935455322, + "learning_rate": 9.755e-06, + "num_tokens": 704892.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0255, + "step": 2051 + }, + { + "loss": 0.0016, + "grad_norm": 0.22299779951572418, + "learning_rate": 9.75e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.026, + "step": 2052 + }, + { + "loss": 0.0016, + "grad_norm": 0.21063728630542755, + "learning_rate": 9.745e-06, + "num_tokens": 705074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0265, + "step": 2053 + }, + { + "loss": 0.0696, + "grad_norm": 1.6844984292984009, + "learning_rate": 9.74e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.027, + "step": 2054 + }, + { + "loss": 0.0714, + "grad_norm": 1.5383219718933105, + "learning_rate": 9.735e-06, + "num_tokens": 706098.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0275, + "step": 2055 + }, + { + "loss": 0.0015, + "grad_norm": 0.19807161390781403, + "learning_rate": 9.73e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.028, + "step": 2056 + }, + { + "loss": 0.0014, + "grad_norm": 0.19030039012432098, + "learning_rate": 9.725000000000001e-06, + "num_tokens": 706280.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0285, + "step": 2057 + }, + { + "loss": 0.0013, + "grad_norm": 0.16322408616542816, + "learning_rate": 9.72e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 1.0, + "epoch": 1.029, + "step": 2058 + }, + { + "loss": 0.0014, + "grad_norm": 0.17665083706378937, + "learning_rate": 9.715000000000001e-06, + "num_tokens": 706462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0295, + "step": 2059 + }, + { + "loss": 0.0669, + "grad_norm": 1.8765722513198853, + "learning_rate": 9.71e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.03, + "step": 2060 + }, + { + "loss": 0.0768, + "grad_norm": 1.7586760520935059, + "learning_rate": 9.705000000000001e-06, + "num_tokens": 707486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0305, + "step": 2061 + }, + { + "loss": 0.0696, + "grad_norm": 1.258619785308838, + "learning_rate": 9.7e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.031, + "step": 2062 + }, + { + "loss": 0.0493, + "grad_norm": 1.2884832620620728, + "learning_rate": 9.695000000000001e-06, + "num_tokens": 708510.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0315, + "step": 2063 + }, + { + "loss": 0.0012, + "grad_norm": 0.15901947021484375, + "learning_rate": 9.69e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.032, + "step": 2064 + }, + { + "loss": 0.0656, + "grad_norm": 1.3002307415008545, + "learning_rate": 9.685000000000001e-06, + "num_tokens": 709113.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0325, + "step": 2065 + }, + { + "loss": 0.0013, + "grad_norm": 0.17090171575546265, + "learning_rate": 9.68e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.033, + "step": 2066 + }, + { + "loss": 0.0013, + "grad_norm": 0.1825355738401413, + "learning_rate": 9.675000000000001e-06, + "num_tokens": 709295.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0335, + "step": 2067 + }, + { + "loss": 0.0459, + "grad_norm": 1.092247724533081, + "learning_rate": 9.67e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.034, + "step": 2068 + }, + { + "loss": 0.0648, + "grad_norm": 1.4761494398117065, + "learning_rate": 9.665000000000001e-06, + "num_tokens": 710319.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0345, + "step": 2069 + }, + { + "loss": 0.0014, + "grad_norm": 0.1826472133398056, + "learning_rate": 9.66e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 1.0, + "epoch": 1.035, + "step": 2070 + }, + { + "loss": 0.0461, + "grad_norm": 1.338349461555481, + "learning_rate": 9.655000000000002e-06, + "num_tokens": 710922.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0355, + "step": 2071 + }, + { + "loss": 0.0567, + "grad_norm": 1.0566164255142212, + "learning_rate": 9.65e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.036, + "step": 2072 + }, + { + "loss": 0.0015, + "grad_norm": 0.19834326207637787, + "learning_rate": 9.645000000000002e-06, + "num_tokens": 711525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0365, + "step": 2073 + }, + { + "loss": 0.0418, + "grad_norm": 1.210045576095581, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.037, + "step": 2074 + }, + { + "loss": 0.0016, + "grad_norm": 0.22290614247322083, + "learning_rate": 9.635000000000002e-06, + "num_tokens": 712128.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0375, + "step": 2075 + }, + { + "loss": 0.0695, + "grad_norm": 1.4690190553665161, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.038, + "step": 2076 + }, + { + "loss": 0.0016, + "grad_norm": 0.2209765613079071, + "learning_rate": 9.625e-06, + "num_tokens": 712731.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0385, + "step": 2077 + }, + { + "loss": 0.0018, + "grad_norm": 0.23313096165657043, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 2078 + }, + { + "loss": 0.0017, + "grad_norm": 0.23196078836917877, + "learning_rate": 9.615e-06, + "num_tokens": 712913.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0395, + "step": 2079 + }, + { + "loss": 0.0541, + "grad_norm": 1.220723032951355, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.04, + "step": 2080 + }, + { + "loss": 0.0018, + "grad_norm": 0.2516387403011322, + "learning_rate": 9.605e-06, + "num_tokens": 713516.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0405, + "step": 2081 + }, + { + "loss": 0.0424, + "grad_norm": 1.0561903715133667, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.041, + "step": 2082 + }, + { + "loss": 0.0438, + "grad_norm": 1.2110846042633057, + "learning_rate": 9.595e-06, + "num_tokens": 714540.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0415, + "step": 2083 + }, + { + "loss": 0.0018, + "grad_norm": 0.24697688221931458, + "learning_rate": 9.59e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 1.0, + "epoch": 1.042, + "step": 2084 + }, + { + "loss": 0.0388, + "grad_norm": 1.0054850578308105, + "learning_rate": 9.585e-06, + "num_tokens": 715143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0425, + "step": 2085 + }, + { + "loss": 0.0713, + "grad_norm": 1.8077067136764526, + "learning_rate": 9.58e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.043, + "step": 2086 + }, + { + "loss": 0.0018, + "grad_norm": 0.24363017082214355, + "learning_rate": 9.575e-06, + "num_tokens": 715746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0435, + "step": 2087 + }, + { + "loss": 0.0016, + "grad_norm": 0.21341845393180847, + "learning_rate": 9.57e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 2088 + }, + { + "loss": 0.0391, + "grad_norm": 1.3833376169204712, + "learning_rate": 9.565e-06, + "num_tokens": 716349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0445, + "step": 2089 + }, + { + "loss": 0.0393, + "grad_norm": 0.9772108793258667, + "learning_rate": 9.56e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.045, + "step": 2090 + }, + { + "loss": 0.002, + "grad_norm": 0.283633828163147, + "learning_rate": 9.555e-06, + "num_tokens": 716952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0455, + "step": 2091 + }, + { + "loss": 0.0728, + "grad_norm": 1.849652647972107, + "learning_rate": 9.55e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.046, + "step": 2092 + }, + { + "loss": 0.0022, + "grad_norm": 0.3161669969558716, + "learning_rate": 9.545000000000001e-06, + "num_tokens": 717555.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0465, + "step": 2093 + }, + { + "loss": 0.0587, + "grad_norm": 1.600858449935913, + "learning_rate": 9.54e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.047, + "step": 2094 + }, + { + "loss": 0.0021, + "grad_norm": 0.2948978543281555, + "learning_rate": 9.535000000000001e-06, + "num_tokens": 718158.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0475, + "step": 2095 + }, + { + "loss": 0.0019, + "grad_norm": 0.27492448687553406, + "learning_rate": 9.53e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.048, + "step": 2096 + }, + { + "loss": 0.0382, + "grad_norm": 1.2440471649169922, + "learning_rate": 9.525000000000001e-06, + "num_tokens": 718761.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0485, + "step": 2097 + }, + { + "loss": 0.058, + "grad_norm": 1.5657495260238647, + "learning_rate": 9.52e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 2098 + }, + { + "loss": 0.0018, + "grad_norm": 0.2510983645915985, + "learning_rate": 9.515000000000001e-06, + "num_tokens": 719364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0495, + "step": 2099 + }, + { + "loss": 0.0677, + "grad_norm": 2.6615045070648193, + "learning_rate": 9.51e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.05, + "step": 2100 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355963945388794, + "learning_rate": 9.505000000000001e-06, + "num_tokens": 719967.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0505, + "step": 2101 + }, + { + "loss": 0.0628, + "grad_norm": 1.4263781309127808, + "learning_rate": 9.5e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.051, + "step": 2102 + }, + { + "loss": 0.0384, + "grad_norm": 1.3316160440444946, + "learning_rate": 9.495000000000001e-06, + "num_tokens": 720991.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0515, + "step": 2103 + }, + { + "loss": 0.0413, + "grad_norm": 1.2754371166229248, + "learning_rate": 9.49e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.052, + "step": 2104 + }, + { + "loss": 0.0551, + "grad_norm": 1.9524251222610474, + "learning_rate": 9.485000000000002e-06, + "num_tokens": 722015.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0525, + "step": 2105 + }, + { + "loss": 0.0551, + "grad_norm": 1.5522267818450928, + "learning_rate": 9.48e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.053, + "step": 2106 + }, + { + "loss": 0.0019, + "grad_norm": 0.27614012360572815, + "learning_rate": 9.475000000000002e-06, + "num_tokens": 722618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0535, + "step": 2107 + }, + { + "loss": 0.0606, + "grad_norm": 1.409346103668213, + "learning_rate": 9.47e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.054, + "step": 2108 + }, + { + "loss": 0.0024, + "grad_norm": 0.357972115278244, + "learning_rate": 9.465e-06, + "num_tokens": 723221.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0545, + "step": 2109 + }, + { + "loss": 0.0023, + "grad_norm": 0.3270082175731659, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 1.0, + "epoch": 1.055, + "step": 2110 + }, + { + "loss": 0.0024, + "grad_norm": 0.3454654812812805, + "learning_rate": 9.455e-06, + "num_tokens": 723403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0555, + "step": 2111 + }, + { + "loss": 0.0024, + "grad_norm": 0.352299302816391, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 1.0, + "epoch": 1.056, + "step": 2112 + }, + { + "loss": 0.002, + "grad_norm": 0.27746516466140747, + "learning_rate": 9.445e-06, + "num_tokens": 723585.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0565, + "step": 2113 + }, + { + "loss": 0.002, + "grad_norm": 0.2780683636665344, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 1.0, + "epoch": 1.057, + "step": 2114 + }, + { + "loss": 0.0464, + "grad_norm": 1.5355291366577148, + "learning_rate": 9.435e-06, + "num_tokens": 724188.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0575, + "step": 2115 + }, + { + "loss": 0.0017, + "grad_norm": 0.2329765260219574, + "learning_rate": 9.43e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.058, + "step": 2116 + }, + { + "loss": 0.0015, + "grad_norm": 0.20377217233181, + "learning_rate": 9.425e-06, + "num_tokens": 724370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0585, + "step": 2117 + }, + { + "loss": 0.0014, + "grad_norm": 0.1731068193912506, + "learning_rate": 9.42e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 2118 + }, + { + "loss": 0.0349, + "grad_norm": 1.301210641860962, + "learning_rate": 9.415e-06, + "num_tokens": 724973.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.0594999999999999, + "step": 2119 + }, + { + "loss": 0.0012, + "grad_norm": 0.15070641040802002, + "learning_rate": 9.41e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.06, + "step": 2120 + }, + { + "loss": 0.0012, + "grad_norm": 0.13666701316833496, + "learning_rate": 9.405e-06, + "num_tokens": 725155.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0605, + "step": 2121 + }, + { + "loss": 0.0011, + "grad_norm": 0.13183920085430145, + "learning_rate": 9.4e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 2122 + }, + { + "loss": 0.0735, + "grad_norm": 2.157339096069336, + "learning_rate": 9.395e-06, + "num_tokens": 725758.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.0615, + "step": 2123 + }, + { + "loss": 0.0434, + "grad_norm": 1.441329836845398, + "learning_rate": 9.39e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.062, + "step": 2124 + }, + { + "loss": 0.001, + "grad_norm": 0.11148537695407867, + "learning_rate": 9.385e-06, + "num_tokens": 726361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0625, + "step": 2125 + }, + { + "loss": 0.0363, + "grad_norm": 1.2650766372680664, + "learning_rate": 9.38e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 1.063, + "step": 2126 + }, + { + "loss": 0.042, + "grad_norm": 1.170820951461792, + "learning_rate": 9.375000000000001e-06, + "num_tokens": 727385.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0635, + "step": 2127 + }, + { + "loss": 0.0375, + "grad_norm": 1.31922447681427, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.064, + "step": 2128 + }, + { + "loss": 0.0009, + "grad_norm": 0.10702881962060928, + "learning_rate": 9.365000000000001e-06, + "num_tokens": 727988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0645, + "step": 2129 + }, + { + "loss": 0.001, + "grad_norm": 0.12134991586208344, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 2130 + }, + { + "loss": 0.001, + "grad_norm": 0.12518537044525146, + "learning_rate": 9.355000000000001e-06, + "num_tokens": 728170.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0655000000000001, + "step": 2131 + }, + { + "loss": 0.0443, + "grad_norm": 1.5640217065811157, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.066, + "step": 2132 + }, + { + "loss": 0.043, + "grad_norm": 1.7402693033218384, + "learning_rate": 9.345000000000001e-06, + "num_tokens": 729194.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0665, + "step": 2133 + }, + { + "loss": 0.0572, + "grad_norm": 1.478943109512329, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.067, + "step": 2134 + }, + { + "loss": 0.0436, + "grad_norm": 1.75895357131958, + "learning_rate": 9.335000000000001e-06, + "num_tokens": 730218.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0675, + "step": 2135 + }, + { + "loss": 0.0011, + "grad_norm": 0.14104828238487244, + "learning_rate": 9.33e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.068, + "step": 2136 + }, + { + "loss": 0.0014, + "grad_norm": 0.1940988302230835, + "learning_rate": 9.325000000000001e-06, + "num_tokens": 730400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0685, + "step": 2137 + }, + { + "loss": 0.0012, + "grad_norm": 0.15279027819633484, + "learning_rate": 9.32e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 2138 + }, + { + "loss": 0.0627, + "grad_norm": 1.8744264841079712, + "learning_rate": 9.315000000000001e-06, + "num_tokens": 731003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0695000000000001, + "step": 2139 + }, + { + "loss": 0.045, + "grad_norm": 1.4347468614578247, + "learning_rate": 9.31e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.07, + "step": 2140 + }, + { + "loss": 0.0711, + "grad_norm": 1.9654953479766846, + "learning_rate": 9.305000000000002e-06, + "num_tokens": 732027.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0705, + "step": 2141 + }, + { + "loss": 0.0723, + "grad_norm": 1.851762294769287, + "learning_rate": 9.3e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 2142 + }, + { + "loss": 0.0397, + "grad_norm": 1.1016762256622314, + "learning_rate": 9.295e-06, + "num_tokens": 733051.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0715, + "step": 2143 + }, + { + "loss": 0.0614, + "grad_norm": 1.278972864151001, + "learning_rate": 9.29e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.072, + "step": 2144 + }, + { + "loss": 0.0578, + "grad_norm": 1.5237491130828857, + "learning_rate": 9.285e-06, + "num_tokens": 734075.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0725, + "step": 2145 + }, + { + "loss": 0.0021, + "grad_norm": 0.29453045129776, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.073, + "step": 2146 + }, + { + "loss": 0.0756, + "grad_norm": 1.90165376663208, + "learning_rate": 9.275e-06, + "num_tokens": 734678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0735, + "step": 2147 + }, + { + "loss": 0.0025, + "grad_norm": 0.3552635610103607, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.074, + "step": 2148 + }, + { + "loss": 0.0615, + "grad_norm": 1.3596733808517456, + "learning_rate": 9.265e-06, + "num_tokens": 735281.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0745, + "step": 2149 + }, + { + "loss": 0.0571, + "grad_norm": 1.0499508380889893, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.075, + "step": 2150 + }, + { + "loss": 0.0593, + "grad_norm": 1.4813532829284668, + "learning_rate": 9.255e-06, + "num_tokens": 736305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0755, + "step": 2151 + }, + { + "loss": 0.0451, + "grad_norm": 1.1956957578659058, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.076, + "step": 2152 + }, + { + "loss": 0.0035, + "grad_norm": 0.5021563172340393, + "learning_rate": 9.245e-06, + "num_tokens": 736908.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0765, + "step": 2153 + }, + { + "loss": 0.0035, + "grad_norm": 0.5023340582847595, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 2154 + }, + { + "loss": 0.0593, + "grad_norm": 1.3515294790267944, + "learning_rate": 9.235e-06, + "num_tokens": 737511.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0775, + "step": 2155 + }, + { + "loss": 0.0036, + "grad_norm": 0.5020677447319031, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.078, + "step": 2156 + }, + { + "loss": 0.0034, + "grad_norm": 0.4873979985713959, + "learning_rate": 9.225e-06, + "num_tokens": 737693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0785, + "step": 2157 + }, + { + "loss": 0.0582, + "grad_norm": 1.3766424655914307, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.079, + "step": 2158 + }, + { + "loss": 0.0631, + "grad_norm": 1.1943955421447754, + "learning_rate": 9.215e-06, + "num_tokens": 738717.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0795, + "step": 2159 + }, + { + "loss": 0.003, + "grad_norm": 0.43413516879081726, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.08, + "step": 2160 + }, + { + "loss": 0.0031, + "grad_norm": 0.44669783115386963, + "learning_rate": 9.205e-06, + "num_tokens": 738899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0805, + "step": 2161 + }, + { + "loss": 0.0561, + "grad_norm": 1.3388497829437256, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.081, + "step": 2162 + }, + { + "loss": 0.0426, + "grad_norm": 1.8933428525924683, + "learning_rate": 9.195000000000001e-06, + "num_tokens": 739923.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0815, + "step": 2163 + }, + { + "loss": 0.06, + "grad_norm": 1.3706074953079224, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.082, + "step": 2164 + }, + { + "loss": 0.0621, + "grad_norm": 1.443211555480957, + "learning_rate": 9.185000000000001e-06, + "num_tokens": 740947.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0825, + "step": 2165 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098005950450897, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 1.0, + "epoch": 1.083, + "step": 2166 + }, + { + "loss": 0.06, + "grad_norm": 1.2332003116607666, + "learning_rate": 9.175000000000001e-06, + "num_tokens": 741550.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.0835, + "step": 2167 + }, + { + "loss": 0.0682, + "grad_norm": 1.4077450037002563, + "learning_rate": 9.17e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.084, + "step": 2168 + }, + { + "loss": 0.0584, + "grad_norm": 1.4201141595840454, + "learning_rate": 9.165000000000001e-06, + "num_tokens": 742574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0845, + "step": 2169 + }, + { + "loss": 0.0024, + "grad_norm": 0.3220980167388916, + "learning_rate": 9.16e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.085, + "step": 2170 + }, + { + "loss": 0.0571, + "grad_norm": 1.3979272842407227, + "learning_rate": 9.155000000000001e-06, + "num_tokens": 743177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0855, + "step": 2171 + }, + { + "loss": 0.0572, + "grad_norm": 1.6924889087677002, + "learning_rate": 9.15e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.086, + "step": 2172 + }, + { + "loss": 0.0708, + "grad_norm": 1.7350118160247803, + "learning_rate": 9.145000000000001e-06, + "num_tokens": 744201.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0865, + "step": 2173 + }, + { + "loss": 0.0024, + "grad_norm": 0.3453267812728882, + "learning_rate": 9.14e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 1.0, + "epoch": 1.087, + "step": 2174 + }, + { + "loss": 0.0028, + "grad_norm": 0.3845599293708801, + "learning_rate": 9.135e-06, + "num_tokens": 744383.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0875, + "step": 2175 + }, + { + "loss": 0.0023, + "grad_norm": 0.32928982377052307, + "learning_rate": 9.13e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.088, + "step": 2176 + }, + { + "loss": 0.0025, + "grad_norm": 0.3593277335166931, + "learning_rate": 9.125e-06, + "num_tokens": 744565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0885, + "step": 2177 + }, + { + "loss": 0.0447, + "grad_norm": 1.6252307891845703, + "learning_rate": 9.12e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.089, + "step": 2178 + }, + { + "loss": 0.0664, + "grad_norm": 1.3326979875564575, + "learning_rate": 9.115e-06, + "num_tokens": 745589.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0895, + "step": 2179 + }, + { + "loss": 0.0713, + "grad_norm": 2.490602493286133, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.09, + "step": 2180 + }, + { + "loss": 0.0577, + "grad_norm": 1.2613682746887207, + "learning_rate": 9.105e-06, + "num_tokens": 746613.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0905, + "step": 2181 + }, + { + "loss": 0.0604, + "grad_norm": 1.8400533199310303, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.091, + "step": 2182 + }, + { + "loss": 0.0546, + "grad_norm": 1.577405571937561, + "learning_rate": 9.095e-06, + "num_tokens": 747637.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0915, + "step": 2183 + }, + { + "loss": 0.1758, + "grad_norm": 3.9485361576080322, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 1.092, + "step": 2184 + }, + { + "loss": 0.0407, + "grad_norm": 1.4230077266693115, + "learning_rate": 9.085e-06, + "num_tokens": 748661.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0925, + "step": 2185 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441873788833618, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 1.0, + "epoch": 1.093, + "step": 2186 + }, + { + "loss": 0.0574, + "grad_norm": 1.059336543083191, + "learning_rate": 9.075e-06, + "num_tokens": 749264.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0935, + "step": 2187 + }, + { + "loss": 0.0022, + "grad_norm": 0.3150666058063507, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.094, + "step": 2188 + }, + { + "loss": 0.0401, + "grad_norm": 1.1904288530349731, + "learning_rate": 9.065e-06, + "num_tokens": 749867.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0945, + "step": 2189 + }, + { + "loss": 0.0024, + "grad_norm": 0.3425971567630768, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 1.0, + "epoch": 1.095, + "step": 2190 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606398403644562, + "learning_rate": 9.055e-06, + "num_tokens": 750049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0955, + "step": 2191 + }, + { + "loss": 0.0025, + "grad_norm": 0.3754805028438568, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.096, + "step": 2192 + }, + { + "loss": 0.0512, + "grad_norm": 1.1577214002609253, + "learning_rate": 9.045e-06, + "num_tokens": 750652.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0965, + "step": 2193 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151845633983612, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 1.0, + "epoch": 1.097, + "step": 2194 + }, + { + "loss": 0.0386, + "grad_norm": 1.1814777851104736, + "learning_rate": 9.035e-06, + "num_tokens": 751255.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0975, + "step": 2195 + }, + { + "loss": 0.002, + "grad_norm": 0.2940640151500702, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.098, + "step": 2196 + }, + { + "loss": 0.0021, + "grad_norm": 0.3114289939403534, + "learning_rate": 9.025e-06, + "num_tokens": 751437.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0985, + "step": 2197 + }, + { + "loss": 0.0587, + "grad_norm": 1.5265949964523315, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.099, + "step": 2198 + }, + { + "loss": 0.0584, + "grad_norm": 1.182391881942749, + "learning_rate": 9.015000000000001e-06, + "num_tokens": 752461.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0995, + "step": 2199 + }, + { + "loss": 0.0018, + "grad_norm": 0.2633577287197113, + "learning_rate": 9.01e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1, + "step": 2200 + }, + { + "loss": 0.0019, + "grad_norm": 0.26985710859298706, + "learning_rate": 9.005000000000001e-06, + "num_tokens": 752643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1005, + "step": 2201 + }, + { + "loss": 0.0017, + "grad_norm": 0.23652321100234985, + "learning_rate": 9e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 2202 + }, + { + "loss": 0.0578, + "grad_norm": 1.4083077907562256, + "learning_rate": 8.995000000000001e-06, + "num_tokens": 753246.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1015, + "step": 2203 + }, + { + "loss": 0.0595, + "grad_norm": 1.427134394645691, + "learning_rate": 8.99e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.102, + "step": 2204 + }, + { + "loss": 0.0539, + "grad_norm": 1.3228328227996826, + "learning_rate": 8.985000000000001e-06, + "num_tokens": 754270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1025, + "step": 2205 + }, + { + "loss": 0.0015, + "grad_norm": 0.2133481651544571, + "learning_rate": 8.98e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.103, + "step": 2206 + }, + { + "loss": 0.0821, + "grad_norm": 2.5287461280822754, + "learning_rate": 8.975e-06, + "num_tokens": 754873.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1035, + "step": 2207 + }, + { + "loss": 0.0623, + "grad_norm": 1.4041988849639893, + "learning_rate": 8.97e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.104, + "step": 2208 + }, + { + "loss": 0.0409, + "grad_norm": 1.1858478784561157, + "learning_rate": 8.965e-06, + "num_tokens": 755897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1045, + "step": 2209 + }, + { + "loss": 0.0583, + "grad_norm": 1.219450831413269, + "learning_rate": 8.96e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.105, + "step": 2210 + }, + { + "loss": 0.0414, + "grad_norm": 1.1721197366714478, + "learning_rate": 8.955e-06, + "num_tokens": 756921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1055, + "step": 2211 + }, + { + "loss": 0.053, + "grad_norm": 1.277345895767212, + "learning_rate": 8.95e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.106, + "step": 2212 + }, + { + "loss": 0.0625, + "grad_norm": 1.3503938913345337, + "learning_rate": 8.945e-06, + "num_tokens": 757945.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1065, + "step": 2213 + }, + { + "loss": 0.002, + "grad_norm": 0.30203038454055786, + "learning_rate": 8.94e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.107, + "step": 2214 + }, + { + "loss": 0.0022, + "grad_norm": 0.35174328088760376, + "learning_rate": 8.935e-06, + "num_tokens": 758127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1075, + "step": 2215 + }, + { + "loss": 0.0423, + "grad_norm": 1.168192744255066, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.108, + "step": 2216 + }, + { + "loss": 0.0764, + "grad_norm": 1.3265845775604248, + "learning_rate": 8.925e-06, + "num_tokens": 759151.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1085, + "step": 2217 + }, + { + "loss": 0.1833, + "grad_norm": 3.288583755493164, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 1.109, + "step": 2218 + }, + { + "loss": 0.0029, + "grad_norm": 0.44568195939064026, + "learning_rate": 8.915e-06, + "num_tokens": 759754.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1095, + "step": 2219 + }, + { + "loss": 0.0027, + "grad_norm": 0.409576416015625, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 2220 + }, + { + "loss": 0.0033, + "grad_norm": 0.4960649907588959, + "learning_rate": 8.905e-06, + "num_tokens": 759936.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1105, + "step": 2221 + }, + { + "loss": 0.1642, + "grad_norm": 2.6913421154022217, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.111, + "step": 2222 + }, + { + "loss": 0.0715, + "grad_norm": 1.5037237405776978, + "learning_rate": 8.895e-06, + "num_tokens": 760960.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1115, + "step": 2223 + }, + { + "loss": 0.0562, + "grad_norm": 1.152312159538269, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.112, + "step": 2224 + }, + { + "loss": 0.0025, + "grad_norm": 0.3840191960334778, + "learning_rate": 8.885e-06, + "num_tokens": 761563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1125, + "step": 2225 + }, + { + "loss": 0.0421, + "grad_norm": 1.0708019733428955, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.113, + "step": 2226 + }, + { + "loss": 0.0713, + "grad_norm": 1.2928557395935059, + "learning_rate": 8.875e-06, + "num_tokens": 762587.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1135, + "step": 2227 + }, + { + "loss": 0.0622, + "grad_norm": 1.3733391761779785, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 2228 + }, + { + "loss": 0.0029, + "grad_norm": 0.42555150389671326, + "learning_rate": 8.865e-06, + "num_tokens": 763190.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1145, + "step": 2229 + }, + { + "loss": 0.0457, + "grad_norm": 1.3084357976913452, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.115, + "step": 2230 + }, + { + "loss": 0.0027, + "grad_norm": 0.3956111967563629, + "learning_rate": 8.855e-06, + "num_tokens": 763793.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1155, + "step": 2231 + }, + { + "loss": 0.066, + "grad_norm": 1.3650692701339722, + "learning_rate": 8.85e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.116, + "step": 2232 + }, + { + "loss": 0.0029, + "grad_norm": 0.4088021516799927, + "learning_rate": 8.845000000000001e-06, + "num_tokens": 764396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1165, + "step": 2233 + }, + { + "loss": 0.0397, + "grad_norm": 1.2808146476745605, + "learning_rate": 8.84e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.117, + "step": 2234 + }, + { + "loss": 0.0027, + "grad_norm": 0.3983195126056671, + "learning_rate": 8.835000000000001e-06, + "num_tokens": 764999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1175, + "step": 2235 + }, + { + "loss": 0.0423, + "grad_norm": 1.1593605279922485, + "learning_rate": 8.83e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1179999999999999, + "step": 2236 + }, + { + "loss": 0.0649, + "grad_norm": 1.5087552070617676, + "learning_rate": 8.825000000000001e-06, + "num_tokens": 766023.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1185, + "step": 2237 + }, + { + "loss": 0.0683, + "grad_norm": 1.5192102193832397, + "learning_rate": 8.82e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.119, + "step": 2238 + }, + { + "loss": 0.0588, + "grad_norm": 1.386413812637329, + "learning_rate": 8.815e-06, + "num_tokens": 767047.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1195, + "step": 2239 + }, + { + "loss": 0.14, + "grad_norm": 2.439119815826416, + "learning_rate": 8.81e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.12, + "step": 2240 + }, + { + "loss": 0.0029, + "grad_norm": 0.4191952049732208, + "learning_rate": 8.805e-06, + "num_tokens": 767650.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1205, + "step": 2241 + }, + { + "loss": 0.0397, + "grad_norm": 1.169542908668518, + "learning_rate": 8.8e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.121, + "step": 2242 + }, + { + "loss": 0.0584, + "grad_norm": 1.2895692586898804, + "learning_rate": 8.795e-06, + "num_tokens": 768674.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1215, + "step": 2243 + }, + { + "loss": 0.0582, + "grad_norm": 1.274592638015747, + "learning_rate": 8.79e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1219999999999999, + "step": 2244 + }, + { + "loss": 0.0032, + "grad_norm": 0.44238153100013733, + "learning_rate": 8.785e-06, + "num_tokens": 769277.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1225, + "step": 2245 + }, + { + "loss": 0.0032, + "grad_norm": 0.4488213360309601, + "learning_rate": 8.78e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.123, + "step": 2246 + }, + { + "loss": 0.003, + "grad_norm": 0.43088752031326294, + "learning_rate": 8.775e-06, + "num_tokens": 769459.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1235, + "step": 2247 + }, + { + "loss": 0.0366, + "grad_norm": 1.2531421184539795, + "learning_rate": 8.77e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.124, + "step": 2248 + }, + { + "loss": 0.0029, + "grad_norm": 0.40329650044441223, + "learning_rate": 8.765e-06, + "num_tokens": 770062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1245, + "step": 2249 + }, + { + "loss": 0.0527, + "grad_norm": 1.196119785308838, + "learning_rate": 8.76e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.125, + "step": 2250 + }, + { + "loss": 0.0468, + "grad_norm": 1.571480393409729, + "learning_rate": 8.755e-06, + "num_tokens": 771086.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1255, + "step": 2251 + }, + { + "loss": 0.0024, + "grad_norm": 0.32946687936782837, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.126, + "step": 2252 + }, + { + "loss": 0.0023, + "grad_norm": 0.3213779628276825, + "learning_rate": 8.745000000000002e-06, + "num_tokens": 771268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1265, + "step": 2253 + }, + { + "loss": 0.0381, + "grad_norm": 1.36893630027771, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.127, + "step": 2254 + }, + { + "loss": 0.0023, + "grad_norm": 0.3214550316333771, + "learning_rate": 8.735000000000002e-06, + "num_tokens": 771871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1275, + "step": 2255 + }, + { + "loss": 0.0389, + "grad_norm": 1.1307684183120728, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1280000000000001, + "step": 2256 + }, + { + "loss": 0.0021, + "grad_norm": 0.30145928263664246, + "learning_rate": 8.725000000000002e-06, + "num_tokens": 772474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1285, + "step": 2257 + }, + { + "loss": 0.0018, + "grad_norm": 0.24611108005046844, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.129, + "step": 2258 + }, + { + "loss": 0.0652, + "grad_norm": 1.5593312978744507, + "learning_rate": 8.715e-06, + "num_tokens": 773077.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1295, + "step": 2259 + }, + { + "loss": 0.1724, + "grad_norm": 3.1925134658813477, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.13, + "step": 2260 + }, + { + "loss": 0.0016, + "grad_norm": 0.2210361361503601, + "learning_rate": 8.705e-06, + "num_tokens": 773680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1305, + "step": 2261 + }, + { + "loss": 0.044, + "grad_norm": 1.1579885482788086, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.131, + "step": 2262 + }, + { + "loss": 0.0812, + "grad_norm": 2.0770068168640137, + "learning_rate": 8.695e-06, + "num_tokens": 774704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1315, + "step": 2263 + }, + { + "loss": 0.0376, + "grad_norm": 1.1654012203216553, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1320000000000001, + "step": 2264 + }, + { + "loss": 0.0017, + "grad_norm": 0.22535240650177002, + "learning_rate": 8.685e-06, + "num_tokens": 775307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1325, + "step": 2265 + }, + { + "loss": 0.0017, + "grad_norm": 0.2348785251379013, + "learning_rate": 8.68e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 2266 + }, + { + "loss": 0.0017, + "grad_norm": 0.24279342591762543, + "learning_rate": 8.675e-06, + "num_tokens": 775489.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1335, + "step": 2267 + }, + { + "loss": 0.0748, + "grad_norm": 1.5453892946243286, + "learning_rate": 8.67e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.134, + "step": 2268 + }, + { + "loss": 0.0015, + "grad_norm": 0.20795051753520966, + "learning_rate": 8.665000000000001e-06, + "num_tokens": 776092.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1345, + "step": 2269 + }, + { + "loss": 0.0016, + "grad_norm": 0.21314096450805664, + "learning_rate": 8.66e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 2270 + }, + { + "loss": 0.0016, + "grad_norm": 0.22147318720817566, + "learning_rate": 8.655000000000001e-06, + "num_tokens": 776274.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1355, + "step": 2271 + }, + { + "loss": 0.0511, + "grad_norm": 1.1325373649597168, + "learning_rate": 8.65e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1360000000000001, + "step": 2272 + }, + { + "loss": 0.0014, + "grad_norm": 0.18845656514167786, + "learning_rate": 8.645000000000001e-06, + "num_tokens": 776877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1365, + "step": 2273 + }, + { + "loss": 0.0013, + "grad_norm": 0.16952817142009735, + "learning_rate": 8.64e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 2274 + }, + { + "loss": 0.0621, + "grad_norm": 1.329026222229004, + "learning_rate": 8.635000000000001e-06, + "num_tokens": 777480.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1375, + "step": 2275 + }, + { + "loss": 0.0416, + "grad_norm": 1.105779767036438, + "learning_rate": 8.63e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.138, + "step": 2276 + }, + { + "loss": 0.0467, + "grad_norm": 1.1847842931747437, + "learning_rate": 8.625000000000001e-06, + "num_tokens": 778504.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1385, + "step": 2277 + }, + { + "loss": 0.0414, + "grad_norm": 1.0636855363845825, + "learning_rate": 8.62e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.139, + "step": 2278 + }, + { + "loss": 0.058, + "grad_norm": 1.3789916038513184, + "learning_rate": 8.615000000000001e-06, + "num_tokens": 779528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1395, + "step": 2279 + }, + { + "loss": 0.0649, + "grad_norm": 1.1419354677200317, + "learning_rate": 8.61e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1400000000000001, + "step": 2280 + }, + { + "loss": 0.0014, + "grad_norm": 0.19384142756462097, + "learning_rate": 8.605000000000001e-06, + "num_tokens": 780131.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1405, + "step": 2281 + }, + { + "loss": 0.0015, + "grad_norm": 0.19773858785629272, + "learning_rate": 8.6e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 2282 + }, + { + "loss": 0.0557, + "grad_norm": 1.190521001815796, + "learning_rate": 8.595000000000002e-06, + "num_tokens": 780734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1415, + "step": 2283 + }, + { + "loss": 0.0017, + "grad_norm": 0.23638860881328583, + "learning_rate": 8.59e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 1.0, + "epoch": 1.142, + "step": 2284 + }, + { + "loss": 0.0017, + "grad_norm": 0.24933819472789764, + "learning_rate": 8.585000000000002e-06, + "num_tokens": 780916.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1425, + "step": 2285 + }, + { + "loss": 0.0017, + "grad_norm": 0.22720065712928772, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 2286 + }, + { + "loss": 0.0416, + "grad_norm": 1.214958667755127, + "learning_rate": 8.575000000000002e-06, + "num_tokens": 781519.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1435, + "step": 2287 + }, + { + "loss": 0.054, + "grad_norm": 0.9985194206237793, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.144, + "step": 2288 + }, + { + "loss": 0.0017, + "grad_norm": 0.24114187061786652, + "learning_rate": 8.565000000000002e-06, + "num_tokens": 782122.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1445, + "step": 2289 + }, + { + "loss": 0.0574, + "grad_norm": 1.4530028104782104, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.145, + "step": 2290 + }, + { + "loss": 0.0018, + "grad_norm": 0.2544173002243042, + "learning_rate": 8.555e-06, + "num_tokens": 782725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1455, + "step": 2291 + }, + { + "loss": 0.0017, + "grad_norm": 0.23475930094718933, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 2292 + }, + { + "loss": 0.0708, + "grad_norm": 1.619470477104187, + "learning_rate": 8.545e-06, + "num_tokens": 783328.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1465, + "step": 2293 + }, + { + "loss": 0.0019, + "grad_norm": 0.2572467029094696, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 1.0, + "epoch": 1.147, + "step": 2294 + }, + { + "loss": 0.0019, + "grad_norm": 0.26701951026916504, + "learning_rate": 8.535e-06, + "num_tokens": 783510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1475, + "step": 2295 + }, + { + "loss": 0.0471, + "grad_norm": 1.147359848022461, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.148, + "step": 2296 + }, + { + "loss": 0.0485, + "grad_norm": 1.0665885210037231, + "learning_rate": 8.525e-06, + "num_tokens": 784534.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1485, + "step": 2297 + }, + { + "loss": 0.0017, + "grad_norm": 0.23322324454784393, + "learning_rate": 8.52e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.149, + "step": 2298 + }, + { + "loss": 0.0667, + "grad_norm": 1.4317374229431152, + "learning_rate": 8.515e-06, + "num_tokens": 785137.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1495, + "step": 2299 + }, + { + "loss": 0.0651, + "grad_norm": 1.4495528936386108, + "learning_rate": 8.51e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.15, + "step": 2300 + }, + { + "loss": 0.0018, + "grad_norm": 0.24990759789943695, + "learning_rate": 8.505e-06, + "num_tokens": 785740.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1505, + "step": 2301 + }, + { + "loss": 0.0673, + "grad_norm": 1.3833082914352417, + "learning_rate": 8.5e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.151, + "step": 2302 + }, + { + "loss": 0.0384, + "grad_norm": 1.0650711059570312, + "learning_rate": 8.495e-06, + "num_tokens": 786764.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1515, + "step": 2303 + }, + { + "loss": 0.0017, + "grad_norm": 0.2362237423658371, + "learning_rate": 8.49e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.152, + "step": 2304 + }, + { + "loss": 0.0362, + "grad_norm": 1.2261658906936646, + "learning_rate": 8.485000000000001e-06, + "num_tokens": 787367.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1525, + "step": 2305 + }, + { + "loss": 0.0021, + "grad_norm": 0.285277396440506, + "learning_rate": 8.48e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 1.0, + "epoch": 1.153, + "step": 2306 + }, + { + "loss": 0.0018, + "grad_norm": 0.24331547319889069, + "learning_rate": 8.475000000000001e-06, + "num_tokens": 787549.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1535, + "step": 2307 + }, + { + "loss": 0.057, + "grad_norm": 1.260392427444458, + "learning_rate": 8.47e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.154, + "step": 2308 + }, + { + "loss": 0.002, + "grad_norm": 0.26841071248054504, + "learning_rate": 8.465000000000001e-06, + "num_tokens": 788152.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1545, + "step": 2309 + }, + { + "loss": 0.0018, + "grad_norm": 0.25016698241233826, + "learning_rate": 8.46e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 2310 + }, + { + "loss": 0.002, + "grad_norm": 0.2738337218761444, + "learning_rate": 8.455000000000001e-06, + "num_tokens": 788334.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1555, + "step": 2311 + }, + { + "loss": 0.0017, + "grad_norm": 0.2311965376138687, + "learning_rate": 8.45e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 2312 + }, + { + "loss": 0.0608, + "grad_norm": 1.6522681713104248, + "learning_rate": 8.445000000000001e-06, + "num_tokens": 788937.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1565, + "step": 2313 + }, + { + "loss": 0.0595, + "grad_norm": 1.3370118141174316, + "learning_rate": 8.44e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.157, + "step": 2314 + }, + { + "loss": 0.0706, + "grad_norm": 1.5185800790786743, + "learning_rate": 8.435000000000001e-06, + "num_tokens": 789961.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1575, + "step": 2315 + }, + { + "loss": 0.0015, + "grad_norm": 0.20058579742908478, + "learning_rate": 8.43e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 1.0, + "epoch": 1.158, + "step": 2316 + }, + { + "loss": 0.0736, + "grad_norm": 1.6871758699417114, + "learning_rate": 8.425000000000001e-06, + "num_tokens": 790564.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1585, + "step": 2317 + }, + { + "loss": 0.0684, + "grad_norm": 1.7638912200927734, + "learning_rate": 8.42e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.159, + "step": 2318 + }, + { + "loss": 0.0017, + "grad_norm": 0.23336097598075867, + "learning_rate": 8.415000000000002e-06, + "num_tokens": 791167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1595, + "step": 2319 + }, + { + "loss": 0.0596, + "grad_norm": 1.3170890808105469, + "learning_rate": 8.41e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.16, + "step": 2320 + }, + { + "loss": 0.0566, + "grad_norm": 1.8501343727111816, + "learning_rate": 8.405000000000002e-06, + "num_tokens": 792191.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1605, + "step": 2321 + }, + { + "loss": 0.0679, + "grad_norm": 1.3065072298049927, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.161, + "step": 2322 + }, + { + "loss": 0.0577, + "grad_norm": 1.3374840021133423, + "learning_rate": 8.395e-06, + "num_tokens": 793215.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1615, + "step": 2323 + }, + { + "loss": 0.0651, + "grad_norm": 1.2627785205841064, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.162, + "step": 2324 + }, + { + "loss": 0.0589, + "grad_norm": 1.1249433755874634, + "learning_rate": 8.385e-06, + "num_tokens": 794239.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1625, + "step": 2325 + }, + { + "loss": 0.0022, + "grad_norm": 0.31153878569602966, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.163, + "step": 2326 + }, + { + "loss": 0.0376, + "grad_norm": 1.2043869495391846, + "learning_rate": 8.375e-06, + "num_tokens": 794842.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1635, + "step": 2327 + }, + { + "loss": 0.0024, + "grad_norm": 0.3410504162311554, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 1.0, + "epoch": 1.164, + "step": 2328 + }, + { + "loss": 0.0497, + "grad_norm": 1.3358232975006104, + "learning_rate": 8.365e-06, + "num_tokens": 795445.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1645, + "step": 2329 + }, + { + "loss": 0.062, + "grad_norm": 1.3019129037857056, + "learning_rate": 8.36e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.165, + "step": 2330 + }, + { + "loss": 0.1411, + "grad_norm": 3.1003713607788086, + "learning_rate": 8.355e-06, + "num_tokens": 796469.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.1655, + "step": 2331 + }, + { + "loss": 0.0675, + "grad_norm": 1.4928791522979736, + "learning_rate": 8.35e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.166, + "step": 2332 + }, + { + "loss": 0.0032, + "grad_norm": 0.47702810168266296, + "learning_rate": 8.345e-06, + "num_tokens": 797072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1665, + "step": 2333 + }, + { + "loss": 0.0486, + "grad_norm": 1.189456820487976, + "learning_rate": 8.34e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.167, + "step": 2334 + }, + { + "loss": 0.0033, + "grad_norm": 0.5152677893638611, + "learning_rate": 8.335e-06, + "num_tokens": 797675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1675, + "step": 2335 + }, + { + "loss": 0.0463, + "grad_norm": 1.3805276155471802, + "learning_rate": 8.33e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.168, + "step": 2336 + }, + { + "loss": 0.0653, + "grad_norm": 1.7025351524353027, + "learning_rate": 8.325e-06, + "num_tokens": 798699.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1685, + "step": 2337 + }, + { + "loss": 0.0031, + "grad_norm": 0.44580474495887756, + "learning_rate": 8.32e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 1.0, + "epoch": 1.169, + "step": 2338 + }, + { + "loss": 0.0462, + "grad_norm": 1.3915964365005493, + "learning_rate": 8.315000000000001e-06, + "num_tokens": 799302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1695, + "step": 2339 + }, + { + "loss": 0.0689, + "grad_norm": 1.3206253051757812, + "learning_rate": 8.31e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.17, + "step": 2340 + }, + { + "loss": 0.0616, + "grad_norm": 1.0774954557418823, + "learning_rate": 8.305000000000001e-06, + "num_tokens": 800326.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1705, + "step": 2341 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280348658561707, + "learning_rate": 8.3e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 1.0, + "epoch": 1.171, + "step": 2342 + }, + { + "loss": 0.0534, + "grad_norm": 1.1514171361923218, + "learning_rate": 8.295000000000001e-06, + "num_tokens": 800929.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1715, + "step": 2343 + }, + { + "loss": 0.0034, + "grad_norm": 0.4936150014400482, + "learning_rate": 8.29e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 1.0, + "epoch": 1.172, + "step": 2344 + }, + { + "loss": 0.0411, + "grad_norm": 1.091706395149231, + "learning_rate": 8.285000000000001e-06, + "num_tokens": 801532.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1724999999999999, + "step": 2345 + }, + { + "loss": 0.0633, + "grad_norm": 1.2277299165725708, + "learning_rate": 8.28e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.173, + "step": 2346 + }, + { + "loss": 0.0032, + "grad_norm": 0.4532278776168823, + "learning_rate": 8.275000000000001e-06, + "num_tokens": 802135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1735, + "step": 2347 + }, + { + "loss": 0.0033, + "grad_norm": 0.467818021774292, + "learning_rate": 8.27e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.174, + "step": 2348 + }, + { + "loss": 0.0528, + "grad_norm": 1.7821072340011597, + "learning_rate": 8.265000000000001e-06, + "num_tokens": 802738.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1745, + "step": 2349 + }, + { + "loss": 0.0415, + "grad_norm": 1.4086565971374512, + "learning_rate": 8.26e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.175, + "step": 2350 + }, + { + "loss": 0.045, + "grad_norm": 1.1930326223373413, + "learning_rate": 8.255000000000001e-06, + "num_tokens": 803762.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1755, + "step": 2351 + }, + { + "loss": 0.0028, + "grad_norm": 0.4077257215976715, + "learning_rate": 8.25e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 1.0, + "epoch": 1.176, + "step": 2352 + }, + { + "loss": 0.0535, + "grad_norm": 1.0156196355819702, + "learning_rate": 8.245000000000002e-06, + "num_tokens": 804365.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1764999999999999, + "step": 2353 + }, + { + "loss": 0.0544, + "grad_norm": 1.701621413230896, + "learning_rate": 8.24e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.177, + "step": 2354 + }, + { + "loss": 0.0408, + "grad_norm": 1.3804023265838623, + "learning_rate": 8.235e-06, + "num_tokens": 805389.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1775, + "step": 2355 + }, + { + "loss": 0.0538, + "grad_norm": 1.4935331344604492, + "learning_rate": 8.23e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.178, + "step": 2356 + }, + { + "loss": 0.0031, + "grad_norm": 0.46967241168022156, + "learning_rate": 8.225e-06, + "num_tokens": 805992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1785, + "step": 2357 + }, + { + "loss": 0.003, + "grad_norm": 0.4181312620639801, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.179, + "step": 2358 + }, + { + "loss": 0.003, + "grad_norm": 0.4292071461677551, + "learning_rate": 8.215e-06, + "num_tokens": 806174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1795, + "step": 2359 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606574833393097, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.18, + "step": 2360 + }, + { + "loss": 0.0384, + "grad_norm": 1.0812703371047974, + "learning_rate": 8.205e-06, + "num_tokens": 806777.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1804999999999999, + "step": 2361 + }, + { + "loss": 0.0025, + "grad_norm": 0.36413413286209106, + "learning_rate": 8.2e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.181, + "step": 2362 + }, + { + "loss": 0.0632, + "grad_norm": 1.3525351285934448, + "learning_rate": 8.195e-06, + "num_tokens": 807380.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1815, + "step": 2363 + }, + { + "loss": 0.0021, + "grad_norm": 0.29519718885421753, + "learning_rate": 8.19e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.182, + "step": 2364 + }, + { + "loss": 0.002, + "grad_norm": 0.28825369477272034, + "learning_rate": 8.185e-06, + "num_tokens": 807562.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1825, + "step": 2365 + }, + { + "loss": 0.0364, + "grad_norm": 1.0907576084136963, + "learning_rate": 8.18e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.183, + "step": 2366 + }, + { + "loss": 0.0682, + "grad_norm": 1.3050081729888916, + "learning_rate": 8.175e-06, + "num_tokens": 808586.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1835, + "step": 2367 + }, + { + "loss": 0.0424, + "grad_norm": 1.141483187675476, + "learning_rate": 8.17e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.184, + "step": 2368 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355233788490295, + "learning_rate": 8.165e-06, + "num_tokens": 809189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1844999999999999, + "step": 2369 + }, + { + "loss": 0.0744, + "grad_norm": 1.7785593271255493, + "learning_rate": 8.16e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.185, + "step": 2370 + }, + { + "loss": 0.0657, + "grad_norm": 1.3623268604278564, + "learning_rate": 8.155e-06, + "num_tokens": 810213.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1855, + "step": 2371 + }, + { + "loss": 0.0549, + "grad_norm": 1.1436368227005005, + "learning_rate": 8.15e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.186, + "step": 2372 + }, + { + "loss": 0.0539, + "grad_norm": 1.2383182048797607, + "learning_rate": 8.145e-06, + "num_tokens": 811237.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1865, + "step": 2373 + }, + { + "loss": 0.0018, + "grad_norm": 0.24816246330738068, + "learning_rate": 8.14e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.187, + "step": 2374 + }, + { + "loss": 0.0409, + "grad_norm": 1.240695834159851, + "learning_rate": 8.135000000000001e-06, + "num_tokens": 811840.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1875, + "step": 2375 + }, + { + "loss": 0.0364, + "grad_norm": 0.927349328994751, + "learning_rate": 8.13e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.188, + "step": 2376 + }, + { + "loss": 0.002, + "grad_norm": 0.28636854887008667, + "learning_rate": 8.125000000000001e-06, + "num_tokens": 812443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1885, + "step": 2377 + }, + { + "loss": 0.0021, + "grad_norm": 0.3085651397705078, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 2378 + }, + { + "loss": 0.0733, + "grad_norm": 1.627233862876892, + "learning_rate": 8.115000000000001e-06, + "num_tokens": 813046.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1895, + "step": 2379 + }, + { + "loss": 0.0523, + "grad_norm": 1.2803730964660645, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.19, + "step": 2380 + }, + { + "loss": 0.0358, + "grad_norm": 1.134440302848816, + "learning_rate": 8.105000000000001e-06, + "num_tokens": 814070.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1905000000000001, + "step": 2381 + }, + { + "loss": 0.062, + "grad_norm": 1.7024178504943848, + "learning_rate": 8.1e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.191, + "step": 2382 + }, + { + "loss": 0.0555, + "grad_norm": 1.755904197692871, + "learning_rate": 8.095000000000001e-06, + "num_tokens": 815094.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1915, + "step": 2383 + }, + { + "loss": 0.0028, + "grad_norm": 0.4056146442890167, + "learning_rate": 8.09e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 1.0, + "epoch": 1.192, + "step": 2384 + }, + { + "loss": 0.0415, + "grad_norm": 1.3847079277038574, + "learning_rate": 8.085000000000001e-06, + "num_tokens": 815697.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1925, + "step": 2385 + }, + { + "loss": 0.041, + "grad_norm": 1.05851149559021, + "learning_rate": 8.08e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.193, + "step": 2386 + }, + { + "loss": 0.0683, + "grad_norm": 1.5797926187515259, + "learning_rate": 8.075000000000001e-06, + "num_tokens": 816721.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1935, + "step": 2387 + }, + { + "loss": 0.003, + "grad_norm": 0.44755682349205017, + "learning_rate": 8.07e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 1.0, + "epoch": 1.194, + "step": 2388 + }, + { + "loss": 0.0035, + "grad_norm": 0.5333588719367981, + "learning_rate": 8.065e-06, + "num_tokens": 816903.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1945000000000001, + "step": 2389 + }, + { + "loss": 0.0034, + "grad_norm": 0.5025861263275146, + "learning_rate": 8.06e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 2390 + }, + { + "loss": 0.0657, + "grad_norm": 1.9265213012695312, + "learning_rate": 8.055e-06, + "num_tokens": 817506.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1955, + "step": 2391 + }, + { + "loss": 0.0029, + "grad_norm": 0.4326709508895874, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.196, + "step": 2392 + }, + { + "loss": 0.0385, + "grad_norm": 1.282583236694336, + "learning_rate": 8.045e-06, + "num_tokens": 818109.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1965, + "step": 2393 + }, + { + "loss": 0.048, + "grad_norm": 1.7246921062469482, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.197, + "step": 2394 + }, + { + "loss": 0.0529, + "grad_norm": 1.3816536664962769, + "learning_rate": 8.035e-06, + "num_tokens": 819133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1975, + "step": 2395 + }, + { + "loss": 0.0025, + "grad_norm": 0.36934202909469604, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 1.0, + "epoch": 1.198, + "step": 2396 + }, + { + "loss": 0.0701, + "grad_norm": 1.844415307044983, + "learning_rate": 8.025e-06, + "num_tokens": 819736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1985000000000001, + "step": 2397 + }, + { + "loss": 0.0026, + "grad_norm": 0.3918537199497223, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 1.0, + "epoch": 1.199, + "step": 2398 + }, + { + "loss": 0.0025, + "grad_norm": 0.3629172444343567, + "learning_rate": 8.015e-06, + "num_tokens": 819918.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1995, + "step": 2399 + }, + { + "loss": 0.0593, + "grad_norm": 1.3562273979187012, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2, + "step": 2400 + }, + { + "loss": 0.0415, + "grad_norm": 1.1191670894622803, + "learning_rate": 8.005e-06, + "num_tokens": 820942.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2005, + "step": 2401 + }, + { + "loss": 0.0021, + "grad_norm": 0.3028194308280945, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 1.0, + "epoch": 1.201, + "step": 2402 + }, + { + "loss": 0.0021, + "grad_norm": 0.3161010444164276, + "learning_rate": 7.995e-06, + "num_tokens": 821124.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2015, + "step": 2403 + }, + { + "loss": 0.0631, + "grad_norm": 1.4275634288787842, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.202, + "step": 2404 + }, + { + "loss": 0.0018, + "grad_norm": 0.2525792121887207, + "learning_rate": 7.985e-06, + "num_tokens": 821727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2025000000000001, + "step": 2405 + }, + { + "loss": 0.0576, + "grad_norm": 1.2019566297531128, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.203, + "step": 2406 + }, + { + "loss": 0.0019, + "grad_norm": 0.28433406352996826, + "learning_rate": 7.975e-06, + "num_tokens": 822330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2035, + "step": 2407 + }, + { + "loss": 0.0018, + "grad_norm": 0.26680925488471985, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 1.0, + "epoch": 1.204, + "step": 2408 + }, + { + "loss": 0.0523, + "grad_norm": 1.5135900974273682, + "learning_rate": 7.965e-06, + "num_tokens": 822933.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2045, + "step": 2409 + }, + { + "loss": 0.0595, + "grad_norm": 1.425874948501587, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.205, + "step": 2410 + }, + { + "loss": 0.0688, + "grad_norm": 1.7353657484054565, + "learning_rate": 7.955000000000001e-06, + "num_tokens": 823957.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2055, + "step": 2411 + }, + { + "loss": 0.0016, + "grad_norm": 0.22734731435775757, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.206, + "step": 2412 + }, + { + "loss": 0.0016, + "grad_norm": 0.22473861277103424, + "learning_rate": 7.945000000000001e-06, + "num_tokens": 824139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2065, + "step": 2413 + }, + { + "loss": 0.0016, + "grad_norm": 0.23369428515434265, + "learning_rate": 7.94e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 2414 + }, + { + "loss": 0.0018, + "grad_norm": 0.25014567375183105, + "learning_rate": 7.935000000000001e-06, + "num_tokens": 824321.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2075, + "step": 2415 + }, + { + "loss": 0.0701, + "grad_norm": 1.4806315898895264, + "learning_rate": 7.93e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.208, + "step": 2416 + }, + { + "loss": 0.0015, + "grad_norm": 0.1993637979030609, + "learning_rate": 7.925000000000001e-06, + "num_tokens": 824924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2085, + "step": 2417 + }, + { + "loss": 0.0548, + "grad_norm": 1.2813140153884888, + "learning_rate": 7.92e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.209, + "step": 2418 + }, + { + "loss": 0.0552, + "grad_norm": 1.2722525596618652, + "learning_rate": 7.915000000000001e-06, + "num_tokens": 825948.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2095, + "step": 2419 + }, + { + "loss": 0.0013, + "grad_norm": 0.17925392091274261, + "learning_rate": 7.91e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 1.0, + "epoch": 1.21, + "step": 2420 + }, + { + "loss": 0.0013, + "grad_norm": 0.18519414961338043, + "learning_rate": 7.905e-06, + "num_tokens": 826130.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2105, + "step": 2421 + }, + { + "loss": 0.041, + "grad_norm": 1.3869478702545166, + "learning_rate": 7.9e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.211, + "step": 2422 + }, + { + "loss": 0.0013, + "grad_norm": 0.1751483976840973, + "learning_rate": 7.895e-06, + "num_tokens": 826733.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2115, + "step": 2423 + }, + { + "loss": 0.05, + "grad_norm": 1.0098025798797607, + "learning_rate": 7.89e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.212, + "step": 2424 + }, + { + "loss": 0.0605, + "grad_norm": 1.3178874254226685, + "learning_rate": 7.885e-06, + "num_tokens": 827757.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2125, + "step": 2425 + }, + { + "loss": 0.0013, + "grad_norm": 0.18827441334724426, + "learning_rate": 7.88e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 1.0, + "epoch": 1.213, + "step": 2426 + }, + { + "loss": 0.064, + "grad_norm": 1.4484566450119019, + "learning_rate": 7.875e-06, + "num_tokens": 828360.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2135, + "step": 2427 + }, + { + "loss": 0.0014, + "grad_norm": 0.19540052115917206, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 1.0, + "epoch": 1.214, + "step": 2428 + }, + { + "loss": 0.0623, + "grad_norm": 1.3592177629470825, + "learning_rate": 7.865e-06, + "num_tokens": 828963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2145, + "step": 2429 + }, + { + "loss": 0.0014, + "grad_norm": 0.20412060618400574, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.215, + "step": 2430 + }, + { + "loss": 0.0617, + "grad_norm": 1.755582332611084, + "learning_rate": 7.855e-06, + "num_tokens": 829566.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2155, + "step": 2431 + }, + { + "loss": 0.0631, + "grad_norm": 1.2380058765411377, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.216, + "step": 2432 + }, + { + "loss": 0.0375, + "grad_norm": 1.3119670152664185, + "learning_rate": 7.845e-06, + "num_tokens": 830590.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2165, + "step": 2433 + }, + { + "loss": 0.0015, + "grad_norm": 0.22137387096881866, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 1.0, + "epoch": 1.217, + "step": 2434 + }, + { + "loss": 0.0017, + "grad_norm": 0.2416553795337677, + "learning_rate": 7.835e-06, + "num_tokens": 830772.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2175, + "step": 2435 + }, + { + "loss": 0.0015, + "grad_norm": 0.21708650887012482, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 2436 + }, + { + "loss": 0.0016, + "grad_norm": 0.23922832310199738, + "learning_rate": 7.825e-06, + "num_tokens": 830954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2185, + "step": 2437 + }, + { + "loss": 0.0016, + "grad_norm": 0.2385343313217163, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 2438 + }, + { + "loss": 0.065, + "grad_norm": 1.4742591381072998, + "learning_rate": 7.815e-06, + "num_tokens": 831557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2195, + "step": 2439 + }, + { + "loss": 0.0016, + "grad_norm": 0.2341725379228592, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 1.0, + "epoch": 1.22, + "step": 2440 + }, + { + "loss": 0.0615, + "grad_norm": 1.4791371822357178, + "learning_rate": 7.805e-06, + "num_tokens": 832160.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2205, + "step": 2441 + }, + { + "loss": 0.048, + "grad_norm": 1.601716160774231, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.221, + "step": 2442 + }, + { + "loss": 0.0014, + "grad_norm": 0.19947591423988342, + "learning_rate": 7.795e-06, + "num_tokens": 832763.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2215, + "step": 2443 + }, + { + "loss": 0.0801, + "grad_norm": 1.753954291343689, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.222, + "step": 2444 + }, + { + "loss": 0.0015, + "grad_norm": 0.21398615837097168, + "learning_rate": 7.785000000000001e-06, + "num_tokens": 833366.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2225, + "step": 2445 + }, + { + "loss": 0.0655, + "grad_norm": 1.799574851989746, + "learning_rate": 7.78e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.223, + "step": 2446 + }, + { + "loss": 0.0438, + "grad_norm": 1.332261085510254, + "learning_rate": 7.775000000000001e-06, + "num_tokens": 834390.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2235, + "step": 2447 + }, + { + "loss": 0.044, + "grad_norm": 1.238344430923462, + "learning_rate": 7.77e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.224, + "step": 2448 + }, + { + "loss": 0.0015, + "grad_norm": 0.2137579768896103, + "learning_rate": 7.765000000000001e-06, + "num_tokens": 834993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2245, + "step": 2449 + }, + { + "loss": 0.0438, + "grad_norm": 1.1821973323822021, + "learning_rate": 7.76e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.225, + "step": 2450 + }, + { + "loss": 0.0562, + "grad_norm": 1.4905529022216797, + "learning_rate": 7.755000000000001e-06, + "num_tokens": 836017.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2255, + "step": 2451 + }, + { + "loss": 0.0015, + "grad_norm": 0.21731820702552795, + "learning_rate": 7.75e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 1.0, + "epoch": 1.226, + "step": 2452 + }, + { + "loss": 0.0017, + "grad_norm": 0.25909724831581116, + "learning_rate": 7.745e-06, + "num_tokens": 836199.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2265, + "step": 2453 + }, + { + "loss": 0.0016, + "grad_norm": 0.22781187295913696, + "learning_rate": 7.74e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 2454 + }, + { + "loss": 0.0016, + "grad_norm": 0.24323998391628265, + "learning_rate": 7.735e-06, + "num_tokens": 836381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2275, + "step": 2455 + }, + { + "loss": 0.0594, + "grad_norm": 1.5349161624908447, + "learning_rate": 7.73e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.228, + "step": 2456 + }, + { + "loss": 0.0017, + "grad_norm": 0.24151335656642914, + "learning_rate": 7.725e-06, + "num_tokens": 836984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2285, + "step": 2457 + }, + { + "loss": 0.0016, + "grad_norm": 0.23347225785255432, + "learning_rate": 7.72e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 2458 + }, + { + "loss": 0.0017, + "grad_norm": 0.24232612550258636, + "learning_rate": 7.715e-06, + "num_tokens": 837166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2295, + "step": 2459 + }, + { + "loss": 0.0016, + "grad_norm": 0.23151801526546478, + "learning_rate": 7.71e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 2460 + }, + { + "loss": 0.0586, + "grad_norm": 1.4122602939605713, + "learning_rate": 7.705e-06, + "num_tokens": 837769.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2305, + "step": 2461 + }, + { + "loss": 0.0014, + "grad_norm": 0.19469626247882843, + "learning_rate": 7.7e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.231, + "step": 2462 + }, + { + "loss": 0.0637, + "grad_norm": 1.675697684288025, + "learning_rate": 7.695e-06, + "num_tokens": 838372.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2315, + "step": 2463 + }, + { + "loss": 0.0013, + "grad_norm": 0.17535777390003204, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.232, + "step": 2464 + }, + { + "loss": 0.0549, + "grad_norm": 1.1719900369644165, + "learning_rate": 7.685e-06, + "num_tokens": 838975.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2325, + "step": 2465 + }, + { + "loss": 0.0013, + "grad_norm": 0.16398227214813232, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.233, + "step": 2466 + }, + { + "loss": 0.0674, + "grad_norm": 1.7502342462539673, + "learning_rate": 7.675e-06, + "num_tokens": 839578.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2335, + "step": 2467 + }, + { + "loss": 0.0013, + "grad_norm": 0.17352193593978882, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.234, + "step": 2468 + }, + { + "loss": 0.063, + "grad_norm": 1.5015274286270142, + "learning_rate": 7.665e-06, + "num_tokens": 840181.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2345, + "step": 2469 + }, + { + "loss": 0.0611, + "grad_norm": 1.3142430782318115, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2349999999999999, + "step": 2470 + }, + { + "loss": 0.0589, + "grad_norm": 1.3366830348968506, + "learning_rate": 7.655e-06, + "num_tokens": 841205.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2355, + "step": 2471 + }, + { + "loss": 0.0013, + "grad_norm": 0.17301248013973236, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.236, + "step": 2472 + }, + { + "loss": 0.0435, + "grad_norm": 1.1996126174926758, + "learning_rate": 7.645e-06, + "num_tokens": 841808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2365, + "step": 2473 + }, + { + "loss": 0.0015, + "grad_norm": 0.21387803554534912, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.237, + "step": 2474 + }, + { + "loss": 0.064, + "grad_norm": 1.3917018175125122, + "learning_rate": 7.635e-06, + "num_tokens": 842411.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2375, + "step": 2475 + }, + { + "loss": 0.0014, + "grad_norm": 0.20352397859096527, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.238, + "step": 2476 + }, + { + "loss": 0.0015, + "grad_norm": 0.21035854518413544, + "learning_rate": 7.625e-06, + "num_tokens": 842593.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2385, + "step": 2477 + }, + { + "loss": 0.0384, + "grad_norm": 1.1954495906829834, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.2389999999999999, + "step": 2478 + }, + { + "loss": 0.0398, + "grad_norm": 1.3171675205230713, + "learning_rate": 7.615e-06, + "num_tokens": 843617.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2395, + "step": 2479 + }, + { + "loss": 0.0016, + "grad_norm": 0.22742266952991486, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.24, + "step": 2480 + }, + { + "loss": 0.0505, + "grad_norm": 1.463847041130066, + "learning_rate": 7.605e-06, + "num_tokens": 844220.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2405, + "step": 2481 + }, + { + "loss": 0.0634, + "grad_norm": 1.0150220394134521, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.241, + "step": 2482 + }, + { + "loss": 0.0628, + "grad_norm": 1.2490217685699463, + "learning_rate": 7.595e-06, + "num_tokens": 845244.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2415, + "step": 2483 + }, + { + "loss": 0.0568, + "grad_norm": 0.9812212586402893, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.242, + "step": 2484 + }, + { + "loss": 0.0684, + "grad_norm": 1.4887269735336304, + "learning_rate": 7.585e-06, + "num_tokens": 846268.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2425, + "step": 2485 + }, + { + "loss": 0.002, + "grad_norm": 0.2907889485359192, + "learning_rate": 7.58e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2429999999999999, + "step": 2486 + }, + { + "loss": 0.0024, + "grad_norm": 0.3490116596221924, + "learning_rate": 7.575e-06, + "num_tokens": 846450.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2435, + "step": 2487 + }, + { + "loss": 0.0379, + "grad_norm": 0.9351921081542969, + "learning_rate": 7.57e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.244, + "step": 2488 + }, + { + "loss": 0.0409, + "grad_norm": 1.486227035522461, + "learning_rate": 7.565e-06, + "num_tokens": 847474.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2445, + "step": 2489 + }, + { + "loss": 0.0024, + "grad_norm": 0.35926783084869385, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.245, + "step": 2490 + }, + { + "loss": 0.0547, + "grad_norm": 1.216343879699707, + "learning_rate": 7.5550000000000005e-06, + "num_tokens": 848077.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2455, + "step": 2491 + }, + { + "loss": 0.0622, + "grad_norm": 1.0978708267211914, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.246, + "step": 2492 + }, + { + "loss": 0.0026, + "grad_norm": 0.3695952892303467, + "learning_rate": 7.545e-06, + "num_tokens": 848680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2465, + "step": 2493 + }, + { + "loss": 0.0712, + "grad_norm": 1.1717898845672607, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2469999999999999, + "step": 2494 + }, + { + "loss": 0.003, + "grad_norm": 0.4548373818397522, + "learning_rate": 7.535e-06, + "num_tokens": 849283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2475, + "step": 2495 + }, + { + "loss": 0.003, + "grad_norm": 0.4568769335746765, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.248, + "step": 2496 + }, + { + "loss": 0.0024, + "grad_norm": 0.36542901396751404, + "learning_rate": 7.525e-06, + "num_tokens": 849465.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2485, + "step": 2497 + }, + { + "loss": 0.0566, + "grad_norm": 1.315274715423584, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.249, + "step": 2498 + }, + { + "loss": 0.0026, + "grad_norm": 0.39514294266700745, + "learning_rate": 7.515e-06, + "num_tokens": 850068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2495, + "step": 2499 + }, + { + "loss": 0.0678, + "grad_norm": 1.530604362487793, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.25, + "step": 2500 + }, + { + "loss": 0.0022, + "grad_norm": 0.3104536533355713, + "learning_rate": 7.505e-06, + "num_tokens": 850671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2505, + "step": 2501 + }, + { + "loss": 0.0019, + "grad_norm": 0.2783941924571991, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.251, + "step": 2502 + }, + { + "loss": 0.0597, + "grad_norm": 1.77070951461792, + "learning_rate": 7.495000000000001e-06, + "num_tokens": 851274.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2515, + "step": 2503 + }, + { + "loss": 0.0019, + "grad_norm": 0.2808924913406372, + "learning_rate": 7.49e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 1.0, + "epoch": 1.252, + "step": 2504 + }, + { + "loss": 0.0441, + "grad_norm": 1.070281982421875, + "learning_rate": 7.485000000000001e-06, + "num_tokens": 851877.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2525, + "step": 2505 + }, + { + "loss": 0.0018, + "grad_norm": 0.25118544697761536, + "learning_rate": 7.48e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2530000000000001, + "step": 2506 + }, + { + "loss": 0.0698, + "grad_norm": 1.3499447107315063, + "learning_rate": 7.475000000000001e-06, + "num_tokens": 852480.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2535, + "step": 2507 + }, + { + "loss": 0.0016, + "grad_norm": 0.23157145082950592, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.254, + "step": 2508 + }, + { + "loss": 0.0384, + "grad_norm": 1.1759817600250244, + "learning_rate": 7.465000000000001e-06, + "num_tokens": 853083.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2545, + "step": 2509 + }, + { + "loss": 0.0017, + "grad_norm": 0.24023179709911346, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.255, + "step": 2510 + }, + { + "loss": 0.0559, + "grad_norm": 1.3075677156448364, + "learning_rate": 7.4550000000000015e-06, + "num_tokens": 853686.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2555, + "step": 2511 + }, + { + "loss": 0.0691, + "grad_norm": 1.5931618213653564, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.256, + "step": 2512 + }, + { + "loss": 0.0015, + "grad_norm": 0.21379417181015015, + "learning_rate": 7.445000000000001e-06, + "num_tokens": 854289.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2565, + "step": 2513 + }, + { + "loss": 0.0016, + "grad_norm": 0.22427783906459808, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 2514 + }, + { + "loss": 0.0585, + "grad_norm": 1.3955110311508179, + "learning_rate": 7.435000000000001e-06, + "num_tokens": 854892.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2575, + "step": 2515 + }, + { + "loss": 0.0016, + "grad_norm": 0.22540539503097534, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.258, + "step": 2516 + }, + { + "loss": 0.0015, + "grad_norm": 0.20957466959953308, + "learning_rate": 7.425000000000001e-06, + "num_tokens": 855074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2585, + "step": 2517 + }, + { + "loss": 0.0013, + "grad_norm": 0.17798997461795807, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 2518 + }, + { + "loss": 0.0681, + "grad_norm": 1.692757487297058, + "learning_rate": 7.415000000000001e-06, + "num_tokens": 855677.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2595, + "step": 2519 + }, + { + "loss": 0.0013, + "grad_norm": 0.18327295780181885, + "learning_rate": 7.41e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 1.0, + "epoch": 1.26, + "step": 2520 + }, + { + "loss": 0.0694, + "grad_norm": 1.3426337242126465, + "learning_rate": 7.405000000000001e-06, + "num_tokens": 856280.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2605, + "step": 2521 + }, + { + "loss": 0.0575, + "grad_norm": 1.3755184412002563, + "learning_rate": 7.4e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2610000000000001, + "step": 2522 + }, + { + "loss": 0.0012, + "grad_norm": 0.15550144016742706, + "learning_rate": 7.395000000000001e-06, + "num_tokens": 856883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2615, + "step": 2523 + }, + { + "loss": 0.0013, + "grad_norm": 0.18434429168701172, + "learning_rate": 7.39e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 2524 + }, + { + "loss": 0.0561, + "grad_norm": 1.3532037734985352, + "learning_rate": 7.385000000000001e-06, + "num_tokens": 857486.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2625, + "step": 2525 + }, + { + "loss": 0.0783, + "grad_norm": 2.749722719192505, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.263, + "step": 2526 + }, + { + "loss": 0.0739, + "grad_norm": 1.7389228343963623, + "learning_rate": 7.375000000000001e-06, + "num_tokens": 858510.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2635, + "step": 2527 + }, + { + "loss": 0.0596, + "grad_norm": 1.5434712171554565, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 2528 + }, + { + "loss": 0.0012, + "grad_norm": 0.16660870611667633, + "learning_rate": 7.365000000000001e-06, + "num_tokens": 859113.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2645, + "step": 2529 + }, + { + "loss": 0.0466, + "grad_norm": 1.1618560552597046, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2650000000000001, + "step": 2530 + }, + { + "loss": 0.066, + "grad_norm": 1.4426238536834717, + "learning_rate": 7.355000000000001e-06, + "num_tokens": 860137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2655, + "step": 2531 + }, + { + "loss": 0.0014, + "grad_norm": 0.1874425858259201, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 1.0, + "epoch": 1.266, + "step": 2532 + }, + { + "loss": 0.0574, + "grad_norm": 1.2460824251174927, + "learning_rate": 7.345000000000001e-06, + "num_tokens": 860740.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2665, + "step": 2533 + }, + { + "loss": 0.0722, + "grad_norm": 1.7045679092407227, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.267, + "step": 2534 + }, + { + "loss": 0.0641, + "grad_norm": 1.4023394584655762, + "learning_rate": 7.335000000000001e-06, + "num_tokens": 861764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2675, + "step": 2535 + }, + { + "loss": 0.0018, + "grad_norm": 0.25083932280540466, + "learning_rate": 7.33e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.268, + "step": 2536 + }, + { + "loss": 0.0625, + "grad_norm": 1.2308841943740845, + "learning_rate": 7.325000000000001e-06, + "num_tokens": 862367.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2685, + "step": 2537 + }, + { + "loss": 0.1399, + "grad_norm": 2.6957058906555176, + "learning_rate": 7.32e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.2690000000000001, + "step": 2538 + }, + { + "loss": 0.0403, + "grad_norm": 1.0539931058883667, + "learning_rate": 7.315000000000001e-06, + "num_tokens": 863391.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2695, + "step": 2539 + }, + { + "loss": 0.0603, + "grad_norm": 1.6862679719924927, + "learning_rate": 7.31e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.27, + "step": 2540 + }, + { + "loss": 0.0022, + "grad_norm": 0.3110877275466919, + "learning_rate": 7.305000000000001e-06, + "num_tokens": 863994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2705, + "step": 2541 + }, + { + "loss": 0.0521, + "grad_norm": 1.1967720985412598, + "learning_rate": 7.3e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.271, + "step": 2542 + }, + { + "loss": 0.1383, + "grad_norm": 2.653751850128174, + "learning_rate": 7.295000000000001e-06, + "num_tokens": 865018.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.2715, + "step": 2543 + }, + { + "loss": 0.0025, + "grad_norm": 0.3700110614299774, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.272, + "step": 2544 + }, + { + "loss": 0.0031, + "grad_norm": 0.42906609177589417, + "learning_rate": 7.2850000000000006e-06, + "num_tokens": 865200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2725, + "step": 2545 + }, + { + "loss": 0.0437, + "grad_norm": 1.104537010192871, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2730000000000001, + "step": 2546 + }, + { + "loss": 0.0027, + "grad_norm": 0.3919247090816498, + "learning_rate": 7.275000000000001e-06, + "num_tokens": 865803.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2735, + "step": 2547 + }, + { + "loss": 0.0029, + "grad_norm": 0.4317328929901123, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 2548 + }, + { + "loss": 0.0025, + "grad_norm": 0.37341031432151794, + "learning_rate": 7.265000000000001e-06, + "num_tokens": 865985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2745, + "step": 2549 + }, + { + "loss": 0.0416, + "grad_norm": 1.0737035274505615, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.275, + "step": 2550 + }, + { + "loss": 0.0646, + "grad_norm": 1.3107216358184814, + "learning_rate": 7.255000000000001e-06, + "num_tokens": 867009.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2755, + "step": 2551 + }, + { + "loss": 0.0381, + "grad_norm": 0.9233097434043884, + "learning_rate": 7.25e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.276, + "step": 2552 + }, + { + "loss": 0.056, + "grad_norm": 1.2655408382415771, + "learning_rate": 7.245000000000001e-06, + "num_tokens": 868033.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2765, + "step": 2553 + }, + { + "loss": 0.0519, + "grad_norm": 1.2633070945739746, + "learning_rate": 7.24e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2770000000000001, + "step": 2554 + }, + { + "loss": 0.0666, + "grad_norm": 1.5826315879821777, + "learning_rate": 7.235000000000001e-06, + "num_tokens": 869057.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2775, + "step": 2555 + }, + { + "loss": 0.0026, + "grad_norm": 0.3732459545135498, + "learning_rate": 7.23e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 1.0, + "epoch": 1.278, + "step": 2556 + }, + { + "loss": 0.0384, + "grad_norm": 0.9308870434761047, + "learning_rate": 7.225000000000001e-06, + "num_tokens": 869660.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2785, + "step": 2557 + }, + { + "loss": 0.0027, + "grad_norm": 0.3898535668849945, + "learning_rate": 7.22e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 1.0, + "epoch": 1.279, + "step": 2558 + }, + { + "loss": 0.0416, + "grad_norm": 1.0320757627487183, + "learning_rate": 7.215000000000001e-06, + "num_tokens": 870263.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2795, + "step": 2559 + }, + { + "loss": 0.0028, + "grad_norm": 0.4121858477592468, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 1.0, + "epoch": 1.28, + "step": 2560 + }, + { + "loss": 0.0028, + "grad_norm": 0.4276776611804962, + "learning_rate": 7.2050000000000005e-06, + "num_tokens": 870445.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2805, + "step": 2561 + }, + { + "loss": 0.0407, + "grad_norm": 0.9345077872276306, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2810000000000001, + "step": 2562 + }, + { + "loss": 0.0025, + "grad_norm": 0.3605985641479492, + "learning_rate": 7.1950000000000006e-06, + "num_tokens": 871048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2814999999999999, + "step": 2563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346655070781708, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.282, + "step": 2564 + }, + { + "loss": 0.0744, + "grad_norm": 1.8985601663589478, + "learning_rate": 7.185000000000001e-06, + "num_tokens": 871651.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2825, + "step": 2565 + }, + { + "loss": 0.0388, + "grad_norm": 0.96394282579422, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.283, + "step": 2566 + }, + { + "loss": 0.0682, + "grad_norm": 1.4056230783462524, + "learning_rate": 7.175000000000001e-06, + "num_tokens": 872675.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2835, + "step": 2567 + }, + { + "loss": 0.0022, + "grad_norm": 0.3106633722782135, + "learning_rate": 7.17e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 1.0, + "epoch": 1.284, + "step": 2568 + }, + { + "loss": 0.0384, + "grad_norm": 1.064553141593933, + "learning_rate": 7.165000000000001e-06, + "num_tokens": 873278.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2845, + "step": 2569 + }, + { + "loss": 0.0626, + "grad_norm": 1.0392028093338013, + "learning_rate": 7.16e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.285, + "step": 2570 + }, + { + "loss": 0.0022, + "grad_norm": 0.30655112862586975, + "learning_rate": 7.155000000000001e-06, + "num_tokens": 873881.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2854999999999999, + "step": 2571 + }, + { + "loss": 0.0673, + "grad_norm": 1.5468289852142334, + "learning_rate": 7.15e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.286, + "step": 2572 + }, + { + "loss": 0.0498, + "grad_norm": 1.2830432653427124, + "learning_rate": 7.145000000000001e-06, + "num_tokens": 874905.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2865, + "step": 2573 + }, + { + "loss": 0.055, + "grad_norm": 1.0863239765167236, + "learning_rate": 7.14e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 2574 + }, + { + "loss": 0.0606, + "grad_norm": 1.434999704360962, + "learning_rate": 7.135000000000001e-06, + "num_tokens": 875929.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2875, + "step": 2575 + }, + { + "loss": 0.0532, + "grad_norm": 1.290963888168335, + "learning_rate": 7.13e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.288, + "step": 2576 + }, + { + "loss": 0.0026, + "grad_norm": 0.36665645241737366, + "learning_rate": 7.125e-06, + "num_tokens": 876532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2885, + "step": 2577 + }, + { + "loss": 0.0485, + "grad_norm": 1.2393323183059692, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.289, + "step": 2578 + }, + { + "loss": 0.0029, + "grad_norm": 0.3994691073894501, + "learning_rate": 7.1150000000000005e-06, + "num_tokens": 877135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2894999999999999, + "step": 2579 + }, + { + "loss": 0.0544, + "grad_norm": 1.361981987953186, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.29, + "step": 2580 + }, + { + "loss": 0.0529, + "grad_norm": 1.1892880201339722, + "learning_rate": 7.105000000000001e-06, + "num_tokens": 878159.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2905, + "step": 2581 + }, + { + "loss": 0.069, + "grad_norm": 1.5022639036178589, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.291, + "step": 2582 + }, + { + "loss": 0.0594, + "grad_norm": 1.2174897193908691, + "learning_rate": 7.095000000000001e-06, + "num_tokens": 879183.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2915, + "step": 2583 + }, + { + "loss": 0.0723, + "grad_norm": 2.1814920902252197, + "learning_rate": 7.09e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.292, + "step": 2584 + }, + { + "loss": 0.0544, + "grad_norm": 1.1524139642715454, + "learning_rate": 7.085000000000001e-06, + "num_tokens": 880207.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2925, + "step": 2585 + }, + { + "loss": 0.0035, + "grad_norm": 0.5082859396934509, + "learning_rate": 7.08e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.293, + "step": 2586 + }, + { + "loss": 0.0034, + "grad_norm": 0.49455657601356506, + "learning_rate": 7.075000000000001e-06, + "num_tokens": 880389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2934999999999999, + "step": 2587 + }, + { + "loss": 0.0516, + "grad_norm": 1.1291673183441162, + "learning_rate": 7.07e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.294, + "step": 2588 + }, + { + "loss": 0.0402, + "grad_norm": 1.073132038116455, + "learning_rate": 7.065000000000001e-06, + "num_tokens": 881413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2945, + "step": 2589 + }, + { + "loss": 0.0409, + "grad_norm": 1.1712205410003662, + "learning_rate": 7.06e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.295, + "step": 2590 + }, + { + "loss": 0.0596, + "grad_norm": 1.2515616416931152, + "learning_rate": 7.055000000000001e-06, + "num_tokens": 882437.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2955, + "step": 2591 + }, + { + "loss": 0.0039, + "grad_norm": 0.5442217588424683, + "learning_rate": 7.05e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.296, + "step": 2592 + }, + { + "loss": 0.0041, + "grad_norm": 0.5982818603515625, + "learning_rate": 7.045e-06, + "num_tokens": 882619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2965, + "step": 2593 + }, + { + "loss": 0.0558, + "grad_norm": 1.3499200344085693, + "learning_rate": 7.04e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.297, + "step": 2594 + }, + { + "loss": 0.0038, + "grad_norm": 0.5531075596809387, + "learning_rate": 7.035e-06, + "num_tokens": 883222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2974999999999999, + "step": 2595 + }, + { + "loss": 0.0716, + "grad_norm": 1.8495835065841675, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.298, + "step": 2596 + }, + { + "loss": 0.0387, + "grad_norm": 1.2195173501968384, + "learning_rate": 7.0250000000000005e-06, + "num_tokens": 884246.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2985, + "step": 2597 + }, + { + "loss": 0.0715, + "grad_norm": 1.7892330884933472, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.299, + "step": 2598 + }, + { + "loss": 0.0034, + "grad_norm": 0.5045487284660339, + "learning_rate": 7.015000000000001e-06, + "num_tokens": 884849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2995, + "step": 2599 + }, + { + "loss": 0.0551, + "grad_norm": 1.5834842920303345, + "learning_rate": 7.01e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3, + "step": 2600 + }, + { + "loss": 0.0037, + "grad_norm": 0.5456190705299377, + "learning_rate": 7.005000000000001e-06, + "num_tokens": 885452.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3005, + "step": 2601 + }, + { + "loss": 0.0036, + "grad_norm": 0.5648893117904663, + "learning_rate": 7e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.301, + "step": 2602 + }, + { + "loss": 0.06, + "grad_norm": 1.417505145072937, + "learning_rate": 6.995000000000001e-06, + "num_tokens": 886055.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3014999999999999, + "step": 2603 + }, + { + "loss": 0.0684, + "grad_norm": 1.5355315208435059, + "learning_rate": 6.99e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.302, + "step": 2604 + }, + { + "loss": 0.0027, + "grad_norm": 0.4013388454914093, + "learning_rate": 6.985000000000001e-06, + "num_tokens": 886658.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3025, + "step": 2605 + }, + { + "loss": 0.0026, + "grad_norm": 0.38935649394989014, + "learning_rate": 6.98e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 1.0, + "epoch": 1.303, + "step": 2606 + }, + { + "loss": 0.0578, + "grad_norm": 1.1277109384536743, + "learning_rate": 6.975000000000001e-06, + "num_tokens": 887261.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3035, + "step": 2607 + }, + { + "loss": 0.0023, + "grad_norm": 0.3507567048072815, + "learning_rate": 6.97e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.304, + "step": 2608 + }, + { + "loss": 0.0021, + "grad_norm": 0.3047695755958557, + "learning_rate": 6.965e-06, + "num_tokens": 887443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3045, + "step": 2609 + }, + { + "loss": 0.0564, + "grad_norm": 1.2580876350402832, + "learning_rate": 6.96e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.305, + "step": 2610 + }, + { + "loss": 0.0018, + "grad_norm": 0.26692500710487366, + "learning_rate": 6.955e-06, + "num_tokens": 888046.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3054999999999999, + "step": 2611 + }, + { + "loss": 0.0601, + "grad_norm": 1.2882280349731445, + "learning_rate": 6.95e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.306, + "step": 2612 + }, + { + "loss": 0.0662, + "grad_norm": 1.3626042604446411, + "learning_rate": 6.945e-06, + "num_tokens": 889070.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3065, + "step": 2613 + }, + { + "loss": 0.0015, + "grad_norm": 0.20663970708847046, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 1.0, + "epoch": 1.307, + "step": 2614 + }, + { + "loss": 0.0421, + "grad_norm": 1.0858242511749268, + "learning_rate": 6.9350000000000005e-06, + "num_tokens": 889673.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3075, + "step": 2615 + }, + { + "loss": 0.061, + "grad_norm": 1.1361438035964966, + "learning_rate": 6.93e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.308, + "step": 2616 + }, + { + "loss": 0.053, + "grad_norm": 1.0651867389678955, + "learning_rate": 6.925000000000001e-06, + "num_tokens": 890697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3085, + "step": 2617 + }, + { + "loss": 0.0648, + "grad_norm": 1.4413301944732666, + "learning_rate": 6.92e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.309, + "step": 2618 + }, + { + "loss": 0.0016, + "grad_norm": 0.23106220364570618, + "learning_rate": 6.915000000000001e-06, + "num_tokens": 891300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3094999999999999, + "step": 2619 + }, + { + "loss": 0.0596, + "grad_norm": 1.1959160566329956, + "learning_rate": 6.91e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.31, + "step": 2620 + }, + { + "loss": 0.0625, + "grad_norm": 1.4631091356277466, + "learning_rate": 6.905000000000001e-06, + "num_tokens": 892324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3105, + "step": 2621 + }, + { + "loss": 0.0385, + "grad_norm": 1.1421785354614258, + "learning_rate": 6.9e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.311, + "step": 2622 + }, + { + "loss": 0.0644, + "grad_norm": 1.3361622095108032, + "learning_rate": 6.895000000000001e-06, + "num_tokens": 893348.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3115, + "step": 2623 + }, + { + "loss": 0.0393, + "grad_norm": 1.3101776838302612, + "learning_rate": 6.89e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.312, + "step": 2624 + }, + { + "loss": 0.0415, + "grad_norm": 1.2668944597244263, + "learning_rate": 6.885e-06, + "num_tokens": 894372.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3125, + "step": 2625 + }, + { + "loss": 0.0637, + "grad_norm": 1.8910597562789917, + "learning_rate": 6.88e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.313, + "step": 2626 + }, + { + "loss": 0.0385, + "grad_norm": 1.383195161819458, + "learning_rate": 6.875e-06, + "num_tokens": 895396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3135, + "step": 2627 + }, + { + "loss": 0.0029, + "grad_norm": 0.41114333271980286, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.314, + "step": 2628 + }, + { + "loss": 0.0709, + "grad_norm": 2.5799410343170166, + "learning_rate": 6.865e-06, + "num_tokens": 895999.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3145, + "step": 2629 + }, + { + "loss": 0.0717, + "grad_norm": 1.9481109380722046, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.315, + "step": 2630 + }, + { + "loss": 0.0031, + "grad_norm": 0.4399254620075226, + "learning_rate": 6.8550000000000004e-06, + "num_tokens": 896602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3155000000000001, + "step": 2631 + }, + { + "loss": 0.0692, + "grad_norm": 1.7998204231262207, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.316, + "step": 2632 + }, + { + "loss": 0.0589, + "grad_norm": 1.2681806087493896, + "learning_rate": 6.8450000000000005e-06, + "num_tokens": 897626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3165, + "step": 2633 + }, + { + "loss": 0.1572, + "grad_norm": 2.9861464500427246, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.317, + "step": 2634 + }, + { + "loss": 0.0033, + "grad_norm": 0.4804554879665375, + "learning_rate": 6.835000000000001e-06, + "num_tokens": 898229.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3175, + "step": 2635 + }, + { + "loss": 0.0039, + "grad_norm": 0.5298879742622375, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 1.0, + "epoch": 1.318, + "step": 2636 + }, + { + "loss": 0.0033, + "grad_norm": 0.45830750465393066, + "learning_rate": 6.825000000000001e-06, + "num_tokens": 898411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3185, + "step": 2637 + }, + { + "loss": 0.0759, + "grad_norm": 2.195838451385498, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.319, + "step": 2638 + }, + { + "loss": 0.0028, + "grad_norm": 0.3985951840877533, + "learning_rate": 6.815000000000001e-06, + "num_tokens": 899014.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3195000000000001, + "step": 2639 + }, + { + "loss": 0.0435, + "grad_norm": 1.082383155822754, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.32, + "step": 2640 + }, + { + "loss": 0.0031, + "grad_norm": 0.4386924207210541, + "learning_rate": 6.805000000000001e-06, + "num_tokens": 899617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3205, + "step": 2641 + }, + { + "loss": 0.044, + "grad_norm": 1.3280903100967407, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.321, + "step": 2642 + }, + { + "loss": 0.0024, + "grad_norm": 0.34161683917045593, + "learning_rate": 6.795e-06, + "num_tokens": 900220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3215, + "step": 2643 + }, + { + "loss": 0.0026, + "grad_norm": 0.3536019027233124, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.322, + "step": 2644 + }, + { + "loss": 0.0721, + "grad_norm": 1.825214147567749, + "learning_rate": 6.785e-06, + "num_tokens": 900823.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.3225, + "step": 2645 + }, + { + "loss": 0.0603, + "grad_norm": 1.441401481628418, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.323, + "step": 2646 + }, + { + "loss": 0.0552, + "grad_norm": 1.026498556137085, + "learning_rate": 6.775e-06, + "num_tokens": 901847.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3235000000000001, + "step": 2647 + }, + { + "loss": 0.0607, + "grad_norm": 1.567400574684143, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.324, + "step": 2648 + }, + { + "loss": 0.0365, + "grad_norm": 1.1754707098007202, + "learning_rate": 6.7650000000000005e-06, + "num_tokens": 902871.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3245, + "step": 2649 + }, + { + "loss": 0.0634, + "grad_norm": 1.0925911664962769, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.325, + "step": 2650 + }, + { + "loss": 0.0022, + "grad_norm": 0.3080379068851471, + "learning_rate": 6.7550000000000005e-06, + "num_tokens": 903474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3255, + "step": 2651 + }, + { + "loss": 0.0024, + "grad_norm": 0.3412145972251892, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.326, + "step": 2652 + }, + { + "loss": 0.0612, + "grad_norm": 1.387506127357483, + "learning_rate": 6.745000000000001e-06, + "num_tokens": 904077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3265, + "step": 2653 + }, + { + "loss": 0.0543, + "grad_norm": 1.0726388692855835, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 2654 + }, + { + "loss": 0.0515, + "grad_norm": 1.3620095252990723, + "learning_rate": 6.735000000000001e-06, + "num_tokens": 905101.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3275000000000001, + "step": 2655 + }, + { + "loss": 0.0536, + "grad_norm": 0.999693751335144, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.328, + "step": 2656 + }, + { + "loss": 0.0725, + "grad_norm": 1.338326096534729, + "learning_rate": 6.725000000000001e-06, + "num_tokens": 906125.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3285, + "step": 2657 + }, + { + "loss": 0.0025, + "grad_norm": 0.3621944487094879, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.329, + "step": 2658 + }, + { + "loss": 0.0027, + "grad_norm": 0.3732605576515198, + "learning_rate": 6.715e-06, + "num_tokens": 906307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3295, + "step": 2659 + }, + { + "loss": 0.0025, + "grad_norm": 0.3675785958766937, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 2660 + }, + { + "loss": 0.0546, + "grad_norm": 1.420166015625, + "learning_rate": 6.705e-06, + "num_tokens": 906910.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3305, + "step": 2661 + }, + { + "loss": 0.065, + "grad_norm": 1.7972251176834106, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.331, + "step": 2662 + }, + { + "loss": 0.0026, + "grad_norm": 0.38739708065986633, + "learning_rate": 6.695e-06, + "num_tokens": 907513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3315000000000001, + "step": 2663 + }, + { + "loss": 0.0621, + "grad_norm": 1.1773098707199097, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.332, + "step": 2664 + }, + { + "loss": 0.047, + "grad_norm": 1.3367711305618286, + "learning_rate": 6.685e-06, + "num_tokens": 908537.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3325, + "step": 2665 + }, + { + "loss": 0.0614, + "grad_norm": 1.5761219263076782, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.333, + "step": 2666 + }, + { + "loss": 0.0028, + "grad_norm": 0.39666748046875, + "learning_rate": 6.6750000000000005e-06, + "num_tokens": 909140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3335, + "step": 2667 + }, + { + "loss": 0.0026, + "grad_norm": 0.38161027431488037, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 2668 + }, + { + "loss": 0.0027, + "grad_norm": 0.3782355785369873, + "learning_rate": 6.6650000000000006e-06, + "num_tokens": 909322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3345, + "step": 2669 + }, + { + "loss": 0.0449, + "grad_norm": 1.2690225839614868, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.335, + "step": 2670 + }, + { + "loss": 0.0618, + "grad_norm": 1.4404915571212769, + "learning_rate": 6.655000000000001e-06, + "num_tokens": 910346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3355000000000001, + "step": 2671 + }, + { + "loss": 0.0593, + "grad_norm": 1.6381967067718506, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.336, + "step": 2672 + }, + { + "loss": 0.0023, + "grad_norm": 0.3195578455924988, + "learning_rate": 6.645000000000001e-06, + "num_tokens": 910949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3365, + "step": 2673 + }, + { + "loss": 0.1244, + "grad_norm": 2.2930221557617188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.337, + "step": 2674 + }, + { + "loss": 0.061, + "grad_norm": 1.1066110134124756, + "learning_rate": 6.635e-06, + "num_tokens": 911973.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3375, + "step": 2675 + }, + { + "loss": 0.0023, + "grad_norm": 0.3287852704524994, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.338, + "step": 2676 + }, + { + "loss": 0.0723, + "grad_norm": 1.8842978477478027, + "learning_rate": 6.625e-06, + "num_tokens": 912576.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3385, + "step": 2677 + }, + { + "loss": 0.0616, + "grad_norm": 1.410254955291748, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.339, + "step": 2678 + }, + { + "loss": 0.0661, + "grad_norm": 1.7658559083938599, + "learning_rate": 6.615e-06, + "num_tokens": 913600.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3395000000000001, + "step": 2679 + }, + { + "loss": 0.0023, + "grad_norm": 0.3321514427661896, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.34, + "step": 2680 + }, + { + "loss": 0.0026, + "grad_norm": 0.38943803310394287, + "learning_rate": 6.605e-06, + "num_tokens": 913782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3405, + "step": 2681 + }, + { + "loss": 0.0533, + "grad_norm": 1.220119833946228, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.341, + "step": 2682 + }, + { + "loss": 0.0577, + "grad_norm": 1.4489399194717407, + "learning_rate": 6.595e-06, + "num_tokens": 914806.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3415, + "step": 2683 + }, + { + "loss": 0.0534, + "grad_norm": 1.437482237815857, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.342, + "step": 2684 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185518980026245, + "learning_rate": 6.5850000000000005e-06, + "num_tokens": 915409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3425, + "step": 2685 + }, + { + "loss": 0.0557, + "grad_norm": 1.233544945716858, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.343, + "step": 2686 + }, + { + "loss": 0.1326, + "grad_norm": 2.9976046085357666, + "learning_rate": 6.5750000000000006e-06, + "num_tokens": 916433.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3435000000000001, + "step": 2687 + }, + { + "loss": 0.0555, + "grad_norm": 1.1236023902893066, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3439999999999999, + "step": 2688 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615230619907379, + "learning_rate": 6.565000000000001e-06, + "num_tokens": 917036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3445, + "step": 2689 + }, + { + "loss": 0.0613, + "grad_norm": 1.391479730606079, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.345, + "step": 2690 + }, + { + "loss": 0.0023, + "grad_norm": 0.32829907536506653, + "learning_rate": 6.555e-06, + "num_tokens": 917639.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3455, + "step": 2691 + }, + { + "loss": 0.0025, + "grad_norm": 0.35658934712409973, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 1.0, + "epoch": 1.346, + "step": 2692 + }, + { + "loss": 0.0028, + "grad_norm": 0.40413787961006165, + "learning_rate": 6.545e-06, + "num_tokens": 917821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3465, + "step": 2693 + }, + { + "loss": 0.0023, + "grad_norm": 0.3243667185306549, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 1.0, + "epoch": 1.347, + "step": 2694 + }, + { + "loss": 0.0023, + "grad_norm": 0.33630460500717163, + "learning_rate": 6.535e-06, + "num_tokens": 918003.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3475, + "step": 2695 + }, + { + "loss": 0.0529, + "grad_norm": 1.6163023710250854, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3479999999999999, + "step": 2696 + }, + { + "loss": 0.0678, + "grad_norm": 1.5625479221343994, + "learning_rate": 6.525e-06, + "num_tokens": 919027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3485, + "step": 2697 + }, + { + "loss": 0.0676, + "grad_norm": 1.5719348192214966, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.349, + "step": 2698 + }, + { + "loss": 0.002, + "grad_norm": 0.2859533727169037, + "learning_rate": 6.515e-06, + "num_tokens": 919630.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3495, + "step": 2699 + }, + { + "loss": 0.0434, + "grad_norm": 1.324418067932129, + "learning_rate": 6.51e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.35, + "step": 2700 + }, + { + "loss": 0.042, + "grad_norm": 1.3165403604507446, + "learning_rate": 6.505e-06, + "num_tokens": 920654.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3505, + "step": 2701 + }, + { + "loss": 0.0018, + "grad_norm": 0.2492700070142746, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.351, + "step": 2702 + }, + { + "loss": 0.1336, + "grad_norm": 2.710927963256836, + "learning_rate": 6.4950000000000005e-06, + "num_tokens": 921257.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.3515, + "step": 2703 + }, + { + "loss": 0.059, + "grad_norm": 1.8472118377685547, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3519999999999999, + "step": 2704 + }, + { + "loss": 0.0448, + "grad_norm": 1.164633870124817, + "learning_rate": 6.485000000000001e-06, + "num_tokens": 922281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3525, + "step": 2705 + }, + { + "loss": 0.0544, + "grad_norm": 1.3916175365447998, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.353, + "step": 2706 + }, + { + "loss": 0.0463, + "grad_norm": 1.397131085395813, + "learning_rate": 6.475e-06, + "num_tokens": 923305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3535, + "step": 2707 + }, + { + "loss": 0.0019, + "grad_norm": 0.26947012543678284, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.354, + "step": 2708 + }, + { + "loss": 0.0017, + "grad_norm": 0.23892365396022797, + "learning_rate": 6.465e-06, + "num_tokens": 923487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3545, + "step": 2709 + }, + { + "loss": 0.0018, + "grad_norm": 0.25066784024238586, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 2710 + }, + { + "loss": 0.0435, + "grad_norm": 1.2238185405731201, + "learning_rate": 6.455e-06, + "num_tokens": 924090.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3555, + "step": 2711 + }, + { + "loss": 0.0019, + "grad_norm": 0.26420801877975464, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3559999999999999, + "step": 2712 + }, + { + "loss": 0.0572, + "grad_norm": 1.1416776180267334, + "learning_rate": 6.445e-06, + "num_tokens": 924693.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3565, + "step": 2713 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754037082195282, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.357, + "step": 2714 + }, + { + "loss": 0.0018, + "grad_norm": 0.25344598293304443, + "learning_rate": 6.435e-06, + "num_tokens": 924875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3575, + "step": 2715 + }, + { + "loss": 0.0017, + "grad_norm": 0.23587873578071594, + "learning_rate": 6.43e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 2716 + }, + { + "loss": 0.0701, + "grad_norm": 1.6822742223739624, + "learning_rate": 6.425e-06, + "num_tokens": 925478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3585, + "step": 2717 + }, + { + "loss": 0.0017, + "grad_norm": 0.22698912024497986, + "learning_rate": 6.42e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 1.0, + "epoch": 1.359, + "step": 2718 + }, + { + "loss": 0.044, + "grad_norm": 1.2083390951156616, + "learning_rate": 6.415e-06, + "num_tokens": 926081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3595, + "step": 2719 + }, + { + "loss": 0.0017, + "grad_norm": 0.23327840864658356, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3599999999999999, + "step": 2720 + }, + { + "loss": 0.0557, + "grad_norm": 1.281182885169983, + "learning_rate": 6.4050000000000005e-06, + "num_tokens": 926684.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3605, + "step": 2721 + }, + { + "loss": 0.0539, + "grad_norm": 1.1743288040161133, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.361, + "step": 2722 + }, + { + "loss": 0.0646, + "grad_norm": 1.2470465898513794, + "learning_rate": 6.395e-06, + "num_tokens": 927708.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3615, + "step": 2723 + }, + { + "loss": 0.0015, + "grad_norm": 0.20256949961185455, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 1.0, + "epoch": 1.362, + "step": 2724 + }, + { + "loss": 0.0394, + "grad_norm": 1.1593482494354248, + "learning_rate": 6.385e-06, + "num_tokens": 928311.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3625, + "step": 2725 + }, + { + "loss": 0.0737, + "grad_norm": 1.937491774559021, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.363, + "step": 2726 + }, + { + "loss": 0.0438, + "grad_norm": 1.1960216760635376, + "learning_rate": 6.375e-06, + "num_tokens": 929335.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3635, + "step": 2727 + }, + { + "loss": 0.0016, + "grad_norm": 0.21763351559638977, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3639999999999999, + "step": 2728 + }, + { + "loss": 0.0017, + "grad_norm": 0.24479590356349945, + "learning_rate": 6.365e-06, + "num_tokens": 929517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3645, + "step": 2729 + }, + { + "loss": 0.0619, + "grad_norm": 1.315623164176941, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.365, + "step": 2730 + }, + { + "loss": 0.0016, + "grad_norm": 0.2220989614725113, + "learning_rate": 6.355e-06, + "num_tokens": 930120.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3655, + "step": 2731 + }, + { + "loss": 0.0017, + "grad_norm": 0.2321062982082367, + "learning_rate": 6.35e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 2732 + }, + { + "loss": 0.0017, + "grad_norm": 0.23798637092113495, + "learning_rate": 6.345e-06, + "num_tokens": 930302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3665, + "step": 2733 + }, + { + "loss": 0.0577, + "grad_norm": 1.2568942308425903, + "learning_rate": 6.34e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.367, + "step": 2734 + }, + { + "loss": 0.041, + "grad_norm": 1.6406105756759644, + "learning_rate": 6.335e-06, + "num_tokens": 931326.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3675, + "step": 2735 + }, + { + "loss": 0.0517, + "grad_norm": 1.235734224319458, + "learning_rate": 6.33e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 2736 + }, + { + "loss": 0.0423, + "grad_norm": 0.9826679825782776, + "learning_rate": 6.3250000000000004e-06, + "num_tokens": 932350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3685, + "step": 2737 + }, + { + "loss": 0.0018, + "grad_norm": 0.26410505175590515, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.369, + "step": 2738 + }, + { + "loss": 0.002, + "grad_norm": 0.2839818596839905, + "learning_rate": 6.315e-06, + "num_tokens": 932532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3695, + "step": 2739 + }, + { + "loss": 0.0533, + "grad_norm": 1.2392011880874634, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.37, + "step": 2740 + }, + { + "loss": 0.0017, + "grad_norm": 0.23982419073581696, + "learning_rate": 6.305e-06, + "num_tokens": 933135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3705, + "step": 2741 + }, + { + "loss": 0.0548, + "grad_norm": 1.4777438640594482, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.371, + "step": 2742 + }, + { + "loss": 0.0019, + "grad_norm": 0.2724550664424896, + "learning_rate": 6.295e-06, + "num_tokens": 933738.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3715, + "step": 2743 + }, + { + "loss": 0.0019, + "grad_norm": 0.2623855173587799, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3719999999999999, + "step": 2744 + }, + { + "loss": 0.0583, + "grad_norm": 1.0648019313812256, + "learning_rate": 6.285e-06, + "num_tokens": 934341.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3725, + "step": 2745 + }, + { + "loss": 0.0725, + "grad_norm": 1.589500069618225, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.373, + "step": 2746 + }, + { + "loss": 0.0617, + "grad_norm": 1.4101024866104126, + "learning_rate": 6.275e-06, + "num_tokens": 935365.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3735, + "step": 2747 + }, + { + "loss": 0.0019, + "grad_norm": 0.2686757743358612, + "learning_rate": 6.27e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 1.0, + "epoch": 1.374, + "step": 2748 + }, + { + "loss": 0.0451, + "grad_norm": 1.6723026037216187, + "learning_rate": 6.265e-06, + "num_tokens": 935968.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3745, + "step": 2749 + }, + { + "loss": 0.1481, + "grad_norm": 2.561096668243408, + "learning_rate": 6.26e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.375, + "step": 2750 + }, + { + "loss": 0.0593, + "grad_norm": 1.1495637893676758, + "learning_rate": 6.255e-06, + "num_tokens": 936992.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3755, + "step": 2751 + }, + { + "loss": 0.0583, + "grad_norm": 1.0880846977233887, + "learning_rate": 6.25e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.376, + "step": 2752 + }, + { + "loss": 0.0641, + "grad_norm": 1.4671814441680908, + "learning_rate": 6.245000000000001e-06, + "num_tokens": 938016.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3765, + "step": 2753 + }, + { + "loss": 0.0022, + "grad_norm": 0.3182397186756134, + "learning_rate": 6.24e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 1.0, + "epoch": 1.377, + "step": 2754 + }, + { + "loss": 0.0605, + "grad_norm": 1.1844297647476196, + "learning_rate": 6.235000000000001e-06, + "num_tokens": 938619.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3775, + "step": 2755 + }, + { + "loss": 0.0633, + "grad_norm": 1.227432131767273, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3780000000000001, + "step": 2756 + }, + { + "loss": 0.0026, + "grad_norm": 0.3716835677623749, + "learning_rate": 6.225000000000001e-06, + "num_tokens": 939222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3785, + "step": 2757 + }, + { + "loss": 0.0599, + "grad_norm": 1.3364546298980713, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.379, + "step": 2758 + }, + { + "loss": 0.0532, + "grad_norm": 1.3746514320373535, + "learning_rate": 6.215000000000001e-06, + "num_tokens": 940246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3795, + "step": 2759 + }, + { + "loss": 0.0696, + "grad_norm": 1.6494160890579224, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.38, + "step": 2760 + }, + { + "loss": 0.0031, + "grad_norm": 0.4407944083213806, + "learning_rate": 6.205000000000001e-06, + "num_tokens": 940849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3805, + "step": 2761 + }, + { + "loss": 0.0559, + "grad_norm": 1.3899201154708862, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.381, + "step": 2762 + }, + { + "loss": 0.0393, + "grad_norm": 1.0294471979141235, + "learning_rate": 6.195000000000001e-06, + "num_tokens": 941873.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3815, + "step": 2763 + }, + { + "loss": 0.0028, + "grad_norm": 0.41492387652397156, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3820000000000001, + "step": 2764 + }, + { + "loss": 0.039, + "grad_norm": 1.2755433320999146, + "learning_rate": 6.185000000000001e-06, + "num_tokens": 942476.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3825, + "step": 2765 + }, + { + "loss": 0.0407, + "grad_norm": 1.1641042232513428, + "learning_rate": 6.18e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.383, + "step": 2766 + }, + { + "loss": 0.0033, + "grad_norm": 0.45876702666282654, + "learning_rate": 6.175000000000001e-06, + "num_tokens": 943079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3835, + "step": 2767 + }, + { + "loss": 0.053, + "grad_norm": 1.1277137994766235, + "learning_rate": 6.17e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.384, + "step": 2768 + }, + { + "loss": 0.069, + "grad_norm": 1.974735140800476, + "learning_rate": 6.165000000000001e-06, + "num_tokens": 944103.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3845, + "step": 2769 + }, + { + "loss": 0.0399, + "grad_norm": 1.308519959449768, + "learning_rate": 6.16e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.385, + "step": 2770 + }, + { + "loss": 0.0399, + "grad_norm": 1.3881995677947998, + "learning_rate": 6.155000000000001e-06, + "num_tokens": 945127.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3855, + "step": 2771 + }, + { + "loss": 0.0388, + "grad_norm": 1.376846194267273, + "learning_rate": 6.15e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3860000000000001, + "step": 2772 + }, + { + "loss": 0.0565, + "grad_norm": 1.6753615140914917, + "learning_rate": 6.145000000000001e-06, + "num_tokens": 946151.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3865, + "step": 2773 + }, + { + "loss": 0.0537, + "grad_norm": 1.350510597229004, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.387, + "step": 2774 + }, + { + "loss": 0.0348, + "grad_norm": 1.0870490074157715, + "learning_rate": 6.1350000000000006e-06, + "num_tokens": 947175.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3875, + "step": 2775 + }, + { + "loss": 0.0041, + "grad_norm": 0.5800921320915222, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 1.0, + "epoch": 1.388, + "step": 2776 + }, + { + "loss": 0.0046, + "grad_norm": 0.6146813631057739, + "learning_rate": 6.125000000000001e-06, + "num_tokens": 947357.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3885, + "step": 2777 + }, + { + "loss": 0.0685, + "grad_norm": 2.028545618057251, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.389, + "step": 2778 + }, + { + "loss": 0.0562, + "grad_norm": 1.10191011428833, + "learning_rate": 6.115000000000001e-06, + "num_tokens": 948381.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3895, + "step": 2779 + }, + { + "loss": 0.057, + "grad_norm": 1.6782788038253784, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3900000000000001, + "step": 2780 + }, + { + "loss": 0.0048, + "grad_norm": 0.6447672843933105, + "learning_rate": 6.105000000000001e-06, + "num_tokens": 948984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3905, + "step": 2781 + }, + { + "loss": 0.0045, + "grad_norm": 0.6120741963386536, + "learning_rate": 6.1e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.391, + "step": 2782 + }, + { + "loss": 0.0037, + "grad_norm": 0.5294094085693359, + "learning_rate": 6.095000000000001e-06, + "num_tokens": 949166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3915, + "step": 2783 + }, + { + "loss": 0.0041, + "grad_norm": 0.5634744167327881, + "learning_rate": 6.09e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.392, + "step": 2784 + }, + { + "loss": 0.0543, + "grad_norm": 1.1946736574172974, + "learning_rate": 6.085000000000001e-06, + "num_tokens": 949769.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3925, + "step": 2785 + }, + { + "loss": 0.0393, + "grad_norm": 1.366204857826233, + "learning_rate": 6.08e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.393, + "step": 2786 + }, + { + "loss": 0.0031, + "grad_norm": 0.4588482677936554, + "learning_rate": 6.075000000000001e-06, + "num_tokens": 950372.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3935, + "step": 2787 + }, + { + "loss": 0.0741, + "grad_norm": 1.6554986238479614, + "learning_rate": 6.07e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.3940000000000001, + "step": 2788 + }, + { + "loss": 0.0358, + "grad_norm": 1.0052374601364136, + "learning_rate": 6.065000000000001e-06, + "num_tokens": 951396.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3945, + "step": 2789 + }, + { + "loss": 0.0029, + "grad_norm": 0.4081237316131592, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.395, + "step": 2790 + }, + { + "loss": 0.0627, + "grad_norm": 1.5037425756454468, + "learning_rate": 6.0550000000000005e-06, + "num_tokens": 951999.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3955, + "step": 2791 + }, + { + "loss": 0.0024, + "grad_norm": 0.36483630537986755, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.396, + "step": 2792 + }, + { + "loss": 0.0455, + "grad_norm": 1.2050751447677612, + "learning_rate": 6.0450000000000006e-06, + "num_tokens": 952602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3965, + "step": 2793 + }, + { + "loss": 0.0021, + "grad_norm": 0.3035581111907959, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.397, + "step": 2794 + }, + { + "loss": 0.0025, + "grad_norm": 0.3607647716999054, + "learning_rate": 6.035000000000001e-06, + "num_tokens": 952784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3975, + "step": 2795 + }, + { + "loss": 0.0625, + "grad_norm": 1.2081470489501953, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3980000000000001, + "step": 2796 + }, + { + "loss": 0.0425, + "grad_norm": 1.0764844417572021, + "learning_rate": 6.025000000000001e-06, + "num_tokens": 953808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3985, + "step": 2797 + }, + { + "loss": 0.0632, + "grad_norm": 1.425076961517334, + "learning_rate": 6.02e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.399, + "step": 2798 + }, + { + "loss": 0.0395, + "grad_norm": 0.9470378160476685, + "learning_rate": 6.015000000000001e-06, + "num_tokens": 954832.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3995, + "step": 2799 + }, + { + "loss": 0.0404, + "grad_norm": 1.0599867105484009, + "learning_rate": 6.01e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4, + "step": 2800 + }, + { + "loss": 0.0577, + "grad_norm": 1.2933481931686401, + "learning_rate": 6.005000000000001e-06, + "num_tokens": 955856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4005, + "step": 2801 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215804398059845, + "learning_rate": 6e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 1.0, + "epoch": 1.401, + "step": 2802 + }, + { + "loss": 0.0601, + "grad_norm": 1.4103161096572876, + "learning_rate": 5.995000000000001e-06, + "num_tokens": 956459.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4015, + "step": 2803 + }, + { + "loss": 0.0022, + "grad_norm": 0.303093820810318, + "learning_rate": 5.99e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4020000000000001, + "step": 2804 + }, + { + "loss": 0.0663, + "grad_norm": 1.360801339149475, + "learning_rate": 5.985000000000001e-06, + "num_tokens": 957062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4025, + "step": 2805 + }, + { + "loss": 0.0022, + "grad_norm": 0.3075718581676483, + "learning_rate": 5.98e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 1.0, + "epoch": 1.403, + "step": 2806 + }, + { + "loss": 0.0602, + "grad_norm": 1.137125849723816, + "learning_rate": 5.975e-06, + "num_tokens": 957665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4035, + "step": 2807 + }, + { + "loss": 0.0022, + "grad_norm": 0.30045661330223083, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.404, + "step": 2808 + }, + { + "loss": 0.0392, + "grad_norm": 1.0042834281921387, + "learning_rate": 5.9650000000000005e-06, + "num_tokens": 958268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4045, + "step": 2809 + }, + { + "loss": 0.0401, + "grad_norm": 1.117727279663086, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.405, + "step": 2810 + }, + { + "loss": 0.0703, + "grad_norm": 1.4459725618362427, + "learning_rate": 5.955000000000001e-06, + "num_tokens": 959292.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4055, + "step": 2811 + }, + { + "loss": 0.0621, + "grad_norm": 1.3719003200531006, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4060000000000001, + "step": 2812 + }, + { + "loss": 0.0023, + "grad_norm": 0.31605690717697144, + "learning_rate": 5.945000000000001e-06, + "num_tokens": 959895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4064999999999999, + "step": 2813 + }, + { + "loss": 0.0605, + "grad_norm": 1.3043557405471802, + "learning_rate": 5.94e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.407, + "step": 2814 + }, + { + "loss": 0.0653, + "grad_norm": 1.2358129024505615, + "learning_rate": 5.935000000000001e-06, + "num_tokens": 960919.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4075, + "step": 2815 + }, + { + "loss": 0.0025, + "grad_norm": 0.3330060839653015, + "learning_rate": 5.93e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.408, + "step": 2816 + }, + { + "loss": 0.058, + "grad_norm": 1.1393845081329346, + "learning_rate": 5.925000000000001e-06, + "num_tokens": 961522.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4085, + "step": 2817 + }, + { + "loss": 0.0689, + "grad_norm": 1.4732993841171265, + "learning_rate": 5.92e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.409, + "step": 2818 + }, + { + "loss": 0.0028, + "grad_norm": 0.37631359696388245, + "learning_rate": 5.915000000000001e-06, + "num_tokens": 962125.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4095, + "step": 2819 + }, + { + "loss": 0.0026, + "grad_norm": 0.35936713218688965, + "learning_rate": 5.91e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.41, + "step": 2820 + }, + { + "loss": 0.0558, + "grad_norm": 1.2061470746994019, + "learning_rate": 5.905000000000001e-06, + "num_tokens": 962728.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4104999999999999, + "step": 2821 + }, + { + "loss": 0.0582, + "grad_norm": 1.513380527496338, + "learning_rate": 5.9e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 2822 + }, + { + "loss": 0.0418, + "grad_norm": 1.2391456365585327, + "learning_rate": 5.895e-06, + "num_tokens": 963752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4115, + "step": 2823 + }, + { + "loss": 0.069, + "grad_norm": 1.4670116901397705, + "learning_rate": 5.89e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.412, + "step": 2824 + }, + { + "loss": 0.0028, + "grad_norm": 0.3788264989852905, + "learning_rate": 5.885e-06, + "num_tokens": 964355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4125, + "step": 2825 + }, + { + "loss": 0.0027, + "grad_norm": 0.3687077462673187, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 1.0, + "epoch": 1.413, + "step": 2826 + }, + { + "loss": 0.0399, + "grad_norm": 1.233347773551941, + "learning_rate": 5.8750000000000005e-06, + "num_tokens": 964958.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4135, + "step": 2827 + }, + { + "loss": 0.0027, + "grad_norm": 0.37683984637260437, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.414, + "step": 2828 + }, + { + "loss": 0.048, + "grad_norm": 1.2649948596954346, + "learning_rate": 5.865000000000001e-06, + "num_tokens": 965561.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4144999999999999, + "step": 2829 + }, + { + "loss": 0.0589, + "grad_norm": 1.3882242441177368, + "learning_rate": 5.86e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.415, + "step": 2830 + }, + { + "loss": 0.0362, + "grad_norm": 1.1658241748809814, + "learning_rate": 5.855000000000001e-06, + "num_tokens": 966585.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4155, + "step": 2831 + }, + { + "loss": 0.0521, + "grad_norm": 1.0679434537887573, + "learning_rate": 5.85e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.416, + "step": 2832 + }, + { + "loss": 0.003, + "grad_norm": 0.40383246541023254, + "learning_rate": 5.845000000000001e-06, + "num_tokens": 967188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4165, + "step": 2833 + }, + { + "loss": 0.0427, + "grad_norm": 1.2304917573928833, + "learning_rate": 5.84e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.417, + "step": 2834 + }, + { + "loss": 0.0538, + "grad_norm": 1.1524217128753662, + "learning_rate": 5.835000000000001e-06, + "num_tokens": 968212.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4175, + "step": 2835 + }, + { + "loss": 0.0379, + "grad_norm": 0.9404373168945312, + "learning_rate": 5.83e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.418, + "step": 2836 + }, + { + "loss": 0.0031, + "grad_norm": 0.4096873104572296, + "learning_rate": 5.825000000000001e-06, + "num_tokens": 968815.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4184999999999999, + "step": 2837 + }, + { + "loss": 0.0028, + "grad_norm": 0.37403908371925354, + "learning_rate": 5.82e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.419, + "step": 2838 + }, + { + "loss": 0.0361, + "grad_norm": 0.9613595604896545, + "learning_rate": 5.815e-06, + "num_tokens": 969418.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.4195, + "step": 2839 + }, + { + "loss": 0.0571, + "grad_norm": 1.3871361017227173, + "learning_rate": 5.81e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.42, + "step": 2840 + }, + { + "loss": 0.0365, + "grad_norm": 1.060208797454834, + "learning_rate": 5.805e-06, + "num_tokens": 970442.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4205, + "step": 2841 + }, + { + "loss": 0.0031, + "grad_norm": 0.4013337790966034, + "learning_rate": 5.8e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 1.0, + "epoch": 1.421, + "step": 2842 + }, + { + "loss": 0.041, + "grad_norm": 1.2097371816635132, + "learning_rate": 5.795e-06, + "num_tokens": 971045.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4215, + "step": 2843 + }, + { + "loss": 0.0614, + "grad_norm": 1.1929858922958374, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.422, + "step": 2844 + }, + { + "loss": 0.0559, + "grad_norm": 1.3881855010986328, + "learning_rate": 5.7850000000000005e-06, + "num_tokens": 972069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4224999999999999, + "step": 2845 + }, + { + "loss": 0.0649, + "grad_norm": 1.5359828472137451, + "learning_rate": 5.78e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.423, + "step": 2846 + }, + { + "loss": 0.0562, + "grad_norm": 1.2387086153030396, + "learning_rate": 5.775000000000001e-06, + "num_tokens": 973093.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4235, + "step": 2847 + }, + { + "loss": 0.0634, + "grad_norm": 1.30796480178833, + "learning_rate": 5.77e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.424, + "step": 2848 + }, + { + "loss": 0.0035, + "grad_norm": 0.4502550959587097, + "learning_rate": 5.765000000000001e-06, + "num_tokens": 973696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4245, + "step": 2849 + }, + { + "loss": 0.0625, + "grad_norm": 1.4468958377838135, + "learning_rate": 5.76e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.425, + "step": 2850 + }, + { + "loss": 0.0675, + "grad_norm": 1.6001074314117432, + "learning_rate": 5.755000000000001e-06, + "num_tokens": 974720.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4255, + "step": 2851 + }, + { + "loss": 0.0039, + "grad_norm": 0.5094487071037292, + "learning_rate": 5.75e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.426, + "step": 2852 + }, + { + "loss": 0.039, + "grad_norm": 0.9305217266082764, + "learning_rate": 5.745000000000001e-06, + "num_tokens": 975323.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4264999999999999, + "step": 2853 + }, + { + "loss": 0.0379, + "grad_norm": 0.9311109185218811, + "learning_rate": 5.74e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.427, + "step": 2854 + }, + { + "loss": 0.0656, + "grad_norm": 1.3803378343582153, + "learning_rate": 5.735e-06, + "num_tokens": 976347.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4275, + "step": 2855 + }, + { + "loss": 0.0495, + "grad_norm": 1.455142855644226, + "learning_rate": 5.73e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.428, + "step": 2856 + }, + { + "loss": 0.048, + "grad_norm": 0.9757342338562012, + "learning_rate": 5.725e-06, + "num_tokens": 977371.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4285, + "step": 2857 + }, + { + "loss": 0.07, + "grad_norm": 1.3820722103118896, + "learning_rate": 5.72e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.429, + "step": 2858 + }, + { + "loss": 0.0496, + "grad_norm": 0.9005600810050964, + "learning_rate": 5.715e-06, + "num_tokens": 978395.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4295, + "step": 2859 + }, + { + "loss": 0.0588, + "grad_norm": 1.1311612129211426, + "learning_rate": 5.71e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.43, + "step": 2860 + }, + { + "loss": 0.0603, + "grad_norm": 1.2565733194351196, + "learning_rate": 5.7050000000000004e-06, + "num_tokens": 979419.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4304999999999999, + "step": 2861 + }, + { + "loss": 0.0061, + "grad_norm": 0.7569929361343384, + "learning_rate": 5.7e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.431, + "step": 2862 + }, + { + "loss": 0.0061, + "grad_norm": 0.757468044757843, + "learning_rate": 5.6950000000000005e-06, + "num_tokens": 979601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4315, + "step": 2863 + }, + { + "loss": 0.0442, + "grad_norm": 1.3257757425308228, + "learning_rate": 5.69e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.432, + "step": 2864 + }, + { + "loss": 0.0054, + "grad_norm": 0.7246440649032593, + "learning_rate": 5.685000000000001e-06, + "num_tokens": 980204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4325, + "step": 2865 + }, + { + "loss": 0.0558, + "grad_norm": 1.1359434127807617, + "learning_rate": 5.68e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.433, + "step": 2866 + }, + { + "loss": 0.0059, + "grad_norm": 0.7417834997177124, + "learning_rate": 5.675000000000001e-06, + "num_tokens": 980807.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4335, + "step": 2867 + }, + { + "loss": 0.0046, + "grad_norm": 0.6065738201141357, + "learning_rate": 5.67e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 1.0, + "epoch": 1.434, + "step": 2868 + }, + { + "loss": 0.0045, + "grad_norm": 0.6112881898880005, + "learning_rate": 5.665000000000001e-06, + "num_tokens": 980989.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4344999999999999, + "step": 2869 + }, + { + "loss": 0.0598, + "grad_norm": 1.1446788311004639, + "learning_rate": 5.66e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.435, + "step": 2870 + }, + { + "loss": 0.004, + "grad_norm": 0.5359569787979126, + "learning_rate": 5.655e-06, + "num_tokens": 981592.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4355, + "step": 2871 + }, + { + "loss": 0.0372, + "grad_norm": 1.0225598812103271, + "learning_rate": 5.65e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.436, + "step": 2872 + }, + { + "loss": 0.0031, + "grad_norm": 0.4344872236251831, + "learning_rate": 5.645e-06, + "num_tokens": 982195.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4365, + "step": 2873 + }, + { + "loss": 0.0035, + "grad_norm": 0.4770989418029785, + "learning_rate": 5.64e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 1.0, + "epoch": 1.437, + "step": 2874 + }, + { + "loss": 0.1529, + "grad_norm": 2.6292223930358887, + "learning_rate": 5.635e-06, + "num_tokens": 982798.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4375, + "step": 2875 + }, + { + "loss": 0.0536, + "grad_norm": 1.1502479314804077, + "learning_rate": 5.63e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.438, + "step": 2876 + }, + { + "loss": 0.0541, + "grad_norm": 1.5837680101394653, + "learning_rate": 5.625e-06, + "num_tokens": 983822.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4385, + "step": 2877 + }, + { + "loss": 0.0621, + "grad_norm": 1.0932730436325073, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.439, + "step": 2878 + }, + { + "loss": 0.0024, + "grad_norm": 0.3176769018173218, + "learning_rate": 5.6150000000000005e-06, + "num_tokens": 984425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4395, + "step": 2879 + }, + { + "loss": 0.056, + "grad_norm": 1.2500354051589966, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.44, + "step": 2880 + }, + { + "loss": 0.046, + "grad_norm": 1.282015323638916, + "learning_rate": 5.6050000000000005e-06, + "num_tokens": 985449.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4405000000000001, + "step": 2881 + }, + { + "loss": 0.0672, + "grad_norm": 1.5532522201538086, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.441, + "step": 2882 + }, + { + "loss": 0.0571, + "grad_norm": 1.1880862712860107, + "learning_rate": 5.595000000000001e-06, + "num_tokens": 986473.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4415, + "step": 2883 + }, + { + "loss": 0.0019, + "grad_norm": 0.26678329706192017, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.442, + "step": 2884 + }, + { + "loss": 0.002, + "grad_norm": 0.26291605830192566, + "learning_rate": 5.585000000000001e-06, + "num_tokens": 986655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4425, + "step": 2885 + }, + { + "loss": 0.002, + "grad_norm": 0.2711234986782074, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.443, + "step": 2886 + }, + { + "loss": 0.0021, + "grad_norm": 0.2862178087234497, + "learning_rate": 5.575000000000001e-06, + "num_tokens": 986837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4435, + "step": 2887 + }, + { + "loss": 0.0571, + "grad_norm": 1.3704899549484253, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.444, + "step": 2888 + }, + { + "loss": 0.0585, + "grad_norm": 1.0157582759857178, + "learning_rate": 5.565e-06, + "num_tokens": 987861.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4445000000000001, + "step": 2889 + }, + { + "loss": 0.0377, + "grad_norm": 1.079724669456482, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.445, + "step": 2890 + }, + { + "loss": 0.14, + "grad_norm": 1.9184038639068604, + "learning_rate": 5.555e-06, + "num_tokens": 988885.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4455, + "step": 2891 + }, + { + "loss": 0.0019, + "grad_norm": 0.25762176513671875, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.446, + "step": 2892 + }, + { + "loss": 0.0702, + "grad_norm": 1.5166800022125244, + "learning_rate": 5.545e-06, + "num_tokens": 989488.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4465, + "step": 2893 + }, + { + "loss": 0.0394, + "grad_norm": 1.1091899871826172, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.447, + "step": 2894 + }, + { + "loss": 0.0647, + "grad_norm": 1.4911457300186157, + "learning_rate": 5.535e-06, + "num_tokens": 990512.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4475, + "step": 2895 + }, + { + "loss": 0.063, + "grad_norm": 1.6225489377975464, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.448, + "step": 2896 + }, + { + "loss": 0.041, + "grad_norm": 1.3053377866744995, + "learning_rate": 5.5250000000000005e-06, + "num_tokens": 991536.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4485000000000001, + "step": 2897 + }, + { + "loss": 0.002, + "grad_norm": 0.27576708793640137, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 1.0, + "epoch": 1.449, + "step": 2898 + }, + { + "loss": 0.0019, + "grad_norm": 0.26415082812309265, + "learning_rate": 5.5150000000000006e-06, + "num_tokens": 991718.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4495, + "step": 2899 + }, + { + "loss": 0.0021, + "grad_norm": 0.29174545407295227, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 2900 + }, + { + "loss": 0.0573, + "grad_norm": 1.38834810256958, + "learning_rate": 5.505000000000001e-06, + "num_tokens": 992321.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4505, + "step": 2901 + }, + { + "loss": 0.0443, + "grad_norm": 1.4421913623809814, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 2902 + }, + { + "loss": 0.0022, + "grad_norm": 0.29639050364494324, + "learning_rate": 5.495000000000001e-06, + "num_tokens": 992924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4515, + "step": 2903 + }, + { + "loss": 0.0655, + "grad_norm": 1.5755751132965088, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.452, + "step": 2904 + }, + { + "loss": 0.0022, + "grad_norm": 0.2955166697502136, + "learning_rate": 5.485e-06, + "num_tokens": 993527.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4525000000000001, + "step": 2905 + }, + { + "loss": 0.0021, + "grad_norm": 0.2841387689113617, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.453, + "step": 2906 + }, + { + "loss": 0.0021, + "grad_norm": 0.286550909280777, + "learning_rate": 5.475e-06, + "num_tokens": 993709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4535, + "step": 2907 + }, + { + "loss": 0.0357, + "grad_norm": 1.0881201028823853, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.454, + "step": 2908 + }, + { + "loss": 0.0409, + "grad_norm": 1.0831390619277954, + "learning_rate": 5.465e-06, + "num_tokens": 994733.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4545, + "step": 2909 + }, + { + "loss": 0.0573, + "grad_norm": 1.2077234983444214, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.455, + "step": 2910 + }, + { + "loss": 0.0567, + "grad_norm": 1.2307626008987427, + "learning_rate": 5.455e-06, + "num_tokens": 995757.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4555, + "step": 2911 + }, + { + "loss": 0.067, + "grad_norm": 1.356170654296875, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.456, + "step": 2912 + }, + { + "loss": 0.0019, + "grad_norm": 0.2535565495491028, + "learning_rate": 5.445e-06, + "num_tokens": 996360.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4565000000000001, + "step": 2913 + }, + { + "loss": 0.0366, + "grad_norm": 1.0972084999084473, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.457, + "step": 2914 + }, + { + "loss": 0.054, + "grad_norm": 1.0509806871414185, + "learning_rate": 5.4350000000000005e-06, + "num_tokens": 997384.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4575, + "step": 2915 + }, + { + "loss": 0.0609, + "grad_norm": 1.3918635845184326, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.458, + "step": 2916 + }, + { + "loss": 0.0388, + "grad_norm": 1.0420371294021606, + "learning_rate": 5.4250000000000006e-06, + "num_tokens": 998408.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4585, + "step": 2917 + }, + { + "loss": 0.072, + "grad_norm": 1.3679769039154053, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.459, + "step": 2918 + }, + { + "loss": 0.0027, + "grad_norm": 0.3709925413131714, + "learning_rate": 5.415000000000001e-06, + "num_tokens": 999011.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4595, + "step": 2919 + }, + { + "loss": 0.0661, + "grad_norm": 1.381754755973816, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.46, + "step": 2920 + }, + { + "loss": 0.041, + "grad_norm": 1.2045968770980835, + "learning_rate": 5.405e-06, + "num_tokens": 1000035.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4605000000000001, + "step": 2921 + }, + { + "loss": 0.0023, + "grad_norm": 0.3062268793582916, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 1.0, + "epoch": 1.461, + "step": 2922 + }, + { + "loss": 0.0464, + "grad_norm": 1.0317680835723877, + "learning_rate": 5.395e-06, + "num_tokens": 1000638.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4615, + "step": 2923 + }, + { + "loss": 0.0495, + "grad_norm": 1.3268100023269653, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.462, + "step": 2924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6260963678359985, + "learning_rate": 5.385e-06, + "num_tokens": 1001662.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4625, + "step": 2925 + }, + { + "loss": 0.0553, + "grad_norm": 1.0903215408325195, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.463, + "step": 2926 + }, + { + "loss": 0.0029, + "grad_norm": 0.3851076066493988, + "learning_rate": 5.375e-06, + "num_tokens": 1002265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4635, + "step": 2927 + }, + { + "loss": 0.0692, + "grad_norm": 1.6572927236557007, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.464, + "step": 2928 + }, + { + "loss": 0.0625, + "grad_norm": 1.5664637088775635, + "learning_rate": 5.365e-06, + "num_tokens": 1003289.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4645000000000001, + "step": 2929 + }, + { + "loss": 0.0626, + "grad_norm": 1.198908805847168, + "learning_rate": 5.36e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.465, + "step": 2930 + }, + { + "loss": 0.0641, + "grad_norm": 1.2499873638153076, + "learning_rate": 5.355e-06, + "num_tokens": 1004313.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4655, + "step": 2931 + }, + { + "loss": 0.0042, + "grad_norm": 0.5362296104431152, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 1.0, + "epoch": 1.466, + "step": 2932 + }, + { + "loss": 0.0037, + "grad_norm": 0.49612900614738464, + "learning_rate": 5.3450000000000005e-06, + "num_tokens": 1004495.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4665, + "step": 2933 + }, + { + "loss": 0.0039, + "grad_norm": 0.5115715861320496, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.467, + "step": 2934 + }, + { + "loss": 0.056, + "grad_norm": 1.3353906869888306, + "learning_rate": 5.335000000000001e-06, + "num_tokens": 1005098.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4675, + "step": 2935 + }, + { + "loss": 0.0407, + "grad_norm": 1.1807116270065308, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.468, + "step": 2936 + }, + { + "loss": 0.0551, + "grad_norm": 1.257308006286621, + "learning_rate": 5.325e-06, + "num_tokens": 1006122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4685000000000001, + "step": 2937 + }, + { + "loss": 0.0606, + "grad_norm": 1.2219009399414062, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4689999999999999, + "step": 2938 + }, + { + "loss": 0.0403, + "grad_norm": 1.094189167022705, + "learning_rate": 5.315e-06, + "num_tokens": 1007146.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4695, + "step": 2939 + }, + { + "loss": 0.0467, + "grad_norm": 1.1191236972808838, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 2940 + }, + { + "loss": 0.0556, + "grad_norm": 1.1905457973480225, + "learning_rate": 5.305e-06, + "num_tokens": 1008170.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4705, + "step": 2941 + }, + { + "loss": 0.0038, + "grad_norm": 0.5084776282310486, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 1.0, + "epoch": 1.471, + "step": 2942 + }, + { + "loss": 0.0558, + "grad_norm": 0.9725843071937561, + "learning_rate": 5.295e-06, + "num_tokens": 1008773.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4715, + "step": 2943 + }, + { + "loss": 0.058, + "grad_norm": 1.1404790878295898, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.472, + "step": 2944 + }, + { + "loss": 0.0038, + "grad_norm": 0.4927501380443573, + "learning_rate": 5.285e-06, + "num_tokens": 1009376.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4725, + "step": 2945 + }, + { + "loss": 0.052, + "grad_norm": 1.0383561849594116, + "learning_rate": 5.28e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4729999999999999, + "step": 2946 + }, + { + "loss": 0.0039, + "grad_norm": 0.5245242118835449, + "learning_rate": 5.275e-06, + "num_tokens": 1009979.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4735, + "step": 2947 + }, + { + "loss": 0.0599, + "grad_norm": 1.137878179550171, + "learning_rate": 5.27e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.474, + "step": 2948 + }, + { + "loss": 0.0039, + "grad_norm": 0.5066397190093994, + "learning_rate": 5.265e-06, + "num_tokens": 1010582.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4745, + "step": 2949 + }, + { + "loss": 0.0037, + "grad_norm": 0.4922652542591095, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 1.0, + "epoch": 1.475, + "step": 2950 + }, + { + "loss": 0.0402, + "grad_norm": 1.1538424491882324, + "learning_rate": 5.2550000000000005e-06, + "num_tokens": 1011185.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4755, + "step": 2951 + }, + { + "loss": 0.0562, + "grad_norm": 1.8279345035552979, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.476, + "step": 2952 + }, + { + "loss": 0.0636, + "grad_norm": 1.2982397079467773, + "learning_rate": 5.245e-06, + "num_tokens": 1012209.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4765, + "step": 2953 + }, + { + "loss": 0.0033, + "grad_norm": 0.4363272488117218, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4769999999999999, + "step": 2954 + }, + { + "loss": 0.0549, + "grad_norm": 1.556806206703186, + "learning_rate": 5.235e-06, + "num_tokens": 1012812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4775, + "step": 2955 + }, + { + "loss": 0.0358, + "grad_norm": 1.0845907926559448, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.478, + "step": 2956 + }, + { + "loss": 0.0032, + "grad_norm": 0.4301038384437561, + "learning_rate": 5.225e-06, + "num_tokens": 1013415.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4785, + "step": 2957 + }, + { + "loss": 0.003, + "grad_norm": 0.3937813341617584, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 1.0, + "epoch": 1.479, + "step": 2958 + }, + { + "loss": 0.0403, + "grad_norm": 0.9416876435279846, + "learning_rate": 5.215e-06, + "num_tokens": 1014018.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4795, + "step": 2959 + }, + { + "loss": 0.0029, + "grad_norm": 0.3991153836250305, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.48, + "step": 2960 + }, + { + "loss": 0.0367, + "grad_norm": 1.106955885887146, + "learning_rate": 5.205e-06, + "num_tokens": 1014621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4805, + "step": 2961 + }, + { + "loss": 0.0586, + "grad_norm": 1.3418941497802734, + "learning_rate": 5.2e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4809999999999999, + "step": 2962 + }, + { + "loss": 0.0358, + "grad_norm": 0.9489701390266418, + "learning_rate": 5.195e-06, + "num_tokens": 1015645.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4815, + "step": 2963 + }, + { + "loss": 0.0629, + "grad_norm": 1.0855809450149536, + "learning_rate": 5.19e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.482, + "step": 2964 + }, + { + "loss": 0.0027, + "grad_norm": 0.3812173306941986, + "learning_rate": 5.185e-06, + "num_tokens": 1016248.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4825, + "step": 2965 + }, + { + "loss": 0.0028, + "grad_norm": 0.3925476372241974, + "learning_rate": 5.18e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 1.0, + "epoch": 1.483, + "step": 2966 + }, + { + "loss": 0.0567, + "grad_norm": 1.3809915781021118, + "learning_rate": 5.1750000000000004e-06, + "num_tokens": 1016851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4835, + "step": 2967 + }, + { + "loss": 0.0428, + "grad_norm": 1.4269046783447266, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.484, + "step": 2968 + }, + { + "loss": 0.0026, + "grad_norm": 0.3535688519477844, + "learning_rate": 5.165e-06, + "num_tokens": 1017454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4845, + "step": 2969 + }, + { + "loss": 0.0025, + "grad_norm": 0.34918057918548584, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4849999999999999, + "step": 2970 + }, + { + "loss": 0.0025, + "grad_norm": 0.34093669056892395, + "learning_rate": 5.155e-06, + "num_tokens": 1017636.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4855, + "step": 2971 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282490372657776, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.486, + "step": 2972 + }, + { + "loss": 0.0762, + "grad_norm": 2.083855628967285, + "learning_rate": 5.145e-06, + "num_tokens": 1018239.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4865, + "step": 2973 + }, + { + "loss": 0.0548, + "grad_norm": 1.5333393812179565, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.487, + "step": 2974 + }, + { + "loss": 0.0373, + "grad_norm": 1.078650712966919, + "learning_rate": 5.135e-06, + "num_tokens": 1019263.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4875, + "step": 2975 + }, + { + "loss": 0.0447, + "grad_norm": 1.3176923990249634, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.488, + "step": 2976 + }, + { + "loss": 0.0023, + "grad_norm": 0.3142336308956146, + "learning_rate": 5.125e-06, + "num_tokens": 1019866.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4885, + "step": 2977 + }, + { + "loss": 0.0021, + "grad_norm": 0.2898966073989868, + "learning_rate": 5.12e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 2978 + }, + { + "loss": 0.046, + "grad_norm": 1.2612260580062866, + "learning_rate": 5.115e-06, + "num_tokens": 1020469.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4895, + "step": 2979 + }, + { + "loss": 0.0718, + "grad_norm": 2.1195919513702393, + "learning_rate": 5.11e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.49, + "step": 2980 + }, + { + "loss": 0.002, + "grad_norm": 0.2805778682231903, + "learning_rate": 5.105e-06, + "num_tokens": 1021072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4905, + "step": 2981 + }, + { + "loss": 0.002, + "grad_norm": 0.2843017280101776, + "learning_rate": 5.1e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 2982 + }, + { + "loss": 0.002, + "grad_norm": 0.277892529964447, + "learning_rate": 5.095e-06, + "num_tokens": 1021254.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4915, + "step": 2983 + }, + { + "loss": 0.0422, + "grad_norm": 1.0654278993606567, + "learning_rate": 5.09e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.492, + "step": 2984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29488760232925415, + "learning_rate": 5.085e-06, + "num_tokens": 1021857.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4925, + "step": 2985 + }, + { + "loss": 0.0392, + "grad_norm": 1.086630940437317, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4929999999999999, + "step": 2986 + }, + { + "loss": 0.0018, + "grad_norm": 0.24030831456184387, + "learning_rate": 5.075e-06, + "num_tokens": 1022460.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4935, + "step": 2987 + }, + { + "loss": 0.0406, + "grad_norm": 0.9846900105476379, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.494, + "step": 2988 + }, + { + "loss": 0.0418, + "grad_norm": 1.6849744319915771, + "learning_rate": 5.065e-06, + "num_tokens": 1023484.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4945, + "step": 2989 + }, + { + "loss": 0.0015, + "grad_norm": 0.2105080932378769, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 1.0, + "epoch": 1.495, + "step": 2990 + }, + { + "loss": 0.0019, + "grad_norm": 0.26552438735961914, + "learning_rate": 5.055e-06, + "num_tokens": 1023666.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4955, + "step": 2991 + }, + { + "loss": 0.0016, + "grad_norm": 0.21752813458442688, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 2992 + }, + { + "loss": 0.0666, + "grad_norm": 1.4344254732131958, + "learning_rate": 5.045e-06, + "num_tokens": 1024269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4965, + "step": 2993 + }, + { + "loss": 0.0415, + "grad_norm": 1.1530293226242065, + "learning_rate": 5.04e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4969999999999999, + "step": 2994 + }, + { + "loss": 0.0365, + "grad_norm": 1.0033750534057617, + "learning_rate": 5.035e-06, + "num_tokens": 1025293.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4975, + "step": 2995 + }, + { + "loss": 0.0369, + "grad_norm": 1.062666654586792, + "learning_rate": 5.03e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.498, + "step": 2996 + }, + { + "loss": 0.0016, + "grad_norm": 0.23261243104934692, + "learning_rate": 5.025e-06, + "num_tokens": 1025896.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4985, + "step": 2997 + }, + { + "loss": 0.0019, + "grad_norm": 0.26436832547187805, + "learning_rate": 5.02e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 2998 + }, + { + "loss": 0.0395, + "grad_norm": 1.0828720331192017, + "learning_rate": 5.015e-06, + "num_tokens": 1026499.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4995, + "step": 2999 + }, + { + "loss": 0.0018, + "grad_norm": 0.24229036271572113, + "learning_rate": 5.01e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5, + "step": 3000 + }, + { + "loss": 0.0636, + "grad_norm": 1.5817841291427612, + "learning_rate": 5.0049999999999995e-06, + "num_tokens": 1027102.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5005, + "step": 3001 + }, + { + "loss": 0.0016, + "grad_norm": 0.21737374365329742, + "learning_rate": 5e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.501, + "step": 3002 + }, + { + "loss": 0.0535, + "grad_norm": 1.0760457515716553, + "learning_rate": 4.9950000000000005e-06, + "num_tokens": 1027705.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5015, + "step": 3003 + }, + { + "loss": 0.0702, + "grad_norm": 1.5160242319107056, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 3004 + }, + { + "loss": 0.002, + "grad_norm": 0.28444817662239075, + "learning_rate": 4.9850000000000006e-06, + "num_tokens": 1028308.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5025, + "step": 3005 + }, + { + "loss": 0.0659, + "grad_norm": 1.394598364830017, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5030000000000001, + "step": 3006 + }, + { + "loss": 0.0549, + "grad_norm": 1.4268598556518555, + "learning_rate": 4.975000000000001e-06, + "num_tokens": 1029332.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5034999999999998, + "step": 3007 + }, + { + "loss": 0.0693, + "grad_norm": 1.3022048473358154, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.504, + "step": 3008 + }, + { + "loss": 0.0577, + "grad_norm": 1.6034104824066162, + "learning_rate": 4.965000000000001e-06, + "num_tokens": 1030356.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5045, + "step": 3009 + }, + { + "loss": 0.002, + "grad_norm": 0.26663535833358765, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.505, + "step": 3010 + }, + { + "loss": 0.0021, + "grad_norm": 0.29342901706695557, + "learning_rate": 4.955e-06, + "num_tokens": 1030538.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5055, + "step": 3011 + }, + { + "loss": 0.0574, + "grad_norm": 1.232057809829712, + "learning_rate": 4.95e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.506, + "step": 3012 + }, + { + "loss": 0.0022, + "grad_norm": 0.2940972149372101, + "learning_rate": 4.945e-06, + "num_tokens": 1031141.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5065, + "step": 3013 + }, + { + "loss": 0.0022, + "grad_norm": 0.3054879307746887, + "learning_rate": 4.94e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 3014 + }, + { + "loss": 0.002, + "grad_norm": 0.2681850492954254, + "learning_rate": 4.935e-06, + "num_tokens": 1031323.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5074999999999998, + "step": 3015 + }, + { + "loss": 0.0018, + "grad_norm": 0.24893507361412048, + "learning_rate": 4.93e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 3016 + }, + { + "loss": 0.0514, + "grad_norm": 0.9832684993743896, + "learning_rate": 4.925e-06, + "num_tokens": 1031926.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5085, + "step": 3017 + }, + { + "loss": 0.0546, + "grad_norm": 1.0513758659362793, + "learning_rate": 4.92e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.509, + "step": 3018 + }, + { + "loss": 0.0438, + "grad_norm": 1.3256640434265137, + "learning_rate": 4.915e-06, + "num_tokens": 1032950.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5095, + "step": 3019 + }, + { + "loss": 0.039, + "grad_norm": 1.1269205808639526, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.51, + "step": 3020 + }, + { + "loss": 0.0606, + "grad_norm": 1.2971444129943848, + "learning_rate": 4.9050000000000005e-06, + "num_tokens": 1033974.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5105, + "step": 3021 + }, + { + "loss": 0.0018, + "grad_norm": 0.24280324578285217, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5110000000000001, + "step": 3022 + }, + { + "loss": 0.0726, + "grad_norm": 1.984804630279541, + "learning_rate": 4.8950000000000006e-06, + "num_tokens": 1034577.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.5114999999999998, + "step": 3023 + }, + { + "loss": 0.0444, + "grad_norm": 1.1891791820526123, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.512, + "step": 3024 + }, + { + "loss": 0.0425, + "grad_norm": 1.3020859956741333, + "learning_rate": 4.885000000000001e-06, + "num_tokens": 1035601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5125, + "step": 3025 + }, + { + "loss": 0.0397, + "grad_norm": 0.8992137312889099, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.513, + "step": 3026 + }, + { + "loss": 0.0518, + "grad_norm": 1.0060539245605469, + "learning_rate": 4.875e-06, + "num_tokens": 1036625.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5135, + "step": 3027 + }, + { + "loss": 0.0618, + "grad_norm": 1.2295892238616943, + "learning_rate": 4.87e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.514, + "step": 3028 + }, + { + "loss": 0.057, + "grad_norm": 1.2740446329116821, + "learning_rate": 4.865e-06, + "num_tokens": 1037649.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5145, + "step": 3029 + }, + { + "loss": 0.067, + "grad_norm": 1.2444658279418945, + "learning_rate": 4.86e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5150000000000001, + "step": 3030 + }, + { + "loss": 0.0389, + "grad_norm": 1.0539816617965698, + "learning_rate": 4.855e-06, + "num_tokens": 1038673.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5154999999999998, + "step": 3031 + }, + { + "loss": 0.0613, + "grad_norm": 1.2166608572006226, + "learning_rate": 4.85e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.516, + "step": 3032 + }, + { + "loss": 0.0636, + "grad_norm": 1.2355148792266846, + "learning_rate": 4.845e-06, + "num_tokens": 1039697.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5165, + "step": 3033 + }, + { + "loss": 0.0586, + "grad_norm": 1.195371150970459, + "learning_rate": 4.84e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.517, + "step": 3034 + }, + { + "loss": 0.0031, + "grad_norm": 0.4328796863555908, + "learning_rate": 4.835e-06, + "num_tokens": 1040300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5175, + "step": 3035 + }, + { + "loss": 0.0033, + "grad_norm": 0.4462224841117859, + "learning_rate": 4.83e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 3036 + }, + { + "loss": 0.0404, + "grad_norm": 1.2766720056533813, + "learning_rate": 4.825e-06, + "num_tokens": 1040903.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5185, + "step": 3037 + }, + { + "loss": 0.0038, + "grad_norm": 0.5095945000648499, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5190000000000001, + "step": 3038 + }, + { + "loss": 0.0528, + "grad_norm": 1.006589651107788, + "learning_rate": 4.8150000000000005e-06, + "num_tokens": 1041506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5194999999999999, + "step": 3039 + }, + { + "loss": 0.0417, + "grad_norm": 1.2964030504226685, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.52, + "step": 3040 + }, + { + "loss": 0.0592, + "grad_norm": 1.1840168237686157, + "learning_rate": 4.805000000000001e-06, + "num_tokens": 1042530.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5205, + "step": 3041 + }, + { + "loss": 0.0038, + "grad_norm": 0.49861085414886475, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 1.0, + "epoch": 1.521, + "step": 3042 + }, + { + "loss": 0.0037, + "grad_norm": 0.49751704931259155, + "learning_rate": 4.795e-06, + "num_tokens": 1042712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5215, + "step": 3043 + }, + { + "loss": 0.0481, + "grad_norm": 1.022782564163208, + "learning_rate": 4.79e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.522, + "step": 3044 + }, + { + "loss": 0.0038, + "grad_norm": 0.49228596687316895, + "learning_rate": 4.785e-06, + "num_tokens": 1043315.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5225, + "step": 3045 + }, + { + "loss": 0.0376, + "grad_norm": 1.1729862689971924, + "learning_rate": 4.78e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5230000000000001, + "step": 3046 + }, + { + "loss": 0.0653, + "grad_norm": 1.5206072330474854, + "learning_rate": 4.775e-06, + "num_tokens": 1044339.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5234999999999999, + "step": 3047 + }, + { + "loss": 0.0633, + "grad_norm": 1.2756298780441284, + "learning_rate": 4.77e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.524, + "step": 3048 + }, + { + "loss": 0.0036, + "grad_norm": 0.4977829158306122, + "learning_rate": 4.765e-06, + "num_tokens": 1044942.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5245, + "step": 3049 + }, + { + "loss": 0.0526, + "grad_norm": 1.0627686977386475, + "learning_rate": 4.76e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.525, + "step": 3050 + }, + { + "loss": 0.0381, + "grad_norm": 1.1623107194900513, + "learning_rate": 4.755e-06, + "num_tokens": 1045966.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5255, + "step": 3051 + }, + { + "loss": 0.0036, + "grad_norm": 0.5119946002960205, + "learning_rate": 4.75e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.526, + "step": 3052 + }, + { + "loss": 0.0581, + "grad_norm": 1.3532719612121582, + "learning_rate": 4.745e-06, + "num_tokens": 1046569.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5265, + "step": 3053 + }, + { + "loss": 0.0594, + "grad_norm": 1.2599351406097412, + "learning_rate": 4.74e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5270000000000001, + "step": 3054 + }, + { + "loss": 0.0033, + "grad_norm": 0.4622514843940735, + "learning_rate": 4.735e-06, + "num_tokens": 1047172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5274999999999999, + "step": 3055 + }, + { + "loss": 0.0728, + "grad_norm": 1.6162607669830322, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.528, + "step": 3056 + }, + { + "loss": 0.0627, + "grad_norm": 1.4714545011520386, + "learning_rate": 4.7250000000000005e-06, + "num_tokens": 1048196.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5285, + "step": 3057 + }, + { + "loss": 0.0034, + "grad_norm": 0.48141252994537354, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 1.0, + "epoch": 1.529, + "step": 3058 + }, + { + "loss": 0.0385, + "grad_norm": 1.0676530599594116, + "learning_rate": 4.715e-06, + "num_tokens": 1048799.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5295, + "step": 3059 + }, + { + "loss": 0.0032, + "grad_norm": 0.44829145073890686, + "learning_rate": 4.71e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 1.0, + "epoch": 1.53, + "step": 3060 + }, + { + "loss": 0.0031, + "grad_norm": 0.4258093535900116, + "learning_rate": 4.705e-06, + "num_tokens": 1048981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5305, + "step": 3061 + }, + { + "loss": 0.0715, + "grad_norm": 1.3509596586227417, + "learning_rate": 4.7e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5310000000000001, + "step": 3062 + }, + { + "loss": 0.0341, + "grad_norm": 1.0876250267028809, + "learning_rate": 4.695e-06, + "num_tokens": 1050005.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5314999999999999, + "step": 3063 + }, + { + "loss": 0.0611, + "grad_norm": 1.3174924850463867, + "learning_rate": 4.69e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.532, + "step": 3064 + }, + { + "loss": 0.0417, + "grad_norm": 1.123489499092102, + "learning_rate": 4.685000000000001e-06, + "num_tokens": 1051029.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5325, + "step": 3065 + }, + { + "loss": 0.066, + "grad_norm": 1.7399777173995972, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.533, + "step": 3066 + }, + { + "loss": 0.0028, + "grad_norm": 0.38190290331840515, + "learning_rate": 4.675000000000001e-06, + "num_tokens": 1051632.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5335, + "step": 3067 + }, + { + "loss": 0.0651, + "grad_norm": 1.4947158098220825, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.534, + "step": 3068 + }, + { + "loss": 0.003, + "grad_norm": 0.40696173906326294, + "learning_rate": 4.665e-06, + "num_tokens": 1052235.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5345, + "step": 3069 + }, + { + "loss": 0.0555, + "grad_norm": 1.2926570177078247, + "learning_rate": 4.66e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5350000000000001, + "step": 3070 + }, + { + "loss": 0.0625, + "grad_norm": 1.2110244035720825, + "learning_rate": 4.655e-06, + "num_tokens": 1053259.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5354999999999999, + "step": 3071 + }, + { + "loss": 0.0033, + "grad_norm": 0.44495561718940735, + "learning_rate": 4.65e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 1.0, + "epoch": 1.536, + "step": 3072 + }, + { + "loss": 0.0574, + "grad_norm": 1.1019057035446167, + "learning_rate": 4.645e-06, + "num_tokens": 1053862.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5365, + "step": 3073 + }, + { + "loss": 0.003, + "grad_norm": 0.4128797650337219, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.537, + "step": 3074 + }, + { + "loss": 0.0572, + "grad_norm": 1.164238452911377, + "learning_rate": 4.6350000000000005e-06, + "num_tokens": 1054465.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5375, + "step": 3075 + }, + { + "loss": 0.0631, + "grad_norm": 1.4220542907714844, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.538, + "step": 3076 + }, + { + "loss": 0.0377, + "grad_norm": 1.2259591817855835, + "learning_rate": 4.625000000000001e-06, + "num_tokens": 1055489.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5385, + "step": 3077 + }, + { + "loss": 0.003, + "grad_norm": 0.4099157154560089, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5390000000000001, + "step": 3078 + }, + { + "loss": 0.0027, + "grad_norm": 0.3750811219215393, + "learning_rate": 4.615000000000001e-06, + "num_tokens": 1055671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5394999999999999, + "step": 3079 + }, + { + "loss": 0.0621, + "grad_norm": 1.2325596809387207, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.54, + "step": 3080 + }, + { + "loss": 0.0504, + "grad_norm": 0.9959844350814819, + "learning_rate": 4.605000000000001e-06, + "num_tokens": 1056695.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5405, + "step": 3081 + }, + { + "loss": 0.0574, + "grad_norm": 1.0301742553710938, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.541, + "step": 3082 + }, + { + "loss": 0.0512, + "grad_norm": 1.0320547819137573, + "learning_rate": 4.595000000000001e-06, + "num_tokens": 1057719.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5415, + "step": 3083 + }, + { + "loss": 0.0561, + "grad_norm": 1.225005865097046, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.542, + "step": 3084 + }, + { + "loss": 0.0376, + "grad_norm": 1.1090381145477295, + "learning_rate": 4.585e-06, + "num_tokens": 1058743.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.5425, + "step": 3085 + }, + { + "loss": 0.0032, + "grad_norm": 0.44738513231277466, + "learning_rate": 4.58e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5430000000000001, + "step": 3086 + }, + { + "loss": 0.0031, + "grad_norm": 0.4485037624835968, + "learning_rate": 4.575e-06, + "num_tokens": 1058925.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5434999999999999, + "step": 3087 + }, + { + "loss": 0.0703, + "grad_norm": 1.630645751953125, + "learning_rate": 4.57e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.544, + "step": 3088 + }, + { + "loss": 0.0034, + "grad_norm": 0.4586680233478546, + "learning_rate": 4.565e-06, + "num_tokens": 1059528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5445, + "step": 3089 + }, + { + "loss": 0.003, + "grad_norm": 0.41872572898864746, + "learning_rate": 4.56e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.545, + "step": 3090 + }, + { + "loss": 0.0433, + "grad_norm": 1.1152652502059937, + "learning_rate": 4.5550000000000004e-06, + "num_tokens": 1060131.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5455, + "step": 3091 + }, + { + "loss": 0.0025, + "grad_norm": 0.35068032145500183, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.546, + "step": 3092 + }, + { + "loss": 0.0396, + "grad_norm": 1.0990018844604492, + "learning_rate": 4.5450000000000005e-06, + "num_tokens": 1060734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5465, + "step": 3093 + }, + { + "loss": 0.0635, + "grad_norm": 1.6193867921829224, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5470000000000002, + "step": 3094 + }, + { + "loss": 0.0027, + "grad_norm": 0.3813343644142151, + "learning_rate": 4.535000000000001e-06, + "num_tokens": 1061337.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5474999999999999, + "step": 3095 + }, + { + "loss": 0.0025, + "grad_norm": 0.3389427363872528, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 1.0, + "epoch": 1.548, + "step": 3096 + }, + { + "loss": 0.0652, + "grad_norm": 1.455460786819458, + "learning_rate": 4.525000000000001e-06, + "num_tokens": 1061940.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5485, + "step": 3097 + }, + { + "loss": 0.0596, + "grad_norm": 1.318932056427002, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.549, + "step": 3098 + }, + { + "loss": 0.0021, + "grad_norm": 0.30851492285728455, + "learning_rate": 4.515000000000001e-06, + "num_tokens": 1062543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5495, + "step": 3099 + }, + { + "loss": 0.0021, + "grad_norm": 0.29576948285102844, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 1.0, + "epoch": 1.55, + "step": 3100 + }, + { + "loss": 0.0021, + "grad_norm": 0.29117029905319214, + "learning_rate": 4.505e-06, + "num_tokens": 1062725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5505, + "step": 3101 + }, + { + "loss": 0.04, + "grad_norm": 1.1777619123458862, + "learning_rate": 4.5e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5510000000000002, + "step": 3102 + }, + { + "loss": 0.0538, + "grad_norm": 1.1641870737075806, + "learning_rate": 4.495e-06, + "num_tokens": 1063749.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5514999999999999, + "step": 3103 + }, + { + "loss": 0.0423, + "grad_norm": 1.3220707178115845, + "learning_rate": 4.49e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.552, + "step": 3104 + }, + { + "loss": 0.0021, + "grad_norm": 0.30619239807128906, + "learning_rate": 4.485e-06, + "num_tokens": 1064352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5525, + "step": 3105 + }, + { + "loss": 0.0681, + "grad_norm": 1.3809969425201416, + "learning_rate": 4.48e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.553, + "step": 3106 + }, + { + "loss": 0.055, + "grad_norm": 1.1956359148025513, + "learning_rate": 4.475e-06, + "num_tokens": 1065376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5535, + "step": 3107 + }, + { + "loss": 0.0573, + "grad_norm": 1.2887022495269775, + "learning_rate": 4.47e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.554, + "step": 3108 + }, + { + "loss": 0.0554, + "grad_norm": 1.1560310125350952, + "learning_rate": 4.4650000000000004e-06, + "num_tokens": 1066400.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5545, + "step": 3109 + }, + { + "loss": 0.0021, + "grad_norm": 0.29395192861557007, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5550000000000002, + "step": 3110 + }, + { + "loss": 0.0652, + "grad_norm": 1.608464002609253, + "learning_rate": 4.4550000000000005e-06, + "num_tokens": 1067003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5554999999999999, + "step": 3111 + }, + { + "loss": 0.0558, + "grad_norm": 1.2650138139724731, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.556, + "step": 3112 + }, + { + "loss": 0.0458, + "grad_norm": 1.2872962951660156, + "learning_rate": 4.445000000000001e-06, + "num_tokens": 1068027.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5565, + "step": 3113 + }, + { + "loss": 0.0022, + "grad_norm": 0.30732589960098267, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.557, + "step": 3114 + }, + { + "loss": 0.0558, + "grad_norm": 1.0926036834716797, + "learning_rate": 4.435000000000001e-06, + "num_tokens": 1068630.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5575, + "step": 3115 + }, + { + "loss": 0.0023, + "grad_norm": 0.32145828008651733, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 1.0, + "epoch": 1.558, + "step": 3116 + }, + { + "loss": 0.0373, + "grad_norm": 1.1655807495117188, + "learning_rate": 4.425e-06, + "num_tokens": 1069233.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5585, + "step": 3117 + }, + { + "loss": 0.0769, + "grad_norm": 1.796105980873108, + "learning_rate": 4.42e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.5590000000000002, + "step": 3118 + }, + { + "loss": 0.0026, + "grad_norm": 0.3620903789997101, + "learning_rate": 4.415e-06, + "num_tokens": 1069836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5594999999999999, + "step": 3119 + }, + { + "loss": 0.0429, + "grad_norm": 1.309659481048584, + "learning_rate": 4.41e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.56, + "step": 3120 + }, + { + "loss": 0.0023, + "grad_norm": 0.32819899916648865, + "learning_rate": 4.405e-06, + "num_tokens": 1070439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5605, + "step": 3121 + }, + { + "loss": 0.0576, + "grad_norm": 1.0110256671905518, + "learning_rate": 4.4e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.561, + "step": 3122 + }, + { + "loss": 0.0474, + "grad_norm": 1.327854037284851, + "learning_rate": 4.395e-06, + "num_tokens": 1071463.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5615, + "step": 3123 + }, + { + "loss": 0.0371, + "grad_norm": 1.2000775337219238, + "learning_rate": 4.39e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.562, + "step": 3124 + }, + { + "loss": 0.0532, + "grad_norm": 1.1874752044677734, + "learning_rate": 4.385e-06, + "num_tokens": 1072487.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5625, + "step": 3125 + }, + { + "loss": 0.0387, + "grad_norm": 1.2780605554580688, + "learning_rate": 4.38e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.563, + "step": 3126 + }, + { + "loss": 0.0029, + "grad_norm": 0.38496679067611694, + "learning_rate": 4.3750000000000005e-06, + "num_tokens": 1073090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5635, + "step": 3127 + }, + { + "loss": 0.0028, + "grad_norm": 0.3800834119319916, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.564, + "step": 3128 + }, + { + "loss": 0.0386, + "grad_norm": 1.077006459236145, + "learning_rate": 4.3650000000000006e-06, + "num_tokens": 1073693.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5645, + "step": 3129 + }, + { + "loss": 0.0669, + "grad_norm": 1.2879207134246826, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.565, + "step": 3130 + }, + { + "loss": 0.0027, + "grad_norm": 0.37664031982421875, + "learning_rate": 4.355000000000001e-06, + "num_tokens": 1074296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5655000000000001, + "step": 3131 + }, + { + "loss": 0.0026, + "grad_norm": 0.35762181878089905, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5659999999999998, + "step": 3132 + }, + { + "loss": 0.0026, + "grad_norm": 0.3616492450237274, + "learning_rate": 4.345000000000001e-06, + "num_tokens": 1074478.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5665, + "step": 3133 + }, + { + "loss": 0.054, + "grad_norm": 1.413800835609436, + "learning_rate": 4.34e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.567, + "step": 3134 + }, + { + "loss": 0.0549, + "grad_norm": 1.1791685819625854, + "learning_rate": 4.335e-06, + "num_tokens": 1075502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5675, + "step": 3135 + }, + { + "loss": 0.0382, + "grad_norm": 1.1417726278305054, + "learning_rate": 4.33e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.568, + "step": 3136 + }, + { + "loss": 0.0586, + "grad_norm": 1.360926866531372, + "learning_rate": 4.325e-06, + "num_tokens": 1076526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5685, + "step": 3137 + }, + { + "loss": 0.0569, + "grad_norm": 1.1636319160461426, + "learning_rate": 4.32e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.569, + "step": 3138 + }, + { + "loss": 0.0024, + "grad_norm": 0.3462548851966858, + "learning_rate": 4.315e-06, + "num_tokens": 1077129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5695000000000001, + "step": 3139 + }, + { + "loss": 0.0619, + "grad_norm": 1.3171995878219604, + "learning_rate": 4.31e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5699999999999998, + "step": 3140 + }, + { + "loss": 0.0026, + "grad_norm": 0.35494717955589294, + "learning_rate": 4.305e-06, + "num_tokens": 1077732.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5705, + "step": 3141 + }, + { + "loss": 0.003, + "grad_norm": 0.4175266921520233, + "learning_rate": 4.3e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 3142 + }, + { + "loss": 0.0588, + "grad_norm": 1.5107394456863403, + "learning_rate": 4.295e-06, + "num_tokens": 1078335.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5715, + "step": 3143 + }, + { + "loss": 0.0583, + "grad_norm": 1.5851935148239136, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.572, + "step": 3144 + }, + { + "loss": 0.0401, + "grad_norm": 1.1422215700149536, + "learning_rate": 4.2850000000000005e-06, + "num_tokens": 1079359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5725, + "step": 3145 + }, + { + "loss": 0.0429, + "grad_norm": 1.3809804916381836, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.573, + "step": 3146 + }, + { + "loss": 0.0397, + "grad_norm": 1.1466025114059448, + "learning_rate": 4.2750000000000006e-06, + "num_tokens": 1080383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5735000000000001, + "step": 3147 + }, + { + "loss": 0.0389, + "grad_norm": 1.035447120666504, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 3148 + }, + { + "loss": 0.0029, + "grad_norm": 0.39080947637557983, + "learning_rate": 4.265000000000001e-06, + "num_tokens": 1080986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5745, + "step": 3149 + }, + { + "loss": 0.0029, + "grad_norm": 0.39702585339546204, + "learning_rate": 4.26e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.575, + "step": 3150 + }, + { + "loss": 0.0376, + "grad_norm": 1.0406111478805542, + "learning_rate": 4.255e-06, + "num_tokens": 1081589.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5755, + "step": 3151 + }, + { + "loss": 0.0029, + "grad_norm": 0.40471911430358887, + "learning_rate": 4.25e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.576, + "step": 3152 + }, + { + "loss": 0.0542, + "grad_norm": 1.382663607597351, + "learning_rate": 4.245e-06, + "num_tokens": 1082192.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5765, + "step": 3153 + }, + { + "loss": 0.0026, + "grad_norm": 0.39454102516174316, + "learning_rate": 4.24e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.577, + "step": 3154 + }, + { + "loss": 0.0515, + "grad_norm": 1.1649845838546753, + "learning_rate": 4.235e-06, + "num_tokens": 1082795.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5775000000000001, + "step": 3155 + }, + { + "loss": 0.0383, + "grad_norm": 1.10068941116333, + "learning_rate": 4.23e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5779999999999998, + "step": 3156 + }, + { + "loss": 0.0417, + "grad_norm": 1.2253996133804321, + "learning_rate": 4.225e-06, + "num_tokens": 1083819.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5785, + "step": 3157 + }, + { + "loss": 0.0028, + "grad_norm": 0.3961932361125946, + "learning_rate": 4.22e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 1.0, + "epoch": 1.579, + "step": 3158 + }, + { + "loss": 0.0503, + "grad_norm": 1.089829921722412, + "learning_rate": 4.215e-06, + "num_tokens": 1084422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5795, + "step": 3159 + }, + { + "loss": 0.0026, + "grad_norm": 0.3804922103881836, + "learning_rate": 4.21e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.58, + "step": 3160 + }, + { + "loss": 0.0551, + "grad_norm": 1.131371259689331, + "learning_rate": 4.205e-06, + "num_tokens": 1085025.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5805, + "step": 3161 + }, + { + "loss": 0.0707, + "grad_norm": 1.5008512735366821, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.581, + "step": 3162 + }, + { + "loss": 0.1371, + "grad_norm": 2.452535629272461, + "learning_rate": 4.1950000000000005e-06, + "num_tokens": 1086049.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.5815000000000001, + "step": 3163 + }, + { + "loss": 0.0375, + "grad_norm": 1.132121205329895, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5819999999999999, + "step": 3164 + }, + { + "loss": 0.0372, + "grad_norm": 1.136691689491272, + "learning_rate": 4.185000000000001e-06, + "num_tokens": 1087073.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5825, + "step": 3165 + }, + { + "loss": 0.066, + "grad_norm": 1.451141595840454, + "learning_rate": 4.18e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.583, + "step": 3166 + }, + { + "loss": 0.0601, + "grad_norm": 1.3219071626663208, + "learning_rate": 4.175e-06, + "num_tokens": 1088097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5835, + "step": 3167 + }, + { + "loss": 0.0033, + "grad_norm": 0.44295263290405273, + "learning_rate": 4.17e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.584, + "step": 3168 + }, + { + "loss": 0.0033, + "grad_norm": 0.4387746751308441, + "learning_rate": 4.165e-06, + "num_tokens": 1088279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5845, + "step": 3169 + }, + { + "loss": 0.0031, + "grad_norm": 0.42495018243789673, + "learning_rate": 4.16e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 3170 + }, + { + "loss": 0.0032, + "grad_norm": 0.43195274472236633, + "learning_rate": 4.155e-06, + "num_tokens": 1088461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5855000000000001, + "step": 3171 + }, + { + "loss": 0.0383, + "grad_norm": 1.089600682258606, + "learning_rate": 4.15e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5859999999999999, + "step": 3172 + }, + { + "loss": 0.037, + "grad_norm": 1.125685691833496, + "learning_rate": 4.145e-06, + "num_tokens": 1089485.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5865, + "step": 3173 + }, + { + "loss": 0.0028, + "grad_norm": 0.3951958119869232, + "learning_rate": 4.14e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 1.0, + "epoch": 1.587, + "step": 3174 + }, + { + "loss": 0.0032, + "grad_norm": 0.4249975085258484, + "learning_rate": 4.135e-06, + "num_tokens": 1089667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5875, + "step": 3175 + }, + { + "loss": 0.003, + "grad_norm": 0.4017711281776428, + "learning_rate": 4.13e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 1.0, + "epoch": 1.588, + "step": 3176 + }, + { + "loss": 0.0554, + "grad_norm": 1.5242044925689697, + "learning_rate": 4.125e-06, + "num_tokens": 1090270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5885, + "step": 3177 + }, + { + "loss": 0.0397, + "grad_norm": 1.1341863870620728, + "learning_rate": 4.12e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.589, + "step": 3178 + }, + { + "loss": 0.0027, + "grad_norm": 0.36381402611732483, + "learning_rate": 4.115e-06, + "num_tokens": 1090873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5895000000000001, + "step": 3179 + }, + { + "loss": 0.0607, + "grad_norm": 1.1853790283203125, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5899999999999999, + "step": 3180 + }, + { + "loss": 0.0643, + "grad_norm": 1.3047658205032349, + "learning_rate": 4.1050000000000005e-06, + "num_tokens": 1091897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5905, + "step": 3181 + }, + { + "loss": 0.0026, + "grad_norm": 0.35462620854377747, + "learning_rate": 4.1e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.591, + "step": 3182 + }, + { + "loss": 0.0551, + "grad_norm": 1.313693642616272, + "learning_rate": 4.095e-06, + "num_tokens": 1092500.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5915, + "step": 3183 + }, + { + "loss": 0.0476, + "grad_norm": 1.3256938457489014, + "learning_rate": 4.09e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.592, + "step": 3184 + }, + { + "loss": 0.0674, + "grad_norm": 1.4579592943191528, + "learning_rate": 4.085e-06, + "num_tokens": 1093524.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5925, + "step": 3185 + }, + { + "loss": 0.0654, + "grad_norm": 1.39744234085083, + "learning_rate": 4.08e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.593, + "step": 3186 + }, + { + "loss": 0.0024, + "grad_norm": 0.3426502048969269, + "learning_rate": 4.075e-06, + "num_tokens": 1094127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5935000000000001, + "step": 3187 + }, + { + "loss": 0.0025, + "grad_norm": 0.34538590908050537, + "learning_rate": 4.07e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 3188 + }, + { + "loss": 0.0023, + "grad_norm": 0.317192405462265, + "learning_rate": 4.065e-06, + "num_tokens": 1094309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5945, + "step": 3189 + }, + { + "loss": 0.067, + "grad_norm": 1.3644077777862549, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.595, + "step": 3190 + }, + { + "loss": 0.0403, + "grad_norm": 1.0108872652053833, + "learning_rate": 4.055000000000001e-06, + "num_tokens": 1095333.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5955, + "step": 3191 + }, + { + "loss": 0.0023, + "grad_norm": 0.32959794998168945, + "learning_rate": 4.05e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 1.0, + "epoch": 1.596, + "step": 3192 + }, + { + "loss": 0.0695, + "grad_norm": 1.4694541692733765, + "learning_rate": 4.045e-06, + "num_tokens": 1095936.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5965, + "step": 3193 + }, + { + "loss": 0.0579, + "grad_norm": 1.4185339212417603, + "learning_rate": 4.04e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.597, + "step": 3194 + }, + { + "loss": 0.0023, + "grad_norm": 0.3271894156932831, + "learning_rate": 4.035e-06, + "num_tokens": 1096539.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5975000000000001, + "step": 3195 + }, + { + "loss": 0.0687, + "grad_norm": 1.3683706521987915, + "learning_rate": 4.03e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5979999999999999, + "step": 3196 + }, + { + "loss": 0.0022, + "grad_norm": 0.3076697289943695, + "learning_rate": 4.0250000000000004e-06, + "num_tokens": 1097142.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5985, + "step": 3197 + }, + { + "loss": 0.0633, + "grad_norm": 1.3920204639434814, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.599, + "step": 3198 + }, + { + "loss": 0.0025, + "grad_norm": 0.340093195438385, + "learning_rate": 4.0150000000000005e-06, + "num_tokens": 1097745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5995, + "step": 3199 + }, + { + "loss": 0.0446, + "grad_norm": 1.343589186668396, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6, + "step": 3200 + }, + { + "loss": 0.0019, + "grad_norm": 0.27124884724617004, + "learning_rate": 4.005000000000001e-06, + "num_tokens": 1098348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6005, + "step": 3201 + }, + { + "loss": 0.0404, + "grad_norm": 0.9648232460021973, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.601, + "step": 3202 + }, + { + "loss": 0.0019, + "grad_norm": 0.27278977632522583, + "learning_rate": 3.995000000000001e-06, + "num_tokens": 1098951.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6015000000000001, + "step": 3203 + }, + { + "loss": 0.0376, + "grad_norm": 1.0787500143051147, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6019999999999999, + "step": 3204 + }, + { + "loss": 0.0528, + "grad_norm": 1.1423871517181396, + "learning_rate": 3.985000000000001e-06, + "num_tokens": 1099975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6025, + "step": 3205 + }, + { + "loss": 0.0428, + "grad_norm": 1.0963202714920044, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.603, + "step": 3206 + }, + { + "loss": 0.0023, + "grad_norm": 0.3151981234550476, + "learning_rate": 3.975000000000001e-06, + "num_tokens": 1100578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6035, + "step": 3207 + }, + { + "loss": 0.0627, + "grad_norm": 1.3276523351669312, + "learning_rate": 3.97e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.604, + "step": 3208 + }, + { + "loss": 0.0644, + "grad_norm": 1.2610445022583008, + "learning_rate": 3.965e-06, + "num_tokens": 1101602.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6045, + "step": 3209 + }, + { + "loss": 0.0605, + "grad_norm": 1.5303077697753906, + "learning_rate": 3.96e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.605, + "step": 3210 + }, + { + "loss": 0.0428, + "grad_norm": 1.1033059358596802, + "learning_rate": 3.955e-06, + "num_tokens": 1102626.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6055000000000001, + "step": 3211 + }, + { + "loss": 0.0025, + "grad_norm": 0.3444884419441223, + "learning_rate": 3.95e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6059999999999999, + "step": 3212 + }, + { + "loss": 0.0021, + "grad_norm": 0.30967977643013, + "learning_rate": 3.945e-06, + "num_tokens": 1102808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6065, + "step": 3213 + }, + { + "loss": 0.0023, + "grad_norm": 0.3297445774078369, + "learning_rate": 3.94e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.607, + "step": 3214 + }, + { + "loss": 0.0389, + "grad_norm": 0.9863300323486328, + "learning_rate": 3.9350000000000004e-06, + "num_tokens": 1103411.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6075, + "step": 3215 + }, + { + "loss": 0.0024, + "grad_norm": 0.34737643599510193, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.608, + "step": 3216 + }, + { + "loss": 0.0636, + "grad_norm": 1.4206818342208862, + "learning_rate": 3.9250000000000005e-06, + "num_tokens": 1104014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6085, + "step": 3217 + }, + { + "loss": 0.0635, + "grad_norm": 1.3302878141403198, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.609, + "step": 3218 + }, + { + "loss": 0.0023, + "grad_norm": 0.34072810411453247, + "learning_rate": 3.915000000000001e-06, + "num_tokens": 1104617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6095000000000002, + "step": 3219 + }, + { + "loss": 0.0023, + "grad_norm": 0.324464350938797, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6099999999999999, + "step": 3220 + }, + { + "loss": 0.041, + "grad_norm": 1.2196465730667114, + "learning_rate": 3.905000000000001e-06, + "num_tokens": 1105220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6105, + "step": 3221 + }, + { + "loss": 0.0609, + "grad_norm": 1.3683393001556396, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.611, + "step": 3222 + }, + { + "loss": 0.067, + "grad_norm": 1.3955715894699097, + "learning_rate": 3.895000000000001e-06, + "num_tokens": 1106244.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6115, + "step": 3223 + }, + { + "loss": 0.0681, + "grad_norm": 1.2971601486206055, + "learning_rate": 3.89e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.612, + "step": 3224 + }, + { + "loss": 0.0399, + "grad_norm": 0.9620857834815979, + "learning_rate": 3.885e-06, + "num_tokens": 1107268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6125, + "step": 3225 + }, + { + "loss": 0.0563, + "grad_norm": 1.419252634048462, + "learning_rate": 3.88e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.613, + "step": 3226 + }, + { + "loss": 0.0025, + "grad_norm": 0.3523210883140564, + "learning_rate": 3.875e-06, + "num_tokens": 1107871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6135000000000002, + "step": 3227 + }, + { + "loss": 0.0025, + "grad_norm": 0.3481607437133789, + "learning_rate": 3.87e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 3228 + }, + { + "loss": 0.0668, + "grad_norm": 1.5234949588775635, + "learning_rate": 3.865e-06, + "num_tokens": 1108474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6145, + "step": 3229 + }, + { + "loss": 0.065, + "grad_norm": 1.0866061449050903, + "learning_rate": 3.86e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.615, + "step": 3230 + }, + { + "loss": 0.0023, + "grad_norm": 0.32322317361831665, + "learning_rate": 3.855e-06, + "num_tokens": 1109077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6155, + "step": 3231 + }, + { + "loss": 0.0028, + "grad_norm": 0.3983127474784851, + "learning_rate": 3.85e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 3232 + }, + { + "loss": 0.0028, + "grad_norm": 0.3855290114879608, + "learning_rate": 3.8450000000000005e-06, + "num_tokens": 1109259.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6165, + "step": 3233 + }, + { + "loss": 0.0628, + "grad_norm": 1.2134065628051758, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.617, + "step": 3234 + }, + { + "loss": 0.0026, + "grad_norm": 0.3645097613334656, + "learning_rate": 3.8350000000000006e-06, + "num_tokens": 1109862.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6175000000000002, + "step": 3235 + }, + { + "loss": 0.0564, + "grad_norm": 1.3227709531784058, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6179999999999999, + "step": 3236 + }, + { + "loss": 0.0356, + "grad_norm": 1.1357544660568237, + "learning_rate": 3.825000000000001e-06, + "num_tokens": 1110886.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6185, + "step": 3237 + }, + { + "loss": 0.002, + "grad_norm": 0.2842106819152832, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.619, + "step": 3238 + }, + { + "loss": 0.0021, + "grad_norm": 0.2954864501953125, + "learning_rate": 3.815000000000001e-06, + "num_tokens": 1111068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6195, + "step": 3239 + }, + { + "loss": 0.0535, + "grad_norm": 1.2989691495895386, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.62, + "step": 3240 + }, + { + "loss": 0.0633, + "grad_norm": 1.4842454195022583, + "learning_rate": 3.8050000000000004e-06, + "num_tokens": 1112092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6205, + "step": 3241 + }, + { + "loss": 0.0613, + "grad_norm": 1.4029802083969116, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.621, + "step": 3242 + }, + { + "loss": 0.0021, + "grad_norm": 0.3039712905883789, + "learning_rate": 3.7950000000000005e-06, + "num_tokens": 1112695.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6215000000000002, + "step": 3243 + }, + { + "loss": 0.0564, + "grad_norm": 1.3126254081726074, + "learning_rate": 3.79e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6219999999999999, + "step": 3244 + }, + { + "loss": 0.0372, + "grad_norm": 1.1704014539718628, + "learning_rate": 3.785e-06, + "num_tokens": 1113719.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6225, + "step": 3245 + }, + { + "loss": 0.0438, + "grad_norm": 1.2828481197357178, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.623, + "step": 3246 + }, + { + "loss": 0.0023, + "grad_norm": 0.343226820230484, + "learning_rate": 3.7750000000000003e-06, + "num_tokens": 1114322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6235, + "step": 3247 + }, + { + "loss": 0.0402, + "grad_norm": 1.072348952293396, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.624, + "step": 3248 + }, + { + "loss": 0.0372, + "grad_norm": 1.061455488204956, + "learning_rate": 3.7650000000000004e-06, + "num_tokens": 1115346.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6245, + "step": 3249 + }, + { + "loss": 0.0621, + "grad_norm": 1.3332241773605347, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.625, + "step": 3250 + }, + { + "loss": 0.0665, + "grad_norm": 1.4206236600875854, + "learning_rate": 3.7550000000000005e-06, + "num_tokens": 1116370.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6255, + "step": 3251 + }, + { + "loss": 0.0616, + "grad_norm": 1.5544387102127075, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.626, + "step": 3252 + }, + { + "loss": 0.0024, + "grad_norm": 0.34623461961746216, + "learning_rate": 3.745e-06, + "num_tokens": 1116973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6265, + "step": 3253 + }, + { + "loss": 0.0611, + "grad_norm": 1.2223175764083862, + "learning_rate": 3.74e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.627, + "step": 3254 + }, + { + "loss": 0.0517, + "grad_norm": 1.338625192642212, + "learning_rate": 3.7350000000000002e-06, + "num_tokens": 1117997.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6275, + "step": 3255 + }, + { + "loss": 0.0567, + "grad_norm": 1.3747273683547974, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6280000000000001, + "step": 3256 + }, + { + "loss": 0.0026, + "grad_norm": 0.36324965953826904, + "learning_rate": 3.7250000000000003e-06, + "num_tokens": 1118600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6284999999999998, + "step": 3257 + }, + { + "loss": 0.0025, + "grad_norm": 0.3447258472442627, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 3258 + }, + { + "loss": 0.0026, + "grad_norm": 0.36628466844558716, + "learning_rate": 3.7150000000000004e-06, + "num_tokens": 1118782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6295, + "step": 3259 + }, + { + "loss": 0.0535, + "grad_norm": 1.2702912092208862, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.63, + "step": 3260 + }, + { + "loss": 0.0026, + "grad_norm": 0.37140271067619324, + "learning_rate": 3.705e-06, + "num_tokens": 1119385.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6305, + "step": 3261 + }, + { + "loss": 0.003, + "grad_norm": 0.4019966721534729, + "learning_rate": 3.7e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 1.0, + "epoch": 1.631, + "step": 3262 + }, + { + "loss": 0.0669, + "grad_norm": 1.4418880939483643, + "learning_rate": 3.695e-06, + "num_tokens": 1119988.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6315, + "step": 3263 + }, + { + "loss": 0.0396, + "grad_norm": 1.2212142944335938, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6320000000000001, + "step": 3264 + }, + { + "loss": 0.0026, + "grad_norm": 0.37143605947494507, + "learning_rate": 3.6850000000000003e-06, + "num_tokens": 1120591.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6324999999999998, + "step": 3265 + }, + { + "loss": 0.0588, + "grad_norm": 1.3627078533172607, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.633, + "step": 3266 + }, + { + "loss": 0.0027, + "grad_norm": 0.3791561722755432, + "learning_rate": 3.6750000000000004e-06, + "num_tokens": 1121194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6335, + "step": 3267 + }, + { + "loss": 0.0567, + "grad_norm": 1.289622187614441, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.634, + "step": 3268 + }, + { + "loss": 0.0579, + "grad_norm": 1.220171332359314, + "learning_rate": 3.665e-06, + "num_tokens": 1122218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6345, + "step": 3269 + }, + { + "loss": 0.0543, + "grad_norm": 1.3633372783660889, + "learning_rate": 3.66e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.635, + "step": 3270 + }, + { + "loss": 0.0376, + "grad_norm": 1.1212244033813477, + "learning_rate": 3.655e-06, + "num_tokens": 1123242.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6355, + "step": 3271 + }, + { + "loss": 0.066, + "grad_norm": 1.352933645248413, + "learning_rate": 3.65e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6360000000000001, + "step": 3272 + }, + { + "loss": 0.0469, + "grad_norm": 1.09308922290802, + "learning_rate": 3.6450000000000003e-06, + "num_tokens": 1124266.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6364999999999998, + "step": 3273 + }, + { + "loss": 0.1411, + "grad_norm": 2.6187405586242676, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.637, + "step": 3274 + }, + { + "loss": 0.0414, + "grad_norm": 1.162994146347046, + "learning_rate": 3.6350000000000003e-06, + "num_tokens": 1125290.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6375, + "step": 3275 + }, + { + "loss": 0.0028, + "grad_norm": 0.3896919786930084, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.638, + "step": 3276 + }, + { + "loss": 0.0026, + "grad_norm": 0.3726244270801544, + "learning_rate": 3.625e-06, + "num_tokens": 1125472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6385, + "step": 3277 + }, + { + "loss": 0.0026, + "grad_norm": 0.36463192105293274, + "learning_rate": 3.62e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.639, + "step": 3278 + }, + { + "loss": 0.0507, + "grad_norm": 1.3470423221588135, + "learning_rate": 3.615e-06, + "num_tokens": 1126075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6395, + "step": 3279 + }, + { + "loss": 0.0683, + "grad_norm": 1.4609153270721436, + "learning_rate": 3.61e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.6400000000000001, + "step": 3280 + }, + { + "loss": 0.0535, + "grad_norm": 1.1537185907363892, + "learning_rate": 3.6050000000000002e-06, + "num_tokens": 1127099.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6404999999999998, + "step": 3281 + }, + { + "loss": 0.0608, + "grad_norm": 1.3845043182373047, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.641, + "step": 3282 + }, + { + "loss": 0.0447, + "grad_norm": 1.212424397468567, + "learning_rate": 3.5950000000000003e-06, + "num_tokens": 1128123.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6415, + "step": 3283 + }, + { + "loss": 0.0026, + "grad_norm": 0.37876564264297485, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 1.0, + "epoch": 1.642, + "step": 3284 + }, + { + "loss": 0.0408, + "grad_norm": 1.2840468883514404, + "learning_rate": 3.585e-06, + "num_tokens": 1128726.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6425, + "step": 3285 + }, + { + "loss": 0.0386, + "grad_norm": 1.1343239545822144, + "learning_rate": 3.58e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.643, + "step": 3286 + }, + { + "loss": 0.0381, + "grad_norm": 1.1031399965286255, + "learning_rate": 3.575e-06, + "num_tokens": 1129750.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6435, + "step": 3287 + }, + { + "loss": 0.0728, + "grad_norm": 1.8012501001358032, + "learning_rate": 3.57e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6440000000000001, + "step": 3288 + }, + { + "loss": 0.003, + "grad_norm": 0.42031532526016235, + "learning_rate": 3.565e-06, + "num_tokens": 1130353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6444999999999999, + "step": 3289 + }, + { + "loss": 0.0028, + "grad_norm": 0.42307499051094055, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 1.0, + "epoch": 1.645, + "step": 3290 + }, + { + "loss": 0.0656, + "grad_norm": 1.4206976890563965, + "learning_rate": 3.5550000000000003e-06, + "num_tokens": 1130956.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6455, + "step": 3291 + }, + { + "loss": 0.0373, + "grad_norm": 1.0836045742034912, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 3292 + }, + { + "loss": 0.0666, + "grad_norm": 1.4353013038635254, + "learning_rate": 3.545e-06, + "num_tokens": 1131980.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6465, + "step": 3293 + }, + { + "loss": 0.0033, + "grad_norm": 0.48532357811927795, + "learning_rate": 3.54e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.647, + "step": 3294 + }, + { + "loss": 0.0032, + "grad_norm": 0.4415268898010254, + "learning_rate": 3.535e-06, + "num_tokens": 1132162.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6475, + "step": 3295 + }, + { + "loss": 0.0029, + "grad_norm": 0.41665494441986084, + "learning_rate": 3.53e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 3296 + }, + { + "loss": 0.0638, + "grad_norm": 1.2469731569290161, + "learning_rate": 3.525e-06, + "num_tokens": 1132765.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6484999999999999, + "step": 3297 + }, + { + "loss": 0.0614, + "grad_norm": 1.251099944114685, + "learning_rate": 3.52e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.649, + "step": 3298 + }, + { + "loss": 0.0027, + "grad_norm": 0.39604058861732483, + "learning_rate": 3.5150000000000002e-06, + "num_tokens": 1133368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6495, + "step": 3299 + }, + { + "loss": 0.0588, + "grad_norm": 1.0699150562286377, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.65, + "step": 3300 + }, + { + "loss": 0.0583, + "grad_norm": 1.2757554054260254, + "learning_rate": 3.505e-06, + "num_tokens": 1134392.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6505, + "step": 3301 + }, + { + "loss": 0.0401, + "grad_norm": 1.3257462978363037, + "learning_rate": 3.5e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.651, + "step": 3302 + }, + { + "loss": 0.0643, + "grad_norm": 1.4011600017547607, + "learning_rate": 3.495e-06, + "num_tokens": 1135416.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6515, + "step": 3303 + }, + { + "loss": 0.0587, + "grad_norm": 1.5523959398269653, + "learning_rate": 3.49e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6520000000000001, + "step": 3304 + }, + { + "loss": 0.0602, + "grad_norm": 1.1153236627578735, + "learning_rate": 3.485e-06, + "num_tokens": 1136440.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6524999999999999, + "step": 3305 + }, + { + "loss": 0.0032, + "grad_norm": 0.4743506610393524, + "learning_rate": 3.48e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 1.0, + "epoch": 1.653, + "step": 3306 + }, + { + "loss": 0.0032, + "grad_norm": 0.44705691933631897, + "learning_rate": 3.475e-06, + "num_tokens": 1136622.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6535, + "step": 3307 + }, + { + "loss": 0.0627, + "grad_norm": 1.376706838607788, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.654, + "step": 3308 + }, + { + "loss": 0.0578, + "grad_norm": 1.3461076021194458, + "learning_rate": 3.465e-06, + "num_tokens": 1137646.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6545, + "step": 3309 + }, + { + "loss": 0.0028, + "grad_norm": 0.4053739011287689, + "learning_rate": 3.46e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 1.0, + "epoch": 1.655, + "step": 3310 + }, + { + "loss": 0.0028, + "grad_norm": 0.4151926636695862, + "learning_rate": 3.455e-06, + "num_tokens": 1137828.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6555, + "step": 3311 + }, + { + "loss": 0.003, + "grad_norm": 0.42436280846595764, + "learning_rate": 3.45e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6560000000000001, + "step": 3312 + }, + { + "loss": 0.0029, + "grad_norm": 0.41050389409065247, + "learning_rate": 3.445e-06, + "num_tokens": 1138010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6564999999999999, + "step": 3313 + }, + { + "loss": 0.0562, + "grad_norm": 1.2650190591812134, + "learning_rate": 3.44e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.657, + "step": 3314 + }, + { + "loss": 0.0558, + "grad_norm": 1.1567943096160889, + "learning_rate": 3.4350000000000006e-06, + "num_tokens": 1139034.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6575, + "step": 3315 + }, + { + "loss": 0.0413, + "grad_norm": 1.3011746406555176, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.658, + "step": 3316 + }, + { + "loss": 0.0569, + "grad_norm": 1.4117727279663086, + "learning_rate": 3.4250000000000007e-06, + "num_tokens": 1140058.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6585, + "step": 3317 + }, + { + "loss": 0.0027, + "grad_norm": 0.3829484283924103, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.659, + "step": 3318 + }, + { + "loss": 0.0516, + "grad_norm": 1.152258038520813, + "learning_rate": 3.4150000000000003e-06, + "num_tokens": 1140661.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6595, + "step": 3319 + }, + { + "loss": 0.0396, + "grad_norm": 1.20711088180542, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6600000000000001, + "step": 3320 + }, + { + "loss": 0.0522, + "grad_norm": 1.251099705696106, + "learning_rate": 3.4050000000000004e-06, + "num_tokens": 1141685.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6604999999999999, + "step": 3321 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730953454971313, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.661, + "step": 3322 + }, + { + "loss": 0.0613, + "grad_norm": 1.5974045991897583, + "learning_rate": 3.3950000000000005e-06, + "num_tokens": 1142709.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6615, + "step": 3323 + }, + { + "loss": 0.0522, + "grad_norm": 1.416182518005371, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.662, + "step": 3324 + }, + { + "loss": 0.0595, + "grad_norm": 1.381279706954956, + "learning_rate": 3.3850000000000006e-06, + "num_tokens": 1143733.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6625, + "step": 3325 + }, + { + "loss": 0.0563, + "grad_norm": 1.2484899759292603, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.663, + "step": 3326 + }, + { + "loss": 0.0029, + "grad_norm": 0.41797107458114624, + "learning_rate": 3.3750000000000003e-06, + "num_tokens": 1144336.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6635, + "step": 3327 + }, + { + "loss": 0.0027, + "grad_norm": 0.39544638991355896, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6640000000000001, + "step": 3328 + }, + { + "loss": 0.0371, + "grad_norm": 1.0045322179794312, + "learning_rate": 3.3650000000000004e-06, + "num_tokens": 1144939.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6644999999999999, + "step": 3329 + }, + { + "loss": 0.0671, + "grad_norm": 1.530097246170044, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.665, + "step": 3330 + }, + { + "loss": 0.0529, + "grad_norm": 1.179215669631958, + "learning_rate": 3.3550000000000005e-06, + "num_tokens": 1145963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6655, + "step": 3331 + }, + { + "loss": 0.0033, + "grad_norm": 0.46830442547798157, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.666, + "step": 3332 + }, + { + "loss": 0.0031, + "grad_norm": 0.44680675864219666, + "learning_rate": 3.3450000000000006e-06, + "num_tokens": 1146145.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6665, + "step": 3333 + }, + { + "loss": 0.0591, + "grad_norm": 2.0427138805389404, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.667, + "step": 3334 + }, + { + "loss": 0.0446, + "grad_norm": 1.0700162649154663, + "learning_rate": 3.3350000000000003e-06, + "num_tokens": 1147169.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6675, + "step": 3335 + }, + { + "loss": 0.0352, + "grad_norm": 0.953519344329834, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6680000000000001, + "step": 3336 + }, + { + "loss": 0.0402, + "grad_norm": 1.208362102508545, + "learning_rate": 3.3250000000000004e-06, + "num_tokens": 1148193.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6684999999999999, + "step": 3337 + }, + { + "loss": 0.0034, + "grad_norm": 0.48497405648231506, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 1.0, + "epoch": 1.669, + "step": 3338 + }, + { + "loss": 0.0031, + "grad_norm": 0.4533288776874542, + "learning_rate": 3.3150000000000004e-06, + "num_tokens": 1148375.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6695, + "step": 3339 + }, + { + "loss": 0.0531, + "grad_norm": 1.031333088874817, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.67, + "step": 3340 + }, + { + "loss": 0.0029, + "grad_norm": 0.40945783257484436, + "learning_rate": 3.3050000000000005e-06, + "num_tokens": 1148978.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6705, + "step": 3341 + }, + { + "loss": 0.0643, + "grad_norm": 1.0990197658538818, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.671, + "step": 3342 + }, + { + "loss": 0.0379, + "grad_norm": 1.0483911037445068, + "learning_rate": 3.2950000000000002e-06, + "num_tokens": 1150002.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6715, + "step": 3343 + }, + { + "loss": 0.0489, + "grad_norm": 1.0835374593734741, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6720000000000002, + "step": 3344 + }, + { + "loss": 0.0033, + "grad_norm": 0.4901528060436249, + "learning_rate": 3.2850000000000003e-06, + "num_tokens": 1150605.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6724999999999999, + "step": 3345 + }, + { + "loss": 0.0029, + "grad_norm": 0.41757330298423767, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.673, + "step": 3346 + }, + { + "loss": 0.0379, + "grad_norm": 0.9371951818466187, + "learning_rate": 3.2750000000000004e-06, + "num_tokens": 1151208.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6735, + "step": 3347 + }, + { + "loss": 0.0397, + "grad_norm": 1.0155102014541626, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 3348 + }, + { + "loss": 0.0027, + "grad_norm": 0.3897286653518677, + "learning_rate": 3.2650000000000005e-06, + "num_tokens": 1151811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6745, + "step": 3349 + }, + { + "loss": 0.0028, + "grad_norm": 0.4042399525642395, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 1.0, + "epoch": 1.675, + "step": 3350 + }, + { + "loss": 0.003, + "grad_norm": 0.43666109442710876, + "learning_rate": 3.255e-06, + "num_tokens": 1151993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6755, + "step": 3351 + }, + { + "loss": 0.0029, + "grad_norm": 0.42103472352027893, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6760000000000002, + "step": 3352 + }, + { + "loss": 0.0028, + "grad_norm": 0.41361838579177856, + "learning_rate": 3.2450000000000003e-06, + "num_tokens": 1152175.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6764999999999999, + "step": 3353 + }, + { + "loss": 0.0357, + "grad_norm": 0.9301024675369263, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.677, + "step": 3354 + }, + { + "loss": 0.0025, + "grad_norm": 0.3655649721622467, + "learning_rate": 3.2350000000000004e-06, + "num_tokens": 1152778.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6775, + "step": 3355 + }, + { + "loss": 0.0363, + "grad_norm": 1.0852001905441284, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.678, + "step": 3356 + }, + { + "loss": 0.0021, + "grad_norm": 0.3051436245441437, + "learning_rate": 3.2250000000000005e-06, + "num_tokens": 1153381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6785, + "step": 3357 + }, + { + "loss": 0.0025, + "grad_norm": 0.38162630796432495, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 3358 + }, + { + "loss": 0.0022, + "grad_norm": 0.33861595392227173, + "learning_rate": 3.215e-06, + "num_tokens": 1153563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6795, + "step": 3359 + }, + { + "loss": 0.0021, + "grad_norm": 0.311531126499176, + "learning_rate": 3.21e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 3360 + }, + { + "loss": 0.002, + "grad_norm": 0.30146220326423645, + "learning_rate": 3.2050000000000002e-06, + "num_tokens": 1153745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6804999999999999, + "step": 3361 + }, + { + "loss": 0.0019, + "grad_norm": 0.28205639123916626, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 3362 + }, + { + "loss": 0.0483, + "grad_norm": 1.185204029083252, + "learning_rate": 3.1950000000000003e-06, + "num_tokens": 1154348.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6815, + "step": 3363 + }, + { + "loss": 0.0705, + "grad_norm": 1.442715048789978, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.682, + "step": 3364 + }, + { + "loss": 0.059, + "grad_norm": 1.5234472751617432, + "learning_rate": 3.1850000000000004e-06, + "num_tokens": 1155372.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6825, + "step": 3365 + }, + { + "loss": 0.0712, + "grad_norm": 1.9519693851470947, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.683, + "step": 3366 + }, + { + "loss": 0.041, + "grad_norm": 1.0349758863449097, + "learning_rate": 3.175e-06, + "num_tokens": 1156396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6835, + "step": 3367 + }, + { + "loss": 0.0423, + "grad_norm": 1.263643503189087, + "learning_rate": 3.17e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 3368 + }, + { + "loss": 0.0015, + "grad_norm": 0.21718572080135345, + "learning_rate": 3.165e-06, + "num_tokens": 1156999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6844999999999999, + "step": 3369 + }, + { + "loss": 0.0612, + "grad_norm": 1.4974867105484009, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.685, + "step": 3370 + }, + { + "loss": 0.0684, + "grad_norm": 1.3690571784973145, + "learning_rate": 3.1550000000000003e-06, + "num_tokens": 1158023.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6855, + "step": 3371 + }, + { + "loss": 0.0015, + "grad_norm": 0.22092363238334656, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 1.0, + "epoch": 1.686, + "step": 3372 + }, + { + "loss": 0.0466, + "grad_norm": 1.359930157661438, + "learning_rate": 3.1450000000000004e-06, + "num_tokens": 1158626.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6865, + "step": 3373 + }, + { + "loss": 0.0017, + "grad_norm": 0.23505748808383942, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.687, + "step": 3374 + }, + { + "loss": 0.0412, + "grad_norm": 1.154797077178955, + "learning_rate": 3.135e-06, + "num_tokens": 1159229.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6875, + "step": 3375 + }, + { + "loss": 0.0688, + "grad_norm": 1.5609385967254639, + "learning_rate": 3.13e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.688, + "step": 3376 + }, + { + "loss": 0.0689, + "grad_norm": 1.9219101667404175, + "learning_rate": 3.125e-06, + "num_tokens": 1160253.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6885, + "step": 3377 + }, + { + "loss": 0.0528, + "grad_norm": 1.4017720222473145, + "learning_rate": 3.12e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 3378 + }, + { + "loss": 0.0018, + "grad_norm": 0.2644074261188507, + "learning_rate": 3.1150000000000002e-06, + "num_tokens": 1160856.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6895, + "step": 3379 + }, + { + "loss": 0.0359, + "grad_norm": 1.1351364850997925, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.69, + "step": 3380 + }, + { + "loss": 0.0561, + "grad_norm": 1.2852329015731812, + "learning_rate": 3.1050000000000003e-06, + "num_tokens": 1161880.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6905000000000001, + "step": 3381 + }, + { + "loss": 0.0019, + "grad_norm": 0.2809182107448578, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6909999999999998, + "step": 3382 + }, + { + "loss": 0.0019, + "grad_norm": 0.2629799544811249, + "learning_rate": 3.0950000000000004e-06, + "num_tokens": 1162062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6915, + "step": 3383 + }, + { + "loss": 0.0583, + "grad_norm": 1.3401031494140625, + "learning_rate": 3.09e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.692, + "step": 3384 + }, + { + "loss": 0.0019, + "grad_norm": 0.2741340398788452, + "learning_rate": 3.085e-06, + "num_tokens": 1162665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6925, + "step": 3385 + }, + { + "loss": 0.0019, + "grad_norm": 0.2670257091522217, + "learning_rate": 3.08e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 3386 + }, + { + "loss": 0.0529, + "grad_norm": 0.9913851022720337, + "learning_rate": 3.075e-06, + "num_tokens": 1163268.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6935, + "step": 3387 + }, + { + "loss": 0.0018, + "grad_norm": 0.2675456404685974, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.694, + "step": 3388 + }, + { + "loss": 0.0405, + "grad_norm": 1.6220101118087769, + "learning_rate": 3.0650000000000003e-06, + "num_tokens": 1163871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6945000000000001, + "step": 3389 + }, + { + "loss": 0.0478, + "grad_norm": 1.0595648288726807, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 3390 + }, + { + "loss": 0.0022, + "grad_norm": 0.3088478446006775, + "learning_rate": 3.0550000000000004e-06, + "num_tokens": 1164474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6955, + "step": 3391 + }, + { + "loss": 0.0501, + "grad_norm": 1.3393687009811401, + "learning_rate": 3.05e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.696, + "step": 3392 + }, + { + "loss": 0.0019, + "grad_norm": 0.2677120566368103, + "learning_rate": 3.045e-06, + "num_tokens": 1165077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6965, + "step": 3393 + }, + { + "loss": 0.0519, + "grad_norm": 1.1974607706069946, + "learning_rate": 3.04e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.697, + "step": 3394 + }, + { + "loss": 0.0406, + "grad_norm": 1.0820717811584473, + "learning_rate": 3.035e-06, + "num_tokens": 1166101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6975, + "step": 3395 + }, + { + "loss": 0.002, + "grad_norm": 0.2836916148662567, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.698, + "step": 3396 + }, + { + "loss": 0.002, + "grad_norm": 0.2837901711463928, + "learning_rate": 3.0250000000000003e-06, + "num_tokens": 1166283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6985000000000001, + "step": 3397 + }, + { + "loss": 0.0546, + "grad_norm": 1.4433382749557495, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6989999999999998, + "step": 3398 + }, + { + "loss": 0.0021, + "grad_norm": 0.2978130877017975, + "learning_rate": 3.0150000000000004e-06, + "num_tokens": 1166886.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6995, + "step": 3399 + }, + { + "loss": 0.002, + "grad_norm": 0.2806030511856079, + "learning_rate": 3.01e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 3400 + }, + { + "loss": 0.0636, + "grad_norm": 1.3879796266555786, + "learning_rate": 3.005e-06, + "num_tokens": 1167489.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7005, + "step": 3401 + }, + { + "loss": 0.002, + "grad_norm": 0.2759900689125061, + "learning_rate": 3e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.701, + "step": 3402 + }, + { + "loss": 0.0574, + "grad_norm": 1.3505700826644897, + "learning_rate": 2.995e-06, + "num_tokens": 1168092.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7015, + "step": 3403 + }, + { + "loss": 0.0554, + "grad_norm": 1.4108113050460815, + "learning_rate": 2.99e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.702, + "step": 3404 + }, + { + "loss": 0.0558, + "grad_norm": 1.5085475444793701, + "learning_rate": 2.9850000000000002e-06, + "num_tokens": 1169116.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7025000000000001, + "step": 3405 + }, + { + "loss": 0.0019, + "grad_norm": 0.2683292031288147, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7029999999999998, + "step": 3406 + }, + { + "loss": 0.0367, + "grad_norm": 1.1768198013305664, + "learning_rate": 2.9750000000000003e-06, + "num_tokens": 1169719.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7035, + "step": 3407 + }, + { + "loss": 0.002, + "grad_norm": 0.2821144759654999, + "learning_rate": 2.97e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 1.0, + "epoch": 1.704, + "step": 3408 + }, + { + "loss": 0.0018, + "grad_norm": 0.26630160212516785, + "learning_rate": 2.965e-06, + "num_tokens": 1169901.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7045, + "step": 3409 + }, + { + "loss": 0.0018, + "grad_norm": 0.2571128308773041, + "learning_rate": 2.96e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 3410 + }, + { + "loss": 0.002, + "grad_norm": 0.28111621737480164, + "learning_rate": 2.955e-06, + "num_tokens": 1170083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7055, + "step": 3411 + }, + { + "loss": 0.002, + "grad_norm": 0.27419018745422363, + "learning_rate": 2.95e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 3412 + }, + { + "loss": 0.0019, + "grad_norm": 0.26888176798820496, + "learning_rate": 2.945e-06, + "num_tokens": 1170265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7065000000000001, + "step": 3413 + }, + { + "loss": 0.0018, + "grad_norm": 0.2536250352859497, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 3414 + }, + { + "loss": 0.0018, + "grad_norm": 0.24844178557395935, + "learning_rate": 2.9350000000000003e-06, + "num_tokens": 1170447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7075, + "step": 3415 + }, + { + "loss": 0.0487, + "grad_norm": 1.4517875909805298, + "learning_rate": 2.93e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.708, + "step": 3416 + }, + { + "loss": 0.0564, + "grad_norm": 1.2101439237594604, + "learning_rate": 2.925e-06, + "num_tokens": 1171471.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7085, + "step": 3417 + }, + { + "loss": 0.043, + "grad_norm": 1.1227502822875977, + "learning_rate": 2.92e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.709, + "step": 3418 + }, + { + "loss": 0.0556, + "grad_norm": 1.1113651990890503, + "learning_rate": 2.915e-06, + "num_tokens": 1172495.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7095, + "step": 3419 + }, + { + "loss": 0.0015, + "grad_norm": 0.21050438284873962, + "learning_rate": 2.91e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.71, + "step": 3420 + }, + { + "loss": 0.0492, + "grad_norm": 1.136242389678955, + "learning_rate": 2.905e-06, + "num_tokens": 1173098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7105000000000001, + "step": 3421 + }, + { + "loss": 0.0549, + "grad_norm": 1.1831704378128052, + "learning_rate": 2.9e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7109999999999999, + "step": 3422 + }, + { + "loss": 0.0589, + "grad_norm": 1.318955659866333, + "learning_rate": 2.8950000000000002e-06, + "num_tokens": 1174122.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7115, + "step": 3423 + }, + { + "loss": 0.0385, + "grad_norm": 1.1089059114456177, + "learning_rate": 2.89e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.712, + "step": 3424 + }, + { + "loss": 0.0017, + "grad_norm": 0.24754203855991364, + "learning_rate": 2.885e-06, + "num_tokens": 1174725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7125, + "step": 3425 + }, + { + "loss": 0.0563, + "grad_norm": 1.1799119710922241, + "learning_rate": 2.88e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.713, + "step": 3426 + }, + { + "loss": 0.0017, + "grad_norm": 0.2318888157606125, + "learning_rate": 2.875e-06, + "num_tokens": 1175328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7135, + "step": 3427 + }, + { + "loss": 0.0623, + "grad_norm": 1.3154571056365967, + "learning_rate": 2.87e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.714, + "step": 3428 + }, + { + "loss": 0.0019, + "grad_norm": 0.26307183504104614, + "learning_rate": 2.865e-06, + "num_tokens": 1175931.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7145000000000001, + "step": 3429 + }, + { + "loss": 0.0018, + "grad_norm": 0.2589333653450012, + "learning_rate": 2.86e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 3430 + }, + { + "loss": 0.0504, + "grad_norm": 1.4614155292510986, + "learning_rate": 2.855e-06, + "num_tokens": 1176534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7155, + "step": 3431 + }, + { + "loss": 0.0018, + "grad_norm": 0.2591991722583771, + "learning_rate": 2.85e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.716, + "step": 3432 + }, + { + "loss": 0.0018, + "grad_norm": 0.25856250524520874, + "learning_rate": 2.845e-06, + "num_tokens": 1176716.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7165, + "step": 3433 + }, + { + "loss": 0.0368, + "grad_norm": 1.2794378995895386, + "learning_rate": 2.84e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.717, + "step": 3434 + }, + { + "loss": 0.0595, + "grad_norm": 1.1754332780838013, + "learning_rate": 2.835e-06, + "num_tokens": 1177740.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7175, + "step": 3435 + }, + { + "loss": 0.0016, + "grad_norm": 0.218499094247818, + "learning_rate": 2.83e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 1.0, + "epoch": 1.718, + "step": 3436 + }, + { + "loss": 0.0562, + "grad_norm": 1.4319361448287964, + "learning_rate": 2.825e-06, + "num_tokens": 1178343.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7185000000000001, + "step": 3437 + }, + { + "loss": 0.0548, + "grad_norm": 1.1614960432052612, + "learning_rate": 2.82e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7189999999999999, + "step": 3438 + }, + { + "loss": 0.0634, + "grad_norm": 1.559000849723816, + "learning_rate": 2.815e-06, + "num_tokens": 1179367.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7195, + "step": 3439 + }, + { + "loss": 0.0593, + "grad_norm": 1.1891441345214844, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 3440 + }, + { + "loss": 0.0638, + "grad_norm": 1.2654136419296265, + "learning_rate": 2.8050000000000007e-06, + "num_tokens": 1180391.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7205, + "step": 3441 + }, + { + "loss": 0.0411, + "grad_norm": 1.2888840436935425, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.721, + "step": 3442 + }, + { + "loss": 0.002, + "grad_norm": 0.2810196280479431, + "learning_rate": 2.7950000000000003e-06, + "num_tokens": 1180994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7215, + "step": 3443 + }, + { + "loss": 0.0393, + "grad_norm": 1.1534147262573242, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.722, + "step": 3444 + }, + { + "loss": 0.0019, + "grad_norm": 0.2703098952770233, + "learning_rate": 2.7850000000000004e-06, + "num_tokens": 1181597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7225000000000001, + "step": 3445 + }, + { + "loss": 0.0612, + "grad_norm": 1.2400104999542236, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7229999999999999, + "step": 3446 + }, + { + "loss": 0.0019, + "grad_norm": 0.27535656094551086, + "learning_rate": 2.7750000000000005e-06, + "num_tokens": 1182200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7235, + "step": 3447 + }, + { + "loss": 0.002, + "grad_norm": 0.2844158411026001, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 3448 + }, + { + "loss": 0.002, + "grad_norm": 0.2850154936313629, + "learning_rate": 2.7650000000000006e-06, + "num_tokens": 1182382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7245, + "step": 3449 + }, + { + "loss": 0.0018, + "grad_norm": 0.26619744300842285, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 3450 + }, + { + "loss": 0.0019, + "grad_norm": 0.2684476971626282, + "learning_rate": 2.7550000000000003e-06, + "num_tokens": 1182564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7255, + "step": 3451 + }, + { + "loss": 0.0577, + "grad_norm": 1.3094863891601562, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.726, + "step": 3452 + }, + { + "loss": 0.0378, + "grad_norm": 1.201589822769165, + "learning_rate": 2.7450000000000004e-06, + "num_tokens": 1183588.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7265000000000001, + "step": 3453 + }, + { + "loss": 0.0537, + "grad_norm": 1.2897847890853882, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7269999999999999, + "step": 3454 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792169749736786, + "learning_rate": 2.7350000000000005e-06, + "num_tokens": 1184191.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7275, + "step": 3455 + }, + { + "loss": 0.002, + "grad_norm": 0.28593137860298157, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 3456 + }, + { + "loss": 0.058, + "grad_norm": 1.3839404582977295, + "learning_rate": 2.7250000000000006e-06, + "num_tokens": 1184794.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7285, + "step": 3457 + }, + { + "loss": 0.0018, + "grad_norm": 0.2617915868759155, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 1.0, + "epoch": 1.729, + "step": 3458 + }, + { + "loss": 0.0019, + "grad_norm": 0.2803640067577362, + "learning_rate": 2.7150000000000003e-06, + "num_tokens": 1184976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7295, + "step": 3459 + }, + { + "loss": 0.0389, + "grad_norm": 1.0974253416061401, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.73, + "step": 3460 + }, + { + "loss": 0.0017, + "grad_norm": 0.24105492234230042, + "learning_rate": 2.7050000000000004e-06, + "num_tokens": 1185579.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7305000000000001, + "step": 3461 + }, + { + "loss": 0.0017, + "grad_norm": 0.2462151199579239, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 3462 + }, + { + "loss": 0.0681, + "grad_norm": 2.0248329639434814, + "learning_rate": 2.6950000000000005e-06, + "num_tokens": 1186182.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7315, + "step": 3463 + }, + { + "loss": 0.0506, + "grad_norm": 1.0506778955459595, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.732, + "step": 3464 + }, + { + "loss": 0.0414, + "grad_norm": 1.1461181640625, + "learning_rate": 2.6850000000000006e-06, + "num_tokens": 1187206.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7325, + "step": 3465 + }, + { + "loss": 0.002, + "grad_norm": 0.29532936215400696, + "learning_rate": 2.68e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 1.0, + "epoch": 1.733, + "step": 3466 + }, + { + "loss": 0.0018, + "grad_norm": 0.2511617839336395, + "learning_rate": 2.6750000000000002e-06, + "num_tokens": 1187388.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7335, + "step": 3467 + }, + { + "loss": 0.0017, + "grad_norm": 0.24015438556671143, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 3468 + }, + { + "loss": 0.0394, + "grad_norm": 1.186040997505188, + "learning_rate": 2.6650000000000003e-06, + "num_tokens": 1187991.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7345000000000002, + "step": 3469 + }, + { + "loss": 0.0516, + "grad_norm": 1.3716928958892822, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7349999999999999, + "step": 3470 + }, + { + "loss": 0.0017, + "grad_norm": 0.24118225276470184, + "learning_rate": 2.6550000000000004e-06, + "num_tokens": 1188594.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7355, + "step": 3471 + }, + { + "loss": 0.0634, + "grad_norm": 1.3280280828475952, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.736, + "step": 3472 + }, + { + "loss": 0.0606, + "grad_norm": 1.5957295894622803, + "learning_rate": 2.6450000000000005e-06, + "num_tokens": 1189618.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7365, + "step": 3473 + }, + { + "loss": 0.0019, + "grad_norm": 0.26652151346206665, + "learning_rate": 2.64e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.737, + "step": 3474 + }, + { + "loss": 0.0465, + "grad_norm": 1.2865381240844727, + "learning_rate": 2.635e-06, + "num_tokens": 1190221.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7375, + "step": 3475 + }, + { + "loss": 0.0696, + "grad_norm": 1.5268961191177368, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.738, + "step": 3476 + }, + { + "loss": 0.0016, + "grad_norm": 0.22352814674377441, + "learning_rate": 2.6250000000000003e-06, + "num_tokens": 1190824.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7385000000000002, + "step": 3477 + }, + { + "loss": 0.0398, + "grad_norm": 1.0832366943359375, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7389999999999999, + "step": 3478 + }, + { + "loss": 0.002, + "grad_norm": 0.2866823971271515, + "learning_rate": 2.6150000000000004e-06, + "num_tokens": 1191427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7395, + "step": 3479 + }, + { + "loss": 0.0017, + "grad_norm": 0.25320085883140564, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 3480 + }, + { + "loss": 0.0554, + "grad_norm": 1.305580496788025, + "learning_rate": 2.6050000000000005e-06, + "num_tokens": 1192030.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7405, + "step": 3481 + }, + { + "loss": 0.053, + "grad_norm": 1.3485558032989502, + "learning_rate": 2.6e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.741, + "step": 3482 + }, + { + "loss": 0.0597, + "grad_norm": 1.3094996213912964, + "learning_rate": 2.595e-06, + "num_tokens": 1193054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7415, + "step": 3483 + }, + { + "loss": 0.0361, + "grad_norm": 1.02549409866333, + "learning_rate": 2.59e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.742, + "step": 3484 + }, + { + "loss": 0.0549, + "grad_norm": 1.1604732275009155, + "learning_rate": 2.5850000000000002e-06, + "num_tokens": 1194078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7425000000000002, + "step": 3485 + }, + { + "loss": 0.0578, + "grad_norm": 1.1389886140823364, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7429999999999999, + "step": 3486 + }, + { + "loss": 0.0383, + "grad_norm": 1.1444112062454224, + "learning_rate": 2.5750000000000003e-06, + "num_tokens": 1195102.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7435, + "step": 3487 + }, + { + "loss": 0.0363, + "grad_norm": 1.2686033248901367, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.744, + "step": 3488 + }, + { + "loss": 0.0609, + "grad_norm": 1.2078722715377808, + "learning_rate": 2.5650000000000004e-06, + "num_tokens": 1196126.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7445, + "step": 3489 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754855155944824, + "learning_rate": 2.56e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 1.0, + "epoch": 1.745, + "step": 3490 + }, + { + "loss": 0.063, + "grad_norm": 1.346100091934204, + "learning_rate": 2.555e-06, + "num_tokens": 1196729.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7455, + "step": 3491 + }, + { + "loss": 0.0625, + "grad_norm": 1.3309886455535889, + "learning_rate": 2.55e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.746, + "step": 3492 + }, + { + "loss": 0.0023, + "grad_norm": 0.3301111161708832, + "learning_rate": 2.545e-06, + "num_tokens": 1197332.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7465000000000002, + "step": 3493 + }, + { + "loss": 0.0382, + "grad_norm": 1.0473533868789673, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7469999999999999, + "step": 3494 + }, + { + "loss": 0.0625, + "grad_norm": 1.2907440662384033, + "learning_rate": 2.5350000000000003e-06, + "num_tokens": 1198356.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7475, + "step": 3495 + }, + { + "loss": 0.0412, + "grad_norm": 1.1875349283218384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.748, + "step": 3496 + }, + { + "loss": 0.1176, + "grad_norm": 2.9710206985473633, + "learning_rate": 2.5250000000000004e-06, + "num_tokens": 1199380.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.7485, + "step": 3497 + }, + { + "loss": 0.0026, + "grad_norm": 0.36476898193359375, + "learning_rate": 2.52e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.749, + "step": 3498 + }, + { + "loss": 0.0379, + "grad_norm": 1.0208238363265991, + "learning_rate": 2.515e-06, + "num_tokens": 1199983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7495, + "step": 3499 + }, + { + "loss": 0.0026, + "grad_norm": 0.37356528639793396, + "learning_rate": 2.51e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.75, + "step": 3500 + }, + { + "loss": 0.0027, + "grad_norm": 0.39622190594673157, + "learning_rate": 2.505e-06, + "num_tokens": 1200165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7505, + "step": 3501 + }, + { + "loss": 0.0372, + "grad_norm": 1.0979310274124146, + "learning_rate": 2.5e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.751, + "step": 3502 + }, + { + "loss": 0.0362, + "grad_norm": 1.0418155193328857, + "learning_rate": 2.4950000000000003e-06, + "num_tokens": 1201189.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7515, + "step": 3503 + }, + { + "loss": 0.0632, + "grad_norm": 1.6260945796966553, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.752, + "step": 3504 + }, + { + "loss": 0.0029, + "grad_norm": 0.3957514762878418, + "learning_rate": 2.4850000000000003e-06, + "num_tokens": 1201792.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7525, + "step": 3505 + }, + { + "loss": 0.0024, + "grad_norm": 0.3393152356147766, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 3506 + }, + { + "loss": 0.0515, + "grad_norm": 1.1930348873138428, + "learning_rate": 2.475e-06, + "num_tokens": 1202395.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7534999999999998, + "step": 3507 + }, + { + "loss": 0.0026, + "grad_norm": 0.380045086145401, + "learning_rate": 2.47e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 1.0, + "epoch": 1.754, + "step": 3508 + }, + { + "loss": 0.0027, + "grad_norm": 0.3971390724182129, + "learning_rate": 2.465e-06, + "num_tokens": 1202577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7545, + "step": 3509 + }, + { + "loss": 0.0028, + "grad_norm": 0.38638150691986084, + "learning_rate": 2.46e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 1.0, + "epoch": 1.755, + "step": 3510 + }, + { + "loss": 0.0615, + "grad_norm": 1.3876094818115234, + "learning_rate": 2.4550000000000002e-06, + "num_tokens": 1203180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7555, + "step": 3511 + }, + { + "loss": 0.0432, + "grad_norm": 1.4136366844177246, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.756, + "step": 3512 + }, + { + "loss": 0.0024, + "grad_norm": 0.34141626954078674, + "learning_rate": 2.4450000000000003e-06, + "num_tokens": 1203783.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7565, + "step": 3513 + }, + { + "loss": 0.0566, + "grad_norm": 1.0875115394592285, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7570000000000001, + "step": 3514 + }, + { + "loss": 0.0482, + "grad_norm": 1.5494464635849, + "learning_rate": 2.435e-06, + "num_tokens": 1204807.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7574999999999998, + "step": 3515 + }, + { + "loss": 0.0413, + "grad_norm": 1.0267417430877686, + "learning_rate": 2.43e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.758, + "step": 3516 + }, + { + "loss": 0.0529, + "grad_norm": 1.3826123476028442, + "learning_rate": 2.425e-06, + "num_tokens": 1205831.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7585, + "step": 3517 + }, + { + "loss": 0.0622, + "grad_norm": 1.3799962997436523, + "learning_rate": 2.42e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.759, + "step": 3518 + }, + { + "loss": 0.0026, + "grad_norm": 0.36601629853248596, + "learning_rate": 2.415e-06, + "num_tokens": 1206434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7595, + "step": 3519 + }, + { + "loss": 0.057, + "grad_norm": 1.4413540363311768, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.76, + "step": 3520 + }, + { + "loss": 0.062, + "grad_norm": 1.5269067287445068, + "learning_rate": 2.4050000000000003e-06, + "num_tokens": 1207458.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7605, + "step": 3521 + }, + { + "loss": 0.0529, + "grad_norm": 1.1583778858184814, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7610000000000001, + "step": 3522 + }, + { + "loss": 0.0629, + "grad_norm": 1.502618432044983, + "learning_rate": 2.395e-06, + "num_tokens": 1208482.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7614999999999998, + "step": 3523 + }, + { + "loss": 0.0556, + "grad_norm": 1.4562733173370361, + "learning_rate": 2.39e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.762, + "step": 3524 + }, + { + "loss": 0.0028, + "grad_norm": 0.4034802317619324, + "learning_rate": 2.385e-06, + "num_tokens": 1209085.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7625, + "step": 3525 + }, + { + "loss": 0.0501, + "grad_norm": 1.3905121088027954, + "learning_rate": 2.38e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.763, + "step": 3526 + }, + { + "loss": 0.0628, + "grad_norm": 1.1878178119659424, + "learning_rate": 2.375e-06, + "num_tokens": 1210109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7635, + "step": 3527 + }, + { + "loss": 0.0371, + "grad_norm": 1.1999701261520386, + "learning_rate": 2.37e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.764, + "step": 3528 + }, + { + "loss": 0.0029, + "grad_norm": 0.40889084339141846, + "learning_rate": 2.3650000000000002e-06, + "num_tokens": 1210712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7645, + "step": 3529 + }, + { + "loss": 0.0389, + "grad_norm": 1.039504885673523, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7650000000000001, + "step": 3530 + }, + { + "loss": 0.068, + "grad_norm": 1.371443748474121, + "learning_rate": 2.355e-06, + "num_tokens": 1211736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7654999999999998, + "step": 3531 + }, + { + "loss": 0.0695, + "grad_norm": 1.7425730228424072, + "learning_rate": 2.35e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.766, + "step": 3532 + }, + { + "loss": 0.0523, + "grad_norm": 1.3040227890014648, + "learning_rate": 2.345e-06, + "num_tokens": 1212760.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7665, + "step": 3533 + }, + { + "loss": 0.0027, + "grad_norm": 0.3859405517578125, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 1.0, + "epoch": 1.767, + "step": 3534 + }, + { + "loss": 0.0385, + "grad_norm": 1.0744153261184692, + "learning_rate": 2.3350000000000005e-06, + "num_tokens": 1213363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7675, + "step": 3535 + }, + { + "loss": 0.0029, + "grad_norm": 0.4078717827796936, + "learning_rate": 2.33e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.768, + "step": 3536 + }, + { + "loss": 0.0464, + "grad_norm": 1.3526980876922607, + "learning_rate": 2.325e-06, + "num_tokens": 1213966.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7685, + "step": 3537 + }, + { + "loss": 0.0032, + "grad_norm": 0.44447413086891174, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7690000000000001, + "step": 3538 + }, + { + "loss": 0.0346, + "grad_norm": 0.9852960705757141, + "learning_rate": 2.3150000000000003e-06, + "num_tokens": 1214569.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7694999999999999, + "step": 3539 + }, + { + "loss": 0.0581, + "grad_norm": 1.1710577011108398, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.77, + "step": 3540 + }, + { + "loss": 0.003, + "grad_norm": 0.42533135414123535, + "learning_rate": 2.3050000000000004e-06, + "num_tokens": 1215172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7705, + "step": 3541 + }, + { + "loss": 0.0373, + "grad_norm": 0.9175604581832886, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.771, + "step": 3542 + }, + { + "loss": 0.0464, + "grad_norm": 1.2586400508880615, + "learning_rate": 2.2950000000000005e-06, + "num_tokens": 1216196.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.7715, + "step": 3543 + }, + { + "loss": 0.0557, + "grad_norm": 1.3000445365905762, + "learning_rate": 2.29e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.772, + "step": 3544 + }, + { + "loss": 0.0377, + "grad_norm": 1.0466715097427368, + "learning_rate": 2.285e-06, + "num_tokens": 1217220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7725, + "step": 3545 + }, + { + "loss": 0.003, + "grad_norm": 0.41341033577919006, + "learning_rate": 2.28e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7730000000000001, + "step": 3546 + }, + { + "loss": 0.0555, + "grad_norm": 1.2895411252975464, + "learning_rate": 2.2750000000000002e-06, + "num_tokens": 1217823.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7734999999999999, + "step": 3547 + }, + { + "loss": 0.0032, + "grad_norm": 0.4543672800064087, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 1.0, + "epoch": 1.774, + "step": 3548 + }, + { + "loss": 0.0033, + "grad_norm": 0.45242005586624146, + "learning_rate": 2.2650000000000003e-06, + "num_tokens": 1218005.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7745, + "step": 3549 + }, + { + "loss": 0.0664, + "grad_norm": 1.4492830038070679, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.775, + "step": 3550 + }, + { + "loss": 0.0621, + "grad_norm": 1.410575270652771, + "learning_rate": 2.2550000000000004e-06, + "num_tokens": 1219029.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7755, + "step": 3551 + }, + { + "loss": 0.0668, + "grad_norm": 1.4600263833999634, + "learning_rate": 2.25e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.776, + "step": 3552 + }, + { + "loss": 0.0518, + "grad_norm": 1.185958981513977, + "learning_rate": 2.245e-06, + "num_tokens": 1220053.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7765, + "step": 3553 + }, + { + "loss": 0.0031, + "grad_norm": 0.4426004886627197, + "learning_rate": 2.24e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7770000000000001, + "step": 3554 + }, + { + "loss": 0.0391, + "grad_norm": 1.1847765445709229, + "learning_rate": 2.235e-06, + "num_tokens": 1220656.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7774999999999999, + "step": 3555 + }, + { + "loss": 0.0387, + "grad_norm": 1.1244046688079834, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.778, + "step": 3556 + }, + { + "loss": 0.0639, + "grad_norm": 1.5144935846328735, + "learning_rate": 2.2250000000000003e-06, + "num_tokens": 1221680.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7785, + "step": 3557 + }, + { + "loss": 0.0504, + "grad_norm": 1.1694223880767822, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.779, + "step": 3558 + }, + { + "loss": 0.039, + "grad_norm": 1.198093295097351, + "learning_rate": 2.2150000000000004e-06, + "num_tokens": 1222704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7795, + "step": 3559 + }, + { + "loss": 0.0556, + "grad_norm": 1.4882034063339233, + "learning_rate": 2.21e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.78, + "step": 3560 + }, + { + "loss": 0.0033, + "grad_norm": 0.4605433940887451, + "learning_rate": 2.205e-06, + "num_tokens": 1223307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7805, + "step": 3561 + }, + { + "loss": 0.0427, + "grad_norm": 1.400830864906311, + "learning_rate": 2.2e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7810000000000001, + "step": 3562 + }, + { + "loss": 0.0596, + "grad_norm": 1.4765678644180298, + "learning_rate": 2.195e-06, + "num_tokens": 1224331.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7814999999999999, + "step": 3563 + }, + { + "loss": 0.0029, + "grad_norm": 0.4184083044528961, + "learning_rate": 2.19e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 1.0, + "epoch": 1.782, + "step": 3564 + }, + { + "loss": 0.0031, + "grad_norm": 0.4302586615085602, + "learning_rate": 2.1850000000000003e-06, + "num_tokens": 1224513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7825, + "step": 3565 + }, + { + "loss": 0.0031, + "grad_norm": 0.4298599362373352, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 1.0, + "epoch": 1.783, + "step": 3566 + }, + { + "loss": 0.065, + "grad_norm": 1.424648642539978, + "learning_rate": 2.1750000000000004e-06, + "num_tokens": 1225116.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7835, + "step": 3567 + }, + { + "loss": 0.0031, + "grad_norm": 0.4238447844982147, + "learning_rate": 2.17e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.784, + "step": 3568 + }, + { + "loss": 0.0031, + "grad_norm": 0.4220222532749176, + "learning_rate": 2.165e-06, + "num_tokens": 1225298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7845, + "step": 3569 + }, + { + "loss": 0.003, + "grad_norm": 0.42732101678848267, + "learning_rate": 2.16e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7850000000000001, + "step": 3570 + }, + { + "loss": 0.0346, + "grad_norm": 1.0672036409378052, + "learning_rate": 2.155e-06, + "num_tokens": 1225901.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7854999999999999, + "step": 3571 + }, + { + "loss": 0.0424, + "grad_norm": 1.0617742538452148, + "learning_rate": 2.15e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.786, + "step": 3572 + }, + { + "loss": 0.0592, + "grad_norm": 1.3852803707122803, + "learning_rate": 2.1450000000000002e-06, + "num_tokens": 1226925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7865, + "step": 3573 + }, + { + "loss": 0.0029, + "grad_norm": 0.4290924072265625, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 1.0, + "epoch": 1.787, + "step": 3574 + }, + { + "loss": 0.051, + "grad_norm": 1.1031818389892578, + "learning_rate": 2.1350000000000003e-06, + "num_tokens": 1227528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7875, + "step": 3575 + }, + { + "loss": 0.0393, + "grad_norm": 1.184659719467163, + "learning_rate": 2.13e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.788, + "step": 3576 + }, + { + "loss": 0.0755, + "grad_norm": 1.9755206108093262, + "learning_rate": 2.125e-06, + "num_tokens": 1228552.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.7885, + "step": 3577 + }, + { + "loss": 0.071, + "grad_norm": 1.4741475582122803, + "learning_rate": 2.12e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7890000000000001, + "step": 3578 + }, + { + "loss": 0.0609, + "grad_norm": 1.6418182849884033, + "learning_rate": 2.115e-06, + "num_tokens": 1229576.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7894999999999999, + "step": 3579 + }, + { + "loss": 0.0027, + "grad_norm": 0.40381157398223877, + "learning_rate": 2.11e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.79, + "step": 3580 + }, + { + "loss": 0.0551, + "grad_norm": 1.2949596643447876, + "learning_rate": 2.105e-06, + "num_tokens": 1230179.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7905, + "step": 3581 + }, + { + "loss": 0.0504, + "grad_norm": 1.073058843612671, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.791, + "step": 3582 + }, + { + "loss": 0.0028, + "grad_norm": 0.3910202980041504, + "learning_rate": 2.0950000000000003e-06, + "num_tokens": 1230782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7915, + "step": 3583 + }, + { + "loss": 0.0029, + "grad_norm": 0.40099310874938965, + "learning_rate": 2.09e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.792, + "step": 3584 + }, + { + "loss": 0.0686, + "grad_norm": 1.5408157110214233, + "learning_rate": 2.085e-06, + "num_tokens": 1231385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7925, + "step": 3585 + }, + { + "loss": 0.0547, + "grad_norm": 1.2888717651367188, + "learning_rate": 2.08e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7930000000000001, + "step": 3586 + }, + { + "loss": 0.0392, + "grad_norm": 1.1414070129394531, + "learning_rate": 2.075e-06, + "num_tokens": 1232409.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7934999999999999, + "step": 3587 + }, + { + "loss": 0.0567, + "grad_norm": 1.2421129941940308, + "learning_rate": 2.07e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.794, + "step": 3588 + }, + { + "loss": 0.0567, + "grad_norm": 1.2121027708053589, + "learning_rate": 2.065e-06, + "num_tokens": 1233433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7945, + "step": 3589 + }, + { + "loss": 0.0028, + "grad_norm": 0.4114837944507599, + "learning_rate": 2.06e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.795, + "step": 3590 + }, + { + "loss": 0.003, + "grad_norm": 0.4205188453197479, + "learning_rate": 2.0550000000000002e-06, + "num_tokens": 1233615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7955, + "step": 3591 + }, + { + "loss": 0.0029, + "grad_norm": 0.39967694878578186, + "learning_rate": 2.05e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 1.0, + "epoch": 1.796, + "step": 3592 + }, + { + "loss": 0.056, + "grad_norm": 1.251736044883728, + "learning_rate": 2.045e-06, + "num_tokens": 1234218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7965, + "step": 3593 + }, + { + "loss": 0.0028, + "grad_norm": 0.3914256989955902, + "learning_rate": 2.04e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7970000000000002, + "step": 3594 + }, + { + "loss": 0.0604, + "grad_norm": 1.1881632804870605, + "learning_rate": 2.035e-06, + "num_tokens": 1234821.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7974999999999999, + "step": 3595 + }, + { + "loss": 0.0622, + "grad_norm": 1.149919033050537, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.798, + "step": 3596 + }, + { + "loss": 0.0549, + "grad_norm": 1.0469919443130493, + "learning_rate": 2.025e-06, + "num_tokens": 1235845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7985, + "step": 3597 + }, + { + "loss": 0.0535, + "grad_norm": 1.3651666641235352, + "learning_rate": 2.02e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.799, + "step": 3598 + }, + { + "loss": 0.0026, + "grad_norm": 0.37465357780456543, + "learning_rate": 2.015e-06, + "num_tokens": 1236448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7995, + "step": 3599 + }, + { + "loss": 0.0365, + "grad_norm": 1.0199239253997803, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8, + "step": 3600 + }, + { + "loss": 0.0617, + "grad_norm": 1.1323697566986084, + "learning_rate": 2.0050000000000003e-06, + "num_tokens": 1237472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8005, + "step": 3601 + }, + { + "loss": 0.003, + "grad_norm": 0.4225693345069885, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8010000000000002, + "step": 3602 + }, + { + "loss": 0.0379, + "grad_norm": 1.1038097143173218, + "learning_rate": 1.9950000000000004e-06, + "num_tokens": 1238075.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8014999999999999, + "step": 3603 + }, + { + "loss": 0.003, + "grad_norm": 0.4044983685016632, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.802, + "step": 3604 + }, + { + "loss": 0.0655, + "grad_norm": 1.8133554458618164, + "learning_rate": 1.985e-06, + "num_tokens": 1238678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8025, + "step": 3605 + }, + { + "loss": 0.0028, + "grad_norm": 0.39725902676582336, + "learning_rate": 1.98e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.803, + "step": 3606 + }, + { + "loss": 0.003, + "grad_norm": 0.4250074028968811, + "learning_rate": 1.975e-06, + "num_tokens": 1238860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8035, + "step": 3607 + }, + { + "loss": 0.0378, + "grad_norm": 1.14003586769104, + "learning_rate": 1.97e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.804, + "step": 3608 + }, + { + "loss": 0.0028, + "grad_norm": 0.39355626702308655, + "learning_rate": 1.9650000000000002e-06, + "num_tokens": 1239463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8045, + "step": 3609 + }, + { + "loss": 0.0378, + "grad_norm": 1.2409162521362305, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8050000000000002, + "step": 3610 + }, + { + "loss": 0.0448, + "grad_norm": 1.4544258117675781, + "learning_rate": 1.9550000000000003e-06, + "num_tokens": 1240487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8054999999999999, + "step": 3611 + }, + { + "loss": 0.0027, + "grad_norm": 0.3753180205821991, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.806, + "step": 3612 + }, + { + "loss": 0.0029, + "grad_norm": 0.4058220088481903, + "learning_rate": 1.945e-06, + "num_tokens": 1240669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8065, + "step": 3613 + }, + { + "loss": 0.0574, + "grad_norm": 1.4277732372283936, + "learning_rate": 1.94e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.807, + "step": 3614 + }, + { + "loss": 0.0645, + "grad_norm": 1.5439943075180054, + "learning_rate": 1.935e-06, + "num_tokens": 1241693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8075, + "step": 3615 + }, + { + "loss": 0.0609, + "grad_norm": 1.4575119018554688, + "learning_rate": 1.93e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.808, + "step": 3616 + }, + { + "loss": 0.0024, + "grad_norm": 0.33791404962539673, + "learning_rate": 1.925e-06, + "num_tokens": 1242296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8085, + "step": 3617 + }, + { + "loss": 0.0392, + "grad_norm": 0.994301974773407, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8090000000000002, + "step": 3618 + }, + { + "loss": 0.0026, + "grad_norm": 0.35725516080856323, + "learning_rate": 1.9150000000000003e-06, + "num_tokens": 1242899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8094999999999999, + "step": 3619 + }, + { + "loss": 0.1147, + "grad_norm": 2.219489097595215, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.81, + "step": 3620 + }, + { + "loss": 0.0025, + "grad_norm": 0.358549028635025, + "learning_rate": 1.9050000000000002e-06, + "num_tokens": 1243502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8105, + "step": 3621 + }, + { + "loss": 0.0497, + "grad_norm": 1.0606470108032227, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.811, + "step": 3622 + }, + { + "loss": 0.0354, + "grad_norm": 1.1863391399383545, + "learning_rate": 1.895e-06, + "num_tokens": 1244526.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8115, + "step": 3623 + }, + { + "loss": 0.0617, + "grad_norm": 1.461073398590088, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.812, + "step": 3624 + }, + { + "loss": 0.0522, + "grad_norm": 1.180123209953308, + "learning_rate": 1.8850000000000002e-06, + "num_tokens": 1245550.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8125, + "step": 3625 + }, + { + "loss": 0.0513, + "grad_norm": 1.1050792932510376, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.813, + "step": 3626 + }, + { + "loss": 0.0382, + "grad_norm": 1.1048370599746704, + "learning_rate": 1.8750000000000003e-06, + "num_tokens": 1246574.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8135, + "step": 3627 + }, + { + "loss": 0.0594, + "grad_norm": 1.5278170108795166, + "learning_rate": 1.87e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.814, + "step": 3628 + }, + { + "loss": 0.0026, + "grad_norm": 0.3680756688117981, + "learning_rate": 1.8650000000000001e-06, + "num_tokens": 1247177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8145, + "step": 3629 + }, + { + "loss": 0.0025, + "grad_norm": 0.3478946387767792, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.815, + "step": 3630 + }, + { + "loss": 0.0602, + "grad_norm": 1.2490179538726807, + "learning_rate": 1.8550000000000002e-06, + "num_tokens": 1247780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8155000000000001, + "step": 3631 + }, + { + "loss": 0.0751, + "grad_norm": 1.6024861335754395, + "learning_rate": 1.85e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8159999999999998, + "step": 3632 + }, + { + "loss": 0.055, + "grad_norm": 1.4603705406188965, + "learning_rate": 1.8450000000000001e-06, + "num_tokens": 1248804.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8165, + "step": 3633 + }, + { + "loss": 0.0025, + "grad_norm": 0.37733298540115356, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.817, + "step": 3634 + }, + { + "loss": 0.0028, + "grad_norm": 0.3999163806438446, + "learning_rate": 1.8350000000000002e-06, + "num_tokens": 1248986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8175, + "step": 3635 + }, + { + "loss": 0.0027, + "grad_norm": 0.39710038900375366, + "learning_rate": 1.83e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.818, + "step": 3636 + }, + { + "loss": 0.0028, + "grad_norm": 0.39646029472351074, + "learning_rate": 1.825e-06, + "num_tokens": 1249168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8185, + "step": 3637 + }, + { + "loss": 0.0426, + "grad_norm": 1.3070132732391357, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.819, + "step": 3638 + }, + { + "loss": 0.039, + "grad_norm": 1.1619224548339844, + "learning_rate": 1.8150000000000002e-06, + "num_tokens": 1250192.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8195000000000001, + "step": 3639 + }, + { + "loss": 0.0367, + "grad_norm": 1.1559624671936035, + "learning_rate": 1.81e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8199999999999998, + "step": 3640 + }, + { + "loss": 0.053, + "grad_norm": 1.3208280801773071, + "learning_rate": 1.805e-06, + "num_tokens": 1251216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8205, + "step": 3641 + }, + { + "loss": 0.0544, + "grad_norm": 1.2948426008224487, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.821, + "step": 3642 + }, + { + "loss": 0.049, + "grad_norm": 1.0491054058074951, + "learning_rate": 1.7950000000000002e-06, + "num_tokens": 1252240.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8215, + "step": 3643 + }, + { + "loss": 0.037, + "grad_norm": 1.3279922008514404, + "learning_rate": 1.79e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.822, + "step": 3644 + }, + { + "loss": 0.0027, + "grad_norm": 0.38797032833099365, + "learning_rate": 1.785e-06, + "num_tokens": 1252843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8225, + "step": 3645 + }, + { + "loss": 0.0526, + "grad_norm": 1.3761346340179443, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.823, + "step": 3646 + }, + { + "loss": 0.0594, + "grad_norm": 1.5943882465362549, + "learning_rate": 1.7750000000000002e-06, + "num_tokens": 1253867.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8235000000000001, + "step": 3647 + }, + { + "loss": 0.0386, + "grad_norm": 1.1582005023956299, + "learning_rate": 1.77e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8239999999999998, + "step": 3648 + }, + { + "loss": 0.0625, + "grad_norm": 1.422128438949585, + "learning_rate": 1.765e-06, + "num_tokens": 1254891.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8245, + "step": 3649 + }, + { + "loss": 0.0027, + "grad_norm": 0.3794823884963989, + "learning_rate": 1.76e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.825, + "step": 3650 + }, + { + "loss": 0.0377, + "grad_norm": 1.0281649827957153, + "learning_rate": 1.7550000000000001e-06, + "num_tokens": 1255494.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8255, + "step": 3651 + }, + { + "loss": 0.057, + "grad_norm": 1.2542749643325806, + "learning_rate": 1.75e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.826, + "step": 3652 + }, + { + "loss": 0.0027, + "grad_norm": 0.3857089579105377, + "learning_rate": 1.745e-06, + "num_tokens": 1256097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8265, + "step": 3653 + }, + { + "loss": 0.0529, + "grad_norm": 1.148740291595459, + "learning_rate": 1.74e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.827, + "step": 3654 + }, + { + "loss": 0.003, + "grad_norm": 0.4200035333633423, + "learning_rate": 1.7350000000000001e-06, + "num_tokens": 1256700.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8275000000000001, + "step": 3655 + }, + { + "loss": 0.0028, + "grad_norm": 0.3945881426334381, + "learning_rate": 1.73e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8279999999999998, + "step": 3656 + }, + { + "loss": 0.039, + "grad_norm": 0.9618701934814453, + "learning_rate": 1.725e-06, + "num_tokens": 1257303.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8285, + "step": 3657 + }, + { + "loss": 0.0399, + "grad_norm": 1.2282723188400269, + "learning_rate": 1.72e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.829, + "step": 3658 + }, + { + "loss": 0.0509, + "grad_norm": 1.175613284111023, + "learning_rate": 1.7150000000000003e-06, + "num_tokens": 1258327.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8295, + "step": 3659 + }, + { + "loss": 0.0378, + "grad_norm": 1.1486104726791382, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.83, + "step": 3660 + }, + { + "loss": 0.0589, + "grad_norm": 1.3274273872375488, + "learning_rate": 1.7050000000000002e-06, + "num_tokens": 1259351.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8305, + "step": 3661 + }, + { + "loss": 0.046, + "grad_norm": 1.3887542486190796, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.831, + "step": 3662 + }, + { + "loss": 0.0029, + "grad_norm": 0.39590317010879517, + "learning_rate": 1.6950000000000003e-06, + "num_tokens": 1259954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8315000000000001, + "step": 3663 + }, + { + "loss": 0.0369, + "grad_norm": 1.080889105796814, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8319999999999999, + "step": 3664 + }, + { + "loss": 0.0535, + "grad_norm": 1.3136940002441406, + "learning_rate": 1.6850000000000002e-06, + "num_tokens": 1260978.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8325, + "step": 3665 + }, + { + "loss": 0.059, + "grad_norm": 1.5410752296447754, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 3666 + }, + { + "loss": 0.0029, + "grad_norm": 0.3952591121196747, + "learning_rate": 1.6750000000000003e-06, + "num_tokens": 1261581.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8335, + "step": 3667 + }, + { + "loss": 0.0518, + "grad_norm": 1.3276718854904175, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.834, + "step": 3668 + }, + { + "loss": 0.003, + "grad_norm": 0.4232414960861206, + "learning_rate": 1.6650000000000002e-06, + "num_tokens": 1262184.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8345, + "step": 3669 + }, + { + "loss": 0.0639, + "grad_norm": 1.2759331464767456, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.835, + "step": 3670 + }, + { + "loss": 0.0571, + "grad_norm": 1.5148133039474487, + "learning_rate": 1.6550000000000002e-06, + "num_tokens": 1263208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8355000000000001, + "step": 3671 + }, + { + "loss": 0.0637, + "grad_norm": 1.4910366535186768, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8359999999999999, + "step": 3672 + }, + { + "loss": 0.0029, + "grad_norm": 0.4135521948337555, + "learning_rate": 1.6450000000000001e-06, + "num_tokens": 1263811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8365, + "step": 3673 + }, + { + "loss": 0.0511, + "grad_norm": 1.2618604898452759, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.837, + "step": 3674 + }, + { + "loss": 0.0501, + "grad_norm": 1.1598845720291138, + "learning_rate": 1.6350000000000002e-06, + "num_tokens": 1264835.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8375, + "step": 3675 + }, + { + "loss": 0.0445, + "grad_norm": 1.0752735137939453, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.838, + "step": 3676 + }, + { + "loss": 0.003, + "grad_norm": 0.42967167496681213, + "learning_rate": 1.6250000000000001e-06, + "num_tokens": 1265438.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8385, + "step": 3677 + }, + { + "loss": 0.003, + "grad_norm": 0.41333630681037903, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 1.0, + "epoch": 1.839, + "step": 3678 + }, + { + "loss": 0.0033, + "grad_norm": 0.4601726531982422, + "learning_rate": 1.6150000000000002e-06, + "num_tokens": 1265620.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8395000000000001, + "step": 3679 + }, + { + "loss": 0.0648, + "grad_norm": 1.4645088911056519, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8399999999999999, + "step": 3680 + }, + { + "loss": 0.0371, + "grad_norm": 1.0282845497131348, + "learning_rate": 1.605e-06, + "num_tokens": 1266644.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8405, + "step": 3681 + }, + { + "loss": 0.0034, + "grad_norm": 0.4804507791996002, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 1.0, + "epoch": 1.841, + "step": 3682 + }, + { + "loss": 0.0611, + "grad_norm": 1.6006290912628174, + "learning_rate": 1.5950000000000002e-06, + "num_tokens": 1267247.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8415, + "step": 3683 + }, + { + "loss": 0.0032, + "grad_norm": 0.4456159472465515, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 1.0, + "epoch": 1.842, + "step": 3684 + }, + { + "loss": 0.0028, + "grad_norm": 0.39536213874816895, + "learning_rate": 1.585e-06, + "num_tokens": 1267429.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8425, + "step": 3685 + }, + { + "loss": 0.0441, + "grad_norm": 1.2790175676345825, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.843, + "step": 3686 + }, + { + "loss": 0.0545, + "grad_norm": 1.1657609939575195, + "learning_rate": 1.5750000000000002e-06, + "num_tokens": 1268453.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8435000000000001, + "step": 3687 + }, + { + "loss": 0.0536, + "grad_norm": 1.0926413536071777, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8439999999999999, + "step": 3688 + }, + { + "loss": 0.0362, + "grad_norm": 0.9912558197975159, + "learning_rate": 1.565e-06, + "num_tokens": 1269477.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8445, + "step": 3689 + }, + { + "loss": 0.0374, + "grad_norm": 1.0493851900100708, + "learning_rate": 1.56e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.845, + "step": 3690 + }, + { + "loss": 0.0028, + "grad_norm": 0.4059640169143677, + "learning_rate": 1.5550000000000001e-06, + "num_tokens": 1270080.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8455, + "step": 3691 + }, + { + "loss": 0.003, + "grad_norm": 0.4232662618160248, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 1.0, + "epoch": 1.846, + "step": 3692 + }, + { + "loss": 0.0031, + "grad_norm": 0.43225178122520447, + "learning_rate": 1.545e-06, + "num_tokens": 1270262.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8465, + "step": 3693 + }, + { + "loss": 0.0027, + "grad_norm": 0.3701487183570862, + "learning_rate": 1.54e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.847, + "step": 3694 + }, + { + "loss": 0.0545, + "grad_norm": 1.3909512758255005, + "learning_rate": 1.5350000000000001e-06, + "num_tokens": 1270865.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8475000000000001, + "step": 3695 + }, + { + "loss": 0.0027, + "grad_norm": 0.38712078332901, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8479999999999999, + "step": 3696 + }, + { + "loss": 0.0506, + "grad_norm": 1.0741735696792603, + "learning_rate": 1.525e-06, + "num_tokens": 1271468.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8485, + "step": 3697 + }, + { + "loss": 0.0693, + "grad_norm": 1.657240629196167, + "learning_rate": 1.52e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.849, + "step": 3698 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615441918373108, + "learning_rate": 1.5150000000000001e-06, + "num_tokens": 1272071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8495, + "step": 3699 + }, + { + "loss": 0.0355, + "grad_norm": 0.9562244415283203, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.85, + "step": 3700 + }, + { + "loss": 0.0026, + "grad_norm": 0.36725983023643494, + "learning_rate": 1.505e-06, + "num_tokens": 1272674.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8505, + "step": 3701 + }, + { + "loss": 0.0028, + "grad_norm": 0.3878721296787262, + "learning_rate": 1.5e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 1.0, + "epoch": 1.851, + "step": 3702 + }, + { + "loss": 0.0359, + "grad_norm": 1.0378117561340332, + "learning_rate": 1.495e-06, + "num_tokens": 1273277.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8515000000000001, + "step": 3703 + }, + { + "loss": 0.0656, + "grad_norm": 1.2746002674102783, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8519999999999999, + "step": 3704 + }, + { + "loss": 0.0026, + "grad_norm": 0.35767146944999695, + "learning_rate": 1.485e-06, + "num_tokens": 1273880.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8525, + "step": 3705 + }, + { + "loss": 0.0026, + "grad_norm": 0.36552944779396057, + "learning_rate": 1.48e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.853, + "step": 3706 + }, + { + "loss": 0.0473, + "grad_norm": 1.1046762466430664, + "learning_rate": 1.475e-06, + "num_tokens": 1274483.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8535, + "step": 3707 + }, + { + "loss": 0.0625, + "grad_norm": 1.4509928226470947, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.854, + "step": 3708 + }, + { + "loss": 0.0421, + "grad_norm": 1.1400452852249146, + "learning_rate": 1.465e-06, + "num_tokens": 1275507.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8545, + "step": 3709 + }, + { + "loss": 0.0026, + "grad_norm": 0.3619054853916168, + "learning_rate": 1.46e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 1.0, + "epoch": 1.855, + "step": 3710 + }, + { + "loss": 0.0026, + "grad_norm": 0.3667825162410736, + "learning_rate": 1.455e-06, + "num_tokens": 1275689.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8555000000000001, + "step": 3711 + }, + { + "loss": 0.0466, + "grad_norm": 1.255405068397522, + "learning_rate": 1.45e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8559999999999999, + "step": 3712 + }, + { + "loss": 0.0657, + "grad_norm": 1.4270333051681519, + "learning_rate": 1.445e-06, + "num_tokens": 1276713.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8565, + "step": 3713 + }, + { + "loss": 0.0356, + "grad_norm": 1.035252571105957, + "learning_rate": 1.44e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.857, + "step": 3714 + }, + { + "loss": 0.0024, + "grad_norm": 0.34851282835006714, + "learning_rate": 1.435e-06, + "num_tokens": 1277316.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8575, + "step": 3715 + }, + { + "loss": 0.0669, + "grad_norm": 1.6207127571105957, + "learning_rate": 1.43e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.858, + "step": 3716 + }, + { + "loss": 0.0025, + "grad_norm": 0.34068116545677185, + "learning_rate": 1.425e-06, + "num_tokens": 1277919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8585, + "step": 3717 + }, + { + "loss": 0.0023, + "grad_norm": 0.3336624801158905, + "learning_rate": 1.42e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 3718 + }, + { + "loss": 0.0663, + "grad_norm": 1.4342654943466187, + "learning_rate": 1.415e-06, + "num_tokens": 1278522.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8595000000000002, + "step": 3719 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730687618255615, + "learning_rate": 1.41e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8599999999999999, + "step": 3720 + }, + { + "loss": 0.062, + "grad_norm": 1.4714523553848267, + "learning_rate": 1.4050000000000003e-06, + "num_tokens": 1279546.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8605, + "step": 3721 + }, + { + "loss": 0.0514, + "grad_norm": 1.2004119157791138, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.861, + "step": 3722 + }, + { + "loss": 0.0023, + "grad_norm": 0.3368993103504181, + "learning_rate": 1.3950000000000002e-06, + "num_tokens": 1280149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8615, + "step": 3723 + }, + { + "loss": 0.0025, + "grad_norm": 0.3626645803451538, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 3724 + }, + { + "loss": 0.0379, + "grad_norm": 1.129130482673645, + "learning_rate": 1.3850000000000003e-06, + "num_tokens": 1280752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8625, + "step": 3725 + }, + { + "loss": 0.0026, + "grad_norm": 0.35549208521842957, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.863, + "step": 3726 + }, + { + "loss": 0.039, + "grad_norm": 1.0426714420318604, + "learning_rate": 1.3750000000000002e-06, + "num_tokens": 1281355.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8635000000000002, + "step": 3727 + }, + { + "loss": 0.0591, + "grad_norm": 1.4238243103027344, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8639999999999999, + "step": 3728 + }, + { + "loss": 0.0587, + "grad_norm": 1.182423710823059, + "learning_rate": 1.3650000000000003e-06, + "num_tokens": 1282379.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8645, + "step": 3729 + }, + { + "loss": 0.0344, + "grad_norm": 1.0535178184509277, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.865, + "step": 3730 + }, + { + "loss": 0.0024, + "grad_norm": 0.34818780422210693, + "learning_rate": 1.3550000000000002e-06, + "num_tokens": 1282982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8655, + "step": 3731 + }, + { + "loss": 0.0652, + "grad_norm": 1.3155183792114258, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.866, + "step": 3732 + }, + { + "loss": 0.0543, + "grad_norm": 1.2466151714324951, + "learning_rate": 1.3450000000000003e-06, + "num_tokens": 1284006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8665, + "step": 3733 + }, + { + "loss": 0.0366, + "grad_norm": 1.1111284494400024, + "learning_rate": 1.34e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.867, + "step": 3734 + }, + { + "loss": 0.036, + "grad_norm": 1.2413430213928223, + "learning_rate": 1.3350000000000001e-06, + "num_tokens": 1285030.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8675000000000002, + "step": 3735 + }, + { + "loss": 0.0503, + "grad_norm": 1.2572247982025146, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8679999999999999, + "step": 3736 + }, + { + "loss": 0.0634, + "grad_norm": 1.3656840324401855, + "learning_rate": 1.3250000000000002e-06, + "num_tokens": 1286054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8685, + "step": 3737 + }, + { + "loss": 0.0369, + "grad_norm": 1.1938374042510986, + "learning_rate": 1.32e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.869, + "step": 3738 + }, + { + "loss": 0.0619, + "grad_norm": 1.5963718891143799, + "learning_rate": 1.3150000000000001e-06, + "num_tokens": 1287078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8695, + "step": 3739 + }, + { + "loss": 0.0569, + "grad_norm": 1.3680788278579712, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.87, + "step": 3740 + }, + { + "loss": 0.0535, + "grad_norm": 1.175209879875183, + "learning_rate": 1.3050000000000002e-06, + "num_tokens": 1288102.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8705, + "step": 3741 + }, + { + "loss": 0.0026, + "grad_norm": 0.3611868619918823, + "learning_rate": 1.3e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.871, + "step": 3742 + }, + { + "loss": 0.0377, + "grad_norm": 1.2314857244491577, + "learning_rate": 1.295e-06, + "num_tokens": 1288705.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8715000000000002, + "step": 3743 + }, + { + "loss": 0.0511, + "grad_norm": 1.4128717184066772, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8719999999999999, + "step": 3744 + }, + { + "loss": 0.1336, + "grad_norm": 2.185844659805298, + "learning_rate": 1.2850000000000002e-06, + "num_tokens": 1289729.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.8725, + "step": 3745 + }, + { + "loss": 0.0025, + "grad_norm": 0.33957669138908386, + "learning_rate": 1.28e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 1.0, + "epoch": 1.873, + "step": 3746 + }, + { + "loss": 0.0027, + "grad_norm": 0.3769534230232239, + "learning_rate": 1.275e-06, + "num_tokens": 1289911.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8735, + "step": 3747 + }, + { + "loss": 0.0584, + "grad_norm": 1.4691829681396484, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.874, + "step": 3748 + }, + { + "loss": 0.0635, + "grad_norm": 1.6226807832717896, + "learning_rate": 1.2650000000000002e-06, + "num_tokens": 1290935.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8745, + "step": 3749 + }, + { + "loss": 0.0033, + "grad_norm": 0.4503451883792877, + "learning_rate": 1.26e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 1.0, + "epoch": 1.875, + "step": 3750 + }, + { + "loss": 0.0028, + "grad_norm": 0.39449983835220337, + "learning_rate": 1.255e-06, + "num_tokens": 1291117.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8755, + "step": 3751 + }, + { + "loss": 0.0029, + "grad_norm": 0.4101957678794861, + "learning_rate": 1.25e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 1.0, + "epoch": 1.876, + "step": 3752 + }, + { + "loss": 0.0359, + "grad_norm": 1.259843111038208, + "learning_rate": 1.2450000000000002e-06, + "num_tokens": 1291720.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8765, + "step": 3753 + }, + { + "loss": 0.0027, + "grad_norm": 0.372577965259552, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.877, + "step": 3754 + }, + { + "loss": 0.0596, + "grad_norm": 1.1994444131851196, + "learning_rate": 1.235e-06, + "num_tokens": 1292323.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8775, + "step": 3755 + }, + { + "loss": 0.0703, + "grad_norm": 1.5322065353393555, + "learning_rate": 1.23e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8780000000000001, + "step": 3756 + }, + { + "loss": 0.0643, + "grad_norm": 1.7045296430587769, + "learning_rate": 1.2250000000000001e-06, + "num_tokens": 1293347.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8784999999999998, + "step": 3757 + }, + { + "loss": 0.0439, + "grad_norm": 1.2476153373718262, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.879, + "step": 3758 + }, + { + "loss": 0.0402, + "grad_norm": 1.186736822128296, + "learning_rate": 1.215e-06, + "num_tokens": 1294371.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8795, + "step": 3759 + }, + { + "loss": 0.0029, + "grad_norm": 0.39700445532798767, + "learning_rate": 1.21e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.88, + "step": 3760 + }, + { + "loss": 0.1202, + "grad_norm": 3.1105434894561768, + "learning_rate": 1.2050000000000001e-06, + "num_tokens": 1294974.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.8805, + "step": 3761 + }, + { + "loss": 0.0408, + "grad_norm": 1.1640613079071045, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.881, + "step": 3762 + }, + { + "loss": 0.0023, + "grad_norm": 0.32245126366615295, + "learning_rate": 1.195e-06, + "num_tokens": 1295577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8815, + "step": 3763 + }, + { + "loss": 0.0644, + "grad_norm": 1.4617496728897095, + "learning_rate": 1.19e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8820000000000001, + "step": 3764 + }, + { + "loss": 0.0024, + "grad_norm": 0.3409968614578247, + "learning_rate": 1.185e-06, + "num_tokens": 1296180.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8824999999999998, + "step": 3765 + }, + { + "loss": 0.0666, + "grad_norm": 2.035632848739624, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.883, + "step": 3766 + }, + { + "loss": 0.0402, + "grad_norm": 1.1498757600784302, + "learning_rate": 1.175e-06, + "num_tokens": 1297204.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8835, + "step": 3767 + }, + { + "loss": 0.0593, + "grad_norm": 1.348196268081665, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.884, + "step": 3768 + }, + { + "loss": 0.0667, + "grad_norm": 1.692858099937439, + "learning_rate": 1.165e-06, + "num_tokens": 1298228.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8845, + "step": 3769 + }, + { + "loss": 0.0029, + "grad_norm": 0.40195682644844055, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 1.0, + "epoch": 1.885, + "step": 3770 + }, + { + "loss": 0.0515, + "grad_norm": 1.0095990896224976, + "learning_rate": 1.1550000000000002e-06, + "num_tokens": 1298831.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8855, + "step": 3771 + }, + { + "loss": 0.0411, + "grad_norm": 1.4529675245285034, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8860000000000001, + "step": 3772 + }, + { + "loss": 0.0029, + "grad_norm": 0.39934462308883667, + "learning_rate": 1.145e-06, + "num_tokens": 1299434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8864999999999998, + "step": 3773 + }, + { + "loss": 0.0026, + "grad_norm": 0.37341752648353577, + "learning_rate": 1.14e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.887, + "step": 3774 + }, + { + "loss": 0.003, + "grad_norm": 0.427602082490921, + "learning_rate": 1.1350000000000001e-06, + "num_tokens": 1299616.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8875, + "step": 3775 + }, + { + "loss": 0.0027, + "grad_norm": 0.38110828399658203, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 1.0, + "epoch": 1.888, + "step": 3776 + }, + { + "loss": 0.05, + "grad_norm": 1.3058017492294312, + "learning_rate": 1.125e-06, + "num_tokens": 1300219.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8885, + "step": 3777 + }, + { + "loss": 0.0551, + "grad_norm": 1.049538016319275, + "learning_rate": 1.12e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.889, + "step": 3778 + }, + { + "loss": 0.0543, + "grad_norm": 1.1460436582565308, + "learning_rate": 1.1150000000000001e-06, + "num_tokens": 1301243.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8895, + "step": 3779 + }, + { + "loss": 0.0402, + "grad_norm": 1.1601300239562988, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8900000000000001, + "step": 3780 + }, + { + "loss": 0.0571, + "grad_norm": 1.1402069330215454, + "learning_rate": 1.105e-06, + "num_tokens": 1302267.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8904999999999998, + "step": 3781 + }, + { + "loss": 0.0381, + "grad_norm": 1.2498735189437866, + "learning_rate": 1.1e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.891, + "step": 3782 + }, + { + "loss": 0.0658, + "grad_norm": 1.471903920173645, + "learning_rate": 1.095e-06, + "num_tokens": 1303291.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8915, + "step": 3783 + }, + { + "loss": 0.003, + "grad_norm": 0.40989261865615845, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.892, + "step": 3784 + }, + { + "loss": 0.0029, + "grad_norm": 0.4065409004688263, + "learning_rate": 1.085e-06, + "num_tokens": 1303473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8925, + "step": 3785 + }, + { + "loss": 0.0027, + "grad_norm": 0.38934385776519775, + "learning_rate": 1.08e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.893, + "step": 3786 + }, + { + "loss": 0.0028, + "grad_norm": 0.3856496810913086, + "learning_rate": 1.075e-06, + "num_tokens": 1303655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8935, + "step": 3787 + }, + { + "loss": 0.0422, + "grad_norm": 1.3679287433624268, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8940000000000001, + "step": 3788 + }, + { + "loss": 0.051, + "grad_norm": 1.206390619277954, + "learning_rate": 1.065e-06, + "num_tokens": 1304679.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8944999999999999, + "step": 3789 + }, + { + "loss": 0.0029, + "grad_norm": 0.41105058789253235, + "learning_rate": 1.06e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 1.0, + "epoch": 1.895, + "step": 3790 + }, + { + "loss": 0.0027, + "grad_norm": 0.3825374245643616, + "learning_rate": 1.055e-06, + "num_tokens": 1304861.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8955, + "step": 3791 + }, + { + "loss": 0.0024, + "grad_norm": 0.3389546871185303, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.896, + "step": 3792 + }, + { + "loss": 0.0027, + "grad_norm": 0.38113462924957275, + "learning_rate": 1.045e-06, + "num_tokens": 1305043.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8965, + "step": 3793 + }, + { + "loss": 0.0025, + "grad_norm": 0.35084959864616394, + "learning_rate": 1.04e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 1.0, + "epoch": 1.897, + "step": 3794 + }, + { + "loss": 0.056, + "grad_norm": 1.4280885457992554, + "learning_rate": 1.035e-06, + "num_tokens": 1305646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8975, + "step": 3795 + }, + { + "loss": 0.0584, + "grad_norm": 1.4864161014556885, + "learning_rate": 1.03e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8980000000000001, + "step": 3796 + }, + { + "loss": 0.0023, + "grad_norm": 0.32296261191368103, + "learning_rate": 1.025e-06, + "num_tokens": 1306249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8984999999999999, + "step": 3797 + }, + { + "loss": 0.0372, + "grad_norm": 1.1412842273712158, + "learning_rate": 1.02e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.899, + "step": 3798 + }, + { + "loss": 0.036, + "grad_norm": 1.0588805675506592, + "learning_rate": 1.0150000000000002e-06, + "num_tokens": 1307273.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8995, + "step": 3799 + }, + { + "loss": 0.0025, + "grad_norm": 0.34841030836105347, + "learning_rate": 1.01e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9, + "step": 3800 + }, + { + "loss": 0.0025, + "grad_norm": 0.3537651002407074, + "learning_rate": 1.0050000000000001e-06, + "num_tokens": 1307455.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9005, + "step": 3801 + }, + { + "loss": 0.0405, + "grad_norm": 1.1438575983047485, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.901, + "step": 3802 + }, + { + "loss": 0.0694, + "grad_norm": 1.4709012508392334, + "learning_rate": 9.950000000000002e-07, + "num_tokens": 1308479.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9015, + "step": 3803 + }, + { + "loss": 0.0023, + "grad_norm": 0.3326675593852997, + "learning_rate": 9.9e-07, + "num_tokens": 1308570.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9020000000000001, + "step": 3804 + }, + { + "loss": 0.0635, + "grad_norm": 1.4323761463165283, + "learning_rate": 9.85e-07, + "num_tokens": 1309082.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9024999999999999, + "step": 3805 + }, + { + "loss": 0.0683, + "grad_norm": 1.6102875471115112, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.903, + "step": 3806 + }, + { + "loss": 0.0022, + "grad_norm": 0.3131149709224701, + "learning_rate": 9.750000000000002e-07, + "num_tokens": 1309685.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9035, + "step": 3807 + }, + { + "loss": 0.0021, + "grad_norm": 0.30395570397377014, + "learning_rate": 9.7e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 3808 + }, + { + "loss": 0.056, + "grad_norm": 1.3097760677337646, + "learning_rate": 9.65e-07, + "num_tokens": 1310288.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9045, + "step": 3809 + }, + { + "loss": 0.0425, + "grad_norm": 1.2873075008392334, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.905, + "step": 3810 + }, + { + "loss": 0.0366, + "grad_norm": 1.1098606586456299, + "learning_rate": 9.550000000000002e-07, + "num_tokens": 1311312.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9055, + "step": 3811 + }, + { + "loss": 0.0023, + "grad_norm": 0.33073046803474426, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9060000000000001, + "step": 3812 + }, + { + "loss": 0.0558, + "grad_norm": 1.287516713142395, + "learning_rate": 9.450000000000001e-07, + "num_tokens": 1311915.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9064999999999999, + "step": 3813 + }, + { + "loss": 0.0023, + "grad_norm": 0.3197239935398102, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 1.0, + "epoch": 1.907, + "step": 3814 + }, + { + "loss": 0.0022, + "grad_norm": 0.3093603253364563, + "learning_rate": 9.35e-07, + "num_tokens": 1312097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9075, + "step": 3815 + }, + { + "loss": 0.0027, + "grad_norm": 0.3792094588279724, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.908, + "step": 3816 + }, + { + "loss": 0.0024, + "grad_norm": 0.33527225255966187, + "learning_rate": 9.25e-07, + "num_tokens": 1312279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9085, + "step": 3817 + }, + { + "loss": 0.0531, + "grad_norm": 1.204848051071167, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.909, + "step": 3818 + }, + { + "loss": 0.0702, + "grad_norm": 1.3416361808776855, + "learning_rate": 9.15e-07, + "num_tokens": 1313303.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9095, + "step": 3819 + }, + { + "loss": 0.0541, + "grad_norm": 1.515673279762268, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9100000000000001, + "step": 3820 + }, + { + "loss": 0.0024, + "grad_norm": 0.33284807205200195, + "learning_rate": 9.05e-07, + "num_tokens": 1313906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9104999999999999, + "step": 3821 + }, + { + "loss": 0.0023, + "grad_norm": 0.32082033157348633, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 3822 + }, + { + "loss": 0.056, + "grad_norm": 1.2340785264968872, + "learning_rate": 8.95e-07, + "num_tokens": 1314509.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9115, + "step": 3823 + }, + { + "loss": 0.0021, + "grad_norm": 0.3040038049221039, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.912, + "step": 3824 + }, + { + "loss": 0.0392, + "grad_norm": 1.3959851264953613, + "learning_rate": 8.85e-07, + "num_tokens": 1315112.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9125, + "step": 3825 + }, + { + "loss": 0.0027, + "grad_norm": 0.37887290120124817, + "learning_rate": 8.8e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 1.0, + "epoch": 1.913, + "step": 3826 + }, + { + "loss": 0.0022, + "grad_norm": 0.30666735768318176, + "learning_rate": 8.75e-07, + "num_tokens": 1315294.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9135, + "step": 3827 + }, + { + "loss": 0.0691, + "grad_norm": 1.3549600839614868, + "learning_rate": 8.7e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9140000000000001, + "step": 3828 + }, + { + "loss": 0.0675, + "grad_norm": 1.2945553064346313, + "learning_rate": 8.65e-07, + "num_tokens": 1316318.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9144999999999999, + "step": 3829 + }, + { + "loss": 0.0022, + "grad_norm": 0.3147728145122528, + "learning_rate": 8.6e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.915, + "step": 3830 + }, + { + "loss": 0.0531, + "grad_norm": 1.0365914106369019, + "learning_rate": 8.550000000000002e-07, + "num_tokens": 1316921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9155, + "step": 3831 + }, + { + "loss": 0.0416, + "grad_norm": 1.2123857736587524, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.916, + "step": 3832 + }, + { + "loss": 0.0023, + "grad_norm": 0.3252547085285187, + "learning_rate": 8.450000000000002e-07, + "num_tokens": 1317524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9165, + "step": 3833 + }, + { + "loss": 0.0021, + "grad_norm": 0.29913613200187683, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.917, + "step": 3834 + }, + { + "loss": 0.0688, + "grad_norm": 1.6491233110427856, + "learning_rate": 8.350000000000002e-07, + "num_tokens": 1318127.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9175, + "step": 3835 + }, + { + "loss": 0.0021, + "grad_norm": 0.3058773875236511, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9180000000000001, + "step": 3836 + }, + { + "loss": 0.038, + "grad_norm": 1.1742405891418457, + "learning_rate": 8.250000000000001e-07, + "num_tokens": 1318730.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9184999999999999, + "step": 3837 + }, + { + "loss": 0.002, + "grad_norm": 0.27437257766723633, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.919, + "step": 3838 + }, + { + "loss": 0.0397, + "grad_norm": 1.1734699010849, + "learning_rate": 8.150000000000001e-07, + "num_tokens": 1319333.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9195, + "step": 3839 + }, + { + "loss": 0.0688, + "grad_norm": 1.6114236116409302, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.92, + "step": 3840 + }, + { + "loss": 0.0396, + "grad_norm": 1.3022080659866333, + "learning_rate": 8.050000000000001e-07, + "num_tokens": 1320357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9205, + "step": 3841 + }, + { + "loss": 0.002, + "grad_norm": 0.2882446348667145, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.921, + "step": 3842 + }, + { + "loss": 0.0636, + "grad_norm": 1.4788239002227783, + "learning_rate": 7.950000000000001e-07, + "num_tokens": 1320960.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9215, + "step": 3843 + }, + { + "loss": 0.0554, + "grad_norm": 1.472805142402649, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 3844 + }, + { + "loss": 0.0382, + "grad_norm": 1.3122379779815674, + "learning_rate": 7.850000000000001e-07, + "num_tokens": 1321984.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9224999999999999, + "step": 3845 + }, + { + "loss": 0.0019, + "grad_norm": 0.27439191937446594, + "learning_rate": 7.8e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.923, + "step": 3846 + }, + { + "loss": 0.0021, + "grad_norm": 0.3059723973274231, + "learning_rate": 7.750000000000001e-07, + "num_tokens": 1322166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9235, + "step": 3847 + }, + { + "loss": 0.0021, + "grad_norm": 0.3025694489479065, + "learning_rate": 7.7e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 3848 + }, + { + "loss": 0.0416, + "grad_norm": 1.4384698867797852, + "learning_rate": 7.650000000000001e-07, + "num_tokens": 1322769.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9245, + "step": 3849 + }, + { + "loss": 0.0019, + "grad_norm": 0.26954689621925354, + "learning_rate": 7.6e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.925, + "step": 3850 + }, + { + "loss": 0.0373, + "grad_norm": 1.0434874296188354, + "learning_rate": 7.550000000000001e-07, + "num_tokens": 1323372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9255, + "step": 3851 + }, + { + "loss": 0.0384, + "grad_norm": 1.2146815061569214, + "learning_rate": 7.5e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9260000000000002, + "step": 3852 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992803454399109, + "learning_rate": 7.450000000000001e-07, + "num_tokens": 1323975.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9264999999999999, + "step": 3853 + }, + { + "loss": 0.0683, + "grad_norm": 2.0715625286102295, + "learning_rate": 7.4e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.927, + "step": 3854 + }, + { + "loss": 0.0687, + "grad_norm": 1.7195099592208862, + "learning_rate": 7.350000000000001e-07, + "num_tokens": 1324999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.9275, + "step": 3855 + }, + { + "loss": 0.0022, + "grad_norm": 0.31213998794555664, + "learning_rate": 7.3e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.928, + "step": 3856 + }, + { + "loss": 0.0446, + "grad_norm": 1.5833452939987183, + "learning_rate": 7.25e-07, + "num_tokens": 1325602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9285, + "step": 3857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27154725790023804, + "learning_rate": 7.2e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.929, + "step": 3858 + }, + { + "loss": 0.0385, + "grad_norm": 1.1363227367401123, + "learning_rate": 7.15e-07, + "num_tokens": 1326205.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9295, + "step": 3859 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992321252822876, + "learning_rate": 7.1e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9300000000000002, + "step": 3860 + }, + { + "loss": 0.0537, + "grad_norm": 1.2202407121658325, + "learning_rate": 7.05e-07, + "num_tokens": 1326808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9304999999999999, + "step": 3861 + }, + { + "loss": 0.0659, + "grad_norm": 1.3972662687301636, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.931, + "step": 3862 + }, + { + "loss": 0.0022, + "grad_norm": 0.3156076967716217, + "learning_rate": 6.950000000000001e-07, + "num_tokens": 1327411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9315, + "step": 3863 + }, + { + "loss": 0.002, + "grad_norm": 0.2746105492115021, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 3864 + }, + { + "loss": 0.0492, + "grad_norm": 1.111280083656311, + "learning_rate": 6.850000000000001e-07, + "num_tokens": 1328014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9325, + "step": 3865 + }, + { + "loss": 0.0557, + "grad_norm": 1.1395080089569092, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.933, + "step": 3866 + }, + { + "loss": 0.041, + "grad_norm": 1.1225674152374268, + "learning_rate": 6.750000000000001e-07, + "num_tokens": 1329038.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9335, + "step": 3867 + }, + { + "loss": 0.0021, + "grad_norm": 0.2975449860095978, + "learning_rate": 6.7e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9340000000000002, + "step": 3868 + }, + { + "loss": 0.002, + "grad_norm": 0.2790532410144806, + "learning_rate": 6.650000000000001e-07, + "num_tokens": 1329220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9344999999999999, + "step": 3869 + }, + { + "loss": 0.0019, + "grad_norm": 0.27045223116874695, + "learning_rate": 6.6e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 3870 + }, + { + "loss": 0.0587, + "grad_norm": 1.2998172044754028, + "learning_rate": 6.550000000000001e-07, + "num_tokens": 1329823.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9355, + "step": 3871 + }, + { + "loss": 0.1167, + "grad_norm": 2.1144580841064453, + "learning_rate": 6.5e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.936, + "step": 3872 + }, + { + "loss": 0.0021, + "grad_norm": 0.29768821597099304, + "learning_rate": 6.450000000000001e-07, + "num_tokens": 1330426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9365, + "step": 3873 + }, + { + "loss": 0.0021, + "grad_norm": 0.3033559024333954, + "learning_rate": 6.4e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 3874 + }, + { + "loss": 0.0017, + "grad_norm": 0.2499658465385437, + "learning_rate": 6.350000000000001e-07, + "num_tokens": 1330608.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9375, + "step": 3875 + }, + { + "loss": 0.002, + "grad_norm": 0.28729239106178284, + "learning_rate": 6.3e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 3876 + }, + { + "loss": 0.0538, + "grad_norm": 1.3207937479019165, + "learning_rate": 6.25e-07, + "num_tokens": 1331211.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9385, + "step": 3877 + }, + { + "loss": 0.0022, + "grad_norm": 0.3201894760131836, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.939, + "step": 3878 + }, + { + "loss": 0.058, + "grad_norm": 1.3156497478485107, + "learning_rate": 6.15e-07, + "num_tokens": 1331814.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9395, + "step": 3879 + }, + { + "loss": 0.0544, + "grad_norm": 1.192156195640564, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.94, + "step": 3880 + }, + { + "loss": 0.0634, + "grad_norm": 2.076542377471924, + "learning_rate": 6.05e-07, + "num_tokens": 1332838.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9405000000000001, + "step": 3881 + }, + { + "loss": 0.0488, + "grad_norm": 1.3221850395202637, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9409999999999998, + "step": 3882 + }, + { + "loss": 0.0021, + "grad_norm": 0.3004106283187866, + "learning_rate": 5.95e-07, + "num_tokens": 1333441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9415, + "step": 3883 + }, + { + "loss": 0.0541, + "grad_norm": 1.230305790901184, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.942, + "step": 3884 + }, + { + "loss": 0.002, + "grad_norm": 0.2805992662906647, + "learning_rate": 5.850000000000001e-07, + "num_tokens": 1334044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9425, + "step": 3885 + }, + { + "loss": 0.0019, + "grad_norm": 0.27598538994789124, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 3886 + }, + { + "loss": 0.0021, + "grad_norm": 0.3006319999694824, + "learning_rate": 5.750000000000001e-07, + "num_tokens": 1334226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9435, + "step": 3887 + }, + { + "loss": 0.0628, + "grad_norm": 1.3234870433807373, + "learning_rate": 5.7e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.944, + "step": 3888 + }, + { + "loss": 0.0368, + "grad_norm": 0.9632979035377502, + "learning_rate": 5.650000000000001e-07, + "num_tokens": 1335250.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9445000000000001, + "step": 3889 + }, + { + "loss": 0.0396, + "grad_norm": 1.0664863586425781, + "learning_rate": 5.6e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9449999999999998, + "step": 3890 + }, + { + "loss": 0.0361, + "grad_norm": 0.998447060585022, + "learning_rate": 5.550000000000001e-07, + "num_tokens": 1336274.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9455, + "step": 3891 + }, + { + "loss": 0.066, + "grad_norm": 1.6561861038208008, + "learning_rate": 5.5e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.946, + "step": 3892 + }, + { + "loss": 0.0564, + "grad_norm": 1.0982937812805176, + "learning_rate": 5.450000000000001e-07, + "num_tokens": 1337298.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9465, + "step": 3893 + }, + { + "loss": 0.0649, + "grad_norm": 1.3116402626037598, + "learning_rate": 5.4e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.947, + "step": 3894 + }, + { + "loss": 0.0393, + "grad_norm": 1.211995005607605, + "learning_rate": 5.350000000000001e-07, + "num_tokens": 1338322.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9475, + "step": 3895 + }, + { + "loss": 0.0656, + "grad_norm": 1.3053356409072876, + "learning_rate": 5.3e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.948, + "step": 3896 + }, + { + "loss": 0.059, + "grad_norm": 1.4926881790161133, + "learning_rate": 5.250000000000001e-07, + "num_tokens": 1339346.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9485000000000001, + "step": 3897 + }, + { + "loss": 0.0517, + "grad_norm": 1.099536657333374, + "learning_rate": 5.2e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9489999999999998, + "step": 3898 + }, + { + "loss": 0.002, + "grad_norm": 0.2851589620113373, + "learning_rate": 5.15e-07, + "num_tokens": 1339949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9495, + "step": 3899 + }, + { + "loss": 0.002, + "grad_norm": 0.2879925072193146, + "learning_rate": 5.1e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 3900 + }, + { + "loss": 0.0557, + "grad_norm": 1.0640603303909302, + "learning_rate": 5.05e-07, + "num_tokens": 1340552.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9505, + "step": 3901 + }, + { + "loss": 0.0021, + "grad_norm": 0.3005947470664978, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.951, + "step": 3902 + }, + { + "loss": 0.0021, + "grad_norm": 0.30592235922813416, + "learning_rate": 4.95e-07, + "num_tokens": 1340734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9515, + "step": 3903 + }, + { + "loss": 0.0508, + "grad_norm": 1.1045085191726685, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.952, + "step": 3904 + }, + { + "loss": 0.0539, + "grad_norm": 1.1382217407226562, + "learning_rate": 4.85e-07, + "num_tokens": 1341758.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9525000000000001, + "step": 3905 + }, + { + "loss": 0.0576, + "grad_norm": 1.5904083251953125, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9529999999999998, + "step": 3906 + }, + { + "loss": 0.0401, + "grad_norm": 1.0153878927230835, + "learning_rate": 4.7500000000000006e-07, + "num_tokens": 1342782.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9535, + "step": 3907 + }, + { + "loss": 0.0023, + "grad_norm": 0.32124239206314087, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.954, + "step": 3908 + }, + { + "loss": 0.037, + "grad_norm": 1.1176637411117554, + "learning_rate": 4.6500000000000005e-07, + "num_tokens": 1343385.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9545, + "step": 3909 + }, + { + "loss": 0.0414, + "grad_norm": 1.1863677501678467, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.955, + "step": 3910 + }, + { + "loss": 0.0697, + "grad_norm": 1.6575289964675903, + "learning_rate": 4.5500000000000004e-07, + "num_tokens": 1344409.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9555, + "step": 3911 + }, + { + "loss": 0.0384, + "grad_norm": 1.020317554473877, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.956, + "step": 3912 + }, + { + "loss": 0.0554, + "grad_norm": 1.1557419300079346, + "learning_rate": 4.4500000000000003e-07, + "num_tokens": 1345433.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9565000000000001, + "step": 3913 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282678723335266, + "learning_rate": 4.4e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9569999999999999, + "step": 3914 + }, + { + "loss": 0.0611, + "grad_norm": 1.4425996541976929, + "learning_rate": 4.35e-07, + "num_tokens": 1346036.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9575, + "step": 3915 + }, + { + "loss": 0.0021, + "grad_norm": 0.30943119525909424, + "learning_rate": 4.3e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.958, + "step": 3916 + }, + { + "loss": 0.0021, + "grad_norm": 0.29412642121315, + "learning_rate": 4.2500000000000006e-07, + "num_tokens": 1346218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9585, + "step": 3917 + }, + { + "loss": 0.0021, + "grad_norm": 0.2940139174461365, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.959, + "step": 3918 + }, + { + "loss": 0.0021, + "grad_norm": 0.3061344027519226, + "learning_rate": 4.1500000000000005e-07, + "num_tokens": 1346400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9595, + "step": 3919 + }, + { + "loss": 0.0399, + "grad_norm": 1.3357733488082886, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.96, + "step": 3920 + }, + { + "loss": 0.0548, + "grad_norm": 1.1528651714324951, + "learning_rate": 4.0500000000000004e-07, + "num_tokens": 1347424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9605000000000001, + "step": 3921 + }, + { + "loss": 0.0024, + "grad_norm": 0.3415958285331726, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9609999999999999, + "step": 3922 + }, + { + "loss": 0.0672, + "grad_norm": 1.716910719871521, + "learning_rate": 3.9500000000000003e-07, + "num_tokens": 1348027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9615, + "step": 3923 + }, + { + "loss": 0.0019, + "grad_norm": 0.2726108729839325, + "learning_rate": 3.9e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.962, + "step": 3924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6874312162399292, + "learning_rate": 3.85e-07, + "num_tokens": 1348630.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9625, + "step": 3925 + }, + { + "loss": 0.0677, + "grad_norm": 1.6080477237701416, + "learning_rate": 3.8e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 3926 + }, + { + "loss": 0.0455, + "grad_norm": 1.2764126062393188, + "learning_rate": 3.75e-07, + "num_tokens": 1349654.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9635, + "step": 3927 + }, + { + "loss": 0.0414, + "grad_norm": 1.4081971645355225, + "learning_rate": 3.7e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.964, + "step": 3928 + }, + { + "loss": 0.0022, + "grad_norm": 0.3177483081817627, + "learning_rate": 3.65e-07, + "num_tokens": 1350257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9645000000000001, + "step": 3929 + }, + { + "loss": 0.0024, + "grad_norm": 0.33574411273002625, + "learning_rate": 3.6e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 3930 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346923887729645, + "learning_rate": 3.55e-07, + "num_tokens": 1350439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9655, + "step": 3931 + }, + { + "loss": 0.0562, + "grad_norm": 1.2322405576705933, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.966, + "step": 3932 + }, + { + "loss": 0.0382, + "grad_norm": 1.126086711883545, + "learning_rate": 3.4500000000000003e-07, + "num_tokens": 1351463.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9665, + "step": 3933 + }, + { + "loss": 0.0679, + "grad_norm": 1.7950743436813354, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.967, + "step": 3934 + }, + { + "loss": 0.0023, + "grad_norm": 0.31813737750053406, + "learning_rate": 3.35e-07, + "num_tokens": 1352066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9675, + "step": 3935 + }, + { + "loss": 0.0563, + "grad_norm": 1.4460132122039795, + "learning_rate": 3.3e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.968, + "step": 3936 + }, + { + "loss": 0.0388, + "grad_norm": 1.2290942668914795, + "learning_rate": 3.25e-07, + "num_tokens": 1353090.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9685000000000001, + "step": 3937 + }, + { + "loss": 0.0624, + "grad_norm": 1.2616753578186035, + "learning_rate": 3.2e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9689999999999999, + "step": 3938 + }, + { + "loss": 0.0018, + "grad_norm": 0.258317232131958, + "learning_rate": 3.15e-07, + "num_tokens": 1353693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9695, + "step": 3939 + }, + { + "loss": 0.0021, + "grad_norm": 0.2969084680080414, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 3940 + }, + { + "loss": 0.0023, + "grad_norm": 0.3306228518486023, + "learning_rate": 3.0500000000000004e-07, + "num_tokens": 1353875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9705, + "step": 3941 + }, + { + "loss": 0.0021, + "grad_norm": 0.2877337336540222, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.971, + "step": 3942 + }, + { + "loss": 0.0385, + "grad_norm": 1.1180164813995361, + "learning_rate": 2.9500000000000003e-07, + "num_tokens": 1354478.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9715, + "step": 3943 + }, + { + "loss": 0.0422, + "grad_norm": 1.2713475227355957, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 3944 + }, + { + "loss": 0.0021, + "grad_norm": 0.30450907349586487, + "learning_rate": 2.85e-07, + "num_tokens": 1355081.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9725000000000001, + "step": 3945 + }, + { + "loss": 0.0369, + "grad_norm": 1.0453548431396484, + "learning_rate": 2.8e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9729999999999999, + "step": 3946 + }, + { + "loss": 0.0647, + "grad_norm": 1.4603972434997559, + "learning_rate": 2.75e-07, + "num_tokens": 1356105.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9735, + "step": 3947 + }, + { + "loss": 0.0572, + "grad_norm": 1.3418960571289062, + "learning_rate": 2.7e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.974, + "step": 3948 + }, + { + "loss": 0.0616, + "grad_norm": 1.2075037956237793, + "learning_rate": 2.65e-07, + "num_tokens": 1357129.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9745, + "step": 3949 + }, + { + "loss": 0.0561, + "grad_norm": 1.3293365240097046, + "learning_rate": 2.6e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.975, + "step": 3950 + }, + { + "loss": 0.0546, + "grad_norm": 1.1330344676971436, + "learning_rate": 2.55e-07, + "num_tokens": 1358153.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9755, + "step": 3951 + }, + { + "loss": 0.0553, + "grad_norm": 1.403975486755371, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 3952 + }, + { + "loss": 0.0589, + "grad_norm": 1.0574450492858887, + "learning_rate": 2.4500000000000004e-07, + "num_tokens": 1359177.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9765000000000001, + "step": 3953 + }, + { + "loss": 0.0024, + "grad_norm": 0.34114331007003784, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9769999999999999, + "step": 3954 + }, + { + "loss": 0.0531, + "grad_norm": 1.2925927639007568, + "learning_rate": 2.3500000000000003e-07, + "num_tokens": 1359780.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9775, + "step": 3955 + }, + { + "loss": 0.0023, + "grad_norm": 0.32414519786834717, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.978, + "step": 3956 + }, + { + "loss": 0.0409, + "grad_norm": 1.1193647384643555, + "learning_rate": 2.2500000000000002e-07, + "num_tokens": 1360383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9785, + "step": 3957 + }, + { + "loss": 0.0528, + "grad_norm": 1.0519967079162598, + "learning_rate": 2.2e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.979, + "step": 3958 + }, + { + "loss": 0.002, + "grad_norm": 0.290457159280777, + "learning_rate": 2.15e-07, + "num_tokens": 1360986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9795, + "step": 3959 + }, + { + "loss": 0.064, + "grad_norm": 1.5267326831817627, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.98, + "step": 3960 + }, + { + "loss": 0.0571, + "grad_norm": 1.354665756225586, + "learning_rate": 2.0500000000000002e-07, + "num_tokens": 1362010.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9805000000000001, + "step": 3961 + }, + { + "loss": 0.0023, + "grad_norm": 0.3175540566444397, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9809999999999999, + "step": 3962 + }, + { + "loss": 0.0022, + "grad_norm": 0.31645578145980835, + "learning_rate": 1.95e-07, + "num_tokens": 1362192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9815, + "step": 3963 + }, + { + "loss": 0.0023, + "grad_norm": 0.32781633734703064, + "learning_rate": 1.9e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 3964 + }, + { + "loss": 0.0022, + "grad_norm": 0.3074043393135071, + "learning_rate": 1.85e-07, + "num_tokens": 1362374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9825, + "step": 3965 + }, + { + "loss": 0.0616, + "grad_norm": 1.3107956647872925, + "learning_rate": 1.8e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.983, + "step": 3966 + }, + { + "loss": 0.0428, + "grad_norm": 1.0233242511749268, + "learning_rate": 1.7500000000000002e-07, + "num_tokens": 1363398.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9835, + "step": 3967 + }, + { + "loss": 0.0509, + "grad_norm": 1.1120326519012451, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.984, + "step": 3968 + }, + { + "loss": 0.0578, + "grad_norm": 1.1184195280075073, + "learning_rate": 1.65e-07, + "num_tokens": 1364422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9845000000000002, + "step": 3969 + }, + { + "loss": 0.0024, + "grad_norm": 0.3374731242656708, + "learning_rate": 1.6e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9849999999999999, + "step": 3970 + }, + { + "loss": 0.0647, + "grad_norm": 1.385146141052246, + "learning_rate": 1.5500000000000002e-07, + "num_tokens": 1365025.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9855, + "step": 3971 + }, + { + "loss": 0.0621, + "grad_norm": 1.3918462991714478, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.986, + "step": 3972 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185434639453888, + "learning_rate": 1.4500000000000001e-07, + "num_tokens": 1365628.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9865, + "step": 3973 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098815679550171, + "learning_rate": 1.4e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 3974 + }, + { + "loss": 0.0508, + "grad_norm": 1.1450035572052002, + "learning_rate": 1.35e-07, + "num_tokens": 1366231.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9875, + "step": 3975 + }, + { + "loss": 0.0545, + "grad_norm": 1.133862018585205, + "learning_rate": 1.3e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.988, + "step": 3976 + }, + { + "loss": 0.0575, + "grad_norm": 1.3929400444030762, + "learning_rate": 1.2500000000000002e-07, + "num_tokens": 1367255.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9885000000000002, + "step": 3977 + }, + { + "loss": 0.0023, + "grad_norm": 0.32601818442344666, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9889999999999999, + "step": 3978 + }, + { + "loss": 0.0614, + "grad_norm": 1.4804233312606812, + "learning_rate": 1.1500000000000001e-07, + "num_tokens": 1367858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9895, + "step": 3979 + }, + { + "loss": 0.0339, + "grad_norm": 1.0161491632461548, + "learning_rate": 1.1e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.99, + "step": 3980 + }, + { + "loss": 0.0374, + "grad_norm": 0.9113408327102661, + "learning_rate": 1.0500000000000001e-07, + "num_tokens": 1368882.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9905, + "step": 3981 + }, + { + "loss": 0.0022, + "grad_norm": 0.31800293922424316, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.991, + "step": 3982 + }, + { + "loss": 0.0022, + "grad_norm": 0.3091203570365906, + "learning_rate": 9.5e-08, + "num_tokens": 1369064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9915, + "step": 3983 + }, + { + "loss": 0.0697, + "grad_norm": 1.368817687034607, + "learning_rate": 9e-08, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.992, + "step": 3984 + }, + { + "loss": 0.0024, + "grad_norm": 0.334277480840683, + "learning_rate": 8.500000000000001e-08, + "num_tokens": 1369667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9925000000000002, + "step": 3985 + }, + { + "loss": 0.0545, + "grad_norm": 1.1396604776382446, + "learning_rate": 8e-08, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9929999999999999, + "step": 3986 + }, + { + "loss": 0.002, + "grad_norm": 0.2931969463825226, + "learning_rate": 7.500000000000001e-08, + "num_tokens": 1370270.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9935, + "step": 3987 + }, + { + "loss": 0.0021, + "grad_norm": 0.29304033517837524, + "learning_rate": 7e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 3988 + }, + { + "loss": 0.0579, + "grad_norm": 1.3336025476455688, + "learning_rate": 6.5e-08, + "num_tokens": 1370873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9945, + "step": 3989 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215644359588623, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.995, + "step": 3990 + }, + { + "loss": 0.0405, + "grad_norm": 1.221953272819519, + "learning_rate": 5.5e-08, + "num_tokens": 1371476.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9955, + "step": 3991 + }, + { + "loss": 0.0404, + "grad_norm": 1.0604480504989624, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.996, + "step": 3992 + }, + { + "loss": 0.0381, + "grad_norm": 0.919835090637207, + "learning_rate": 4.5e-08, + "num_tokens": 1372500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9965000000000002, + "step": 3993 + }, + { + "loss": 0.0378, + "grad_norm": 1.2490025758743286, + "learning_rate": 4e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9969999999999999, + "step": 3994 + }, + { + "loss": 0.0021, + "grad_norm": 0.3125726878643036, + "learning_rate": 3.5e-08, + "num_tokens": 1373103.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9975, + "step": 3995 + }, + { + "loss": 0.0023, + "grad_norm": 0.3294070065021515, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 3996 + }, + { + "loss": 0.002, + "grad_norm": 0.2793242931365967, + "learning_rate": 2.5000000000000002e-08, + "num_tokens": 1373285.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9985, + "step": 3997 + }, + { + "loss": 0.0386, + "grad_norm": 1.0813380479812622, + "learning_rate": 2e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.999, + "step": 3998 + }, + { + "loss": 0.0025, + "grad_norm": 0.3470178544521332, + "learning_rate": 1.5000000000000002e-08, + "num_tokens": 1373888.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9995, + "step": 3999 + }, + { + "loss": 0.0681, + "grad_norm": 1.5211089849472046, + "learning_rate": 1e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 2.0, + "step": 4000 + }, + { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898, + "epoch": 2.0, + "step": 4000 + } +] \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..be089935a10e89f2cb7ed806e7c10efa3baca54a --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "unsloth_available": false, + "train_runtime": 483.7085, + "train_loss": 0.11515871361242898, + "train_metrics": { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/availability.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/availability.json new file mode 100644 index 0000000000000000000000000000000000000000..7353b93e049865d037f56de7e4e4e7ef22ecc017 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/availability.json @@ -0,0 +1,27 @@ +{ + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "not_seen_in_status", + "grpo_postsave_inference": "not_seen_in_status", + "policy_ablation": "not_seen_in_status" + }, + "metrics": { + "sft_train_loss": 0.18184852770145518, + "sft_train_runtime": 372.1845, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.569, + "sft_last_loss": 0.0037, + "sft_best_loss": 0.0011, + "sft_last_token_accuracy": 1.0, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.762, + "sft_avg_latency_seconds": 2.748, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + } +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..40d6577dec4199160cb23eb63f3805b530c38dd7 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.762, + "avg_latency_seconds": 2.748, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %{ \"patient_id\": \"patient_8000\", \"rationale\": \"candidate_id=cand_08; The patient is experiencing a high fever, which is a symptom of an infection. This suggests that the patient may have a bacterial infection, which would be treated with antibiotics. Candidate_id=cand_08 prescribes antibiotics, which are appropriate for treating bacterial infections.\",", + "candidate_id": "cand_08", + "selected_candidate": "cand_08", + "env_reward": 0.806, + "latency_seconds": 3.164, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"candidate_id\": \"cand_05\", \"rationale\": \"patient mentions having a headache and taking medicine. cand_05 explains that the patient is taking medicine for a headache, which aligns with the patient's symptoms. cand_09 does not mention any medication. cand_01 mentions nausea but no headache. cand_02 mentions fever but", + "candidate_id": "cand_05", + "selected_candidate": "cand_05", + "env_reward": 0.792, + "latency_seconds": 2.656, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.641, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; //= = = = = = = = = = = = = = = FUNCTION select_best_candidate(candidate_ids=LIST_of_STR, patient_id=STR, candidates_dict=DICTIONARY_OF_TUPLES) ;; best_candidate_id := candidate_ids[0]; strongest_rationale := candidates_dict[candidate_ids[0]][1]; for i in range(1, len(candidate_ids)) { this", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.643, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.638, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5e7e4a3ead8458fcf1611ff54bdbc630d0bbdfea --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "model_index": 2, + "sft_epochs": 1, + "sft_max_steps": 0, + "sft_batch_size": 1, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..bd04c896532f5a5ae0fa8959979709a445323fb4 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.569, + "grad_norm": NaN, + "learning_rate": 2e-05, + "num_tokens": 91.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 3.569, + "grad_norm": NaN, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 1.2853, + "grad_norm": 1.139764428138733, + "learning_rate": 2e-05, + "num_tokens": 694.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 3.5581, + "grad_norm": NaN, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.002, + "step": 4 + }, + { + "loss": 0.8917, + "grad_norm": 1.0447810888290405, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 1297.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 1.1935, + "grad_norm": 0.8309267163276672, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 3.5163, + "grad_norm": 4.351670742034912, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1900.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 3.4885, + "grad_norm": 4.261757850646973, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 1.2711, + "grad_norm": 0.8578795790672302, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 2503.0, + "mean_token_accuracy": 0.7690802216529846, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.8313, + "grad_norm": 0.6491284370422363, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.005, + "step": 10 + }, + { + "loss": 1.2098, + "grad_norm": 0.8803694844245911, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 3527.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 3.3912, + "grad_norm": 3.3331027030944824, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 1.1925, + "grad_norm": 0.6839883327484131, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 4130.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 3.3481, + "grad_norm": 2.9968008995056152, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.8284, + "grad_norm": 0.5385816693305969, + "learning_rate": 1.989e-05, + "num_tokens": 4733.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 1.2033, + "grad_norm": 0.5642092823982239, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 1.2305, + "grad_norm": 0.6205269694328308, + "learning_rate": 1.987e-05, + "num_tokens": 5757.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 1.1978, + "grad_norm": 0.5339632630348206, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 3.2635, + "grad_norm": 2.3871994018554688, + "learning_rate": 1.985e-05, + "num_tokens": 6360.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 1.1722, + "grad_norm": 0.5115076303482056, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 1.234, + "grad_norm": 0.7502650618553162, + "learning_rate": 1.983e-05, + "num_tokens": 7384.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 1.2009, + "grad_norm": 0.563306450843811, + "learning_rate": 1.982e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 3.2024, + "grad_norm": 2.1435375213623047, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 7987.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 1.1136, + "grad_norm": 0.4755318760871887, + "learning_rate": 1.98e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.81, + "grad_norm": 0.42654362320899963, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 9011.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 3.1658, + "grad_norm": 2.022304058074951, + "learning_rate": 1.978e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 3.1525, + "grad_norm": 1.9966037273406982, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 9193.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 1.1701, + "grad_norm": 0.43180903792381287, + "learning_rate": 1.976e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 1.1161, + "grad_norm": 0.49122628569602966, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 10217.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 3.1096, + "grad_norm": 1.9505829811096191, + "learning_rate": 1.974e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 1.0957, + "grad_norm": 0.4052703380584717, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 10820.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 1.1922, + "grad_norm": 0.4599268436431885, + "learning_rate": 1.972e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 3.0661, + "grad_norm": 1.9074920415878296, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 11423.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 3.0517, + "grad_norm": 1.9043670892715454, + "learning_rate": 1.97e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.8217, + "grad_norm": 0.43874070048332214, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 12026.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 1.1533, + "grad_norm": 0.4097289741039276, + "learning_rate": 1.968e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 3.0079, + "grad_norm": 1.8589015007019043, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 12629.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 2.9929, + "grad_norm": 1.8493101596832275, + "learning_rate": 1.966e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 2.9771, + "grad_norm": 1.823657751083374, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 12811.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 1.1322, + "grad_norm": 0.41579654812812805, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 1.0436, + "grad_norm": 0.4191758632659912, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 13835.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.7707, + "grad_norm": 0.389350026845932, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.7557, + "grad_norm": 0.3683435320854187, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 14859.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 2.9037, + "grad_norm": 1.7245700359344482, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 2.8901, + "grad_norm": 1.7086819410324097, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 15041.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 1.0387, + "grad_norm": 0.40467050671577454, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 1.0567, + "grad_norm": 0.4369414746761322, + "learning_rate": 1.957e-05, + "num_tokens": 16065.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 1.1317, + "grad_norm": 0.4135839641094208, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 1.0284, + "grad_norm": 0.3962143063545227, + "learning_rate": 1.955e-05, + "num_tokens": 17089.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 2.8211, + "grad_norm": 1.6713019609451294, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.751, + "grad_norm": 0.3764272928237915, + "learning_rate": 1.953e-05, + "num_tokens": 17692.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 1.1035, + "grad_norm": 0.4032706618309021, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 1.066, + "grad_norm": 0.3904367685317993, + "learning_rate": 1.951e-05, + "num_tokens": 18716.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 2.7715, + "grad_norm": 1.6729886531829834, + "learning_rate": 1.95e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 2.7583, + "grad_norm": 1.668998122215271, + "learning_rate": 1.949e-05, + "num_tokens": 18898.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 2.7429, + "grad_norm": 1.6743063926696777, + "learning_rate": 1.948e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 1.1043, + "grad_norm": 0.41544175148010254, + "learning_rate": 1.947e-05, + "num_tokens": 19501.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 1.0547, + "grad_norm": 0.4136095345020294, + "learning_rate": 1.946e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 2.7022, + "grad_norm": 1.6811003684997559, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 20104.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 2.685, + "grad_norm": 1.6868253946304321, + "learning_rate": 1.944e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 2.6703, + "grad_norm": 1.6875874996185303, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 20286.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 1.0897, + "grad_norm": 0.3931529223918915, + "learning_rate": 1.942e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 1.0308, + "grad_norm": 0.4257798492908478, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 21310.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.752, + "grad_norm": 0.3678564429283142, + "learning_rate": 1.94e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 0.995, + "grad_norm": 0.414833128452301, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 22334.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 1.0055, + "grad_norm": 0.42559435963630676, + "learning_rate": 1.938e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 2.5807, + "grad_norm": 1.7541372776031494, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 22937.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 2.5636, + "grad_norm": 1.7794091701507568, + "learning_rate": 1.936e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 2.5482, + "grad_norm": 1.7919189929962158, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 23119.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.7033, + "grad_norm": 0.3789256811141968, + "learning_rate": 1.934e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.7623, + "grad_norm": 0.41511237621307373, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 24143.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 2.5008, + "grad_norm": 1.8457392454147339, + "learning_rate": 1.932e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 0.9835, + "grad_norm": 0.4251658618450165, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 24746.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.6836, + "grad_norm": 0.39055028557777405, + "learning_rate": 1.93e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 1.0516, + "grad_norm": 0.4297751784324646, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 25770.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": 0.9707, + "grad_norm": 0.408170223236084, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 1.0632, + "grad_norm": 0.4372476041316986, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 26794.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 2.419, + "grad_norm": 1.9062981605529785, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 2.4008, + "grad_norm": 1.9403553009033203, + "learning_rate": 1.925e-05, + "num_tokens": 26976.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 2.3866, + "grad_norm": 1.9395607709884644, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 2.3668, + "grad_norm": 1.948604941368103, + "learning_rate": 1.923e-05, + "num_tokens": 27158.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.7165, + "grad_norm": 0.3970690369606018, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 1.0087, + "grad_norm": 0.46349093317985535, + "learning_rate": 1.921e-05, + "num_tokens": 28182.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.7138, + "grad_norm": 0.3978181481361389, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.6682, + "grad_norm": 0.38714009523391724, + "learning_rate": 1.919e-05, + "num_tokens": 29206.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 2.2852, + "grad_norm": 1.8964459896087646, + "learning_rate": 1.918e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 2.2692, + "grad_norm": 1.8906216621398926, + "learning_rate": 1.917e-05, + "num_tokens": 29388.0, + "mean_token_accuracy": 0.644444465637207, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 2.253, + "grad_norm": 1.8771262168884277, + "learning_rate": 1.916e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.9113, + "grad_norm": 0.49527081847190857, + "learning_rate": 1.915e-05, + "num_tokens": 29991.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 1.0366, + "grad_norm": 0.4962358772754669, + "learning_rate": 1.914e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 2.2018, + "grad_norm": 1.8590370416641235, + "learning_rate": 1.913e-05, + "num_tokens": 30594.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 0.9951, + "grad_norm": 0.5745645761489868, + "learning_rate": 1.912e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.6545, + "grad_norm": 0.4285139739513397, + "learning_rate": 1.911e-05, + "num_tokens": 31618.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 2.1565, + "grad_norm": 1.8819890022277832, + "learning_rate": 1.91e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 2.1391, + "grad_norm": 1.9009383916854858, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 31800.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 0.9592, + "grad_norm": 0.5530417561531067, + "learning_rate": 1.908e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.639, + "grad_norm": 0.4635550081729889, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 32824.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 2.0893, + "grad_norm": 1.9755080938339233, + "learning_rate": 1.906e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 2.0698, + "grad_norm": 2.017965793609619, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 33006.0, + "mean_token_accuracy": 0.6666666865348816, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 2.0535, + "grad_norm": 2.0711710453033447, + "learning_rate": 1.904e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.6666666865348816, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 2.0313, + "grad_norm": 2.117086172103882, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 33188.0, + "mean_token_accuracy": 0.6666666865348816, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.6362, + "grad_norm": 0.48415306210517883, + "learning_rate": 1.902e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.6335, + "grad_norm": 0.5150465965270996, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 34212.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 0.9912, + "grad_norm": 0.6076453924179077, + "learning_rate": 1.9e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.052, + "step": 104 + }, + { + "loss": 0.9828, + "grad_norm": 0.5944868326187134, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 35236.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.8844, + "grad_norm": 0.5450642704963684, + "learning_rate": 1.898e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.9195, + "grad_norm": 0.5619152188301086, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 36260.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 1.9053, + "grad_norm": 2.4565858840942383, + "learning_rate": 1.896e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.6608, + "grad_norm": 0.5228564739227295, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 36863.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.6786, + "grad_norm": 0.5397571325302124, + "learning_rate": 1.894e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.6198, + "grad_norm": 0.537507176399231, + "learning_rate": 1.893e-05, + "num_tokens": 37887.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 1.8448, + "grad_norm": 2.565553665161133, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 0.9505, + "grad_norm": 0.5609534978866577, + "learning_rate": 1.891e-05, + "num_tokens": 38490.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": 0.6103, + "grad_norm": 0.5393182635307312, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 1.8089, + "grad_norm": 2.6849920749664307, + "learning_rate": 1.889e-05, + "num_tokens": 39093.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 0.961, + "grad_norm": 0.5978713035583496, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 1.777, + "grad_norm": 2.7187552452087402, + "learning_rate": 1.887e-05, + "num_tokens": 39696.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 1.7591, + "grad_norm": 2.7737131118774414, + "learning_rate": 1.886e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 1.74, + "grad_norm": 2.7507472038269043, + "learning_rate": 1.885e-05, + "num_tokens": 39878.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.6336, + "grad_norm": 0.6201249957084656, + "learning_rate": 1.884e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.5845, + "grad_norm": 0.5287116169929504, + "learning_rate": 1.883e-05, + "num_tokens": 40902.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.8665, + "grad_norm": 0.6071702241897583, + "learning_rate": 1.882e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.8748, + "grad_norm": 0.6387258172035217, + "learning_rate": 1.881e-05, + "num_tokens": 41926.0, + "mean_token_accuracy": 0.8258317112922668, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.875, + "grad_norm": 0.5957177877426147, + "learning_rate": 1.88e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8258317112922668, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.5784, + "grad_norm": 0.5134051442146301, + "learning_rate": 1.879e-05, + "num_tokens": 42950.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.5775, + "grad_norm": 0.5122160911560059, + "learning_rate": 1.878e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 1.6118, + "grad_norm": 2.893503189086914, + "learning_rate": 1.877e-05, + "num_tokens": 43553.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.6218, + "grad_norm": 0.5278106927871704, + "learning_rate": 1.876e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 1.5808, + "grad_norm": 2.9607582092285156, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 44156.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.802, + "grad_norm": 0.6248002052307129, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.8202, + "grad_norm": 0.6419914364814758, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 45180.0, + "mean_token_accuracy": 0.8238747715950012, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 1.534, + "grad_norm": 3.0163865089416504, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 1.5157, + "grad_norm": 3.01271390914917, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 45362.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 1.497, + "grad_norm": 2.959350824356079, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 1.4734, + "grad_norm": 2.8837082386016846, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 45544.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.8266, + "grad_norm": 0.6843762993812561, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.861, + "grad_norm": 0.7351704835891724, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 46568.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.845, + "grad_norm": 0.7598766088485718, + "learning_rate": 1.866e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 1.3777, + "grad_norm": 3.036391496658325, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 47171.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.5412, + "grad_norm": 0.6829193830490112, + "learning_rate": 1.864e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.7666, + "grad_norm": 0.7895976901054382, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 48195.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.5381, + "grad_norm": 0.790127694606781, + "learning_rate": 1.862e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 1.2811, + "grad_norm": 3.4602015018463135, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 48798.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 1.26, + "grad_norm": 3.52811336517334, + "learning_rate": 1.86e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 1.2314, + "grad_norm": 3.6009700298309326, + "learning_rate": 1.859e-05, + "num_tokens": 48980.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 1.2002, + "grad_norm": 3.6722474098205566, + "learning_rate": 1.858e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 1.1693, + "grad_norm": 3.4836974143981934, + "learning_rate": 1.857e-05, + "num_tokens": 49162.0, + "mean_token_accuracy": 0.7666666507720947, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 1.1338, + "grad_norm": 3.369781017303467, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 1.0973, + "grad_norm": 3.3117072582244873, + "learning_rate": 1.855e-05, + "num_tokens": 49344.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.8315, + "grad_norm": 0.9976187944412231, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 1.0272, + "grad_norm": 3.300879955291748, + "learning_rate": 1.853e-05, + "num_tokens": 49947.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 0.9891, + "grad_norm": 3.3772897720336914, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.5464, + "grad_norm": 0.9478758573532104, + "learning_rate": 1.851e-05, + "num_tokens": 50550.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.8039, + "grad_norm": 1.1654984951019287, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 0.8961, + "grad_norm": 4.251962184906006, + "learning_rate": 1.849e-05, + "num_tokens": 51153.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 0.8656, + "grad_norm": 4.492918491363525, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.8222222328186035, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.493, + "grad_norm": 0.8727006912231445, + "learning_rate": 1.847e-05, + "num_tokens": 51756.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.7707, + "grad_norm": 1.041538119316101, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.5714, + "grad_norm": 0.9487267136573792, + "learning_rate": 1.845e-05, + "num_tokens": 52780.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.4725, + "grad_norm": 0.798832356929779, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.7814, + "grad_norm": 0.9986205101013184, + "learning_rate": 1.843e-05, + "num_tokens": 53804.0, + "mean_token_accuracy": 0.8258317112922668, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.7441, + "grad_norm": 0.9336599707603455, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 0.7031, + "grad_norm": 5.16276741027832, + "learning_rate": 1.841e-05, + "num_tokens": 54407.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 0.679, + "grad_norm": 4.1701273918151855, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.7353, + "grad_norm": 1.0674586296081543, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 55010.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.7491, + "grad_norm": 1.21304452419281, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.6185, + "grad_norm": 4.724250316619873, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 55613.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.6687, + "grad_norm": 1.0483168363571167, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.5248, + "grad_norm": 1.1386994123458862, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 56637.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": 0.692, + "grad_norm": 1.000663161277771, + "learning_rate": 1.834e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.549, + "grad_norm": 5.925390720367432, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 57240.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.5316, + "grad_norm": 7.124028205871582, + "learning_rate": 1.832e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.6214, + "grad_norm": 1.0966285467147827, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 57843.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.482, + "grad_norm": 4.625036239624023, + "learning_rate": 1.83e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.6731, + "grad_norm": 1.3060588836669922, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 58446.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.5768, + "grad_norm": 1.7968002557754517, + "learning_rate": 1.828e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.6029, + "grad_norm": 1.7848604917526245, + "learning_rate": 1.827e-05, + "num_tokens": 59470.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.3979, + "grad_norm": 1.9516690969467163, + "learning_rate": 1.826e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.391, + "grad_norm": 3.8316330909729004, + "learning_rate": 1.825e-05, + "num_tokens": 60073.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.6449, + "grad_norm": 1.5616425275802612, + "learning_rate": 1.824e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.6533, + "grad_norm": 1.280671238899231, + "learning_rate": 1.823e-05, + "num_tokens": 61097.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.3584, + "grad_norm": 6.280538082122803, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.9444444179534912, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.3733, + "grad_norm": 1.0696591138839722, + "learning_rate": 1.821e-05, + "num_tokens": 61700.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.3357, + "grad_norm": 3.6380887031555176, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.9444444179534912, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.3244, + "grad_norm": 3.0167179107666016, + "learning_rate": 1.819e-05, + "num_tokens": 61882.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.5994, + "grad_norm": 1.6260021924972534, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.6215, + "grad_norm": 1.607763409614563, + "learning_rate": 1.817e-05, + "num_tokens": 62906.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.5443, + "grad_norm": 1.351562261581421, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.2865, + "grad_norm": 2.277933120727539, + "learning_rate": 1.815e-05, + "num_tokens": 63509.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.5709, + "grad_norm": 1.3398513793945312, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.2716, + "grad_norm": 3.923830986022949, + "learning_rate": 1.813e-05, + "num_tokens": 64112.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.509, + "grad_norm": 1.4502966403961182, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.4854, + "grad_norm": 1.4078965187072754, + "learning_rate": 1.811e-05, + "num_tokens": 65136.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.2501, + "grad_norm": 3.077928304672241, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.5453, + "grad_norm": 1.7737340927124023, + "learning_rate": 1.809e-05, + "num_tokens": 65739.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.239, + "grad_norm": 2.0369770526885986, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.2344, + "grad_norm": 1.9151840209960938, + "learning_rate": 1.807e-05, + "num_tokens": 65921.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.5325, + "grad_norm": 1.6656997203826904, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.4971, + "grad_norm": 1.9251680374145508, + "learning_rate": 1.805e-05, + "num_tokens": 66945.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.52, + "grad_norm": 1.8106904029846191, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.2154, + "grad_norm": 2.2629575729370117, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 67548.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.4612, + "grad_norm": 1.7021019458770752, + "learning_rate": 1.802e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.8962817788124084, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.4315, + "grad_norm": 2.6399946212768555, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 68572.0, + "mean_token_accuracy": 0.9060665369033813, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": 0.4603, + "grad_norm": 1.909094214439392, + "learning_rate": 1.8e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.4483, + "grad_norm": 1.7435243129730225, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 69596.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.4438, + "grad_norm": 2.1652462482452393, + "learning_rate": 1.798e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.4678, + "grad_norm": 2.338404417037964, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 70620.0, + "mean_token_accuracy": 0.8962817788124084, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.3195, + "grad_norm": 1.3209658861160278, + "learning_rate": 1.796e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.4409, + "grad_norm": 1.709653377532959, + "learning_rate": 1.795e-05, + "num_tokens": 71644.0, + "mean_token_accuracy": 0.8982387185096741, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.4037, + "grad_norm": 2.7179744243621826, + "learning_rate": 1.794e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.9060665369033813, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.2739, + "grad_norm": 1.0299943685531616, + "learning_rate": 1.793e-05, + "num_tokens": 72668.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.2022, + "grad_norm": 2.607898473739624, + "learning_rate": 1.792e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.2042, + "grad_norm": 2.916175127029419, + "learning_rate": 1.791e-05, + "num_tokens": 72850.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.3787, + "grad_norm": 2.026442527770996, + "learning_rate": 1.79e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.3879, + "grad_norm": 1.7650607824325562, + "learning_rate": 1.789e-05, + "num_tokens": 73874.0, + "mean_token_accuracy": 0.908023476600647, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.1951, + "grad_norm": 3.8692498207092285, + "learning_rate": 1.788e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.1904, + "grad_norm": 3.0922181606292725, + "learning_rate": 1.787e-05, + "num_tokens": 74056.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.301, + "grad_norm": 1.9583574533462524, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.1827, + "grad_norm": 1.9792364835739136, + "learning_rate": 1.785e-05, + "num_tokens": 74659.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.1794, + "grad_norm": 1.3933207988739014, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.3381, + "grad_norm": 1.6843299865722656, + "learning_rate": 1.783e-05, + "num_tokens": 75262.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.1732, + "grad_norm": 1.4762918949127197, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.1689, + "grad_norm": 1.1075265407562256, + "learning_rate": 1.781e-05, + "num_tokens": 75444.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.3562, + "grad_norm": 2.2154247760772705, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.1629, + "grad_norm": 1.3579362630844116, + "learning_rate": 1.779e-05, + "num_tokens": 76047.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.3199, + "grad_norm": 1.9855793714523315, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.3381, + "grad_norm": 1.787819266319275, + "learning_rate": 1.777e-05, + "num_tokens": 77071.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.1525, + "grad_norm": 1.0635879039764404, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.1496, + "grad_norm": 1.0544939041137695, + "learning_rate": 1.775e-05, + "num_tokens": 77253.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.1459, + "grad_norm": 1.147072672843933, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.1426, + "grad_norm": 1.0801589488983154, + "learning_rate": 1.773e-05, + "num_tokens": 77435.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.2557, + "grad_norm": 1.2963556051254272, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.1332, + "grad_norm": 1.3799799680709839, + "learning_rate": 1.771e-05, + "num_tokens": 78038.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.2481, + "grad_norm": 1.1608214378356934, + "learning_rate": 1.77e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.2642, + "grad_norm": 1.2985522747039795, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 79062.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.3124, + "grad_norm": 2.222142219543457, + "learning_rate": 1.768e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.3102, + "grad_norm": 2.533982753753662, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 80086.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.1218, + "grad_norm": 1.7190382480621338, + "learning_rate": 1.766e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.1169, + "grad_norm": 1.3357374668121338, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 80268.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.1147, + "grad_norm": 1.298270344734192, + "learning_rate": 1.764e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.3127, + "grad_norm": 2.2547061443328857, + "learning_rate": 1.763e-05, + "num_tokens": 80871.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.2312, + "grad_norm": 1.7744327783584595, + "learning_rate": 1.762e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.3975, + "grad_norm": 4.527610778808594, + "learning_rate": 1.761e-05, + "num_tokens": 81895.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.3551, + "grad_norm": 3.1718592643737793, + "learning_rate": 1.76e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.1045, + "grad_norm": 1.574190378189087, + "learning_rate": 1.759e-05, + "num_tokens": 82498.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.2236, + "grad_norm": 1.4468473196029663, + "learning_rate": 1.758e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.0999, + "grad_norm": 1.4842942953109741, + "learning_rate": 1.757e-05, + "num_tokens": 83101.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.2509, + "grad_norm": 1.7860370874404907, + "learning_rate": 1.756e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.2611, + "grad_norm": 1.6783521175384521, + "learning_rate": 1.755e-05, + "num_tokens": 84125.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.379, + "grad_norm": 2.3508005142211914, + "learning_rate": 1.754e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.0941, + "grad_norm": 2.0986952781677246, + "learning_rate": 1.753e-05, + "num_tokens": 84728.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.0924, + "grad_norm": 1.9180539846420288, + "learning_rate": 1.752e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.0906, + "grad_norm": 1.0870189666748047, + "learning_rate": 1.751e-05, + "num_tokens": 84910.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.2357, + "grad_norm": 1.0672377347946167, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.2584, + "grad_norm": 2.204198122024536, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 85934.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.0862, + "grad_norm": 2.385765552520752, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.2371, + "grad_norm": 1.8736376762390137, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 86537.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.2442, + "grad_norm": 1.8243354558944702, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.0824, + "grad_norm": 1.8955978155136108, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 87140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.3363, + "grad_norm": 2.798372507095337, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.0794, + "grad_norm": 1.304677128791809, + "learning_rate": 1.743e-05, + "num_tokens": 87743.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.0773, + "grad_norm": 1.626665711402893, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.1939, + "grad_norm": 1.7440603971481323, + "learning_rate": 1.741e-05, + "num_tokens": 88346.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.2501, + "grad_norm": 1.3810110092163086, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.3304, + "grad_norm": 3.183516025543213, + "learning_rate": 1.739e-05, + "num_tokens": 89370.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.2224, + "grad_norm": 2.094963550567627, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.2354, + "grad_norm": 1.3596550226211548, + "learning_rate": 1.737e-05, + "num_tokens": 90394.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.0727, + "grad_norm": 1.5260241031646729, + "learning_rate": 1.736e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.222, + "grad_norm": 1.5992202758789062, + "learning_rate": 1.735e-05, + "num_tokens": 90997.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.3177, + "grad_norm": 2.2656893730163574, + "learning_rate": 1.734e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.0713, + "grad_norm": 1.7473493814468384, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 91600.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.2135, + "grad_norm": 1.9787451028823853, + "learning_rate": 1.732e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.1763, + "grad_norm": 1.0072226524353027, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 92624.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.1957, + "grad_norm": 1.1664408445358276, + "learning_rate": 1.73e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.3349, + "grad_norm": 2.7109858989715576, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 93648.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.0711, + "grad_norm": 2.568545341491699, + "learning_rate": 1.728e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.1836, + "grad_norm": 1.850518822669983, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 94251.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.0695, + "grad_norm": 2.5018086433410645, + "learning_rate": 1.726e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.1961, + "grad_norm": 0.9769375324249268, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 94854.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.2135, + "grad_norm": 1.4824577569961548, + "learning_rate": 1.724e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.1623, + "grad_norm": 1.7970157861709595, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 95878.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.2098, + "grad_norm": 1.702469825744629, + "learning_rate": 1.722e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.0642, + "grad_norm": 1.6492910385131836, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 96481.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.1893, + "grad_norm": 1.3040688037872314, + "learning_rate": 1.72e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.0638, + "grad_norm": 2.035078287124634, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 97084.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.0617, + "grad_norm": 1.428052306175232, + "learning_rate": 1.718e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.1591, + "grad_norm": 1.416749119758606, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 97687.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.1787, + "grad_norm": 1.3673189878463745, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.324, + "grad_norm": 3.40804386138916, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 98711.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.0582, + "grad_norm": 2.4875428676605225, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.1816, + "grad_norm": 1.6370735168457031, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 99314.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.0556, + "grad_norm": 2.5525963306427, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.1861, + "grad_norm": 2.1719298362731934, + "learning_rate": 1.711e-05, + "num_tokens": 99917.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.201, + "grad_norm": 1.304052472114563, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.1531, + "grad_norm": 1.5254027843475342, + "learning_rate": 1.709e-05, + "num_tokens": 100941.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.2727, + "grad_norm": 2.922405242919922, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.1459, + "grad_norm": 1.7082411050796509, + "learning_rate": 1.707e-05, + "num_tokens": 101965.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.174, + "grad_norm": 1.3555234670639038, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.1749, + "grad_norm": 0.9526453018188477, + "learning_rate": 1.705e-05, + "num_tokens": 102989.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.1751, + "grad_norm": 1.491074800491333, + "learning_rate": 1.704e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.3221, + "grad_norm": 3.0102553367614746, + "learning_rate": 1.703e-05, + "num_tokens": 104013.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.1546, + "grad_norm": 2.2727670669555664, + "learning_rate": 1.702e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.1623, + "grad_norm": 1.1690260171890259, + "learning_rate": 1.701e-05, + "num_tokens": 105037.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.1757, + "grad_norm": 1.3821128606796265, + "learning_rate": 1.7e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.1345, + "grad_norm": 1.1042118072509766, + "learning_rate": 1.699e-05, + "num_tokens": 106061.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.1709, + "grad_norm": 1.283263087272644, + "learning_rate": 1.698e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.1741, + "grad_norm": 1.0933341979980469, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 107085.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.1479, + "grad_norm": 1.3540836572647095, + "learning_rate": 1.696e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.094, + "grad_norm": 5.643751621246338, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 107688.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.091, + "grad_norm": 5.622400760650635, + "learning_rate": 1.694e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.1534, + "grad_norm": 0.9459224343299866, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 108291.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.0764, + "grad_norm": 4.563518047332764, + "learning_rate": 1.692e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.0689, + "grad_norm": 3.9746463298797607, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 108473.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.1265, + "grad_norm": 1.5034980773925781, + "learning_rate": 1.69e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.055, + "grad_norm": 2.8813798427581787, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 109076.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.0502, + "grad_norm": 2.0983633995056152, + "learning_rate": 1.688e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.1459, + "grad_norm": 2.4966609477996826, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 109679.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.1373, + "grad_norm": 1.884824514389038, + "learning_rate": 1.686e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.12, + "grad_norm": 1.6215541362762451, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 110703.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.0514, + "grad_norm": 3.570695400238037, + "learning_rate": 1.684e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.0503, + "grad_norm": 3.7310097217559814, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 110885.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.1698, + "grad_norm": 1.3565757274627686, + "learning_rate": 1.682e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.144, + "grad_norm": 1.7988064289093018, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 111909.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.1553, + "grad_norm": 1.199349284172058, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.2808, + "grad_norm": 2.2785050868988037, + "learning_rate": 1.679e-05, + "num_tokens": 112933.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.1303, + "grad_norm": 1.4797053337097168, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.1437, + "grad_norm": 1.2159603834152222, + "learning_rate": 1.677e-05, + "num_tokens": 113957.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.1094, + "grad_norm": 1.3378634452819824, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.1107, + "grad_norm": 1.3265125751495361, + "learning_rate": 1.675e-05, + "num_tokens": 114981.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.104, + "grad_norm": 1.0398075580596924, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0508, + "grad_norm": 3.7928128242492676, + "learning_rate": 1.673e-05, + "num_tokens": 115584.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.1141, + "grad_norm": 1.543946385383606, + "learning_rate": 1.672e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.2347, + "grad_norm": 3.0478694438934326, + "learning_rate": 1.671e-05, + "num_tokens": 116608.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.1568, + "grad_norm": 1.438165307044983, + "learning_rate": 1.67e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0602, + "grad_norm": 4.521894454956055, + "learning_rate": 1.669e-05, + "num_tokens": 117211.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0575, + "grad_norm": 4.285327434539795, + "learning_rate": 1.668e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.1228, + "grad_norm": 1.7977162599563599, + "learning_rate": 1.667e-05, + "num_tokens": 117814.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0498, + "grad_norm": 3.2977139949798584, + "learning_rate": 1.666e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.1072, + "grad_norm": 1.0961717367172241, + "learning_rate": 1.665e-05, + "num_tokens": 118417.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.0888, + "grad_norm": 1.2719725370407104, + "learning_rate": 1.664e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.1016, + "grad_norm": 1.7138031721115112, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 119441.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.0775, + "grad_norm": 1.2170872688293457, + "learning_rate": 1.662e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0415, + "grad_norm": 2.3039064407348633, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 120044.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0407, + "grad_norm": 2.1441495418548584, + "learning_rate": 1.66e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 1.0, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0378, + "grad_norm": 1.570320725440979, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 120226.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0358, + "grad_norm": 1.359679937362671, + "learning_rate": 1.658e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.1491, + "grad_norm": 1.4656238555908203, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 120829.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.093, + "grad_norm": 1.550439715385437, + "learning_rate": 1.656e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.1191, + "grad_norm": 1.6594032049179077, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 121853.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.1667, + "grad_norm": 1.6316683292388916, + "learning_rate": 1.654e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.1172, + "grad_norm": 1.1592111587524414, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 122877.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0288, + "grad_norm": 1.2376233339309692, + "learning_rate": 1.652e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 1.0, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0279, + "grad_norm": 1.1726553440093994, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 123059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.158, + "grad_norm": 1.639247179031372, + "learning_rate": 1.65e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0254, + "grad_norm": 0.882344126701355, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 123662.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0236, + "grad_norm": 0.7603262066841125, + "learning_rate": 1.648e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0231, + "grad_norm": 1.0259835720062256, + "learning_rate": 1.647e-05, + "num_tokens": 123844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.1341, + "grad_norm": 1.3803941011428833, + "learning_rate": 1.646e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.26, + "grad_norm": 2.67657208442688, + "learning_rate": 1.645e-05, + "num_tokens": 124868.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.0787, + "grad_norm": 1.1956502199172974, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0184, + "grad_norm": 1.0563417673110962, + "learning_rate": 1.643e-05, + "num_tokens": 125471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.2769, + "grad_norm": 3.5824198722839355, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.017, + "grad_norm": 0.9444816708564758, + "learning_rate": 1.641e-05, + "num_tokens": 126074.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.1499, + "grad_norm": 1.6610344648361206, + "learning_rate": 1.64e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0159, + "grad_norm": 1.3713178634643555, + "learning_rate": 1.639e-05, + "num_tokens": 126677.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0142, + "grad_norm": 0.7958543300628662, + "learning_rate": 1.638e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 1.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0136, + "grad_norm": 0.7060168385505676, + "learning_rate": 1.637e-05, + "num_tokens": 126859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0126, + "grad_norm": 0.6885517239570618, + "learning_rate": 1.636e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.1437, + "grad_norm": 1.7837411165237427, + "learning_rate": 1.635e-05, + "num_tokens": 127462.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": 0.1352, + "grad_norm": 1.0794353485107422, + "learning_rate": 1.634e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.1036, + "grad_norm": 1.2649973630905151, + "learning_rate": 1.633e-05, + "num_tokens": 128486.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.082, + "grad_norm": 1.4123811721801758, + "learning_rate": 1.632e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.2251, + "grad_norm": 2.3190250396728516, + "learning_rate": 1.631e-05, + "num_tokens": 129510.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0101, + "grad_norm": 1.145607590675354, + "learning_rate": 1.63e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 1.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.01, + "grad_norm": 1.1430310010910034, + "learning_rate": 1.629e-05, + "num_tokens": 129692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.1157, + "grad_norm": 1.080237865447998, + "learning_rate": 1.628e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0094, + "grad_norm": 0.8564168810844421, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 130295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.009, + "grad_norm": 0.6895986199378967, + "learning_rate": 1.626e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0088, + "grad_norm": 0.7237755656242371, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 130477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0081, + "grad_norm": 0.7111520767211914, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 1.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.2266, + "grad_norm": 3.2268872261047363, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 131080.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.1096, + "grad_norm": 1.5681886672973633, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.1323, + "grad_norm": 1.1309343576431274, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 132104.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0065, + "grad_norm": 0.4017643630504608, + "learning_rate": 1.62e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.0901, + "grad_norm": 1.3869181871414185, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 132707.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.135, + "grad_norm": 1.0720597505569458, + "learning_rate": 1.618e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.2196, + "grad_norm": 2.46571683883667, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 133731.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.1479, + "grad_norm": 1.4283263683319092, + "learning_rate": 1.616e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.1442, + "grad_norm": 1.0318039655685425, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 134755.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.119, + "grad_norm": 0.9293051958084106, + "learning_rate": 1.614e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0122, + "grad_norm": 2.9073522090911865, + "learning_rate": 1.613e-05, + "num_tokens": 135358.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0156, + "grad_norm": 3.24949049949646, + "learning_rate": 1.612e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.2428, + "grad_norm": 2.2780046463012695, + "learning_rate": 1.611e-05, + "num_tokens": 135961.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0158, + "grad_norm": 2.8313698768615723, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.073, + "grad_norm": 1.1441925764083862, + "learning_rate": 1.609e-05, + "num_tokens": 136564.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.0713, + "grad_norm": 1.0356674194335938, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.1163, + "grad_norm": 0.9958234429359436, + "learning_rate": 1.607e-05, + "num_tokens": 137588.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.065, + "grad_norm": 1.0690953731536865, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0143, + "grad_norm": 2.4794986248016357, + "learning_rate": 1.605e-05, + "num_tokens": 138191.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.1213, + "grad_norm": 1.1662561893463135, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0133, + "grad_norm": 2.1572377681732178, + "learning_rate": 1.603e-05, + "num_tokens": 138794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": 0.2415, + "grad_norm": 2.1097450256347656, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.2415, + "grad_norm": 1.9146851301193237, + "learning_rate": 1.601e-05, + "num_tokens": 139818.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.0792, + "grad_norm": 1.4688655138015747, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.1037, + "grad_norm": 1.3678481578826904, + "learning_rate": 1.599e-05, + "num_tokens": 140842.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.0645, + "grad_norm": 1.394155740737915, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.1221, + "grad_norm": 1.3450697660446167, + "learning_rate": 1.597e-05, + "num_tokens": 141866.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0111, + "grad_norm": 1.5307925939559937, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 1.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.0111, + "grad_norm": 1.5876197814941406, + "learning_rate": 1.595e-05, + "num_tokens": 142048.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.1193, + "grad_norm": 1.4841184616088867, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.1328, + "grad_norm": 1.1095598936080933, + "learning_rate": 1.593e-05, + "num_tokens": 143072.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0093, + "grad_norm": 1.4608124494552612, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 1.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.1107, + "grad_norm": 1.4897429943084717, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 143675.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.1984, + "grad_norm": 2.675309419631958, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0076, + "grad_norm": 1.1623023748397827, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 144278.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0063, + "grad_norm": 0.732515275478363, + "learning_rate": 1.588e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 1.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.1286, + "grad_norm": 1.144338846206665, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 144881.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.1896, + "grad_norm": 2.561152219772339, + "learning_rate": 1.586e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.1736, + "grad_norm": 2.7632133960723877, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 145905.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0056, + "grad_norm": 0.5383828282356262, + "learning_rate": 1.584e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 1.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0053, + "grad_norm": 0.5213011503219604, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 146087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.1293, + "grad_norm": 1.3833296298980713, + "learning_rate": 1.582e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0047, + "grad_norm": 0.35407668352127075, + "learning_rate": 1.581e-05, + "num_tokens": 146690.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.1152, + "grad_norm": 1.2960784435272217, + "learning_rate": 1.58e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.0701, + "grad_norm": 1.1170578002929688, + "learning_rate": 1.579e-05, + "num_tokens": 147714.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.1111, + "grad_norm": 1.0579668283462524, + "learning_rate": 1.578e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0048, + "grad_norm": 0.4491373300552368, + "learning_rate": 1.577e-05, + "num_tokens": 148317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0052, + "grad_norm": 0.5798842906951904, + "learning_rate": 1.576e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0053, + "grad_norm": 0.6644476056098938, + "learning_rate": 1.575e-05, + "num_tokens": 148499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.1002, + "grad_norm": 1.4146150350570679, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0049, + "grad_norm": 0.5174235701560974, + "learning_rate": 1.573e-05, + "num_tokens": 149102.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.1005, + "grad_norm": 1.295534610748291, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.0997, + "grad_norm": 1.874627947807312, + "learning_rate": 1.571e-05, + "num_tokens": 150126.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0048, + "grad_norm": 0.477443128824234, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 1.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0048, + "grad_norm": 0.5091577172279358, + "learning_rate": 1.569e-05, + "num_tokens": 150308.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0045, + "grad_norm": 0.42573752999305725, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 1.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.1289, + "grad_norm": 1.2042423486709595, + "learning_rate": 1.567e-05, + "num_tokens": 150911.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.0741, + "grad_norm": 1.1629348993301392, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.004, + "grad_norm": 0.3303038775920868, + "learning_rate": 1.565e-05, + "num_tokens": 151514.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0039, + "grad_norm": 0.279052734375, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 1.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.1122, + "grad_norm": 1.5259605646133423, + "learning_rate": 1.563e-05, + "num_tokens": 152117.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.1174, + "grad_norm": 1.2986260652542114, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.0041, + "grad_norm": 0.4193200170993805, + "learning_rate": 1.561e-05, + "num_tokens": 152720.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.1207, + "grad_norm": 1.2413984537124634, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0045, + "grad_norm": 0.6368035078048706, + "learning_rate": 1.559e-05, + "num_tokens": 153323.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.101, + "grad_norm": 1.2425626516342163, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.1124, + "grad_norm": 1.019707202911377, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 154347.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0051, + "grad_norm": 0.8345929384231567, + "learning_rate": 1.556e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0052, + "grad_norm": 0.8587450385093689, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 154529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.1214, + "grad_norm": 1.1086853742599487, + "learning_rate": 1.554e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.1164, + "grad_norm": 1.238479495048523, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 155553.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.1249, + "grad_norm": 1.3684537410736084, + "learning_rate": 1.552e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0054, + "grad_norm": 0.947119951248169, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 156156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0056, + "grad_norm": 0.9146615266799927, + "learning_rate": 1.55e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.0782, + "grad_norm": 1.2344416379928589, + "learning_rate": 1.549e-05, + "num_tokens": 156759.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.4506, + "grad_norm": 7.777007579803467, + "learning_rate": 1.548e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.0639, + "grad_norm": 1.501968264579773, + "learning_rate": 1.547e-05, + "num_tokens": 157783.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0046, + "grad_norm": 0.6376725435256958, + "learning_rate": 1.546e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 1.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0043, + "grad_norm": 0.5955199003219604, + "learning_rate": 1.545e-05, + "num_tokens": 157965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.1027, + "grad_norm": 1.514914631843567, + "learning_rate": 1.544e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.1145, + "grad_norm": 1.1080951690673828, + "learning_rate": 1.543e-05, + "num_tokens": 158989.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.1661, + "grad_norm": 2.103287696838379, + "learning_rate": 1.542e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0041, + "grad_norm": 0.5920866131782532, + "learning_rate": 1.541e-05, + "num_tokens": 159592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.0831, + "grad_norm": 1.2727563381195068, + "learning_rate": 1.54e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.076, + "grad_norm": 1.3624043464660645, + "learning_rate": 1.539e-05, + "num_tokens": 160616.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0051, + "grad_norm": 1.0213030576705933, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 1.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0053, + "grad_norm": 1.1751487255096436, + "learning_rate": 1.537e-05, + "num_tokens": 160798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.1073, + "grad_norm": 1.1450884342193604, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.1152, + "grad_norm": 1.0188744068145752, + "learning_rate": 1.535e-05, + "num_tokens": 161822.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0042, + "grad_norm": 0.6943671703338623, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 1.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.0041, + "grad_norm": 0.5702145099639893, + "learning_rate": 1.533e-05, + "num_tokens": 162004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.1601, + "grad_norm": 2.467028856277466, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0036, + "grad_norm": 0.3947738707065582, + "learning_rate": 1.531e-05, + "num_tokens": 162607.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0035, + "grad_norm": 0.3578404486179352, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": 0.1018, + "grad_norm": 1.5206029415130615, + "learning_rate": 1.529e-05, + "num_tokens": 163210.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.0753, + "grad_norm": 1.400350570678711, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0032, + "grad_norm": 0.33458250761032104, + "learning_rate": 1.527e-05, + "num_tokens": 163813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0029, + "grad_norm": 0.2822412848472595, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 1.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0029, + "grad_norm": 0.24599352478981018, + "learning_rate": 1.525e-05, + "num_tokens": 163995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.0772, + "grad_norm": 1.2155442237854004, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0028, + "grad_norm": 0.2298114001750946, + "learning_rate": 1.523e-05, + "num_tokens": 164598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0027, + "grad_norm": 0.23676389455795288, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.0027, + "grad_norm": 0.21022361516952515, + "learning_rate": 1.521e-05, + "num_tokens": 164780.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.1104, + "grad_norm": 1.7568659782409668, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0027, + "grad_norm": 0.28411486744880676, + "learning_rate": 1.519e-05, + "num_tokens": 165383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0028, + "grad_norm": 0.2967180907726288, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 1.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0026, + "grad_norm": 0.31251031160354614, + "learning_rate": 1.517e-05, + "num_tokens": 165565.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.0629, + "grad_norm": 1.4641610383987427, + "learning_rate": 1.516e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.0024, + "grad_norm": 0.22654157876968384, + "learning_rate": 1.515e-05, + "num_tokens": 166168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.063, + "grad_norm": 1.187050223350525, + "learning_rate": 1.514e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.0565, + "grad_norm": 1.331944227218628, + "learning_rate": 1.513e-05, + "num_tokens": 167192.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0026, + "grad_norm": 0.37733522057533264, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 1.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.0989, + "grad_norm": 1.4206980466842651, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 167795.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0028, + "grad_norm": 0.3664330244064331, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 1.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.003, + "grad_norm": 0.5825914740562439, + "learning_rate": 1.509e-05, + "num_tokens": 167977.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.003, + "grad_norm": 0.47541120648384094, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 1.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": 0.1152, + "grad_norm": 1.194077730178833, + "learning_rate": 1.507e-05, + "num_tokens": 168580.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.0642, + "grad_norm": 1.5998581647872925, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0031, + "grad_norm": 0.45395979285240173, + "learning_rate": 1.505e-05, + "num_tokens": 169183.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.066, + "grad_norm": 1.4924191236495972, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0642, + "grad_norm": 1.4406323432922363, + "learning_rate": 1.503e-05, + "num_tokens": 170207.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.004, + "grad_norm": 0.7274853587150574, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 1.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0637, + "grad_norm": 1.4921272993087769, + "learning_rate": 1.501e-05, + "num_tokens": 170810.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0881, + "grad_norm": 1.3289899826049805, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0046, + "grad_norm": 0.9299827814102173, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 171413.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.0917, + "grad_norm": 1.0895007848739624, + "learning_rate": 1.498e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0055, + "grad_norm": 1.2428455352783203, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 172016.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.0904, + "grad_norm": 1.1731876134872437, + "learning_rate": 1.496e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0042, + "grad_norm": 0.8642317652702332, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 172619.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.0042, + "grad_norm": 0.9150028228759766, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 1.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.1244, + "grad_norm": 1.520849585533142, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 173222.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": 0.0667, + "grad_norm": 1.3897782564163208, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0028, + "grad_norm": 0.4630263149738312, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 173825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0026, + "grad_norm": 0.32279714941978455, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 1.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.1723, + "grad_norm": 2.5587806701660156, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 174428.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.084, + "grad_norm": 1.5307081937789917, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0455, + "grad_norm": 1.2075250148773193, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 175452.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0025, + "grad_norm": 0.3137587904930115, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.1133, + "grad_norm": 1.3542101383209229, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 176055.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0025, + "grad_norm": 0.3963753581047058, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 1.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.1022, + "grad_norm": 1.4186869859695435, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 176658.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0029, + "grad_norm": 0.533608615398407, + "learning_rate": 1.482e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 1.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0842, + "grad_norm": 1.5056371688842773, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 177261.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0033, + "grad_norm": 0.6577285528182983, + "learning_rate": 1.48e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 1.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.1089, + "grad_norm": 1.4338765144348145, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 177864.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.1055, + "grad_norm": 1.13351571559906, + "learning_rate": 1.478e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.0951, + "grad_norm": 1.237243413925171, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 178888.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.212, + "grad_norm": 3.4371607303619385, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.0058, + "grad_norm": 1.4969244003295898, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 179491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0068, + "grad_norm": 1.7211462259292603, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 1.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.0986, + "grad_norm": 0.948099672794342, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 180094.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0057, + "grad_norm": 1.391058325767517, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 1.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0042, + "grad_norm": 0.9918210506439209, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 180276.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.2042, + "grad_norm": 2.672642230987549, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.003, + "grad_norm": 0.45506858825683594, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 180879.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0797, + "grad_norm": 1.4114668369293213, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0027, + "grad_norm": 0.5301483869552612, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 181482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0668, + "grad_norm": 1.3311203718185425, + "learning_rate": 1.466e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.0022, + "grad_norm": 0.2691483795642853, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 182085.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.1992, + "grad_norm": 1.9987740516662598, + "learning_rate": 1.464e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.1435, + "grad_norm": 2.9904839992523193, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 183109.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.1085, + "grad_norm": 1.4652901887893677, + "learning_rate": 1.462e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0022, + "grad_norm": 0.30126360058784485, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 183712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0023, + "grad_norm": 0.28965601325035095, + "learning_rate": 1.46e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 1.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0022, + "grad_norm": 0.23019753396511078, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 183894.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0022, + "grad_norm": 0.21258652210235596, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 1.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0748, + "grad_norm": 1.3212836980819702, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 184497.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.002, + "grad_norm": 0.15865401923656464, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 1.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.002, + "grad_norm": 0.18746234476566315, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 184679.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0684, + "grad_norm": 1.4932857751846313, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0021, + "grad_norm": 0.23370607197284698, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 185282.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0765, + "grad_norm": 1.3977128267288208, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.0999, + "grad_norm": 1.421388030052185, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 186306.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0025, + "grad_norm": 0.41459253430366516, + "learning_rate": 1.45e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0026, + "grad_norm": 0.4490201473236084, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 186488.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0918, + "grad_norm": 1.3046605587005615, + "learning_rate": 1.448e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0864, + "grad_norm": 1.233083963394165, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 187512.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0032, + "grad_norm": 0.6014226078987122, + "learning_rate": 1.446e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.1619, + "grad_norm": 2.670433759689331, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 188115.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0034, + "grad_norm": 0.6123008131980896, + "learning_rate": 1.444e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 1.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.1146, + "grad_norm": 1.6403765678405762, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 188718.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": 0.1593, + "grad_norm": 2.7106077671051025, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0035, + "grad_norm": 0.693053126335144, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 189321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.06, + "grad_norm": 4.2686448097229, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.0764, + "grad_norm": 1.4215189218521118, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 190345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0037, + "grad_norm": 0.7100173234939575, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 1.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.1991, + "grad_norm": 2.5193188190460205, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 190948.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.0711, + "grad_norm": 1.3730517625808716, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.0891, + "grad_norm": 1.397972583770752, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 191972.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0928, + "grad_norm": 1.5409183502197266, + "learning_rate": 1.434e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.0893, + "grad_norm": 1.1101114749908447, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 192996.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0055, + "grad_norm": 1.2417343854904175, + "learning_rate": 1.432e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0829, + "grad_norm": 1.277969479560852, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 193599.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.0892, + "grad_norm": 1.385054349899292, + "learning_rate": 1.43e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.0074, + "grad_norm": 1.8123408555984497, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 194202.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0575, + "grad_norm": 1.3045315742492676, + "learning_rate": 1.428e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.1662, + "grad_norm": 2.5381715297698975, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 195226.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.0067, + "grad_norm": 1.5872633457183838, + "learning_rate": 1.426e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0061, + "grad_norm": 1.5367522239685059, + "learning_rate": 1.425e-05, + "num_tokens": 195408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0052, + "grad_norm": 1.1771265268325806, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0035, + "grad_norm": 0.596717119216919, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 195590.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0027, + "grad_norm": 0.3555561900138855, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 1.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0022, + "grad_norm": 0.31791797280311584, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 195772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.1456, + "grad_norm": 3.0790412425994873, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.0915, + "grad_norm": 1.610164761543274, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 196796.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.0019, + "grad_norm": 0.35682275891304016, + "learning_rate": 1.418e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 1.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0758, + "grad_norm": 1.1877442598342896, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 197399.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.0018, + "grad_norm": 0.3156123459339142, + "learning_rate": 1.416e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 1.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0017, + "grad_norm": 0.25764769315719604, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 197581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.1041, + "grad_norm": 1.8042068481445312, + "learning_rate": 1.414e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.1758, + "grad_norm": 2.5269131660461426, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 198605.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0016, + "grad_norm": 0.12714117765426636, + "learning_rate": 1.412e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0016, + "grad_norm": 0.13591638207435608, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 198787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.0943, + "grad_norm": 1.4506866931915283, + "learning_rate": 1.41e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0017, + "grad_norm": 0.17016956210136414, + "learning_rate": 1.409e-05, + "num_tokens": 199390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0715, + "grad_norm": 1.1805306673049927, + "learning_rate": 1.408e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.298, + "step": 596 + }, + { + "loss": 0.0831, + "grad_norm": 1.2475357055664062, + "learning_rate": 1.407e-05, + "num_tokens": 200414.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.002, + "grad_norm": 0.35699722170829773, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 1.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0721, + "grad_norm": 1.1971431970596313, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 201017.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.066, + "grad_norm": 1.1251575946807861, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0027, + "grad_norm": 0.5506196618080139, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 201620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.1048, + "grad_norm": 1.8220717906951904, + "learning_rate": 1.402e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.0037, + "grad_norm": 0.8545289039611816, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 202223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0037, + "grad_norm": 0.8475953936576843, + "learning_rate": 1.4e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 1.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.0967, + "grad_norm": 1.2703156471252441, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 202826.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": 0.098, + "grad_norm": 1.2548829317092896, + "learning_rate": 1.398e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.0924, + "grad_norm": 1.2570987939834595, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 203850.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0609, + "grad_norm": 1.531058669090271, + "learning_rate": 1.396e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.1424, + "grad_norm": 2.5060534477233887, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 204874.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0048, + "grad_norm": 1.0655303001403809, + "learning_rate": 1.394e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0593, + "grad_norm": 1.0243408679962158, + "learning_rate": 1.393e-05, + "num_tokens": 205477.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0905, + "grad_norm": 1.3182287216186523, + "learning_rate": 1.392e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0068, + "grad_norm": 1.4663218259811401, + "learning_rate": 1.391e-05, + "num_tokens": 206080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0057, + "grad_norm": 1.2375314235687256, + "learning_rate": 1.39e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0489, + "grad_norm": 1.071290135383606, + "learning_rate": 1.389e-05, + "num_tokens": 206683.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0743, + "grad_norm": 1.0402666330337524, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.1041, + "grad_norm": 2.195901870727539, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 207707.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0038, + "grad_norm": 0.7095027565956116, + "learning_rate": 1.386e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0804, + "grad_norm": 1.4653010368347168, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 208310.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0038, + "grad_norm": 0.7164344191551208, + "learning_rate": 1.384e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.1019, + "grad_norm": 1.508054494857788, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 208913.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0031, + "grad_norm": 0.4974660575389862, + "learning_rate": 1.382e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0031, + "grad_norm": 0.4921479821205139, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 209095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0614, + "grad_norm": 1.180677056312561, + "learning_rate": 1.38e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0843, + "grad_norm": 1.1165193319320679, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 210119.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0816, + "grad_norm": 1.4082179069519043, + "learning_rate": 1.378e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.0893, + "grad_norm": 1.1407965421676636, + "learning_rate": 1.377e-05, + "num_tokens": 211143.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0029, + "grad_norm": 0.47326186299324036, + "learning_rate": 1.376e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.003, + "grad_norm": 0.48467254638671875, + "learning_rate": 1.375e-05, + "num_tokens": 211325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0025, + "grad_norm": 0.3466941714286804, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0028, + "grad_norm": 0.383543461561203, + "learning_rate": 1.373e-05, + "num_tokens": 211507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0027, + "grad_norm": 0.3878021240234375, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0699, + "grad_norm": 1.2407838106155396, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 212110.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0956, + "grad_norm": 1.2576494216918945, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0022, + "grad_norm": 0.25685280561447144, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 212713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0022, + "grad_norm": 0.2545858323574066, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0023, + "grad_norm": 0.2819485366344452, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 212895.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0858, + "grad_norm": 1.0897297859191895, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0021, + "grad_norm": 0.325777530670166, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 213498.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0021, + "grad_norm": 0.29383793473243713, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 1.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0944, + "grad_norm": 1.389978289604187, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 214101.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0962, + "grad_norm": 1.3364863395690918, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0019, + "grad_norm": 0.23381884396076202, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 214704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.058, + "grad_norm": 1.5767658948898315, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.002, + "grad_norm": 0.288552463054657, + "learning_rate": 1.359e-05, + "num_tokens": 215307.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0894, + "grad_norm": 1.6633201837539673, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0829, + "grad_norm": 1.4220677614212036, + "learning_rate": 1.357e-05, + "num_tokens": 216331.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0845, + "grad_norm": 1.3433754444122314, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.0917, + "grad_norm": 1.295201063156128, + "learning_rate": 1.355e-05, + "num_tokens": 217355.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.0891, + "grad_norm": 1.3927174806594849, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.006, + "grad_norm": 1.4622353315353394, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 217958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0481, + "grad_norm": 1.178935170173645, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0075, + "grad_norm": 1.825118064880371, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 218561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.0065, + "grad_norm": 1.5563267469406128, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0059, + "grad_norm": 1.4133291244506836, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 218743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0753, + "grad_norm": 1.4185911417007446, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.087, + "grad_norm": 1.3738617897033691, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 219767.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0702, + "grad_norm": 1.0876400470733643, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0031, + "grad_norm": 0.587776243686676, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 220370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": 0.057, + "grad_norm": 1.4529519081115723, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0596, + "grad_norm": 1.0564322471618652, + "learning_rate": 1.343e-05, + "num_tokens": 221394.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0795, + "grad_norm": 1.359084129333496, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0602, + "grad_norm": 1.625110387802124, + "learning_rate": 1.341e-05, + "num_tokens": 222418.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.1519, + "grad_norm": 2.79744291305542, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.1522, + "grad_norm": 2.5003347396850586, + "learning_rate": 1.339e-05, + "num_tokens": 223442.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0591, + "grad_norm": 1.2735769748687744, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0603, + "grad_norm": 1.4963431358337402, + "learning_rate": 1.337e-05, + "num_tokens": 224466.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.008, + "grad_norm": 1.6320358514785767, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0885, + "grad_norm": 1.660543441772461, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 225069.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.059, + "grad_norm": 1.6638036966323853, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0092, + "grad_norm": 1.7701940536499023, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 225672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0717, + "grad_norm": 1.6387797594070435, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0795, + "grad_norm": 1.6651279926300049, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 226696.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0811, + "grad_norm": 1.6673662662506104, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.1082, + "grad_norm": 2.1547534465789795, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 227720.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0724, + "grad_norm": 1.5310810804367065, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.1319, + "grad_norm": 3.544659376144409, + "learning_rate": 1.327e-05, + "num_tokens": 228744.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0668, + "grad_norm": 1.4902386665344238, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0099, + "grad_norm": 1.8921332359313965, + "learning_rate": 1.325e-05, + "num_tokens": 229347.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0093, + "grad_norm": 1.8240478038787842, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0727, + "grad_norm": 1.3348301649093628, + "learning_rate": 1.323e-05, + "num_tokens": 229950.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.082, + "grad_norm": 1.235790491104126, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0743, + "grad_norm": 1.6094404458999634, + "learning_rate": 1.321e-05, + "num_tokens": 230974.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0079, + "grad_norm": 1.5763838291168213, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 1.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0882, + "grad_norm": 1.602766513824463, + "learning_rate": 1.319e-05, + "num_tokens": 231577.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0654, + "grad_norm": 1.5263670682907104, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0678, + "grad_norm": 1.2824158668518066, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 232601.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.1246, + "grad_norm": 2.722593307495117, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0428, + "grad_norm": 1.1944324970245361, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 233625.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0643, + "grad_norm": 1.0645701885223389, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.0061, + "grad_norm": 1.2870023250579834, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 234228.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0055, + "grad_norm": 1.1952035427093506, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 1.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0621, + "grad_norm": 1.063179850578308, + "learning_rate": 1.311e-05, + "num_tokens": 234831.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0047, + "grad_norm": 0.9894086122512817, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 1.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0763, + "grad_norm": 1.4259341955184937, + "learning_rate": 1.309e-05, + "num_tokens": 235434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0642, + "grad_norm": 1.2943477630615234, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.079, + "grad_norm": 1.5152034759521484, + "learning_rate": 1.307e-05, + "num_tokens": 236458.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0717, + "grad_norm": 1.1957803964614868, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.0599, + "grad_norm": 1.4417110681533813, + "learning_rate": 1.305e-05, + "num_tokens": 237482.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0654, + "grad_norm": 1.5242059230804443, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0365, + "grad_norm": 1.1553280353546143, + "learning_rate": 1.303e-05, + "num_tokens": 238506.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0045, + "grad_norm": 0.8679006695747375, + "learning_rate": 1.302e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0782, + "grad_norm": 1.3552151918411255, + "learning_rate": 1.301e-05, + "num_tokens": 239109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": 0.0777, + "grad_norm": 1.6802747249603271, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0895, + "grad_norm": 2.0004899501800537, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 240133.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0065, + "grad_norm": 1.2331161499023438, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 1.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0926, + "grad_norm": 1.814571738243103, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 240736.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0447, + "grad_norm": 1.2055951356887817, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.1061, + "grad_norm": 1.93771493434906, + "learning_rate": 1.295e-05, + "num_tokens": 241760.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0071, + "grad_norm": 1.3096961975097656, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 1.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0809, + "grad_norm": 1.462066650390625, + "learning_rate": 1.293e-05, + "num_tokens": 242363.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0696, + "grad_norm": 1.6013977527618408, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.0067, + "grad_norm": 1.247151494026184, + "learning_rate": 1.291e-05, + "num_tokens": 242966.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0822, + "grad_norm": 1.3341907262802124, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.1516, + "grad_norm": 2.655081033706665, + "learning_rate": 1.289e-05, + "num_tokens": 243990.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0628, + "grad_norm": 1.1444809436798096, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.0731, + "grad_norm": 1.465855598449707, + "learning_rate": 1.287e-05, + "num_tokens": 245014.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0057, + "grad_norm": 1.112541913986206, + "learning_rate": 1.286e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.1399, + "grad_norm": 3.088876485824585, + "learning_rate": 1.285e-05, + "num_tokens": 245617.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0759, + "grad_norm": 1.2233434915542603, + "learning_rate": 1.284e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0557, + "grad_norm": 1.2852802276611328, + "learning_rate": 1.283e-05, + "num_tokens": 246641.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.005, + "grad_norm": 1.0076061487197876, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 1.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0549, + "grad_norm": 1.230972409248352, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 247244.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.004, + "grad_norm": 0.7870916724205017, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 1.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0034, + "grad_norm": 0.6174665093421936, + "learning_rate": 1.279e-05, + "num_tokens": 247426.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.5346, + "grad_norm": 9.506900787353516, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0627, + "grad_norm": 1.454014539718628, + "learning_rate": 1.277e-05, + "num_tokens": 248450.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0024, + "grad_norm": 0.3459113836288452, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 1.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0775, + "grad_norm": 1.3046914339065552, + "learning_rate": 1.275e-05, + "num_tokens": 249053.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0528, + "grad_norm": 1.3675225973129272, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0629, + "grad_norm": 1.5410852432250977, + "learning_rate": 1.273e-05, + "num_tokens": 250077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0579, + "grad_norm": 1.2241291999816895, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0023, + "grad_norm": 0.32806485891342163, + "learning_rate": 1.271e-05, + "num_tokens": 250680.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0024, + "grad_norm": 0.3713594675064087, + "learning_rate": 1.27e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0024, + "grad_norm": 0.383628249168396, + "learning_rate": 1.269e-05, + "num_tokens": 250862.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0565, + "grad_norm": 1.4605262279510498, + "learning_rate": 1.268e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0907, + "grad_norm": 2.0260767936706543, + "learning_rate": 1.267e-05, + "num_tokens": 251886.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.1355, + "grad_norm": 2.7483110427856445, + "learning_rate": 1.266e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0028, + "grad_norm": 0.5287377834320068, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 252489.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0029, + "grad_norm": 0.5259289145469666, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0029, + "grad_norm": 0.5197233557701111, + "learning_rate": 1.263e-05, + "num_tokens": 252671.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0779, + "grad_norm": 1.9638550281524658, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0022, + "grad_norm": 0.34271013736724854, + "learning_rate": 1.261e-05, + "num_tokens": 253274.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0021, + "grad_norm": 0.31841135025024414, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.0021, + "grad_norm": 0.28541284799575806, + "learning_rate": 1.259e-05, + "num_tokens": 253456.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.0765, + "grad_norm": 1.1577314138412476, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0018, + "grad_norm": 0.2100057303905487, + "learning_rate": 1.257e-05, + "num_tokens": 254059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0017, + "grad_norm": 0.19263769686222076, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.0813, + "grad_norm": 1.540268898010254, + "learning_rate": 1.255e-05, + "num_tokens": 254662.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0705, + "grad_norm": 1.2791322469711304, + "learning_rate": 1.254e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.3907, + "grad_norm": 7.0182013511657715, + "learning_rate": 1.253e-05, + "num_tokens": 255686.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0017, + "grad_norm": 0.19119806587696075, + "learning_rate": 1.252e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 1.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0017, + "grad_norm": 0.18740034103393555, + "learning_rate": 1.251e-05, + "num_tokens": 255868.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0797, + "grad_norm": 1.8779743909835815, + "learning_rate": 1.25e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0018, + "grad_norm": 0.1861187219619751, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 256471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0017, + "grad_norm": 0.17008422315120697, + "learning_rate": 1.248e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0018, + "grad_norm": 0.2042454481124878, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 256653.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.083, + "grad_norm": 1.2712551355361938, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0019, + "grad_norm": 0.22894388437271118, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 257256.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0632, + "grad_norm": 1.2945611476898193, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0018, + "grad_norm": 0.21884307265281677, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 257859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0018, + "grad_norm": 0.22480158507823944, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0019, + "grad_norm": 0.24674543738365173, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 258041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0795, + "grad_norm": 2.106468677520752, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0018, + "grad_norm": 0.2204350233078003, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 258644.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0737, + "grad_norm": 1.4242573976516724, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0878, + "grad_norm": 1.518812656402588, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 259668.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0633, + "grad_norm": 1.0321228504180908, + "learning_rate": 1.236e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0756, + "grad_norm": 1.1949939727783203, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 260692.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0024, + "grad_norm": 0.4306935966014862, + "learning_rate": 1.234e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 1.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0627, + "grad_norm": 1.1531753540039062, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 261295.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.003, + "grad_norm": 0.6374348998069763, + "learning_rate": 1.232e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0036, + "grad_norm": 0.7683020234107971, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 261477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.1434, + "grad_norm": 2.3946049213409424, + "learning_rate": 1.23e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.0032, + "grad_norm": 0.6773089170455933, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 262080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.003, + "grad_norm": 0.5508646368980408, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0517, + "grad_norm": 1.0663422346115112, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 262683.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0598, + "grad_norm": 1.1945189237594604, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0024, + "grad_norm": 0.3890499174594879, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 263286.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0023, + "grad_norm": 0.3637482821941376, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 1.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0022, + "grad_norm": 0.3558770716190338, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 263468.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.0698, + "grad_norm": 1.282705545425415, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0753, + "grad_norm": 1.923362374305725, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 264492.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": 0.0769, + "grad_norm": 1.28227961063385, + "learning_rate": 1.22e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0019, + "grad_norm": 0.26410141587257385, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 265095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0802, + "grad_norm": 1.2387802600860596, + "learning_rate": 1.218e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.002, + "grad_norm": 0.3023037612438202, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 265698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0547, + "grad_norm": 1.3596991300582886, + "learning_rate": 1.216e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0725, + "grad_norm": 1.2279936075210571, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 266722.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0603, + "grad_norm": 1.4540890455245972, + "learning_rate": 1.214e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0026, + "grad_norm": 0.48957788944244385, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 267325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0771, + "grad_norm": 1.2322392463684082, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0434, + "grad_norm": 1.224611759185791, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 268349.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0034, + "grad_norm": 0.7317530512809753, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0038, + "grad_norm": 0.7885755300521851, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 268531.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0692, + "grad_norm": 1.2012921571731567, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.0036, + "grad_norm": 0.8018218874931335, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 269134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0451, + "grad_norm": 1.2235223054885864, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0474, + "grad_norm": 1.2205861806869507, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 270158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.0032, + "grad_norm": 0.7037767767906189, + "learning_rate": 1.204e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0518, + "grad_norm": 1.4091877937316895, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 270761.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0691, + "grad_norm": 1.106124758720398, + "learning_rate": 1.202e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0034, + "grad_norm": 0.7851144075393677, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 271364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0032, + "grad_norm": 0.7951046824455261, + "learning_rate": 1.2e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0831, + "grad_norm": 1.5029832124710083, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 271967.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0026, + "grad_norm": 0.5559270977973938, + "learning_rate": 1.198e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0022, + "grad_norm": 0.4153921902179718, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 272149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.0021, + "grad_norm": 0.37202781438827515, + "learning_rate": 1.196e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0529, + "grad_norm": 1.0388691425323486, + "learning_rate": 1.195e-05, + "num_tokens": 272752.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0017, + "grad_norm": 0.22652830183506012, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0645, + "grad_norm": 1.505333423614502, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 273355.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0865, + "grad_norm": 1.883539080619812, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0015, + "grad_norm": 0.16957923769950867, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 273958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0015, + "grad_norm": 0.19717897474765778, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0014, + "grad_norm": 0.1534471958875656, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 274140.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0494, + "grad_norm": 1.1535961627960205, + "learning_rate": 1.188e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0014, + "grad_norm": 0.1624767929315567, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 274743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0015, + "grad_norm": 0.17362011969089508, + "learning_rate": 1.186e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0775, + "grad_norm": 1.9903476238250732, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 275346.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.1399, + "grad_norm": 3.302823781967163, + "learning_rate": 1.184e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0673, + "grad_norm": 1.326196312904358, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 276370.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0015, + "grad_norm": 0.18564815819263458, + "learning_rate": 1.182e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 1.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0548, + "grad_norm": 1.438742756843567, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 276973.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0017, + "grad_norm": 0.23712487518787384, + "learning_rate": 1.18e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0018, + "grad_norm": 0.27533257007598877, + "learning_rate": 1.179e-05, + "num_tokens": 277155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0018, + "grad_norm": 0.2764306366443634, + "learning_rate": 1.178e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0513, + "grad_norm": 1.2485377788543701, + "learning_rate": 1.177e-05, + "num_tokens": 277758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.143, + "grad_norm": 2.3260533809661865, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0865, + "grad_norm": 2.006594181060791, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 278782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": 0.0728, + "grad_norm": 1.229394793510437, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0727, + "grad_norm": 1.264754295349121, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 279806.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0624, + "grad_norm": 1.1297813653945923, + "learning_rate": 1.172e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0657, + "grad_norm": 1.348644495010376, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 280830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.4017, + "grad_norm": 7.936118125915527, + "learning_rate": 1.17e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0509, + "grad_norm": 2.504011392593384, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 281854.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0071, + "grad_norm": 1.4856328964233398, + "learning_rate": 1.168e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0065, + "grad_norm": 1.3074718713760376, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 282036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.0064, + "grad_norm": 1.328763484954834, + "learning_rate": 1.166e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0545, + "grad_norm": 1.255282998085022, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 282639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.1362, + "grad_norm": 1.9963600635528564, + "learning_rate": 1.164e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0042, + "grad_norm": 0.8505628108978271, + "learning_rate": 1.163e-05, + "num_tokens": 283242.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0554, + "grad_norm": 1.5559666156768799, + "learning_rate": 1.162e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0029, + "grad_norm": 0.528516411781311, + "learning_rate": 1.161e-05, + "num_tokens": 283845.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0025, + "grad_norm": 0.40555793046951294, + "learning_rate": 1.16e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 1.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0021, + "grad_norm": 0.3407900333404541, + "learning_rate": 1.159e-05, + "num_tokens": 284027.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0726, + "grad_norm": 1.2919087409973145, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.4289, + "grad_norm": 6.98607063293457, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 285051.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0511, + "grad_norm": 1.4350818395614624, + "learning_rate": 1.156e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0519, + "grad_norm": 1.400582194328308, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 286075.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0017, + "grad_norm": 0.31648895144462585, + "learning_rate": 1.154e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0018, + "grad_norm": 0.3369519114494324, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 286257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0572, + "grad_norm": 1.1995043754577637, + "learning_rate": 1.152e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0742, + "grad_norm": 0.9991039633750916, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 287281.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0501, + "grad_norm": 1.4309474229812622, + "learning_rate": 1.15e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.1276, + "grad_norm": 2.5142507553100586, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 288305.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0023, + "grad_norm": 0.4930354058742523, + "learning_rate": 1.148e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 1.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.09, + "grad_norm": 1.8823350667953491, + "learning_rate": 1.147e-05, + "num_tokens": 288908.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0517, + "grad_norm": 1.3514404296875, + "learning_rate": 1.146e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0023, + "grad_norm": 0.39818212389945984, + "learning_rate": 1.145e-05, + "num_tokens": 289511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.0026, + "grad_norm": 0.4840705394744873, + "learning_rate": 1.144e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0551, + "grad_norm": 0.9981673955917358, + "learning_rate": 1.143e-05, + "num_tokens": 290114.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0025, + "grad_norm": 0.43263715505599976, + "learning_rate": 1.142e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.1179, + "grad_norm": 2.982013463973999, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 290717.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0691, + "grad_norm": 0.9637575745582581, + "learning_rate": 1.14e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0764, + "grad_norm": 1.1376231908798218, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 291741.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0474, + "grad_norm": 0.9938456416130066, + "learning_rate": 1.138e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0036, + "grad_norm": 0.6827121376991272, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 292344.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.069, + "grad_norm": 1.1721850633621216, + "learning_rate": 1.136e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0742, + "grad_norm": 1.3182216882705688, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 293368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0619, + "grad_norm": 1.405136227607727, + "learning_rate": 1.134e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0053, + "grad_norm": 1.0143218040466309, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 293971.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0822, + "grad_norm": 1.4492801427841187, + "learning_rate": 1.132e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0518, + "grad_norm": 1.1326556205749512, + "learning_rate": 1.131e-05, + "num_tokens": 294995.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0059, + "grad_norm": 1.0942848920822144, + "learning_rate": 1.13e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0486, + "grad_norm": 1.2563117742538452, + "learning_rate": 1.129e-05, + "num_tokens": 295598.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.0994, + "grad_norm": 2.3433609008789062, + "learning_rate": 1.128e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.1001, + "grad_norm": 2.7536284923553467, + "learning_rate": 1.127e-05, + "num_tokens": 296622.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": 0.0585, + "grad_norm": 0.9778537154197693, + "learning_rate": 1.126e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0062, + "grad_norm": 1.1226321458816528, + "learning_rate": 1.125e-05, + "num_tokens": 297225.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0471, + "grad_norm": 1.1883548498153687, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0784, + "grad_norm": 1.976486086845398, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 298249.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0701, + "grad_norm": 1.0843766927719116, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.067, + "grad_norm": 1.3081246614456177, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 299273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0062, + "grad_norm": 1.1432628631591797, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.0415, + "grad_norm": 0.9637823104858398, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 299876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0059, + "grad_norm": 1.120526909828186, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 1.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.005, + "grad_norm": 0.9103840589523315, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 300058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0696, + "grad_norm": 1.4037501811981201, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0466, + "grad_norm": 0.9911297559738159, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 301082.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.0383, + "grad_norm": 0.9758827090263367, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0675, + "grad_norm": 1.3758506774902344, + "learning_rate": 1.113e-05, + "num_tokens": 302106.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0032, + "grad_norm": 0.5923029780387878, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 1.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0032, + "grad_norm": 0.5734418630599976, + "learning_rate": 1.111e-05, + "num_tokens": 302288.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0533, + "grad_norm": 1.0125759840011597, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0738, + "grad_norm": 1.2687044143676758, + "learning_rate": 1.109e-05, + "num_tokens": 303312.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.372, + "grad_norm": 5.941206455230713, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.108, + "grad_norm": 2.1613714694976807, + "learning_rate": 1.107e-05, + "num_tokens": 304336.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.0024, + "grad_norm": 0.39348432421684265, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 1.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.0639, + "grad_norm": 1.184023141860962, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 304939.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0824, + "grad_norm": 1.9686490297317505, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0026, + "grad_norm": 0.44682711362838745, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 305542.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0028, + "grad_norm": 0.49993517994880676, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0026, + "grad_norm": 0.4428325891494751, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 305724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0709, + "grad_norm": 1.2466169595718384, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0735, + "grad_norm": 1.3401033878326416, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 306748.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0023, + "grad_norm": 0.3811323642730713, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 1.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0706, + "grad_norm": 1.4406594038009644, + "learning_rate": 1.097e-05, + "num_tokens": 307351.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.054, + "grad_norm": 1.363612413406372, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0505, + "grad_norm": 1.161858320236206, + "learning_rate": 1.095e-05, + "num_tokens": 308375.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.0022, + "grad_norm": 0.3702404797077179, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0023, + "grad_norm": 0.39905861020088196, + "learning_rate": 1.093e-05, + "num_tokens": 308557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0654, + "grad_norm": 1.083019733428955, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0613, + "grad_norm": 1.1142648458480835, + "learning_rate": 1.091e-05, + "num_tokens": 309581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0526, + "grad_norm": 1.24055016040802, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0687, + "grad_norm": 1.400773525238037, + "learning_rate": 1.089e-05, + "num_tokens": 310605.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0462, + "grad_norm": 1.1053345203399658, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0574, + "grad_norm": 1.0202289819717407, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 311629.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.1215, + "grad_norm": 2.0495526790618896, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0603, + "grad_norm": 0.9297711253166199, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 312653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0073, + "grad_norm": 1.4618480205535889, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0491, + "grad_norm": 1.1468454599380493, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 313256.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.07, + "grad_norm": 1.5984728336334229, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0097, + "grad_norm": 1.7861182689666748, + "learning_rate": 1.081e-05, + "num_tokens": 313859.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0098, + "grad_norm": 1.7681940793991089, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0086, + "grad_norm": 1.6711666584014893, + "learning_rate": 1.079e-05, + "num_tokens": 314041.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0431, + "grad_norm": 1.0142930746078491, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0417, + "grad_norm": 0.9444635510444641, + "learning_rate": 1.077e-05, + "num_tokens": 315065.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0054, + "grad_norm": 1.0890287160873413, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0045, + "grad_norm": 0.9186440706253052, + "learning_rate": 1.075e-05, + "num_tokens": 315247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0033, + "grad_norm": 0.6265022158622742, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 1.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0426, + "grad_norm": 1.0279744863510132, + "learning_rate": 1.073e-05, + "num_tokens": 315850.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0693, + "grad_norm": 1.372605323791504, + "learning_rate": 1.072e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0017, + "grad_norm": 0.21290767192840576, + "learning_rate": 1.071e-05, + "num_tokens": 316453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0015, + "grad_norm": 0.17253448069095612, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.0526, + "grad_norm": 1.160703182220459, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 317056.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0727, + "grad_norm": 1.2380679845809937, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.1214, + "grad_norm": 2.0913727283477783, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 318080.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0506, + "grad_norm": 1.0945791006088257, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.075, + "grad_norm": 1.382978916168213, + "learning_rate": 1.065e-05, + "num_tokens": 319104.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0015, + "grad_norm": 0.172458216547966, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0742, + "grad_norm": 1.5439574718475342, + "learning_rate": 1.063e-05, + "num_tokens": 319707.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": 0.0875, + "grad_norm": 1.514559030532837, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.1175, + "grad_norm": 2.566283941268921, + "learning_rate": 1.061e-05, + "num_tokens": 320731.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0018, + "grad_norm": 0.22718015313148499, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 1.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0662, + "grad_norm": 1.2446449995040894, + "learning_rate": 1.059e-05, + "num_tokens": 321334.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0023, + "grad_norm": 0.32198604941368103, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.1204, + "grad_norm": 3.195101261138916, + "learning_rate": 1.057e-05, + "num_tokens": 321937.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0647, + "grad_norm": 1.3185839653015137, + "learning_rate": 1.056e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0025, + "grad_norm": 0.3570478856563568, + "learning_rate": 1.055e-05, + "num_tokens": 322540.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0692, + "grad_norm": 1.1017460823059082, + "learning_rate": 1.054e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0583, + "grad_norm": 1.167201042175293, + "learning_rate": 1.053e-05, + "num_tokens": 323564.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.1038, + "grad_norm": 2.155097723007202, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0038, + "grad_norm": 0.646456778049469, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 324167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0751, + "grad_norm": 1.3510818481445312, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.1132, + "grad_norm": 2.1775286197662354, + "learning_rate": 1.049e-05, + "num_tokens": 325191.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.1073, + "grad_norm": 2.2072458267211914, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0048, + "grad_norm": 0.8271514177322388, + "learning_rate": 1.047e-05, + "num_tokens": 325794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0679, + "grad_norm": 1.0402039289474487, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0045, + "grad_norm": 0.7622825503349304, + "learning_rate": 1.045e-05, + "num_tokens": 326397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0538, + "grad_norm": 1.2865958213806152, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0656, + "grad_norm": 1.024865746498108, + "learning_rate": 1.043e-05, + "num_tokens": 327421.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0039, + "grad_norm": 0.6565131545066833, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0043, + "grad_norm": 0.7380317449569702, + "learning_rate": 1.041e-05, + "num_tokens": 327603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0035, + "grad_norm": 0.570799708366394, + "learning_rate": 1.04e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 1.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.062, + "grad_norm": 1.1511563062667847, + "learning_rate": 1.039e-05, + "num_tokens": 328206.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0695, + "grad_norm": 1.2906415462493896, + "learning_rate": 1.038e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0468, + "grad_norm": 1.2258033752441406, + "learning_rate": 1.037e-05, + "num_tokens": 329230.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0024, + "grad_norm": 0.3688075542449951, + "learning_rate": 1.036e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0023, + "grad_norm": 0.3373582065105438, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 329412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.0709, + "grad_norm": 2.084989309310913, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.002, + "grad_norm": 0.27264249324798584, + "learning_rate": 1.033e-05, + "num_tokens": 330015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0018, + "grad_norm": 0.24489571154117584, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0799, + "grad_norm": 1.8190633058547974, + "learning_rate": 1.031e-05, + "num_tokens": 330618.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0836, + "grad_norm": 1.4041454792022705, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.1136, + "grad_norm": 2.274580240249634, + "learning_rate": 1.029e-05, + "num_tokens": 331642.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0424, + "grad_norm": 1.3687119483947754, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0015, + "grad_norm": 0.16964252293109894, + "learning_rate": 1.027e-05, + "num_tokens": 332245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.0698, + "grad_norm": 1.1283705234527588, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0018, + "grad_norm": 0.22557133436203003, + "learning_rate": 1.025e-05, + "num_tokens": 332848.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0017, + "grad_norm": 0.21104346215724945, + "learning_rate": 1.024e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.0018, + "grad_norm": 0.24475614726543427, + "learning_rate": 1.023e-05, + "num_tokens": 333030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.0563, + "grad_norm": 2.955718755722046, + "learning_rate": 1.022e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0017, + "grad_norm": 0.24137888848781586, + "learning_rate": 1.021e-05, + "num_tokens": 333633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0017, + "grad_norm": 0.22060562670230865, + "learning_rate": 1.02e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0723, + "grad_norm": 1.5680960416793823, + "learning_rate": 1.019e-05, + "num_tokens": 334236.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0016, + "grad_norm": 0.2214270681142807, + "learning_rate": 1.018e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0016, + "grad_norm": 0.216565802693367, + "learning_rate": 1.017e-05, + "num_tokens": 334418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0684, + "grad_norm": 1.214136004447937, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.1141, + "grad_norm": 2.0787954330444336, + "learning_rate": 1.015e-05, + "num_tokens": 335442.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0015, + "grad_norm": 0.1908382773399353, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 1.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.0684, + "grad_norm": 0.9953256845474243, + "learning_rate": 1.013e-05, + "num_tokens": 336045.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.1151, + "grad_norm": 2.989778518676758, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0015, + "grad_norm": 0.1622181534767151, + "learning_rate": 1.011e-05, + "num_tokens": 336648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0015, + "grad_norm": 0.19451792538166046, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0015, + "grad_norm": 0.17583484947681427, + "learning_rate": 1.009e-05, + "num_tokens": 336830.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": 0.0971, + "grad_norm": 2.013803482055664, + "learning_rate": 1.008e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0015, + "grad_norm": 0.17960964143276215, + "learning_rate": 1.007e-05, + "num_tokens": 337433.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0015, + "grad_norm": 0.18522843718528748, + "learning_rate": 1.006e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.071, + "grad_norm": 1.612250804901123, + "learning_rate": 1.005e-05, + "num_tokens": 338036.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0789, + "grad_norm": 1.4309505224227905, + "learning_rate": 1.004e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0749, + "grad_norm": 1.3195449113845825, + "learning_rate": 1.003e-05, + "num_tokens": 339060.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0685, + "grad_norm": 2.325835943222046, + "learning_rate": 1.002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0454, + "grad_norm": 1.1207916736602783, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 340084.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.0018, + "grad_norm": 0.25914737582206726, + "learning_rate": 1e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 1.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0022, + "grad_norm": 0.35625582933425903, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 340266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.002, + "grad_norm": 0.3242781162261963, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0021, + "grad_norm": 0.3145410120487213, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 340448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0021, + "grad_norm": 0.33488088846206665, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0019, + "grad_norm": 0.2918454706668854, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 340630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0728, + "grad_norm": 1.2409576177597046, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.072, + "grad_norm": 1.2893600463867188, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 341654.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.043, + "grad_norm": 1.1790004968643188, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0602, + "grad_norm": 1.1076241731643677, + "learning_rate": 9.91e-06, + "num_tokens": 342678.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0017, + "grad_norm": 0.2319565713405609, + "learning_rate": 9.9e-06, + "num_tokens": 342769.0, + "mean_token_accuracy": 1.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0573, + "grad_norm": 2.263990879058838, + "learning_rate": 9.89e-06, + "num_tokens": 343281.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0018, + "grad_norm": 0.27414289116859436, + "learning_rate": 9.88e-06, + "num_tokens": 343372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.068, + "grad_norm": 1.3204398155212402, + "learning_rate": 9.87e-06, + "num_tokens": 343884.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0021, + "grad_norm": 0.33790865540504456, + "learning_rate": 9.86e-06, + "num_tokens": 343975.0, + "mean_token_accuracy": 1.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.002, + "grad_norm": 0.3250488340854645, + "learning_rate": 9.85e-06, + "num_tokens": 344066.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0614, + "grad_norm": 1.4563555717468262, + "learning_rate": 9.84e-06, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0499, + "grad_norm": 3.906182289123535, + "learning_rate": 9.83e-06, + "num_tokens": 345090.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.1039, + "grad_norm": 2.9131107330322266, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.1067, + "grad_norm": 3.119446039199829, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 346114.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.0023, + "grad_norm": 0.3656690716743469, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 346205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": 0.0647, + "grad_norm": 1.234238862991333, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 346717.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0612, + "grad_norm": 1.0838911533355713, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0668, + "grad_norm": 1.8563507795333862, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 347741.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0027, + "grad_norm": 0.447256475687027, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 347832.0, + "mean_token_accuracy": 1.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0029, + "grad_norm": 0.4668635427951813, + "learning_rate": 9.75e-06, + "num_tokens": 347923.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0027, + "grad_norm": 0.45568251609802246, + "learning_rate": 9.74e-06, + "num_tokens": 348014.0, + "mean_token_accuracy": 1.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0029, + "grad_norm": 0.5207828283309937, + "learning_rate": 9.73e-06, + "num_tokens": 348105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0023, + "grad_norm": 0.3548046946525574, + "learning_rate": 9.72e-06, + "num_tokens": 348196.0, + "mean_token_accuracy": 1.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.0022, + "grad_norm": 0.3339339792728424, + "learning_rate": 9.71e-06, + "num_tokens": 348287.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0449, + "grad_norm": 1.344630479812622, + "learning_rate": 9.7e-06, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0627, + "grad_norm": 1.3697110414505005, + "learning_rate": 9.69e-06, + "num_tokens": 349311.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0631, + "grad_norm": 1.4324746131896973, + "learning_rate": 9.68e-06, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0598, + "grad_norm": 1.1418583393096924, + "learning_rate": 9.67e-06, + "num_tokens": 350335.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0703, + "grad_norm": 1.3187053203582764, + "learning_rate": 9.66e-06, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0674, + "grad_norm": 1.5415701866149902, + "learning_rate": 9.65e-06, + "num_tokens": 351359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0022, + "grad_norm": 0.5410366654396057, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 351450.0, + "mean_token_accuracy": 1.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0472, + "grad_norm": 1.4691059589385986, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 351962.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0714, + "grad_norm": 1.8328925371170044, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0502, + "grad_norm": 1.4959746599197388, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 352986.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0025, + "grad_norm": 0.3770292103290558, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 353077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.0638, + "grad_norm": 1.2776446342468262, + "learning_rate": 9.59e-06, + "num_tokens": 353589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0437, + "grad_norm": 1.0079017877578735, + "learning_rate": 9.58e-06, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": 0.0628, + "grad_norm": 1.1776297092437744, + "learning_rate": 9.57e-06, + "num_tokens": 354613.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0444, + "grad_norm": 1.2560832500457764, + "learning_rate": 9.56e-06, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0658, + "grad_norm": 1.9305787086486816, + "learning_rate": 9.55e-06, + "num_tokens": 355637.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0673, + "grad_norm": 1.5484907627105713, + "learning_rate": 9.54e-06, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0603, + "grad_norm": 1.2816107273101807, + "learning_rate": 9.53e-06, + "num_tokens": 356661.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0071, + "grad_norm": 1.2031859159469604, + "learning_rate": 9.52e-06, + "num_tokens": 356752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0446, + "grad_norm": 1.0432018041610718, + "learning_rate": 9.51e-06, + "num_tokens": 357264.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0082, + "grad_norm": 1.3467326164245605, + "learning_rate": 9.5e-06, + "num_tokens": 357355.0, + "mean_token_accuracy": 1.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": 0.044, + "grad_norm": 1.1683317422866821, + "learning_rate": 9.49e-06, + "num_tokens": 357867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.007, + "grad_norm": 1.1747612953186035, + "learning_rate": 9.48e-06, + "num_tokens": 357958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0623, + "grad_norm": 1.1376299858093262, + "learning_rate": 9.47e-06, + "num_tokens": 358470.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0711, + "grad_norm": 1.2417066097259521, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0052, + "grad_norm": 0.9077128171920776, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 359073.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0053, + "grad_norm": 0.951680600643158, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 359164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0467, + "grad_norm": 1.1328734159469604, + "learning_rate": 9.43e-06, + "num_tokens": 359676.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0036, + "grad_norm": 0.6388375163078308, + "learning_rate": 9.42e-06, + "num_tokens": 359767.0, + "mean_token_accuracy": 1.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0713, + "grad_norm": 1.098759651184082, + "learning_rate": 9.41e-06, + "num_tokens": 360279.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0024, + "grad_norm": 0.3749485909938812, + "learning_rate": 9.4e-06, + "num_tokens": 360370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.078, + "grad_norm": 1.4193601608276367, + "learning_rate": 9.39e-06, + "num_tokens": 360882.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0021, + "grad_norm": 0.29766610264778137, + "learning_rate": 9.38e-06, + "num_tokens": 360973.0, + "mean_token_accuracy": 1.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0019, + "grad_norm": 0.2773911952972412, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 361064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0016, + "grad_norm": 0.19664674997329712, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0994, + "grad_norm": 2.1268746852874756, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 361667.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.0476, + "grad_norm": 1.1297088861465454, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0518, + "grad_norm": 1.1052606105804443, + "learning_rate": 9.33e-06, + "num_tokens": 362691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0483, + "grad_norm": 1.1215248107910156, + "learning_rate": 9.32e-06, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0595, + "grad_norm": 1.192276120185852, + "learning_rate": 9.31e-06, + "num_tokens": 363715.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.1127, + "grad_norm": 2.282710552215576, + "learning_rate": 9.3e-06, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0014, + "grad_norm": 0.18352188169956207, + "learning_rate": 9.29e-06, + "num_tokens": 364318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0668, + "grad_norm": 1.2716619968414307, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.1147, + "grad_norm": 2.7008156776428223, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 365342.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.1018, + "grad_norm": 2.031930446624756, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.002, + "grad_norm": 0.2863346338272095, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 365945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0471, + "grad_norm": 1.2682809829711914, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.002, + "grad_norm": 0.30941078066825867, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 366548.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0024, + "grad_norm": 0.3932475745677948, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": 0.0632, + "grad_norm": 1.0679800510406494, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 367151.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0663, + "grad_norm": 1.3005118370056152, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0423, + "grad_norm": 1.1240161657333374, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 368175.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0029, + "grad_norm": 0.4581877887248993, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 368266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0029, + "grad_norm": 0.47186893224716187, + "learning_rate": 9.17e-06, + "num_tokens": 368357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0032, + "grad_norm": 0.5238748788833618, + "learning_rate": 9.16e-06, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0028, + "grad_norm": 0.4411686062812805, + "learning_rate": 9.15e-06, + "num_tokens": 368539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0026, + "grad_norm": 0.40239110589027405, + "learning_rate": 9.14e-06, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0023, + "grad_norm": 0.3315543234348297, + "learning_rate": 9.13e-06, + "num_tokens": 368721.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0021, + "grad_norm": 0.2885858416557312, + "learning_rate": 9.12e-06, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.073, + "grad_norm": 1.8177210092544556, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 369324.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.0966, + "grad_norm": 1.7291756868362427, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0016, + "grad_norm": 0.19609428942203522, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 369927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0496, + "grad_norm": 1.1353715658187866, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0015, + "grad_norm": 0.17373698949813843, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 370530.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.0441, + "grad_norm": 1.0672266483306885, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0014, + "grad_norm": 0.154168039560318, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 371133.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0737, + "grad_norm": 1.3493475914001465, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0013, + "grad_norm": 0.14875750243663788, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 371736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0012, + "grad_norm": 0.13037247955799103, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0012, + "grad_norm": 0.12503254413604736, + "learning_rate": 9.01e-06, + "num_tokens": 371918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0012, + "grad_norm": 0.12820948660373688, + "learning_rate": 9e-06, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.0885, + "grad_norm": 1.8362265825271606, + "learning_rate": 8.99e-06, + "num_tokens": 372521.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0012, + "grad_norm": 0.12838858366012573, + "learning_rate": 8.98e-06, + "num_tokens": 372612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0495, + "grad_norm": 1.446435809135437, + "learning_rate": 8.97e-06, + "num_tokens": 373124.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.07, + "grad_norm": 1.1417546272277832, + "learning_rate": 8.96e-06, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0679, + "grad_norm": 1.1534578800201416, + "learning_rate": 8.95e-06, + "num_tokens": 374148.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.0556, + "grad_norm": 1.263162612915039, + "learning_rate": 8.94e-06, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0687, + "grad_norm": 1.441730260848999, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 375172.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0561, + "grad_norm": 0.989497721195221, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0508, + "grad_norm": 1.1718560457229614, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 376196.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0436, + "grad_norm": 1.1105691194534302, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0574, + "grad_norm": 1.159988522529602, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 377220.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0028, + "grad_norm": 0.5130383968353271, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 377311.0, + "mean_token_accuracy": 1.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0703, + "grad_norm": 1.8314932584762573, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 377823.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0389, + "grad_norm": 0.7763837575912476, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0648, + "grad_norm": 1.4212884902954102, + "learning_rate": 8.85e-06, + "num_tokens": 378847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0467, + "grad_norm": 1.0347092151641846, + "learning_rate": 8.84e-06, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0574, + "grad_norm": 0.9852561950683594, + "learning_rate": 8.83e-06, + "num_tokens": 379871.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0443, + "grad_norm": 1.2871586084365845, + "learning_rate": 8.82e-06, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0497, + "grad_norm": 1.0900676250457764, + "learning_rate": 8.81e-06, + "num_tokens": 380895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0094, + "grad_norm": 1.5167303085327148, + "learning_rate": 8.8e-06, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0091, + "grad_norm": 1.4984208345413208, + "learning_rate": 8.79e-06, + "num_tokens": 381077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0087, + "grad_norm": 1.4189144372940063, + "learning_rate": 8.78e-06, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0711, + "grad_norm": 1.5254539251327515, + "learning_rate": 8.77e-06, + "num_tokens": 381680.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0559, + "grad_norm": 0.9745803475379944, + "learning_rate": 8.76e-06, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0487, + "grad_norm": 0.9314166307449341, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 382704.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.0985, + "grad_norm": 1.935889482498169, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.0884, + "grad_norm": 2.4487457275390625, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 383728.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0417, + "grad_norm": 1.0779677629470825, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0071, + "grad_norm": 1.1962640285491943, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 384331.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0412, + "grad_norm": 1.0417979955673218, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.0064, + "grad_norm": 1.0799331665039062, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 384934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0061, + "grad_norm": 1.0343092679977417, + "learning_rate": 8.68e-06, + "num_tokens": 385025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0516, + "grad_norm": 1.2088981866836548, + "learning_rate": 8.67e-06, + "num_tokens": 385537.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0645, + "grad_norm": 1.4574052095413208, + "learning_rate": 8.66e-06, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0608, + "grad_norm": 1.5976455211639404, + "learning_rate": 8.65e-06, + "num_tokens": 386561.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0034, + "grad_norm": 0.562424898147583, + "learning_rate": 8.64e-06, + "num_tokens": 386652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0031, + "grad_norm": 0.5184334516525269, + "learning_rate": 8.63e-06, + "num_tokens": 386743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0538, + "grad_norm": 1.175452709197998, + "learning_rate": 8.62e-06, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0457, + "grad_norm": 1.0699386596679688, + "learning_rate": 8.61e-06, + "num_tokens": 387767.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0409, + "grad_norm": 1.2275623083114624, + "learning_rate": 8.6e-06, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0024, + "grad_norm": 0.36210763454437256, + "learning_rate": 8.59e-06, + "num_tokens": 388370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0506, + "grad_norm": 1.1862293481826782, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0417, + "grad_norm": 1.0955649614334106, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 389394.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0021, + "grad_norm": 0.3166447579860687, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 389485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0021, + "grad_norm": 0.3213079571723938, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 389576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.002, + "grad_norm": 0.29460856318473816, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0018, + "grad_norm": 0.2646322250366211, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 389758.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.0962, + "grad_norm": 1.9064080715179443, + "learning_rate": 8.52e-06, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0018, + "grad_norm": 0.26078224182128906, + "learning_rate": 8.51e-06, + "num_tokens": 390361.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0015, + "grad_norm": 0.22155798971652985, + "learning_rate": 8.5e-06, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0462, + "grad_norm": 1.282672643661499, + "learning_rate": 8.49e-06, + "num_tokens": 390964.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0615, + "grad_norm": 1.0272878408432007, + "learning_rate": 8.48e-06, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0391, + "grad_norm": 1.081066370010376, + "learning_rate": 8.47e-06, + "num_tokens": 391988.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0016, + "grad_norm": 0.2022254467010498, + "learning_rate": 8.46e-06, + "num_tokens": 392079.0, + "mean_token_accuracy": 1.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0647, + "grad_norm": 1.203537106513977, + "learning_rate": 8.45e-06, + "num_tokens": 392591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0411, + "grad_norm": 1.3823119401931763, + "learning_rate": 8.44e-06, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0017, + "grad_norm": 0.23678964376449585, + "learning_rate": 8.43e-06, + "num_tokens": 393194.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0498, + "grad_norm": 1.1035040616989136, + "learning_rate": 8.42e-06, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0019, + "grad_norm": 0.2826336622238159, + "learning_rate": 8.41e-06, + "num_tokens": 393797.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0018, + "grad_norm": 0.26219162344932556, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0369, + "grad_norm": 0.8924168944358826, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 394400.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.002, + "grad_norm": 0.2968710660934448, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 394491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0655, + "grad_norm": 1.4359571933746338, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 395003.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0793, + "grad_norm": 1.4873827695846558, + "learning_rate": 8.36e-06, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0022, + "grad_norm": 0.3399635851383209, + "learning_rate": 8.35e-06, + "num_tokens": 395606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0388, + "grad_norm": 1.2504096031188965, + "learning_rate": 8.34e-06, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0022, + "grad_norm": 0.34148266911506653, + "learning_rate": 8.33e-06, + "num_tokens": 396209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0022, + "grad_norm": 0.33662110567092896, + "learning_rate": 8.32e-06, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.0022, + "grad_norm": 0.324468731880188, + "learning_rate": 8.31e-06, + "num_tokens": 396391.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.1031, + "grad_norm": 1.776872992515564, + "learning_rate": 8.3e-06, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0019, + "grad_norm": 0.27522948384284973, + "learning_rate": 8.29e-06, + "num_tokens": 396994.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0625, + "grad_norm": 1.0583921670913696, + "learning_rate": 8.28e-06, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.002, + "grad_norm": 0.2976676821708679, + "learning_rate": 8.27e-06, + "num_tokens": 397597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0428, + "grad_norm": 1.0262646675109863, + "learning_rate": 8.26e-06, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.0569, + "grad_norm": 1.088004469871521, + "learning_rate": 8.25e-06, + "num_tokens": 398621.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0617, + "grad_norm": 1.422031044960022, + "learning_rate": 8.24e-06, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0705, + "grad_norm": 1.1122493743896484, + "learning_rate": 8.23e-06, + "num_tokens": 399645.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0023, + "grad_norm": 0.3706248998641968, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 399736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0548, + "grad_norm": 1.159569501876831, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 400248.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0027, + "grad_norm": 0.44550517201423645, + "learning_rate": 8.2e-06, + "num_tokens": 400339.0, + "mean_token_accuracy": 1.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0505, + "grad_norm": 1.0908255577087402, + "learning_rate": 8.19e-06, + "num_tokens": 400851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0429, + "grad_norm": 0.9888002276420593, + "learning_rate": 8.18e-06, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.039, + "grad_norm": 1.1269707679748535, + "learning_rate": 8.17e-06, + "num_tokens": 401875.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0547, + "grad_norm": 2.2459864616394043, + "learning_rate": 8.16e-06, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0648, + "grad_norm": 1.141405463218689, + "learning_rate": 8.15e-06, + "num_tokens": 402899.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0036, + "grad_norm": 0.6154343485832214, + "learning_rate": 8.14e-06, + "num_tokens": 402990.0, + "mean_token_accuracy": 1.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0037, + "grad_norm": 0.607581377029419, + "learning_rate": 8.13e-06, + "num_tokens": 403081.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.041, + "grad_norm": 1.0139696598052979, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0548, + "grad_norm": 1.2063956260681152, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 404105.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0546, + "grad_norm": 1.0185149908065796, + "learning_rate": 8.1e-06, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0846, + "grad_norm": 1.5638638734817505, + "learning_rate": 8.09e-06, + "num_tokens": 405129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0397, + "grad_norm": 0.9592515826225281, + "learning_rate": 8.08e-06, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0732, + "grad_norm": 2.417308807373047, + "learning_rate": 8.07e-06, + "num_tokens": 406153.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0397, + "grad_norm": 1.0397586822509766, + "learning_rate": 8.06e-06, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0539, + "grad_norm": 1.0043741464614868, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 407177.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0064, + "grad_norm": 1.0331615209579468, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 407268.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": 0.3439, + "grad_norm": 7.151169776916504, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 407780.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.3186, + "grad_norm": 6.194533348083496, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0064, + "grad_norm": 1.0373780727386475, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 408383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0693, + "grad_norm": 1.3804030418395996, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0063, + "grad_norm": 1.0356889963150024, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 408986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0063, + "grad_norm": 1.025659203529358, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 409077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.1028, + "grad_norm": 2.4993162155151367, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 409589.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0765, + "grad_norm": 1.528414011001587, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0039, + "grad_norm": 0.6606444120407104, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 410192.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.1021, + "grad_norm": 1.9298466444015503, + "learning_rate": 7.94e-06, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0658, + "grad_norm": 1.2403901815414429, + "learning_rate": 7.93e-06, + "num_tokens": 411216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0901, + "grad_norm": 2.676560878753662, + "learning_rate": 7.92e-06, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0025, + "grad_norm": 0.3969874083995819, + "learning_rate": 7.91e-06, + "num_tokens": 411819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0022, + "grad_norm": 0.3410389721393585, + "learning_rate": 7.9e-06, + "num_tokens": 411910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.0467, + "grad_norm": 1.2688374519348145, + "learning_rate": 7.89e-06, + "num_tokens": 412422.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0906, + "grad_norm": 1.5839786529541016, + "learning_rate": 7.88e-06, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0808, + "grad_norm": 1.8329588174819946, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 413446.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": 0.0678, + "grad_norm": 1.438069462776184, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0675, + "grad_norm": 1.4430946111679077, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 414470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0019, + "grad_norm": 0.29633986949920654, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 414561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0494, + "grad_norm": 1.1387202739715576, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 415073.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0021, + "grad_norm": 0.32885608077049255, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 415164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.0862, + "grad_norm": 2.407383680343628, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 415676.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0614, + "grad_norm": 1.1128315925598145, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0022, + "grad_norm": 0.3651196360588074, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 416279.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0648, + "grad_norm": 1.3287708759307861, + "learning_rate": 7.78e-06, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.0023, + "grad_norm": 0.3838794231414795, + "learning_rate": 7.77e-06, + "num_tokens": 416882.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0684, + "grad_norm": 1.4677760601043701, + "learning_rate": 7.76e-06, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0024, + "grad_norm": 0.42079463601112366, + "learning_rate": 7.75e-06, + "num_tokens": 417485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0024, + "grad_norm": 0.42147955298423767, + "learning_rate": 7.74e-06, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0441, + "grad_norm": 1.1677274703979492, + "learning_rate": 7.73e-06, + "num_tokens": 418088.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0492, + "grad_norm": 1.4035431146621704, + "learning_rate": 7.72e-06, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0671, + "grad_norm": 1.9446959495544434, + "learning_rate": 7.71e-06, + "num_tokens": 419112.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0025, + "grad_norm": 0.4543871581554413, + "learning_rate": 7.7e-06, + "num_tokens": 419203.0, + "mean_token_accuracy": 1.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.042, + "grad_norm": 1.1771857738494873, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 419715.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0679, + "grad_norm": 1.3713475465774536, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0026, + "grad_norm": 0.47350987792015076, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 420318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0633, + "grad_norm": 1.3524508476257324, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": 0.0637, + "grad_norm": 1.2763797044754028, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 421342.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0902, + "grad_norm": 1.6739592552185059, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0031, + "grad_norm": 0.5534782409667969, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 421945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.0501, + "grad_norm": 1.3401867151260376, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.046, + "grad_norm": 1.1883294582366943, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 422969.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0466, + "grad_norm": 1.101483941078186, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.071, + "grad_norm": 1.3334777355194092, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 423993.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0558, + "grad_norm": 1.267762541770935, + "learning_rate": 7.58e-06, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0658, + "grad_norm": 1.4283661842346191, + "learning_rate": 7.57e-06, + "num_tokens": 425017.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0411, + "grad_norm": 0.9805395007133484, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0766, + "grad_norm": 1.4888850450515747, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 426041.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0055, + "grad_norm": 0.9557706713676453, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 426132.0, + "mean_token_accuracy": 1.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0054, + "grad_norm": 0.9585487842559814, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 426223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0538, + "grad_norm": 1.1800369024276733, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0051, + "grad_norm": 0.8553330898284912, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 426826.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0629, + "grad_norm": 1.230909824371338, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.068, + "grad_norm": 1.453507900238037, + "learning_rate": 7.49e-06, + "num_tokens": 427850.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0427, + "grad_norm": 0.9869980812072754, + "learning_rate": 7.48e-06, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.1017, + "grad_norm": 2.1453680992126465, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 428874.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0042, + "grad_norm": 0.7140144109725952, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 428965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.0616, + "grad_norm": 1.021086573600769, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 429477.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0434, + "grad_norm": 1.1894596815109253, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0862, + "grad_norm": 2.159723997116089, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 430501.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.0429, + "grad_norm": 1.066892147064209, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0572, + "grad_norm": 1.0095235109329224, + "learning_rate": 7.41e-06, + "num_tokens": 431525.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.054, + "grad_norm": 1.2086626291275024, + "learning_rate": 7.4e-06, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0046, + "grad_norm": 0.7741432189941406, + "learning_rate": 7.39e-06, + "num_tokens": 432128.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0047, + "grad_norm": 0.7828612923622131, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 432219.0, + "mean_token_accuracy": 1.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0045, + "grad_norm": 0.7598645687103271, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 432310.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0046, + "grad_norm": 0.7734522819519043, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 432401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": 0.057, + "grad_norm": 1.0973255634307861, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 432913.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.065, + "grad_norm": 1.709967017173767, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0931, + "grad_norm": 2.1337525844573975, + "learning_rate": 7.33e-06, + "num_tokens": 433937.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0028, + "grad_norm": 0.4441553056240082, + "learning_rate": 7.32e-06, + "num_tokens": 434028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0805, + "grad_norm": 3.2075629234313965, + "learning_rate": 7.31e-06, + "num_tokens": 434540.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0026, + "grad_norm": 0.4167421758174896, + "learning_rate": 7.3e-06, + "num_tokens": 434631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0023, + "grad_norm": 0.35469523072242737, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 434722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0021, + "grad_norm": 0.31768423318862915, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0441, + "grad_norm": 0.9787921905517578, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 435325.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0019, + "grad_norm": 0.2729261517524719, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 435416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0016, + "grad_norm": 0.21043084561824799, + "learning_rate": 7.25e-06, + "num_tokens": 435507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0015, + "grad_norm": 0.1971331685781479, + "learning_rate": 7.24e-06, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0802, + "grad_norm": 1.84896719455719, + "learning_rate": 7.23e-06, + "num_tokens": 436110.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.0687, + "grad_norm": 1.369922399520874, + "learning_rate": 7.22e-06, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0014, + "grad_norm": 0.16199085116386414, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 436713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0013, + "grad_norm": 0.14561891555786133, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0762, + "grad_norm": 2.150111436843872, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 437316.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.0011, + "grad_norm": 0.12219979614019394, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 437407.0, + "mean_token_accuracy": 1.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0409, + "grad_norm": 1.0275540351867676, + "learning_rate": 7.17e-06, + "num_tokens": 437919.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0622, + "grad_norm": 1.3782963752746582, + "learning_rate": 7.16e-06, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0942, + "grad_norm": 2.0990819931030273, + "learning_rate": 7.15e-06, + "num_tokens": 438943.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0556, + "grad_norm": 1.1607019901275635, + "learning_rate": 7.14e-06, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0012, + "grad_norm": 0.14383459091186523, + "learning_rate": 7.13e-06, + "num_tokens": 439546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0443, + "grad_norm": 1.0032017230987549, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0014, + "grad_norm": 0.18446141481399536, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 440149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0014, + "grad_norm": 0.19693079590797424, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.0486, + "grad_norm": 1.2597516775131226, + "learning_rate": 7.09e-06, + "num_tokens": 440752.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0014, + "grad_norm": 0.1964249163866043, + "learning_rate": 7.08e-06, + "num_tokens": 440843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0015, + "grad_norm": 0.21462222933769226, + "learning_rate": 7.07e-06, + "num_tokens": 440934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0508, + "grad_norm": 1.3977996110916138, + "learning_rate": 7.06e-06, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": 0.0828, + "grad_norm": 1.5659841299057007, + "learning_rate": 7.05e-06, + "num_tokens": 441958.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0603, + "grad_norm": 1.602921724319458, + "learning_rate": 7.04e-06, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0744, + "grad_norm": 2.2317163944244385, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 442982.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0561, + "grad_norm": 2.125541925430298, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.002, + "grad_norm": 0.3173121213912964, + "learning_rate": 7.01e-06, + "num_tokens": 443585.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0459, + "grad_norm": 1.2071703672409058, + "learning_rate": 7e-06, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0432, + "grad_norm": 1.2934582233428955, + "learning_rate": 6.99e-06, + "num_tokens": 444609.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0489, + "grad_norm": 1.1334161758422852, + "learning_rate": 6.98e-06, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0579, + "grad_norm": 0.9369598627090454, + "learning_rate": 6.97e-06, + "num_tokens": 445633.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0033, + "grad_norm": 0.5776845812797546, + "learning_rate": 6.96e-06, + "num_tokens": 445724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0569, + "grad_norm": 1.3031799793243408, + "learning_rate": 6.95e-06, + "num_tokens": 446236.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0037, + "grad_norm": 0.6248667240142822, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 446327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0032, + "grad_norm": 0.5299662947654724, + "learning_rate": 6.93e-06, + "num_tokens": 446418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0667, + "grad_norm": 1.8433657884597778, + "learning_rate": 6.92e-06, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0577, + "grad_norm": 1.1226876974105835, + "learning_rate": 6.91e-06, + "num_tokens": 447442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.0567, + "grad_norm": 1.1603243350982666, + "learning_rate": 6.9e-06, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0032, + "grad_norm": 0.5435492992401123, + "learning_rate": 6.89e-06, + "num_tokens": 448045.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0606, + "grad_norm": 0.9929336905479431, + "learning_rate": 6.88e-06, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0036, + "grad_norm": 0.6169335842132568, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 448648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0649, + "grad_norm": 1.2230188846588135, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0613, + "grad_norm": 1.0680222511291504, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 449672.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.0455, + "grad_norm": 1.529793620109558, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0036, + "grad_norm": 0.614677906036377, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 450275.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.074, + "grad_norm": 2.1550259590148926, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0541, + "grad_norm": 0.9593685269355774, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 451299.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.0036, + "grad_norm": 0.5768935084342957, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 451390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0465, + "grad_norm": 1.2158730030059814, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 451902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0438, + "grad_norm": 1.1586334705352783, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0444, + "grad_norm": 1.4859849214553833, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 452926.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0403, + "grad_norm": 1.1270227432250977, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.004, + "grad_norm": 0.6430424451828003, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 453529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.0906, + "grad_norm": 1.5925347805023193, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0422, + "grad_norm": 0.9977685213088989, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 454553.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0564, + "grad_norm": 1.1696628332138062, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0518, + "grad_norm": 0.9724094271659851, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 455577.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0047, + "grad_norm": 0.7779951095581055, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 455668.0, + "mean_token_accuracy": 1.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0043, + "grad_norm": 0.7115391492843628, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 455759.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.3534, + "grad_norm": 6.629246234893799, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0038, + "grad_norm": 0.6219172477722168, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 456362.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0041, + "grad_norm": 0.6817074418067932, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 456453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0601, + "grad_norm": 1.2284682989120483, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 456965.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0585, + "grad_norm": 1.3272614479064941, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0417, + "grad_norm": 0.929707944393158, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 457989.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.0768, + "grad_norm": 1.2148957252502441, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.003, + "grad_norm": 0.4916832149028778, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 458592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": 0.0659, + "grad_norm": 1.1595323085784912, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0669, + "grad_norm": 1.3607900142669678, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 459616.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0843, + "grad_norm": 2.730896472930908, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0587, + "grad_norm": 1.2983198165893555, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 460640.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.0675, + "grad_norm": 1.475829839706421, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0034, + "grad_norm": 0.569835364818573, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 461243.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0031, + "grad_norm": 0.5171738862991333, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 461334.0, + "mean_token_accuracy": 1.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0032, + "grad_norm": 0.5472842454910278, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 461425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0029, + "grad_norm": 0.4868464767932892, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 461516.0, + "mean_token_accuracy": 1.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0616, + "grad_norm": 1.1753767728805542, + "learning_rate": 6.51e-06, + "num_tokens": 462028.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.05, + "grad_norm": 1.306359052658081, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.0027, + "grad_norm": 0.4471572935581207, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 462631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0535, + "grad_norm": 1.1857725381851196, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0023, + "grad_norm": 0.39148810505867004, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 463234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0021, + "grad_norm": 0.3375743329524994, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0601, + "grad_norm": 3.349716901779175, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 463837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.077, + "grad_norm": 1.3602453470230103, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0482, + "grad_norm": 1.1098014116287231, + "learning_rate": 6.43e-06, + "num_tokens": 464861.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0019, + "grad_norm": 0.3053341507911682, + "learning_rate": 6.42e-06, + "num_tokens": 464952.0, + "mean_token_accuracy": 1.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0019, + "grad_norm": 0.3125056326389313, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 465043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0019, + "grad_norm": 0.28826457262039185, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0652, + "grad_norm": 1.4113070964813232, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 465646.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0467, + "grad_norm": 1.2754263877868652, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0017, + "grad_norm": 0.2621810734272003, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 466249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0658, + "grad_norm": 1.0557119846343994, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0567, + "grad_norm": 1.4838411808013916, + "learning_rate": 6.35e-06, + "num_tokens": 467273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0017, + "grad_norm": 0.26117855310440063, + "learning_rate": 6.34e-06, + "num_tokens": 467364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0447, + "grad_norm": 1.1064739227294922, + "learning_rate": 6.33e-06, + "num_tokens": 467876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0435, + "grad_norm": 1.063262939453125, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.066, + "grad_norm": 1.1504032611846924, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 468900.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0641, + "grad_norm": 1.203201174736023, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0585, + "grad_norm": 1.2477880716323853, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 469924.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0025, + "grad_norm": 0.4655078947544098, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 470015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0602, + "grad_norm": 1.341115951538086, + "learning_rate": 6.27e-06, + "num_tokens": 470527.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0904, + "grad_norm": 2.366762399673462, + "learning_rate": 6.26e-06, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0033, + "grad_norm": 0.6076349020004272, + "learning_rate": 6.25e-06, + "num_tokens": 471130.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0907, + "grad_norm": 1.9339498281478882, + "learning_rate": 6.24e-06, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0864, + "grad_norm": 1.780813217163086, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 472154.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0033, + "grad_norm": 0.6028679609298706, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 472245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0542, + "grad_norm": 1.0088207721710205, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 472757.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0758, + "grad_norm": 1.5442019701004028, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0034, + "grad_norm": 0.6019788980484009, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 473360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.277, + "grad_norm": 5.171119689941406, + "learning_rate": 6.18e-06, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0036, + "grad_norm": 0.6451438665390015, + "learning_rate": 6.17e-06, + "num_tokens": 473963.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0037, + "grad_norm": 0.6643303036689758, + "learning_rate": 6.16e-06, + "num_tokens": 474054.0, + "mean_token_accuracy": 1.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0034, + "grad_norm": 0.6205865740776062, + "learning_rate": 6.15e-06, + "num_tokens": 474145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0029, + "grad_norm": 0.4953503906726837, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 474236.0, + "mean_token_accuracy": 1.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.0027, + "grad_norm": 0.46802619099617004, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 474327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0908, + "grad_norm": 1.535525918006897, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0417, + "grad_norm": 0.9248743653297424, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 475351.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.002, + "grad_norm": 0.3165223300457001, + "learning_rate": 6.1e-06, + "num_tokens": 475442.0, + "mean_token_accuracy": 1.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0542, + "grad_norm": 0.9654661417007446, + "learning_rate": 6.09e-06, + "num_tokens": 475954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0692, + "grad_norm": 1.3097866773605347, + "learning_rate": 6.08e-06, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0701, + "grad_norm": 1.50612473487854, + "learning_rate": 6.07e-06, + "num_tokens": 476978.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0017, + "grad_norm": 0.2454281896352768, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 477069.0, + "mean_token_accuracy": 1.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0855, + "grad_norm": 1.9738035202026367, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 477581.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0017, + "grad_norm": 0.2594867944717407, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 477672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0579, + "grad_norm": 1.1067945957183838, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 478184.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0566, + "grad_norm": 1.0555428266525269, + "learning_rate": 6.02e-06, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0016, + "grad_norm": 0.24508465826511383, + "learning_rate": 6.01e-06, + "num_tokens": 478787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0632, + "grad_norm": 1.3900046348571777, + "learning_rate": 6e-06, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0404, + "grad_norm": 0.9500136971473694, + "learning_rate": 5.99e-06, + "num_tokens": 479811.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0573, + "grad_norm": 1.2340861558914185, + "learning_rate": 5.98e-06, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.04, + "grad_norm": 1.035536527633667, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 480835.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.064, + "grad_norm": 0.9856736660003662, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0456, + "grad_norm": 1.2168488502502441, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 481859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.0819, + "grad_norm": 1.6233789920806885, + "learning_rate": 5.94e-06, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0644, + "grad_norm": 1.539711594581604, + "learning_rate": 5.93e-06, + "num_tokens": 482883.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0031, + "grad_norm": 0.5361098647117615, + "learning_rate": 5.92e-06, + "num_tokens": 482974.0, + "mean_token_accuracy": 1.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0657, + "grad_norm": 1.5077885389328003, + "learning_rate": 5.91e-06, + "num_tokens": 483486.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0033, + "grad_norm": 0.5819950699806213, + "learning_rate": 5.9e-06, + "num_tokens": 483577.0, + "mean_token_accuracy": 1.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": 0.0844, + "grad_norm": 1.6911466121673584, + "learning_rate": 5.89e-06, + "num_tokens": 484089.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.059, + "grad_norm": 0.909106969833374, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0046, + "grad_norm": 0.8148921132087708, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 484692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0603, + "grad_norm": 1.50859797000885, + "learning_rate": 5.86e-06, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0041, + "grad_norm": 0.7295659780502319, + "learning_rate": 5.85e-06, + "num_tokens": 485295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.0532, + "grad_norm": 1.1242952346801758, + "learning_rate": 5.84e-06, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0544, + "grad_norm": 0.9595649838447571, + "learning_rate": 5.83e-06, + "num_tokens": 486319.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0042, + "grad_norm": 0.7197695374488831, + "learning_rate": 5.82e-06, + "num_tokens": 486410.0, + "mean_token_accuracy": 1.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": 0.0637, + "grad_norm": 1.327078938484192, + "learning_rate": 5.81e-06, + "num_tokens": 486922.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0515, + "grad_norm": 1.3836802244186401, + "learning_rate": 5.8e-06, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0471, + "grad_norm": 2.055051326751709, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 487946.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0634, + "grad_norm": 1.3304088115692139, + "learning_rate": 5.78e-06, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0042, + "grad_norm": 0.7247684597969055, + "learning_rate": 5.77e-06, + "num_tokens": 488549.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0042, + "grad_norm": 0.7230411767959595, + "learning_rate": 5.76e-06, + "num_tokens": 488640.0, + "mean_token_accuracy": 1.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0802, + "grad_norm": 1.942260980606079, + "learning_rate": 5.75e-06, + "num_tokens": 489152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0408, + "grad_norm": 0.9843087792396545, + "learning_rate": 5.74e-06, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0037, + "grad_norm": 0.6149731278419495, + "learning_rate": 5.73e-06, + "num_tokens": 489755.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0035, + "grad_norm": 0.591227114200592, + "learning_rate": 5.72e-06, + "num_tokens": 489846.0, + "mean_token_accuracy": 1.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0034, + "grad_norm": 0.5716548562049866, + "learning_rate": 5.71e-06, + "num_tokens": 489937.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0028, + "grad_norm": 0.4706770181655884, + "learning_rate": 5.7e-06, + "num_tokens": 490028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0023, + "grad_norm": 0.37091749906539917, + "learning_rate": 5.69e-06, + "num_tokens": 490119.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0592, + "grad_norm": 1.1389172077178955, + "learning_rate": 5.68e-06, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0021, + "grad_norm": 0.33143892884254456, + "learning_rate": 5.67e-06, + "num_tokens": 490722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.068, + "grad_norm": 2.0014731884002686, + "learning_rate": 5.66e-06, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0433, + "grad_norm": 1.1497068405151367, + "learning_rate": 5.65e-06, + "num_tokens": 491746.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0017, + "grad_norm": 0.2540724575519562, + "learning_rate": 5.64e-06, + "num_tokens": 491837.0, + "mean_token_accuracy": 1.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0403, + "grad_norm": 1.0868761539459229, + "learning_rate": 5.63e-06, + "num_tokens": 492349.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0015, + "grad_norm": 0.19899524748325348, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 492440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0455, + "grad_norm": 1.617480754852295, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 492952.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0014, + "grad_norm": 0.19665531814098358, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 493043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0648, + "grad_norm": 1.622554898262024, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 493555.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0014, + "grad_norm": 0.18810254335403442, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 493646.0, + "mean_token_accuracy": 1.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0701, + "grad_norm": 1.4964152574539185, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 494158.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0013, + "grad_norm": 0.15776444971561432, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 494249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0012, + "grad_norm": 0.1539117842912674, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 494340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0013, + "grad_norm": 0.1636369377374649, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0012, + "grad_norm": 0.15004193782806396, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 494522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0012, + "grad_norm": 0.15097948908805847, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0012, + "grad_norm": 0.14485493302345276, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 494704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.047, + "grad_norm": 1.3281570672988892, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0519, + "grad_norm": 2.394688844680786, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 495728.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.0012, + "grad_norm": 0.1376945525407791, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 495819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0011, + "grad_norm": 0.13309122622013092, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 495910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0439, + "grad_norm": 1.0667738914489746, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.0012, + "grad_norm": 0.14376237988471985, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 496513.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0011, + "grad_norm": 0.13507920503616333, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0749, + "grad_norm": 1.5052191019058228, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 497116.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0012, + "grad_norm": 0.14203152060508728, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 497207.0, + "mean_token_accuracy": 1.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0445, + "grad_norm": 1.228667974472046, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 497719.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.0656, + "grad_norm": 1.407843828201294, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0647, + "grad_norm": 1.6894930601119995, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 498743.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0012, + "grad_norm": 0.14642253518104553, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 498834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0452, + "grad_norm": 1.07169508934021, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 499346.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0013, + "grad_norm": 0.1761048138141632, + "learning_rate": 5.36e-06, + "num_tokens": 499437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0849, + "grad_norm": 2.0752289295196533, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 499949.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0425, + "grad_norm": 1.113696575164795, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0846, + "grad_norm": 1.7338367700576782, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 500973.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0014, + "grad_norm": 0.1934671550989151, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 501064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0443, + "grad_norm": 1.1740210056304932, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 501576.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0016, + "grad_norm": 0.221791610121727, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 501667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0419, + "grad_norm": 1.0604463815689087, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 502179.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0018, + "grad_norm": 0.2774617373943329, + "learning_rate": 5.28e-06, + "num_tokens": 502270.0, + "mean_token_accuracy": 1.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0715, + "grad_norm": 1.4584964513778687, + "learning_rate": 5.27e-06, + "num_tokens": 502782.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0424, + "grad_norm": 1.1874643564224243, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0681, + "grad_norm": 1.1877933740615845, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 503806.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0574, + "grad_norm": 1.2860503196716309, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0024, + "grad_norm": 0.38671889901161194, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 504409.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0778, + "grad_norm": 1.683851718902588, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0624, + "grad_norm": 1.148560643196106, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 505433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0026, + "grad_norm": 0.422258198261261, + "learning_rate": 5.2e-06, + "num_tokens": 505524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0029, + "grad_norm": 0.48346948623657227, + "learning_rate": 5.19e-06, + "num_tokens": 505615.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.003, + "grad_norm": 0.4990505874156952, + "learning_rate": 5.18e-06, + "num_tokens": 505706.0, + "mean_token_accuracy": 1.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0444, + "grad_norm": 1.1750332117080688, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 506218.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0631, + "grad_norm": 1.0927088260650635, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0029, + "grad_norm": 0.491895854473114, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 506821.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0029, + "grad_norm": 0.48604080080986023, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 506912.0, + "mean_token_accuracy": 1.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0646, + "grad_norm": 1.8152271509170532, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 507424.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0905, + "grad_norm": 2.1916065216064453, + "learning_rate": 5.12e-06, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0441, + "grad_norm": 0.9943680167198181, + "learning_rate": 5.11e-06, + "num_tokens": 508448.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0028, + "grad_norm": 0.4724738299846649, + "learning_rate": 5.1e-06, + "num_tokens": 508539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0455, + "grad_norm": 1.327681303024292, + "learning_rate": 5.09e-06, + "num_tokens": 509051.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0401, + "grad_norm": 1.00179922580719, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.2741, + "grad_norm": 5.871794700622559, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 510075.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0028, + "grad_norm": 0.48077592253685, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 510166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0706, + "grad_norm": 1.4320826530456543, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 510678.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.0435, + "grad_norm": 1.2258262634277344, + "learning_rate": 5.04e-06, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0031, + "grad_norm": 0.5447593331336975, + "learning_rate": 5.03e-06, + "num_tokens": 511281.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0408, + "grad_norm": 1.0005323886871338, + "learning_rate": 5.02e-06, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0031, + "grad_norm": 0.52440345287323, + "learning_rate": 5.01e-06, + "num_tokens": 511884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0931, + "grad_norm": 2.2890543937683105, + "learning_rate": 5e-06, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0028, + "grad_norm": 0.47974297404289246, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 512487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0028, + "grad_norm": 0.4712013900279999, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 512578.0, + "mean_token_accuracy": 1.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0734, + "grad_norm": 1.7330412864685059, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 513090.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.0412, + "grad_norm": 1.2318421602249146, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0577, + "grad_norm": 1.1624799966812134, + "learning_rate": 4.95e-06, + "num_tokens": 514114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0667, + "grad_norm": 1.3667885065078735, + "learning_rate": 4.94e-06, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0472, + "grad_norm": 1.0038102865219116, + "learning_rate": 4.93e-06, + "num_tokens": 515138.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0662, + "grad_norm": 1.370149850845337, + "learning_rate": 4.92e-06, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.003, + "grad_norm": 0.4965730309486389, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 515741.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0397, + "grad_norm": 0.9282152056694031, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0576, + "grad_norm": 1.0276484489440918, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 516765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0656, + "grad_norm": 1.319326400756836, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0636, + "grad_norm": 1.2873133420944214, + "learning_rate": 4.87e-06, + "num_tokens": 517789.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.0032, + "grad_norm": 0.5650099515914917, + "learning_rate": 4.86e-06, + "num_tokens": 517880.0, + "mean_token_accuracy": 1.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0404, + "grad_norm": 1.389515995979309, + "learning_rate": 4.85e-06, + "num_tokens": 518392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0036, + "grad_norm": 0.6158953309059143, + "learning_rate": 4.84e-06, + "num_tokens": 518483.0, + "mean_token_accuracy": 1.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0823, + "grad_norm": 2.242391347885132, + "learning_rate": 4.83e-06, + "num_tokens": 518995.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0653, + "grad_norm": 1.5677355527877808, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0781, + "grad_norm": 2.0974771976470947, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 520019.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0611, + "grad_norm": 1.4084426164627075, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0044, + "grad_norm": 0.7955360412597656, + "learning_rate": 4.79e-06, + "num_tokens": 520622.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0352, + "grad_norm": 0.9566419124603271, + "learning_rate": 4.78e-06, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0564, + "grad_norm": 0.9539786577224731, + "learning_rate": 4.77e-06, + "num_tokens": 521646.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0459, + "grad_norm": 1.0773917436599731, + "learning_rate": 4.76e-06, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.075, + "grad_norm": 2.423198938369751, + "learning_rate": 4.75e-06, + "num_tokens": 522670.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0044, + "grad_norm": 0.7832935452461243, + "learning_rate": 4.74e-06, + "num_tokens": 522761.0, + "mean_token_accuracy": 1.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0661, + "grad_norm": 1.3831069469451904, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 523273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.0043, + "grad_norm": 0.7653414011001587, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 523364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0039, + "grad_norm": 0.7014725208282471, + "learning_rate": 4.71e-06, + "num_tokens": 523455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0042, + "grad_norm": 0.7603307962417603, + "learning_rate": 4.7e-06, + "num_tokens": 523546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0622, + "grad_norm": 1.3033061027526855, + "learning_rate": 4.69e-06, + "num_tokens": 524058.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0774, + "grad_norm": 2.0244553089141846, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0035, + "grad_norm": 0.6342400908470154, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 524661.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0031, + "grad_norm": 0.5407992601394653, + "learning_rate": 4.66e-06, + "num_tokens": 524752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0611, + "grad_norm": 1.2235374450683594, + "learning_rate": 4.65e-06, + "num_tokens": 525264.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0623, + "grad_norm": 1.3751453161239624, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0027, + "grad_norm": 0.4813397526741028, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 525867.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.0664, + "grad_norm": 1.2894669771194458, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.056, + "grad_norm": 1.4559017419815063, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 526891.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": 0.0775, + "grad_norm": 2.593362808227539, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.3138, + "grad_norm": 5.148370742797852, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 527915.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0756, + "grad_norm": 2.2736735343933105, + "learning_rate": 4.58e-06, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.065, + "grad_norm": 3.2683534622192383, + "learning_rate": 4.57e-06, + "num_tokens": 528939.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0025, + "grad_norm": 0.44800934195518494, + "learning_rate": 4.56e-06, + "num_tokens": 529030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.2697, + "grad_norm": 5.550428867340088, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 529542.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0566, + "grad_norm": 1.0541280508041382, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0021, + "grad_norm": 0.3617427945137024, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 530145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0473, + "grad_norm": 1.3375787734985352, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0021, + "grad_norm": 0.33384522795677185, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 530748.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0379, + "grad_norm": 1.0544806718826294, + "learning_rate": 4.5e-06, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0023, + "grad_norm": 0.39406508207321167, + "learning_rate": 4.49e-06, + "num_tokens": 531351.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0752, + "grad_norm": 1.9515206813812256, + "learning_rate": 4.48e-06, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.0023, + "grad_norm": 0.3835340738296509, + "learning_rate": 4.47e-06, + "num_tokens": 531954.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.059, + "grad_norm": 1.1221628189086914, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0021, + "grad_norm": 0.3509887456893921, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 532557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.064, + "grad_norm": 1.205573320388794, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0718, + "grad_norm": 2.1418721675872803, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 533581.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0414, + "grad_norm": 1.3037139177322388, + "learning_rate": 4.42e-06, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.0736, + "grad_norm": 2.1680147647857666, + "learning_rate": 4.41e-06, + "num_tokens": 534605.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0021, + "grad_norm": 0.347339004278183, + "learning_rate": 4.4e-06, + "num_tokens": 534696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": 0.0736, + "grad_norm": 2.0864803791046143, + "learning_rate": 4.39e-06, + "num_tokens": 535208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0025, + "grad_norm": 0.4395049810409546, + "learning_rate": 4.38e-06, + "num_tokens": 535299.0, + "mean_token_accuracy": 1.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0023, + "grad_norm": 0.39004504680633545, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 535390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0022, + "grad_norm": 0.36095598340034485, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 535481.0, + "mean_token_accuracy": 1.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0582, + "grad_norm": 1.2327930927276611, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 535993.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0461, + "grad_norm": 1.040818452835083, + "learning_rate": 4.34e-06, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.248, + "grad_norm": 5.55968713760376, + "learning_rate": 4.33e-06, + "num_tokens": 537017.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0021, + "grad_norm": 0.33996713161468506, + "learning_rate": 4.32e-06, + "num_tokens": 537108.0, + "mean_token_accuracy": 1.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0885, + "grad_norm": 1.9103176593780518, + "learning_rate": 4.31e-06, + "num_tokens": 537620.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0021, + "grad_norm": 0.3596363663673401, + "learning_rate": 4.3e-06, + "num_tokens": 537711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0024, + "grad_norm": 0.38911113142967224, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 537802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.0575, + "grad_norm": 1.1043959856033325, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.0398, + "grad_norm": 1.0082714557647705, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 538826.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.07, + "grad_norm": 1.312532901763916, + "learning_rate": 4.26e-06, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.0019, + "grad_norm": 0.314879834651947, + "learning_rate": 4.25e-06, + "num_tokens": 539429.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.002, + "grad_norm": 0.32559505105018616, + "learning_rate": 4.24e-06, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0021, + "grad_norm": 0.3332079350948334, + "learning_rate": 4.23e-06, + "num_tokens": 539611.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0585, + "grad_norm": 1.1406902074813843, + "learning_rate": 4.22e-06, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0018, + "grad_norm": 0.2799522876739502, + "learning_rate": 4.21e-06, + "num_tokens": 540214.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0525, + "grad_norm": 1.1263917684555054, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0019, + "grad_norm": 0.28769129514694214, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 540817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.002, + "grad_norm": 0.3043234348297119, + "learning_rate": 4.18e-06, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0018, + "grad_norm": 0.2788783311843872, + "learning_rate": 4.17e-06, + "num_tokens": 540999.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.002, + "grad_norm": 0.3088054358959198, + "learning_rate": 4.16e-06, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.0382, + "grad_norm": 1.0789445638656616, + "learning_rate": 4.15e-06, + "num_tokens": 541602.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.0435, + "grad_norm": 1.0291471481323242, + "learning_rate": 4.14e-06, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0754, + "grad_norm": 1.4396899938583374, + "learning_rate": 4.13e-06, + "num_tokens": 542626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.05, + "grad_norm": 1.1235865354537964, + "learning_rate": 4.12e-06, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0018, + "grad_norm": 0.2745732069015503, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 543229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0017, + "grad_norm": 0.2619018256664276, + "learning_rate": 4.1e-06, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.063, + "grad_norm": 1.068122148513794, + "learning_rate": 4.09e-06, + "num_tokens": 543832.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.076, + "grad_norm": 1.5099190473556519, + "learning_rate": 4.08e-06, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.075, + "grad_norm": 1.370004415512085, + "learning_rate": 4.07e-06, + "num_tokens": 544856.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.06, + "grad_norm": 1.2732493877410889, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.045, + "grad_norm": 1.2496861219406128, + "learning_rate": 4.05e-06, + "num_tokens": 545880.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0471, + "grad_norm": 1.1135365962982178, + "learning_rate": 4.04e-06, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0668, + "grad_norm": 1.5768578052520752, + "learning_rate": 4.03e-06, + "num_tokens": 546904.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0024, + "grad_norm": 0.3887575566768646, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 546995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0023, + "grad_norm": 0.3817980885505676, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 547086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.2858, + "grad_norm": 5.93766975402832, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0023, + "grad_norm": 0.3757269084453583, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 547689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0611, + "grad_norm": 1.3149932622909546, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.085, + "grad_norm": 1.8090168237686157, + "learning_rate": 3.97e-06, + "num_tokens": 548713.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0624, + "grad_norm": 1.2021411657333374, + "learning_rate": 3.96e-06, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0597, + "grad_norm": 1.1230809688568115, + "learning_rate": 3.95e-06, + "num_tokens": 549737.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0521, + "grad_norm": 1.225655198097229, + "learning_rate": 3.94e-06, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0028, + "grad_norm": 0.4546661674976349, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 550340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.2426, + "grad_norm": 4.83814001083374, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0032, + "grad_norm": 0.5268356800079346, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 550943.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.003, + "grad_norm": 0.5073143839836121, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0571, + "grad_norm": 1.12201988697052, + "learning_rate": 3.89e-06, + "num_tokens": 551546.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0027, + "grad_norm": 0.441703200340271, + "learning_rate": 3.88e-06, + "num_tokens": 551637.0, + "mean_token_accuracy": 1.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.06, + "grad_norm": 1.055845022201538, + "learning_rate": 3.87e-06, + "num_tokens": 552149.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0026, + "grad_norm": 0.4252733290195465, + "learning_rate": 3.86e-06, + "num_tokens": 552240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0654, + "grad_norm": 1.2097599506378174, + "learning_rate": 3.85e-06, + "num_tokens": 552752.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0031, + "grad_norm": 0.5153416395187378, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 552843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0412, + "grad_norm": 1.2524850368499756, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 553355.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0603, + "grad_norm": 1.216737985610962, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0027, + "grad_norm": 0.4374849498271942, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 553958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0027, + "grad_norm": 0.45386913418769836, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 554049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0772, + "grad_norm": 2.3643293380737305, + "learning_rate": 3.79e-06, + "num_tokens": 554561.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0585, + "grad_norm": 1.1927247047424316, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0024, + "grad_norm": 0.4038313329219818, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 555164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0024, + "grad_norm": 0.3948758542537689, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 555255.0, + "mean_token_accuracy": 1.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0022, + "grad_norm": 0.36720144748687744, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 555346.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0024, + "grad_norm": 0.3845508098602295, + "learning_rate": 3.74e-06, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0021, + "grad_norm": 0.33976465463638306, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 555528.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0656, + "grad_norm": 1.0829418897628784, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0816, + "grad_norm": 1.7684704065322876, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 556552.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0021, + "grad_norm": 0.3379213809967041, + "learning_rate": 3.7e-06, + "num_tokens": 556643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0017, + "grad_norm": 0.268597275018692, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 556734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0571, + "grad_norm": 1.7145894765853882, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0017, + "grad_norm": 0.262333482503891, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 557337.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0453, + "grad_norm": 1.0645833015441895, + "learning_rate": 3.66e-06, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0596, + "grad_norm": 1.364123821258545, + "learning_rate": 3.65e-06, + "num_tokens": 558361.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0472, + "grad_norm": 0.9277791380882263, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.062, + "grad_norm": 1.2970867156982422, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 559385.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0486, + "grad_norm": 1.1752419471740723, + "learning_rate": 3.62e-06, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.067, + "grad_norm": 1.646427869796753, + "learning_rate": 3.61e-06, + "num_tokens": 560409.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.0488, + "grad_norm": 1.3798638582229614, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0585, + "grad_norm": 1.2615973949432373, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 561433.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0536, + "grad_norm": 1.4801198244094849, + "learning_rate": 3.58e-06, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0021, + "grad_norm": 0.3402940332889557, + "learning_rate": 3.57e-06, + "num_tokens": 562036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0506, + "grad_norm": 0.878396213054657, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0022, + "grad_norm": 0.37959179282188416, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 562639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0023, + "grad_norm": 0.39978647232055664, + "learning_rate": 3.54e-06, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.0692, + "grad_norm": 1.6479856967926025, + "learning_rate": 3.53e-06, + "num_tokens": 563242.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0022, + "grad_norm": 0.37655898928642273, + "learning_rate": 3.52e-06, + "num_tokens": 563333.0, + "mean_token_accuracy": 1.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0547, + "grad_norm": 1.4809867143630981, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 563845.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.038, + "grad_norm": 1.2819538116455078, + "learning_rate": 3.5e-06, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0437, + "grad_norm": 1.2474430799484253, + "learning_rate": 3.49e-06, + "num_tokens": 564869.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0611, + "grad_norm": 1.1493180990219116, + "learning_rate": 3.48e-06, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.062, + "grad_norm": 1.4344936609268188, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 565893.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0027, + "grad_norm": 0.501312255859375, + "learning_rate": 3.46e-06, + "num_tokens": 565984.0, + "mean_token_accuracy": 1.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.003, + "grad_norm": 0.57524174451828, + "learning_rate": 3.45e-06, + "num_tokens": 566075.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.003, + "grad_norm": 0.546630322933197, + "learning_rate": 3.44e-06, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0028, + "grad_norm": 0.5239407420158386, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 566257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0395, + "grad_norm": 0.8654681444168091, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.0399, + "grad_norm": 0.9791849851608276, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 567281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0714, + "grad_norm": 1.4680542945861816, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0029, + "grad_norm": 0.5489619970321655, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 567884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0652, + "grad_norm": 1.445259690284729, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.0031, + "grad_norm": 0.554716944694519, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 568487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0655, + "grad_norm": 1.0966905355453491, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0494, + "grad_norm": 1.049824833869934, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 569511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0591, + "grad_norm": 1.8449171781539917, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.003, + "grad_norm": 0.5422641634941101, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 570114.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.0805, + "grad_norm": 1.8794130086898804, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.0481, + "grad_norm": 0.9934747219085693, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 571138.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0497, + "grad_norm": 1.2348871231079102, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": 0.0444, + "grad_norm": 1.1614453792572021, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 572162.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0388, + "grad_norm": 1.22681725025177, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0032, + "grad_norm": 0.5757941603660583, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 572765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0034, + "grad_norm": 0.611791729927063, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 572856.0, + "mean_token_accuracy": 1.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0616, + "grad_norm": 1.136299967765808, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 573368.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0433, + "grad_norm": 1.2018715143203735, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.042, + "grad_norm": 1.0409917831420898, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 574392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.044, + "grad_norm": 1.2323369979858398, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0034, + "grad_norm": 0.6153194904327393, + "learning_rate": 3.21e-06, + "num_tokens": 574995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0034, + "grad_norm": 0.6106674671173096, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 575086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.0639, + "grad_norm": 1.089705467224121, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 575598.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0692, + "grad_norm": 1.5026510953903198, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0637, + "grad_norm": 1.383870005607605, + "learning_rate": 3.17e-06, + "num_tokens": 576622.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0032, + "grad_norm": 0.568756639957428, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 576713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.0413, + "grad_norm": 1.2440272569656372, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 577225.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.039, + "grad_norm": 1.180145025253296, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0033, + "grad_norm": 0.6265860795974731, + "learning_rate": 3.13e-06, + "num_tokens": 577828.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0033, + "grad_norm": 0.5880522727966309, + "learning_rate": 3.12e-06, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0032, + "grad_norm": 0.5984041690826416, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 578010.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0557, + "grad_norm": 1.0321638584136963, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0585, + "grad_norm": 1.1382465362548828, + "learning_rate": 3.09e-06, + "num_tokens": 579034.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0032, + "grad_norm": 0.5756648778915405, + "learning_rate": 3.08e-06, + "num_tokens": 579125.0, + "mean_token_accuracy": 1.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.003, + "grad_norm": 0.5428857207298279, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 579216.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0774, + "grad_norm": 1.805572271347046, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0569, + "grad_norm": 1.139460563659668, + "learning_rate": 3.05e-06, + "num_tokens": 580240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.0426, + "grad_norm": 1.383743405342102, + "learning_rate": 3.04e-06, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0024, + "grad_norm": 0.4358248710632324, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 580843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0397, + "grad_norm": 1.0429037809371948, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0457, + "grad_norm": 1.3951339721679688, + "learning_rate": 3.01e-06, + "num_tokens": 581867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0027, + "grad_norm": 0.47018593549728394, + "learning_rate": 3e-06, + "num_tokens": 581958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0731, + "grad_norm": 1.9685642719268799, + "learning_rate": 2.99e-06, + "num_tokens": 582470.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.0026, + "grad_norm": 0.45238158106803894, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 582561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0024, + "grad_norm": 0.40610402822494507, + "learning_rate": 2.97e-06, + "num_tokens": 582652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0525, + "grad_norm": 1.0180531740188599, + "learning_rate": 2.96e-06, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0436, + "grad_norm": 1.2175544500350952, + "learning_rate": 2.95e-06, + "num_tokens": 583676.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.0601, + "grad_norm": 1.2007901668548584, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0566, + "grad_norm": 1.2265726327896118, + "learning_rate": 2.93e-06, + "num_tokens": 584700.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0556, + "grad_norm": 1.1947659254074097, + "learning_rate": 2.92e-06, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0027, + "grad_norm": 0.464779794216156, + "learning_rate": 2.91e-06, + "num_tokens": 585303.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.0026, + "grad_norm": 0.4438534080982208, + "learning_rate": 2.9e-06, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0593, + "grad_norm": 1.0972975492477417, + "learning_rate": 2.89e-06, + "num_tokens": 585906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0835, + "grad_norm": 1.884253978729248, + "learning_rate": 2.88e-06, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0633, + "grad_norm": 1.0084459781646729, + "learning_rate": 2.87e-06, + "num_tokens": 586930.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0558, + "grad_norm": 1.0302374362945557, + "learning_rate": 2.86e-06, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0542, + "grad_norm": 0.9511706829071045, + "learning_rate": 2.85e-06, + "num_tokens": 587954.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0506, + "grad_norm": 1.4875551462173462, + "learning_rate": 2.84e-06, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0596, + "grad_norm": 1.1406636238098145, + "learning_rate": 2.83e-06, + "num_tokens": 588978.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0843, + "grad_norm": 1.663854718208313, + "learning_rate": 2.82e-06, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.003, + "grad_norm": 0.5147997140884399, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 589581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0862, + "grad_norm": 1.6565779447555542, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0031, + "grad_norm": 0.5479184985160828, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 590184.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0444, + "grad_norm": 1.354533076286316, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0031, + "grad_norm": 0.5383754968643188, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 590787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0405, + "grad_norm": 1.1847655773162842, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0686, + "grad_norm": 1.8093054294586182, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 591811.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0599, + "grad_norm": 0.9621073603630066, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0037, + "grad_norm": 0.6532343626022339, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 592414.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.062, + "grad_norm": 1.1963555812835693, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0471, + "grad_norm": 1.2936190366744995, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 593438.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0039, + "grad_norm": 0.6896610856056213, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 593529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.0035, + "grad_norm": 0.619045615196228, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 593620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.0037, + "grad_norm": 0.6495220065116882, + "learning_rate": 2.68e-06, + "num_tokens": 593711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0033, + "grad_norm": 0.5850738286972046, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 593802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0394, + "grad_norm": 1.1021217107772827, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.003, + "grad_norm": 0.5251200795173645, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 594405.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0029, + "grad_norm": 0.5125622153282166, + "learning_rate": 2.64e-06, + "num_tokens": 594496.0, + "mean_token_accuracy": 1.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0829, + "grad_norm": 1.8204774856567383, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 595008.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.0624, + "grad_norm": 1.3469654321670532, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0587, + "grad_norm": 1.1263304948806763, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 596032.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0791, + "grad_norm": 2.308769941329956, + "learning_rate": 2.6e-06, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0025, + "grad_norm": 0.42390695214271545, + "learning_rate": 2.59e-06, + "num_tokens": 596635.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0025, + "grad_norm": 0.4351828694343567, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0025, + "grad_norm": 0.45117858052253723, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 596817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.002, + "grad_norm": 0.3449709117412567, + "learning_rate": 2.56e-06, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0552, + "grad_norm": 1.02012038230896, + "learning_rate": 2.55e-06, + "num_tokens": 597420.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0021, + "grad_norm": 0.35598093271255493, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 597511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0706, + "grad_norm": 1.9882680177688599, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 598023.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0585, + "grad_norm": 1.1153826713562012, + "learning_rate": 2.52e-06, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0606, + "grad_norm": 1.6919127702713013, + "learning_rate": 2.51e-06, + "num_tokens": 599047.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.0381, + "grad_norm": 0.9558757543563843, + "learning_rate": 2.5e-06, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0021, + "grad_norm": 0.3558536469936371, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 599650.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0522, + "grad_norm": 1.5039445161819458, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0762, + "grad_norm": 1.8451253175735474, + "learning_rate": 2.47e-06, + "num_tokens": 600674.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0021, + "grad_norm": 0.3580801486968994, + "learning_rate": 2.46e-06, + "num_tokens": 600765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0596, + "grad_norm": 1.0082149505615234, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 601277.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0019, + "grad_norm": 0.31669387221336365, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 601368.0, + "mean_token_accuracy": 1.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0021, + "grad_norm": 0.3432970345020294, + "learning_rate": 2.43e-06, + "num_tokens": 601459.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0574, + "grad_norm": 1.3162227869033813, + "learning_rate": 2.42e-06, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0435, + "grad_norm": 1.0670703649520874, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 602483.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0461, + "grad_norm": 1.2668665647506714, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0594, + "grad_norm": 1.4527745246887207, + "learning_rate": 2.39e-06, + "num_tokens": 603507.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.002, + "grad_norm": 0.3514978885650635, + "learning_rate": 2.38e-06, + "num_tokens": 603598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": 0.0729, + "grad_norm": 2.0161454677581787, + "learning_rate": 2.37e-06, + "num_tokens": 604110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0022, + "grad_norm": 0.38664510846138, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 604201.0, + "mean_token_accuracy": 1.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0353, + "grad_norm": 0.9888522624969482, + "learning_rate": 2.35e-06, + "num_tokens": 604713.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0816, + "grad_norm": 1.6845252513885498, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.002, + "grad_norm": 0.34472399950027466, + "learning_rate": 2.33e-06, + "num_tokens": 605316.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0612, + "grad_norm": 1.5795350074768066, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.036, + "grad_norm": 1.0923341512680054, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 606340.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0021, + "grad_norm": 0.36445900797843933, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 606431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0021, + "grad_norm": 0.36632096767425537, + "learning_rate": 2.29e-06, + "num_tokens": 606522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0024, + "grad_norm": 0.4193936884403229, + "learning_rate": 2.28e-06, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0021, + "grad_norm": 0.36693835258483887, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 606704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0695, + "grad_norm": 1.6587837934494019, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0439, + "grad_norm": 1.2197368144989014, + "learning_rate": 2.25e-06, + "num_tokens": 607728.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.0737, + "grad_norm": 1.8300983905792236, + "learning_rate": 2.24e-06, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0443, + "grad_norm": 1.1544647216796875, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 608752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0023, + "grad_norm": 0.40331411361694336, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 608843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.0024, + "grad_norm": 0.4283469021320343, + "learning_rate": 2.21e-06, + "num_tokens": 608934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0023, + "grad_norm": 0.38760119676589966, + "learning_rate": 2.2e-06, + "num_tokens": 609025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0768, + "grad_norm": 2.4320685863494873, + "learning_rate": 2.19e-06, + "num_tokens": 609537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.0022, + "grad_norm": 0.3753429353237152, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 609628.0, + "mean_token_accuracy": 1.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.0022, + "grad_norm": 0.37054023146629333, + "learning_rate": 2.17e-06, + "num_tokens": 609719.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.063, + "grad_norm": 1.1455004215240479, + "learning_rate": 2.16e-06, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.002, + "grad_norm": 0.3473651707172394, + "learning_rate": 2.15e-06, + "num_tokens": 610322.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0613, + "grad_norm": 1.3616305589675903, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0728, + "grad_norm": 1.4589122533798218, + "learning_rate": 2.13e-06, + "num_tokens": 611346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0021, + "grad_norm": 0.3479214906692505, + "learning_rate": 2.12e-06, + "num_tokens": 611437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0652, + "grad_norm": 1.3161977529525757, + "learning_rate": 2.11e-06, + "num_tokens": 611949.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0019, + "grad_norm": 0.30886292457580566, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 612040.0, + "mean_token_accuracy": 1.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0592, + "grad_norm": 1.1527003049850464, + "learning_rate": 2.09e-06, + "num_tokens": 612552.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0019, + "grad_norm": 0.32701927423477173, + "learning_rate": 2.08e-06, + "num_tokens": 612643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0019, + "grad_norm": 0.31851011514663696, + "learning_rate": 2.07e-06, + "num_tokens": 612734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0019, + "grad_norm": 0.3128160238265991, + "learning_rate": 2.06e-06, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0609, + "grad_norm": 1.4082930088043213, + "learning_rate": 2.05e-06, + "num_tokens": 613337.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0367, + "grad_norm": 1.014041781425476, + "learning_rate": 2.04e-06, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0018, + "grad_norm": 0.31275689601898193, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 613940.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0651, + "grad_norm": 1.7855079174041748, + "learning_rate": 2.02e-06, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0019, + "grad_norm": 0.3344590663909912, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 614543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0647, + "grad_norm": 1.4787598848342896, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0578, + "grad_norm": 1.2822742462158203, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 615567.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0431, + "grad_norm": 1.270432472229004, + "learning_rate": 1.98e-06, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0629, + "grad_norm": 1.4008212089538574, + "learning_rate": 1.97e-06, + "num_tokens": 616591.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0018, + "grad_norm": 0.29254984855651855, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 616682.0, + "mean_token_accuracy": 1.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.002, + "grad_norm": 0.33816665410995483, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 616773.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0407, + "grad_norm": 1.2000517845153809, + "learning_rate": 1.94e-06, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0021, + "grad_norm": 0.36089253425598145, + "learning_rate": 1.93e-06, + "num_tokens": 617376.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.0018, + "grad_norm": 0.3009200990200043, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0681, + "grad_norm": 1.279045581817627, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 617979.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.041, + "grad_norm": 0.9949601292610168, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0436, + "grad_norm": 1.0469834804534912, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 619003.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.07, + "grad_norm": 1.9559322595596313, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.002, + "grad_norm": 0.34342578053474426, + "learning_rate": 1.87e-06, + "num_tokens": 619606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.0878, + "grad_norm": 1.9412786960601807, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.002, + "grad_norm": 0.32897070050239563, + "learning_rate": 1.85e-06, + "num_tokens": 620209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0558, + "grad_norm": 1.230363368988037, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0021, + "grad_norm": 0.36400625109672546, + "learning_rate": 1.83e-06, + "num_tokens": 620812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0836, + "grad_norm": 2.0716917514801025, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0621, + "grad_norm": 1.304250717163086, + "learning_rate": 1.81e-06, + "num_tokens": 621836.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0021, + "grad_norm": 0.36326804757118225, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 621927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0021, + "grad_norm": 0.35329553484916687, + "learning_rate": 1.79e-06, + "num_tokens": 622018.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0022, + "grad_norm": 0.37259048223495483, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0427, + "grad_norm": 1.4227620363235474, + "learning_rate": 1.77e-06, + "num_tokens": 622621.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.0019, + "grad_norm": 0.3209492564201355, + "learning_rate": 1.76e-06, + "num_tokens": 622712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.0461, + "grad_norm": 1.0381195545196533, + "learning_rate": 1.75e-06, + "num_tokens": 623224.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.042, + "grad_norm": 1.2007672786712646, + "learning_rate": 1.74e-06, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0021, + "grad_norm": 0.36294040083885193, + "learning_rate": 1.73e-06, + "num_tokens": 623827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0021, + "grad_norm": 0.36834561824798584, + "learning_rate": 1.72e-06, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0571, + "grad_norm": 1.3143699169158936, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 624430.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0019, + "grad_norm": 0.3313964307308197, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 624521.0, + "mean_token_accuracy": 1.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.002, + "grad_norm": 0.357883095741272, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 624612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0021, + "grad_norm": 0.3507683277130127, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0019, + "grad_norm": 0.32915839552879333, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 624794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.055, + "grad_norm": 1.478965163230896, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0563, + "grad_norm": 1.0098392963409424, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 625818.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0018, + "grad_norm": 0.30924662947654724, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 625909.0, + "mean_token_accuracy": 1.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0662, + "grad_norm": 1.276971459388733, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 626421.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0018, + "grad_norm": 0.3022649586200714, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 626512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0019, + "grad_norm": 0.32340654730796814, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 626603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.038, + "grad_norm": 1.0054205656051636, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.0445, + "grad_norm": 1.2428219318389893, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 627627.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0531, + "grad_norm": 1.1613452434539795, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0018, + "grad_norm": 0.2842133641242981, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 628230.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0018, + "grad_norm": 0.3061327040195465, + "learning_rate": 1.56e-06, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0019, + "grad_norm": 0.31931373476982117, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 628412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0689, + "grad_norm": 1.777726650238037, + "learning_rate": 1.54e-06, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0626, + "grad_norm": 1.0839914083480835, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 629436.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0621, + "grad_norm": 1.0777654647827148, + "learning_rate": 1.52e-06, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0617, + "grad_norm": 1.3572564125061035, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 630460.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0019, + "grad_norm": 0.31615281105041504, + "learning_rate": 1.5e-06, + "num_tokens": 630551.0, + "mean_token_accuracy": 1.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0584, + "grad_norm": 1.4089421033859253, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 631063.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0376, + "grad_norm": 0.9989500641822815, + "learning_rate": 1.48e-06, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0564, + "grad_norm": 1.4619941711425781, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 632087.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0017, + "grad_norm": 0.27881649136543274, + "learning_rate": 1.46e-06, + "num_tokens": 632178.0, + "mean_token_accuracy": 1.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.0021, + "grad_norm": 0.3606109619140625, + "learning_rate": 1.45e-06, + "num_tokens": 632269.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0018, + "grad_norm": 0.3089398145675659, + "learning_rate": 1.44e-06, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.002, + "grad_norm": 0.35239994525909424, + "learning_rate": 1.43e-06, + "num_tokens": 632451.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.0434, + "grad_norm": 1.028780460357666, + "learning_rate": 1.42e-06, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.055, + "grad_norm": 1.3252202272415161, + "learning_rate": 1.41e-06, + "num_tokens": 633475.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.002, + "grad_norm": 0.34616848826408386, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 633566.0, + "mean_token_accuracy": 1.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0021, + "grad_norm": 0.345546156167984, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 633657.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": 0.041, + "grad_norm": 1.0742279291152954, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.0558, + "grad_norm": 1.3981537818908691, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 634681.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0021, + "grad_norm": 0.3480032682418823, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 634772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0414, + "grad_norm": 1.1904889345169067, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 635284.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0019, + "grad_norm": 0.32626014947891235, + "learning_rate": 1.34e-06, + "num_tokens": 635375.0, + "mean_token_accuracy": 1.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.0019, + "grad_norm": 0.3311507999897003, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 635466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.0417, + "grad_norm": 1.0487819910049438, + "learning_rate": 1.32e-06, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0612, + "grad_norm": 1.482262372970581, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 636490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0597, + "grad_norm": 1.0906400680541992, + "learning_rate": 1.3e-06, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0451, + "grad_norm": 1.3021650314331055, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 637514.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0566, + "grad_norm": 1.1073824167251587, + "learning_rate": 1.28e-06, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0021, + "grad_norm": 0.366703599691391, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 638117.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0402, + "grad_norm": 1.114858865737915, + "learning_rate": 1.26e-06, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0726, + "grad_norm": 1.9793658256530762, + "learning_rate": 1.25e-06, + "num_tokens": 639141.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0393, + "grad_norm": 1.212233066558838, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.002, + "grad_norm": 0.3448551893234253, + "learning_rate": 1.23e-06, + "num_tokens": 639744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.002, + "grad_norm": 0.33576035499572754, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 639835.0, + "mean_token_accuracy": 1.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0662, + "grad_norm": 1.6050575971603394, + "learning_rate": 1.21e-06, + "num_tokens": 640347.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0361, + "grad_norm": 1.034451961517334, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0022, + "grad_norm": 0.3761736750602722, + "learning_rate": 1.19e-06, + "num_tokens": 640950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0648, + "grad_norm": 1.8947163820266724, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0556, + "grad_norm": 1.317289113998413, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 641974.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0441, + "grad_norm": 1.1064449548721313, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0895, + "grad_norm": 1.8790072202682495, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 642998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0824, + "grad_norm": 2.2661681175231934, + "learning_rate": 1.14e-06, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": 0.08, + "grad_norm": 2.5085411071777344, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 644022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0382, + "grad_norm": 0.8821580410003662, + "learning_rate": 1.12e-06, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.0419, + "grad_norm": 1.2789467573165894, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 645046.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0661, + "grad_norm": 1.2416129112243652, + "learning_rate": 1.1e-06, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0385, + "grad_norm": 1.19954514503479, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 646070.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0803, + "grad_norm": 1.7022594213485718, + "learning_rate": 1.08e-06, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0651, + "grad_norm": 1.4528557062149048, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 647094.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0647, + "grad_norm": 1.2057602405548096, + "learning_rate": 1.06e-06, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0609, + "grad_norm": 1.2766141891479492, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 648118.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0437, + "grad_norm": 1.1985217332839966, + "learning_rate": 1.04e-06, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0571, + "grad_norm": 1.1973105669021606, + "learning_rate": 1.03e-06, + "num_tokens": 649142.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0664, + "grad_norm": 1.5751904249191284, + "learning_rate": 1.02e-06, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0436, + "grad_norm": 1.0939377546310425, + "learning_rate": 1.01e-06, + "num_tokens": 650166.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0031, + "grad_norm": 0.5472993850708008, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 650257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0595, + "grad_norm": 1.3305593729019165, + "learning_rate": 9.9e-07, + "num_tokens": 650769.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": 0.0391, + "grad_norm": 1.123191475868225, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0032, + "grad_norm": 0.5546753406524658, + "learning_rate": 9.7e-07, + "num_tokens": 651372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0031, + "grad_norm": 0.5491161942481995, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 651463.0, + "mean_token_accuracy": 1.0, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.0687, + "grad_norm": 2.234290599822998, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 651975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0586, + "grad_norm": 1.2323557138442993, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0557, + "grad_norm": 1.1316601037979126, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 652999.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.0399, + "grad_norm": 1.354643702507019, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0032, + "grad_norm": 0.5774580836296082, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 653602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.2131, + "grad_norm": 5.501800537109375, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0552, + "grad_norm": 1.1691670417785645, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 654626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0571, + "grad_norm": 1.3334885835647583, + "learning_rate": 8.8e-07, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0033, + "grad_norm": 0.5850784778594971, + "learning_rate": 8.7e-07, + "num_tokens": 655229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0751, + "grad_norm": 2.8085896968841553, + "learning_rate": 8.6e-07, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0805, + "grad_norm": 1.9259722232818604, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 656253.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0404, + "grad_norm": 1.23832106590271, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0566, + "grad_norm": 1.0702412128448486, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 657277.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0608, + "grad_norm": 1.4386783838272095, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0592, + "grad_norm": 1.2550030946731567, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 658301.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0434, + "grad_norm": 1.8757680654525757, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.2038, + "grad_norm": 4.9877095222473145, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 659325.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0037, + "grad_norm": 0.6778392791748047, + "learning_rate": 7.8e-07, + "num_tokens": 659416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.048, + "grad_norm": 1.6256376504898071, + "learning_rate": 7.7e-07, + "num_tokens": 659928.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.0561, + "grad_norm": 1.4658511877059937, + "learning_rate": 7.6e-07, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.071, + "grad_norm": 1.7589434385299683, + "learning_rate": 7.5e-07, + "num_tokens": 660952.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.0403, + "grad_norm": 1.2130093574523926, + "learning_rate": 7.4e-07, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0594, + "grad_norm": 1.2599217891693115, + "learning_rate": 7.3e-07, + "num_tokens": 661976.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0584, + "grad_norm": 1.2125273942947388, + "learning_rate": 7.2e-07, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0039, + "grad_norm": 0.6885141730308533, + "learning_rate": 7.1e-07, + "num_tokens": 662579.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.056, + "grad_norm": 1.233972430229187, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.004, + "grad_norm": 0.7142868041992188, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 663182.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0614, + "grad_norm": 1.4658222198486328, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0493, + "grad_norm": 1.051007866859436, + "learning_rate": 6.7e-07, + "num_tokens": 664206.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0409, + "grad_norm": 1.2317217588424683, + "learning_rate": 6.6e-07, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.004, + "grad_norm": 0.7169041633605957, + "learning_rate": 6.5e-07, + "num_tokens": 664809.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0393, + "grad_norm": 1.290911316871643, + "learning_rate": 6.4e-07, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.043, + "grad_norm": 1.550564169883728, + "learning_rate": 6.3e-07, + "num_tokens": 665833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.044, + "grad_norm": 1.1559568643569946, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0761, + "grad_norm": 1.5238863229751587, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 666857.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0479, + "grad_norm": 1.310771107673645, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0463, + "grad_norm": 1.120958924293518, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 667881.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.0039, + "grad_norm": 0.6784827709197998, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 667972.0, + "mean_token_accuracy": 1.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0672, + "grad_norm": 1.386460542678833, + "learning_rate": 5.7e-07, + "num_tokens": 668484.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0453, + "grad_norm": 1.2751063108444214, + "learning_rate": 5.6e-07, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.062, + "grad_norm": 1.0763590335845947, + "learning_rate": 5.5e-07, + "num_tokens": 669508.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0571, + "grad_norm": 1.2678844928741455, + "learning_rate": 5.4e-07, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.004, + "grad_norm": 0.7198203802108765, + "learning_rate": 5.3e-07, + "num_tokens": 670111.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0442, + "grad_norm": 1.2891501188278198, + "learning_rate": 5.2e-07, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0039, + "grad_norm": 0.6999010443687439, + "learning_rate": 5.1e-07, + "num_tokens": 670714.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.004, + "grad_norm": 0.7249695658683777, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 670805.0, + "mean_token_accuracy": 1.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0781, + "grad_norm": 1.6599754095077515, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 671317.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0038, + "grad_norm": 0.6885353922843933, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 671408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0568, + "grad_norm": 1.6591845750808716, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 671920.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0038, + "grad_norm": 0.6629458069801331, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 672011.0, + "mean_token_accuracy": 1.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0553, + "grad_norm": 1.0831410884857178, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 672523.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.065, + "grad_norm": 1.709847331047058, + "learning_rate": 4.4e-07, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0446, + "grad_norm": 1.2094167470932007, + "learning_rate": 4.3e-07, + "num_tokens": 673547.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0585, + "grad_norm": 1.23978853225708, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0039, + "grad_norm": 0.6842091083526611, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 674150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0603, + "grad_norm": 1.337598204612732, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.004, + "grad_norm": 0.7296668291091919, + "learning_rate": 3.9e-07, + "num_tokens": 674753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0038, + "grad_norm": 0.6806443333625793, + "learning_rate": 3.8e-07, + "num_tokens": 674844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0038, + "grad_norm": 0.6828562021255493, + "learning_rate": 3.7e-07, + "num_tokens": 674935.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0667, + "grad_norm": 1.748108148574829, + "learning_rate": 3.6e-07, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0386, + "grad_norm": 1.3246146440505981, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 675959.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0038, + "grad_norm": 0.6706036329269409, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 676050.0, + "mean_token_accuracy": 1.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0552, + "grad_norm": 1.2772272825241089, + "learning_rate": 3.3e-07, + "num_tokens": 676562.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0596, + "grad_norm": 1.3164302110671997, + "learning_rate": 3.2e-07, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0582, + "grad_norm": 1.3520668745040894, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 677586.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0547, + "grad_norm": 1.2490239143371582, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0387, + "grad_norm": 1.1652135848999023, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 678610.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0626, + "grad_norm": 1.9845855236053467, + "learning_rate": 2.8e-07, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0038, + "grad_norm": 0.6789660453796387, + "learning_rate": 2.7e-07, + "num_tokens": 679213.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.0037, + "grad_norm": 0.678180456161499, + "learning_rate": 2.6e-07, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0038, + "grad_norm": 0.6906817555427551, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 679395.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0516, + "grad_norm": 1.1001511812210083, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0037, + "grad_norm": 0.6647882461547852, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 679998.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.0627, + "grad_norm": 1.4906483888626099, + "learning_rate": 2.2e-07, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0653, + "grad_norm": 1.6483995914459229, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 681022.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0542, + "grad_norm": 1.1732497215270996, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0035, + "grad_norm": 0.6123244762420654, + "learning_rate": 1.9e-07, + "num_tokens": 681625.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0628, + "grad_norm": 3.3254270553588867, + "learning_rate": 1.8e-07, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.0409, + "grad_norm": 1.0730781555175781, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 682649.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0034, + "grad_norm": 0.5923974514007568, + "learning_rate": 1.6e-07, + "num_tokens": 682740.0, + "mean_token_accuracy": 1.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.033, + "grad_norm": 1.07072114944458, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 683252.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0563, + "grad_norm": 1.1191027164459229, + "learning_rate": 1.4e-07, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0034, + "grad_norm": 0.6199093461036682, + "learning_rate": 1.3e-07, + "num_tokens": 683855.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0497, + "grad_norm": 1.2205955982208252, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0553, + "grad_norm": 1.2247557640075684, + "learning_rate": 1.1e-07, + "num_tokens": 684879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.0615, + "grad_norm": 1.5119178295135498, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0036, + "grad_norm": 0.6369652152061462, + "learning_rate": 9e-08, + "num_tokens": 685482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0409, + "grad_norm": 1.2765092849731445, + "learning_rate": 8e-08, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0446, + "grad_norm": 1.0794225931167603, + "learning_rate": 7e-08, + "num_tokens": 686506.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0037, + "grad_norm": 0.6602066159248352, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 686597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0637, + "grad_norm": 1.4354852437973022, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 687109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.0037, + "grad_norm": 0.6749649047851562, + "learning_rate": 4e-08, + "num_tokens": 687200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0, + "step": 2000 + }, + { + "train_runtime": 372.1845, + "train_samples_per_second": 5.374, + "train_steps_per_second": 5.374, + "total_flos": 1.1456146931712e+16, + "train_loss": 0.18184852770145518, + "epoch": 1.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..cf39b39eacfc4a0eb4375b757c1d2cdd829d1bbd --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "unsloth_available": false, + "train_runtime": 372.1845, + "train_loss": 0.18184852770145518, + "train_metrics": { + "train_runtime": 372.1845, + "train_samples_per_second": 5.374, + "train_steps_per_second": 5.374, + "total_flos": 1.1456146931712e+16, + "train_loss": 0.18184852770145518 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/submission_summary.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/submission_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..67d00756e92a5f7b983ca1856d58db24059c3fad --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/submission_summary.json @@ -0,0 +1,376 @@ +{ + "status": "ok", + "generated_at_unix": 1777179904.792038, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "not_seen_in_status", + "grpo_postsave_inference": "not_seen_in_status", + "policy_ablation": "not_seen_in_status" + }, + "metrics": { + "sft_train_loss": 0.18184852770145518, + "sft_train_runtime": 372.1845, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.569, + "sft_last_loss": 0.0037, + "sft_best_loss": 0.0011, + "sft_last_token_accuracy": 1.0, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.762, + "sft_avg_latency_seconds": 2.748, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "ok", + "files": [ + ".gitattributes", + "usable_model_bundles/local-qwen-0-5b-active-smoke/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/bundle_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/generation_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merge_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/acceptance_gate.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/anti_hacking_overfit_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/baselines.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dose_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dosing_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/frontier_ready.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/graph_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_ablation_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_auto.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_fallback_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_strict_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_sweep_summary.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/inference_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/planner_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/plot_index.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/risk_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/robustness.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/supervisor_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json" + ], + "meaningful_file_count": 82, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/63acc4b1a4167e78b785814b5de63c5a913f9099", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 736.955, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png", + "qwen-qwen2-5-3b-instruct_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png", + "qwen-qwen2-5-3b-instruct_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png", + "qwen-qwen2-5-3b-instruct_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png", + "reward_component_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png", + "primary_reward_channel_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 3B grpo_history.json: pending_artifact_upload", + "Qwen 3B grpo_postsave_inference: not_seen_in_status", + "Qwen 3B grpo_training: not_seen_in_status", + "Qwen 3B policy_ablation: not_seen_in_status", + "Qwen 3B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system" +} diff --git a/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/training_space_runtime_status.json b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/training_space_runtime_status.json new file mode 100644 index 0000000000000000000000000000000000000000..e9c6137a69670ce54039397c1c7dafcb577d19c1 --- /dev/null +++ b/docs/results/submission_evidence/qwen_0_5b_1_5b_3b/training_space_runtime_status.json @@ -0,0 +1,39 @@ +{ + "status": "ok", + "generated_at_utc": "2026-04-26T05:07:01.973345+00:00", + "space_id": "TheJackBright/polyguard-openenv-training-full", + "artifact_repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "runtime": "SpaceRuntime(stage='PAUSED', hardware=None, requested_hardware='cpu-basic', sleep_time=172800, storage=None, raw={'stage': 'PAUSED', 'hardware': {'current': None, 'requested': 'cpu-basic'}, 'gcTimeout': 172800, 'replicas': {'requested': 1}, 'devMode': False, 'domains': [{'domain': 'thejackbright-polyguard-openenv-training-full.hf.space', 'stage': 'READY'}]})", + "runtime_error": "", + "artifact_error": "", + "artifact_file_count": 83, + "has_usable_active_bundle": true, + "has_full_sweep_artifacts": false, + "run_statuses": { + "qwen-qwen2-5-0-5b-instruct": { + "sft_training": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "sft_postsave_inference": "artifact_available", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload", + "artifact_files": [] + }, + "qwen-qwen2-5-1-5b-instruct": { + "sft_training": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "sft_postsave_inference": "artifact_available", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload", + "artifact_files": [] + }, + "qwen-qwen2-5-3b-instruct": { + "sft_training": "artifact_available", + "grpo_training": "not_seen_in_status", + "sft_postsave_inference": "artifact_available", + "grpo_postsave_inference": "not_seen_in_status", + "policy_ablation": "not_seen_in_status", + "artifact_files": [] + } + }, + "interpretation": "The Space is not actively training if runtime contains stage='PAUSED'. Completed stage records are taken from live evidence snapshots when available; missing per-run artifact files mean the full sweep checkpoints/reports are not yet downloadable." +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/README.md b/docs/results/submission_evidence_qwen_0_5b_1_5b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6dbe0f2a042ccfdb35eae53e5be1edd053c94b2c --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/README.md @@ -0,0 +1,59 @@ +# PolyGuard Submission Evidence: Qwen 0.5B and 1.5B + +This folder is generated without retraining. It uses already completed HF Space status, local mirrored sweep artifacts, and deterministic PolyGuard verifier rollouts. + +## Run Status + +| Model | SFT training | GRPO training | SFT loss | SFT verifier reward | SFT latency | +| --- | --- | --- | ---: | ---: | ---: | +| Qwen 0.5B | artifact_available | remote_completed_pending_artifact_upload | 0.1923 | 0.726 | 1.839s | +| Qwen 1.5B | artifact_available | remote_completed_pending_artifact_upload | 0.1152 | 0.726 | 2.158s | + +## Basic LLM vs Full PolyGuard Pipeline + +- Judge: `PolyGuard verifier/reward system`. +- Matched seeds: `8`. +- Pipeline minus basic average reward delta: `0.043`. +- LLM-as-judge is optional and disabled unless `POLYGUARD_ENABLE_LLM_JUDGE=true`. + +## Pending Items + +- Qwen 0.5B grpo_history.json: pending_artifact_upload +- Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload +- Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload +- Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload +- Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload +- Qwen 1.5B grpo_history.json: pending_artifact_upload +- Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload +- Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload +- Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload +- Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload + +## Generated Charts + +- `qwen_0_5b_sft_training_loss.png` +- `qwen_0_5b_sft_token_accuracy.png` +- `qwen_0_5b_sft_learning_rate.png` +- `qwen_1_5b_sft_training_loss.png` +- `qwen_1_5b_sft_token_accuracy.png` +- `qwen_1_5b_sft_learning_rate.png` +- `qwen_0_5b_vs_1_5b_sft_loss_comparison.png` +- `qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png` +- `qwen_0_5b_1_5b_final_sft_train_loss.png` +- `qwen_0_5b_1_5b_postsave_reward.png` +- `qwen_0_5b_1_5b_postsave_latency.png` +- `qwen_0_5b_1_5b_sft_runtime.png` +- `qwen_0_5b_1_5b_remote_completed_stage_durations.png` +- `policy_ablation_avg_reward.png` +- `policy_ablation_legality.png` +- `policy_ablation_exploit_detection.png` +- `reward_component_bars.png` +- `primary_reward_channel_bars.png` +- `basic_llm_vs_full_pipeline_reward.png` +- `basic_llm_vs_full_pipeline_legality.png` +- `basic_llm_vs_full_pipeline_latency.png` +- `basic_llm_vs_full_pipeline_reward_delta_by_seed.png` + +## Important Honesty Note + +Remote-completed stages and uploaded artifact files are tracked separately. If a GRPO run completed on the HF Space but the per-run GRPO history file has not been uploaded yet, this bundle labels it as `remote_completed_pending_artifact_upload` instead of inventing a curve. diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_latency.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..30df76ac40b24370c4d47f38a5b392e8e7c8b36f Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_latency.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_legality.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..354ee4f38019cfceb7db848c00ee7bda6270c162 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_legality.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..a334d8db37904ac9ab47a582cd1efb83545a7027 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..5d068d5f289f2e688017d55fba2219c1d0154167 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..b8a16a69c129c24b20c8ab712e219662b853e8ac Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_avg_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_exploit_detection.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..b02893a92db120bde2f2a629c680c7191230edeb Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_exploit_detection.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_legality.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..a084c777866c2316a63e3ab9a6339d45606517a5 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/policy_ablation_legality.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/primary_reward_channel_bars.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..2b33f8c40f985870bbf6ad986307cf9988ae229d Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/primary_reward_channel_bars.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..e624303fbcd1dcbc7e67edb578055310873bc7ad Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..d5d8d458cfe55b068060be5cbed93d4f3ea2e15f Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..eaf9687f4bd8f1fddf41434e8317105634a2366a Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..db33a7a97a9a7470e3927df08f1b2c61a5331e05 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..692ae055aa330d28ddecde01f82d2e0fb984de79 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_learning_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..ffd982a07fec0d80dff092afea033c65d3a06552 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_learning_rate.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_token_accuracy.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..91f0c0075c563b6915e2f8225a659d9f88c08bc8 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_token_accuracy.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_training_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..8ee344753fde4ea2476b340dbf618a9b12b1f94c Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_sft_training_loss.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..15a7de44aa9ec407cb7a8647624a67edb8bb38c6 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..d36b471da2f0902e2c513e98a16098be6ec9a515 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_learning_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..a8de709d9201c4d7a4fb502d3045104c0a8017a5 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_learning_rate.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_token_accuracy.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..642d57b9cb8a88d2a602adcbc92e220df2fc1c6c Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_token_accuracy.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_training_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..c72e897e7360ab9ceaafaaf36dd867414c0694d9 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/qwen_1_5b_sft_training_loss.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..2f0b417999883105867eebe93b2fdb8bbdaf4b43 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bbe17a795d04470e938101377019eadd6246670049fc717149bbe6d28888bae +size 142092 diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/anti_cheat_failure_rates.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..9ee2415b64aa6d1e4357754bd432cfc43dbf5091 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/anti_cheat_failure_rates.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..edb2fa8c25074d88c90bce5c243af90dcb28e1c6 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/avg_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/grpo_reward_curves.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..b8b1c8d550e72424ffeef18cd8fff38ce8c91cab Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/grpo_reward_curves.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_latency_validity.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..0fb4d13ec904f9d31e23bc155fe571425145913c Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/inference_latency_validity.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/legality_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..b4c1e418b0262902ad1c9ad4818f4d9b22a152d0 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/legality_rate.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/policy_stack_avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..b28dc57ac180e83b38194b17251e3cf3a5a941da Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/policy_stack_avg_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_grpo_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4b35e432d6d777827f6bf0dc189bfc74b4427125 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_grpo_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1ec58084d2c79f340541654e5d99906a3ae592ac Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_loss.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2773c4f16e553eeffc43c9ef348a988b77735c52 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/qwen_model_sft_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/reward_component_bars.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fc18c8433fb28860795036a1aab24f9aa05f61af Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/reward_component_bars.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_loss_curves.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..8d5bf10a57fdc8264485616fd51d637f0709f104 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_loss_curves.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_vs_grpo_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4765e95fbbc1f1ed2f8a6686909241a75486caa5 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/sft_vs_grpo_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/train_holdout_gap.png b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..3bf8436ec672a1cb1875c178b9369e85e5aca2e8 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b/charts/local_available_combined/train_holdout_gap.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/manifest.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..fb9aa967b6aba73ae13fe8bf2e2bc9953aa17ab0 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/manifest.json @@ -0,0 +1,237 @@ +{ + "status": "ok", + "generated_at_unix": 1777179035.763374, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "pending_artifact_upload", + "files": [ + ".gitattributes" + ], + "meaningful_file_count": 0, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/f313e87ad0df089dbe586b469c8f0a34e05bc5cd", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png", + "reward_component_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png", + "primary_reward_channel_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system", + "bundle_zip": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/submission_bundle/qwen_0_5b_1_5b_evidence.zip", + "mirrored_file_count": 56 +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/README.md b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6dbe0f2a042ccfdb35eae53e5be1edd053c94b2c --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/README.md @@ -0,0 +1,59 @@ +# PolyGuard Submission Evidence: Qwen 0.5B and 1.5B + +This folder is generated without retraining. It uses already completed HF Space status, local mirrored sweep artifacts, and deterministic PolyGuard verifier rollouts. + +## Run Status + +| Model | SFT training | GRPO training | SFT loss | SFT verifier reward | SFT latency | +| --- | --- | --- | ---: | ---: | ---: | +| Qwen 0.5B | artifact_available | remote_completed_pending_artifact_upload | 0.1923 | 0.726 | 1.839s | +| Qwen 1.5B | artifact_available | remote_completed_pending_artifact_upload | 0.1152 | 0.726 | 2.158s | + +## Basic LLM vs Full PolyGuard Pipeline + +- Judge: `PolyGuard verifier/reward system`. +- Matched seeds: `8`. +- Pipeline minus basic average reward delta: `0.043`. +- LLM-as-judge is optional and disabled unless `POLYGUARD_ENABLE_LLM_JUDGE=true`. + +## Pending Items + +- Qwen 0.5B grpo_history.json: pending_artifact_upload +- Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload +- Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload +- Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload +- Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload +- Qwen 1.5B grpo_history.json: pending_artifact_upload +- Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload +- Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload +- Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload +- Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload + +## Generated Charts + +- `qwen_0_5b_sft_training_loss.png` +- `qwen_0_5b_sft_token_accuracy.png` +- `qwen_0_5b_sft_learning_rate.png` +- `qwen_1_5b_sft_training_loss.png` +- `qwen_1_5b_sft_token_accuracy.png` +- `qwen_1_5b_sft_learning_rate.png` +- `qwen_0_5b_vs_1_5b_sft_loss_comparison.png` +- `qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png` +- `qwen_0_5b_1_5b_final_sft_train_loss.png` +- `qwen_0_5b_1_5b_postsave_reward.png` +- `qwen_0_5b_1_5b_postsave_latency.png` +- `qwen_0_5b_1_5b_sft_runtime.png` +- `qwen_0_5b_1_5b_remote_completed_stage_durations.png` +- `policy_ablation_avg_reward.png` +- `policy_ablation_legality.png` +- `policy_ablation_exploit_detection.png` +- `reward_component_bars.png` +- `primary_reward_channel_bars.png` +- `basic_llm_vs_full_pipeline_reward.png` +- `basic_llm_vs_full_pipeline_legality.png` +- `basic_llm_vs_full_pipeline_latency.png` +- `basic_llm_vs_full_pipeline_reward_delta_by_seed.png` + +## Important Honesty Note + +Remote-completed stages and uploaded artifact files are tracked separately. If a GRPO run completed on the HF Space but the per-run GRPO history file has not been uploaded yet, this bundle labels it as `remote_completed_pending_artifact_upload` instead of inventing a curve. diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..59db0c703e99a0a76c10f9d2b48c15ab8e71f5c4 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/action_traces.jsonl @@ -0,0 +1,24 @@ +{"seed": 8000, "policy": "basic_llm", "reward": 0.717, "latency_seconds": 0.0218, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "exploit_detection", "failure_reasons": ["holdout_ddi_not_addressed"], "anti_cheat_reasons": ["holdout_ddi_not_addressed"], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.001, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.675, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.717}, "primary_reward_channels": {"safety_legality": 0.675, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8000, "policy": "sft_policy", "reward": 0.803, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_02", "action_type": "STOP_DRUG", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.842, "burden_improvement_score": 0.55, "disease_stability_score": 0.58, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.78, "primary_safety_legality": 0.944, "primary_clinical_improvement": 0.657, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.803}, "primary_reward_channels": {"safety_legality": 0.944, "clinical_improvement": 0.657, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8000, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 3.0834, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8001, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0014, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8001, "policy": "sft_policy", "reward": 0.755, "latency_seconds": 0.0013, "legal": true, "candidate_id": "cand_02", "action_type": "STOP_DRUG", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.518, "burden_improvement_score": 0.55, "disease_stability_score": 0.58, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.78, "primary_safety_legality": 0.944, "primary_clinical_improvement": 0.549, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.755}, "primary_reward_channels": {"safety_legality": 0.944, "clinical_improvement": 0.549, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8001, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0027, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8002, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8002, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8002, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 0.0027, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8003, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0014, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8003, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0013, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8003, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0024, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8004, "policy": "basic_llm", "reward": 0.717, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "exploit_detection", "failure_reasons": ["holdout_ddi_not_addressed"], "anti_cheat_reasons": ["holdout_ddi_not_addressed"], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.001, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.675, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.717}, "primary_reward_channels": {"safety_legality": 0.675, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8004, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8004, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 0.0027, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8005, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8005, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8005, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0022, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8006, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8006, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0014, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8006, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0023, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8007, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8007, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8007, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0022, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1b2faf9c8218a4e723aaac00e7a7f2cddf0538 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/artifact_repo_listing.json @@ -0,0 +1,9 @@ +{ + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "pending_artifact_upload", + "files": [ + ".gitattributes" + ], + "meaningful_file_count": 0, + "error": "" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md new file mode 100644 index 0000000000000000000000000000000000000000..d520a446c99c01d6446abc8c937157e54f669684 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_failure_cases.md @@ -0,0 +1,43 @@ +# Basic LLM vs PolyGuard Failure Cases + +## Seed 8000 + +- Baseline attempt: candidate `cand_01`, reward `0.717`. +- PolyGuard pipeline attempt: candidate `cand_03`, reward `0.804`. +- Measured reward delta: `0.087`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. + +## Seed 8004 + +- Baseline attempt: candidate `cand_01`, reward `0.717`. +- PolyGuard pipeline attempt: candidate `cand_03`, reward `0.804`. +- Measured reward delta: `0.087`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. + +## Seed 8001 + +- Baseline attempt: candidate `cand_01`, reward `0.777`. +- PolyGuard pipeline attempt: candidate `cand_05`, reward `0.806`. +- Measured reward delta: `0.029`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. + +## Seed 8003 + +- Baseline attempt: candidate `cand_01`, reward `0.777`. +- PolyGuard pipeline attempt: candidate `cand_05`, reward `0.806`. +- Measured reward delta: `0.029`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. + +## Seed 8005 + +- Baseline attempt: candidate `cand_01`, reward `0.777`. +- PolyGuard pipeline attempt: candidate `cand_05`, reward `0.806`. +- Measured reward delta: `0.029`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. + +## Seed 8006 + +- Baseline attempt: candidate `cand_01`, reward `0.777`. +- PolyGuard pipeline attempt: candidate `cand_05`, reward `0.806`. +- Measured reward delta: `0.029`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json new file mode 100644 index 0000000000000000000000000000000000000000..32d4f98fc269daee5221d67244ea0c995322747f --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/basic_llm_vs_polyguard_report.json @@ -0,0 +1,133 @@ +{ + "status": "ok", + "judge": "PolyGuard verifier/reward system", + "llm_as_judge": false, + "matched_seeds": [ + 8000, + 8001, + 8002, + 8003, + 8004, + 8005, + 8006, + 8007 + ], + "summaries": { + "basic_llm": { + "episodes": 8, + "avg_reward": 0.762, + "avg_latency_seconds": 0.0038, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.25, + "candidate_diversity": 1 + }, + "sft_policy": { + "episodes": 8, + "avg_reward": 0.818, + "avg_latency_seconds": 0.0012, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.0, + "candidate_diversity": 2 + }, + "full_polyguard_pipeline": { + "episodes": 8, + "avg_reward": 0.805, + "avg_latency_seconds": 0.3876, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.0, + "candidate_diversity": 2 + } + }, + "pipeline_minus_basic_reward_delta": 0.043, + "deltas": [ + { + "seed": 8000, + "basic_reward": 0.717, + "pipeline_reward": 0.804, + "reward_delta": 0.087, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [ + "holdout_ddi_not_addressed" + ], + "pipeline_failure_reasons": [] + }, + { + "seed": 8001, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8002, + "basic_reward": 0.777, + "pipeline_reward": 0.804, + "reward_delta": 0.027, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8003, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8004, + "basic_reward": 0.717, + "pipeline_reward": 0.804, + "reward_delta": 0.087, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [ + "holdout_ddi_not_addressed" + ], + "pipeline_failure_reasons": [] + }, + { + "seed": 8005, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8006, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8007, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + } + ], + "notes": [ + "basic_llm is an evaluation-only prompt-style proxy that selects the first legal candidate without verifier reranking.", + "sft_policy is an evaluation-only SFT-style safety ranker over the same candidate set.", + "full_polyguard_pipeline runs the orchestrated LLM+bandit stack and scores through the same verifier." + ] +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json new file mode 100644 index 0000000000000000000000000000000000000000..adec7032d7fae6ba4ca73ed347e0176c38aa961f --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/hf_status_snapshot.json @@ -0,0 +1,311 @@ +{ + "status": "running", + "started_at": 1777162756.623835, + "finished_at": null, + "commands": [ + { + "args": [ + "python", + "scripts/bootstrap_data.py" + ], + "returncode": 0, + "elapsed_seconds": 0.577 + }, + { + "args": [ + "python", + "scripts/build_training_corpus.py", + "--profile", + "massive", + "--with-local", + "--with-synthetic", + "--with-hf" + ], + "returncode": 0, + "elapsed_seconds": 3.86 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 257.387 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 4230.645 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 7.303 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 15.201 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 18.461 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 3.989 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 454.278 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 5118.654 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 10.6 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 17.128 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 21.528 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 4.001 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-3B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 736.955 + } + ], + "artifact_repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "model_sweep": [ + "Qwen/Qwen2.5-0.5B-Instruct", + "Qwen/Qwen2.5-1.5B-Instruct", + "Qwen/Qwen2.5-3B-Instruct" + ], + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "log_tail": "\u2588\u2588\u2588\u2588\u2588\u258a| 1965/2000 [11:41<00:10, 3.22it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1966/2000 [11:42<00:11, 2.91it/s]\n \n{'loss': 0.0449, 'grad_norm': 0.8585970401763916, 'learning_rate': 3.7e-07, 'num_tokens': 1350951.0, 'mean_token_accuracy': 0.9767054915428162, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1966/2000 [11:42<00:11, 2.91it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1967/2000 [11:42<00:11, 2.85it/s]\n \n{'loss': 0.0518, 'grad_norm': 0.7478350400924683, 'learning_rate': 3.6e-07, 'num_tokens': 1351975.0, 'mean_token_accuracy': 0.9755381345748901, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1967/2000 [11:42<00:11, 2.85it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1968/2000 [11:42<00:11, 2.69it/s]\n \n{'loss': 0.0442, 'grad_norm': 0.8791924715042114, 'learning_rate': 3.5000000000000004e-07, 'num_tokens': 1352578.0, 'mean_token_accuracy': 0.9767054915428162, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1968/2000 [11:42<00:11, 2.69it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1969/2000 [11:43<00:11, 2.70it/s]\n \n{'loss': 0.0488, 'grad_norm': 0.6195839047431946, 'learning_rate': 3.4000000000000003e-07, 'num_tokens': 1353602.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1969/2000 [11:43<00:11, 2.70it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1970/2000 [11:43<00:09, 3.27it/s]\n \n{'loss': 0.0047, 'grad_norm': 0.8639671802520752, 'learning_rate': 3.3e-07, 'num_tokens': 1353784.0, 'mean_token_accuracy': 1.0, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1970/2000 [11:43<00:09, 3.27it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1971/2000 [11:43<00:07, 3.82it/s]\n \n{'loss': 0.0048, 'grad_norm': 0.8560010194778442, 'learning_rate': 3.2e-07, 'num_tokens': 1353966.0, 'mean_token_accuracy': 1.0, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1971/2000 [11:43<00:07, 3.82it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1972/2000 [11:43<00:08, 3.41it/s]\n \n{'loss': 0.0382, 'grad_norm': 0.8542295694351196, 'learning_rate': 3.1000000000000005e-07, 'num_tokens': 1354990.0, 'mean_token_accuracy': 0.9823874831199646, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1972/2000 [11:43<00:08, 3.41it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1973/2000 [11:44<00:08, 3.02it/s]\n \n{'loss': 0.033, 'grad_norm': 0.7632898688316345, 'learning_rate': 3.0000000000000004e-07, 'num_tokens': 1355593.0, 'mean_token_accuracy': 0.9833610653877258, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1973/2000 [11:44<00:08, 3.02it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1974/2000 [11:44<00:08, 2.92it/s]\n \n{'loss': 0.0582, 'grad_norm': 0.7546073198318481, 'learning_rate': 2.9000000000000003e-07, 'num_tokens': 1356617.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1974/2000 [11:44<00:08, 2.92it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1975/2000 [11:44<00:08, 2.85it/s]\n \n{'loss': 0.0607, 'grad_norm': 0.9100231528282166, 'learning_rate': 2.8e-07, 'num_tokens': 1357641.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1975/2000 [11:44<00:08, 2.85it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1976/2000 [11:45<00:08, 2.81it/s]\n \n{'loss': 0.0522, 'grad_norm': 0.9831849932670593, 'learning_rate': 2.7e-07, 'num_tokens': 1358665.0, 'mean_token_accuracy': 0.9726027250289917, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1976/2000 [11:45<00:08, 2.81it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1977/2000 [11:45<00:08, 2.67it/s]\n \n{'loss': 0.0455, 'grad_norm': 0.7770227789878845, 'learning_rate': 2.6e-07, 'num_tokens': 1359268.0, 'mean_token_accuracy': 0.9783693552017212, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1977/2000 [11:45<00:08, 2.67it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1978/2000 [11:46<00:08, 2.58it/s]\n \n{'loss': 0.043, 'grad_norm': 0.9285680055618286, 'learning_rate': 2.5000000000000004e-07, 'num_tokens': 1359871.0, 'mean_token_accuracy': 0.981697142124176, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1978/2000 [11:46<00:08, 2.58it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1979/2000 [11:46<00:08, 2.62it/s]\n \n{'loss': 0.0475, 'grad_norm': 0.725820004940033, 'learning_rate': 2.4000000000000003e-07, 'num_tokens': 1360895.0, 'mean_token_accuracy': 0.9784736037254333, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1979/2000 [11:46<00:08, 2.62it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1980/2000 [11:46<00:07, 2.54it/s]\n \n{'loss': 0.0523, 'grad_norm': 0.9508711099624634, 'learning_rate': 2.3000000000000002e-07, 'num_tokens': 1361498.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1980/2000 [11:46<00:07, 2.54it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1981/2000 [11:47<00:07, 2.49it/s]\n \n{'loss': 0.0461, 'grad_norm': 0.9076665639877319, 'learning_rate': 2.2e-07, 'num_tokens': 1362101.0, 'mean_token_accuracy': 0.980033278465271, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1981/2000 [11:47<00:07, 2.49it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1982/2000 [11:47<00:05, 3.07it/s]\n \n{'loss': 0.0049, 'grad_norm': 0.8733372092247009, 'learning_rate': 2.1000000000000003e-07, 'num_tokens': 1362283.0, 'mean_token_accuracy': 1.0, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1982/2000 [11:47<00:05, 3.07it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1983/2000 [11:47<00:06, 2.83it/s]\n \n{'loss': 0.0499, 'grad_norm': 1.0219769477844238, 'learning_rate': 2.0000000000000002e-07, 'num_tokens': 1362886.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1983/2000 [11:47<00:06, 2.83it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1984/2000 [11:48<00:05, 2.79it/s]\n \n{'loss': 0.047, 'grad_norm': 0.6855125427246094, 'learning_rate': 1.9e-07, 'num_tokens': 1363910.0, 'mean_token_accuracy': 0.9794520735740662, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1984/2000 [11:48<00:05, 2.79it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1985/2000 [11:48<00:05, 2.66it/s]\n \n{'loss': 0.053, 'grad_norm': 0.9592626094818115, 'learning_rate': 1.8e-07, 'num_tokens': 1364513.0, 'mean_token_accuracy': 0.9717137813568115, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1985/2000 [11:48<00:05, 2.66it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1986/2000 [11:49<00:05, 2.67it/s]\n \n{'loss': 0.0634, 'grad_norm': 0.9822715520858765, 'learning_rate': 1.7000000000000001e-07, 'num_tokens': 1365537.0, 'mean_token_accuracy': 0.9696673154830933, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1986/2000 [11:49<00:05, 2.67it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1987/2000 [11:49<00:04, 3.24it/s]\n \n{'loss': 0.005, 'grad_norm': 0.9051101207733154, 'learning_rate': 1.6e-07, 'num_tokens': 1365719.0, 'mean_token_accuracy': 1.0, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1987/2000 [11:49<00:04, 3.24it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1988/2000 [11:49<00:03, 3.06it/s]\n \n{'loss': 0.057, 'grad_norm': 0.7732815742492676, 'learning_rate': 1.5000000000000002e-07, 'num_tokens': 1366743.0, 'mean_token_accuracy': 0.9716242551803589, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1988/2000 [11:49<00:03, 3.06it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1989/2000 [11:50<00:03, 2.82it/s]\n \n{'loss': 0.0488, 'grad_norm': 1.0130807161331177, 'learning_rate': 1.4e-07, 'num_tokens': 1367346.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1989/2000 [11:50<00:03, 2.82it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1990/2000 [11:50<00:03, 2.79it/s]\n \n{'loss': 0.0502, 'grad_norm': 0.7733030319213867, 'learning_rate': 1.3e-07, 'num_tokens': 1368370.0, 'mean_token_accuracy': 0.976516604423523, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1990/2000 [11:50<00:03, 2.79it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1991/2000 [11:50<00:03, 2.65it/s]\n \n{'loss': 0.033, 'grad_norm': 0.8099549412727356, 'learning_rate': 1.2000000000000002e-07, 'num_tokens': 1368973.0, 'mean_token_accuracy': 0.981697142124176, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1991/2000 [11:50<00:03, 2.65it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1992/2000 [11:51<00:03, 2.57it/s]\n \n{'loss': 0.0505, 'grad_norm': 0.8513318300247192, 'learning_rate': 1.1e-07, 'num_tokens': 1369576.0, 'mean_token_accuracy': 0.9733777046203613, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1992/2000 [11:51<00:03, 2.57it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1993/2000 [11:51<00:02, 2.51it/s]\n \n{'loss': 0.0471, 'grad_norm': 0.8666603565216064, 'learning_rate': 1.0000000000000001e-07, 'num_tokens': 1370179.0, 'mean_token_accuracy': 0.9783693552017212, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1993/2000 [11:51<00:02, 2.51it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1994/2000 [11:51<00:01, 3.08it/s]\n \n{'loss': 0.0046, 'grad_norm': 0.8277124166488647, 'learning_rate': 9e-08, 'num_tokens': 1370361.0, 'mean_token_accuracy': 1.0, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1994/2000 [11:51<00:01, 3.08it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1995/2000 [11:52<00:01, 2.83it/s]\n \n{'loss': 0.0491, 'grad_norm': 0.7712334990501404, 'learning_rate': 8e-08, 'num_tokens': 1370964.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1995/2000 [11:52<00:01, 2.83it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1996/2000 [11:52<00:01, 2.80it/s]\n \n{'loss': 0.037, 'grad_norm': 0.8775883316993713, 'learning_rate': 7e-08, 'num_tokens': 1371988.0, 'mean_token_accuracy': 0.980430543422699, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1996/2000 [11:52<00:01, 2.80it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1997/2000 [11:53<00:01, 2.77it/s]\n \n{'loss': 0.0377, 'grad_norm': 0.7055721282958984, 'learning_rate': 6.000000000000001e-08, 'num_tokens': 1373012.0, 'mean_token_accuracy': 0.9814090132713318, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1997/2000 [11:53<00:01, 2.77it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1998/2000 [11:53<00:00, 3.33it/s]\n \n{'loss': 0.005, 'grad_norm': 0.8954693675041199, 'learning_rate': 5.0000000000000004e-08, 'num_tokens': 1373194.0, 'mean_token_accuracy': 1.0, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1998/2000 [11:53<00:00, 3.33it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1999/2000 [11:53<00:00, 2.98it/s]\n \n{'loss': 0.0314, 'grad_norm': 0.7444577217102051, 'learning_rate': 4e-08, 'num_tokens': 1373797.0, 'mean_token_accuracy': 0.9883527159690857, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1999/2000 [11:53<00:00, 2.98it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n \n{'loss': 0.0525, 'grad_norm': 1.007545828819275, 'learning_rate': 3.0000000000000004e-08, 'num_tokens': 1374400.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n \n{'train_runtime': 714.3473, 'train_samples_per_second': 5.6, 'train_steps_per_second': 2.8, 'train_loss': 0.1561080440459773, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.80it/s]\nsft_trl_done\n$ python scripts/train_grpo_trl.py --model-id Qwen/Qwen2.5-3B-Instruct --prompts-path data/processed/training_corpus_grpo_prompts.jsonl --output-dir checkpoints/sweeps/qwen-qwen2-5-3b-instruct --report-path outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json --max-prompts 0 --max-steps 0 --epochs 1.0 --batch-size 2 --grad-accum 1 --num-generations 2 --max-prompt-length 384 --max-completion-length 64 --learning-rate 1e-06 --use-unsloth\n" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/manifest.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..559a39eee196526b0c832f9689a667397f11b61a --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/manifest.json @@ -0,0 +1,235 @@ +{ + "status": "ok", + "generated_at_unix": 1777179035.763374, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "pending_artifact_upload", + "files": [ + ".gitattributes" + ], + "meaningful_file_count": 0, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/f313e87ad0df089dbe586b469c8f0a34e05bc5cd", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png", + "reward_component_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png", + "primary_reward_channel_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json new file mode 100644 index 0000000000000000000000000000000000000000..17f42d1ba8e5ed4aaf91fc331e9057d45b539b10 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/policy_ablation_report.json @@ -0,0 +1,150 @@ +{ + "status": "ok", + "ablations": { + "bandit_only": { + "avg_reward": 0.779625, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.8125, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.483125, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9056250000000008, + "exploit_detection_count": 2.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0625, + "avg_invalid_actions": 0.0625, + "reward_columns": { + "format_compliance_score": 0.9989999999999996, + "candidate_alignment_score": 0.9989999999999996, + "legality_score": 0.9989999999999996, + "safety_delta_score": 0.483125, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999995, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000002, + "efficiency_score": 0.5855625, + "process_fidelity_score": 0.9056250000000008, + "explanation_grounding_score": 0.8000000000000004, + "anti_cheat_score": 0.9366249999999997, + "uncertainty_calibration_score": 0.8531250000000004 + }, + "primary_reward_channels": { + "safety_legality": 0.9469062499999998, + "clinical_improvement": 0.6273749999999997, + "dosing_quality": 0.6550000000000001, + "process_integrity": 0.8225937500000001 + }, + "policy_stack": "bandit-only", + "failure_mining": { + "total_rows": 32, + "failure_rows": 2, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 2 + } + ] + } + }, + "llm_only": { + "avg_reward": 0.7723913043478261, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.4882608695652174, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.4882608695652174, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999998, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8482608695652176 + }, + "primary_reward_channels": { + "safety_legality": 0.8853478260869562, + "clinical_improvement": 0.6290869565217388, + "dosing_quality": 0.6549999999999998, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm-only", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + }, + "llm_bandit": { + "avg_reward": 0.7647391304347826, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.48982608695652174, + "avg_dosing_quality": 0.717391304347826, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.48982608695652174, + "burden_improvement_score": 0.5043478260869565, + "disease_stability_score": 0.8582608695652173, + "dosing_quality_score": 0.717391304347826, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8126086956521739 + }, + "primary_reward_channels": { + "safety_legality": 0.8765217391304347, + "clinical_improvement": 0.6171739130434781, + "dosing_quality": 0.6386956521739129, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm+bandit", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + } + }, + "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/grpo_ablation_report.json" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json new file mode 100644 index 0000000000000000000000000000000000000000..26352611eeab0bb07b964c76298f3fa0f542711b --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/remote_stage_records.json @@ -0,0 +1,92 @@ +[ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + } +] diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json new file mode 100644 index 0000000000000000000000000000000000000000..d10ae0a2d52e93bf7afd4fe5560708fd2cc8e794 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json @@ -0,0 +1,27 @@ +{ + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + } +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..e97c6f0e7743a9470ea6efc3b741694dafed8b57 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 1.839, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8000\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.224, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "There are 5 candidates with IDs . The candidate with ID has the lowest prediction score. Therefore, we will choose candidate . Rationale: Candidate has the lowest prediction score among the candidates with IDs , indicating that they are the most likely to", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "there is no patient with patient_id = 8002, please select a different patient_id. Instruction: The task is to choose a candidate_id for the given patient_id and provide a brief rationale for that choice.\nThus, the final answer is (e).Human beings have been able to use fire since at least 3 million years ago, but we are not sure how they learned", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8003\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 1.737, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8004\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_1", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.736, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..986493a2b78954513d0716891dc5dd71c576c569 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "model_index": 0, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 2, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..7d5a627d2519bb0afa7485ff88f51eb813d742e6 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.0856, + "grad_norm": 3.887380838394165, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.001, + "step": 1 + }, + { + "loss": 1.6647, + "grad_norm": 1.2190884351730347, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.7138103246688843, + "epoch": 0.002, + "step": 2 + }, + { + "loss": 1.1696, + "grad_norm": 0.8276316523551941, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.003, + "step": 3 + }, + { + "loss": 3.0464, + "grad_norm": 3.3297364711761475, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.004, + "step": 4 + }, + { + "loss": 1.1875, + "grad_norm": 0.8076611757278442, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.005, + "step": 5 + }, + { + "loss": 1.6105, + "grad_norm": 1.0332727432250977, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.7188019752502441, + "epoch": 0.006, + "step": 6 + }, + { + "loss": 1.5834, + "grad_norm": 1.0094527006149292, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.007, + "step": 7 + }, + { + "loss": 1.1683, + "grad_norm": 0.7861526012420654, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.008, + "step": 8 + }, + { + "loss": 1.3843, + "grad_norm": 0.7377748489379883, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7495107650756836, + "epoch": 0.009, + "step": 9 + }, + { + "loss": 1.584, + "grad_norm": 0.9443085193634033, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.720465898513794, + "epoch": 0.01, + "step": 10 + }, + { + "loss": 1.366, + "grad_norm": 0.7967380285263062, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7504892349243164, + "epoch": 0.011, + "step": 11 + }, + { + "loss": 1.5266, + "grad_norm": 1.0016096830368042, + "learning_rate": 1.989e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.012, + "step": 12 + }, + { + "loss": 1.2453, + "grad_norm": 0.9283791184425354, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.7836938500404358, + "epoch": 0.013, + "step": 13 + }, + { + "loss": 1.6206, + "grad_norm": 0.9805537462234497, + "learning_rate": 1.987e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7171381115913391, + "epoch": 0.014, + "step": 14 + }, + { + "loss": 1.5375, + "grad_norm": 0.9191323518753052, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.015, + "step": 15 + }, + { + "loss": 1.3423, + "grad_norm": 0.7822748422622681, + "learning_rate": 1.985e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.016, + "step": 16 + }, + { + "loss": 2.9309, + "grad_norm": 2.773752450942993, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5277777910232544, + "epoch": 0.017, + "step": 17 + }, + { + "loss": 1.1574, + "grad_norm": 0.7265554666519165, + "learning_rate": 1.983e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7876712083816528, + "epoch": 0.018, + "step": 18 + }, + { + "loss": 2.9093, + "grad_norm": 2.9051146507263184, + "learning_rate": 1.982e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5388888716697693, + "epoch": 0.019, + "step": 19 + }, + { + "loss": 1.5786, + "grad_norm": 0.9728697538375854, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.02, + "step": 20 + }, + { + "loss": 1.0934, + "grad_norm": 0.7319854497909546, + "learning_rate": 1.98e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.7974559664726257, + "epoch": 0.021, + "step": 21 + }, + { + "loss": 1.2097, + "grad_norm": 0.8981963992118835, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.022, + "step": 22 + }, + { + "loss": 1.4816, + "grad_norm": 1.0308023691177368, + "learning_rate": 1.978e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.023, + "step": 23 + }, + { + "loss": 1.3218, + "grad_norm": 0.7793745398521423, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.024, + "step": 24 + }, + { + "loss": 1.4883, + "grad_norm": 1.0108226537704468, + "learning_rate": 1.976e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.025, + "step": 25 + }, + { + "loss": 1.1398, + "grad_norm": 0.7284001111984253, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7857142686843872, + "epoch": 0.026, + "step": 26 + }, + { + "loss": 1.5201, + "grad_norm": 0.9933396577835083, + "learning_rate": 1.974e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.7354409098625183, + "epoch": 0.027, + "step": 27 + }, + { + "loss": 2.8162, + "grad_norm": 3.1626200675964355, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.028, + "step": 28 + }, + { + "loss": 1.31, + "grad_norm": 0.8019158244132996, + "learning_rate": 1.972e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.7573385238647461, + "epoch": 0.029, + "step": 29 + }, + { + "loss": 2.7985, + "grad_norm": 3.126246929168701, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.03, + "step": 30 + }, + { + "loss": 1.5341, + "grad_norm": 0.952720582485199, + "learning_rate": 1.97e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7271214723587036, + "epoch": 0.031, + "step": 31 + }, + { + "loss": 1.0763, + "grad_norm": 0.7093926668167114, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.032, + "step": 32 + }, + { + "loss": 1.2127, + "grad_norm": 0.813561201095581, + "learning_rate": 1.968e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.033, + "step": 33 + }, + { + "loss": 2.7516, + "grad_norm": 3.1947083473205566, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.034, + "step": 34 + }, + { + "loss": 1.1881, + "grad_norm": 1.0367817878723145, + "learning_rate": 1.966e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.035, + "step": 35 + }, + { + "loss": 1.1991, + "grad_norm": 0.9249914288520813, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.036, + "step": 36 + }, + { + "loss": 1.0422, + "grad_norm": 0.7850101590156555, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.037, + "step": 37 + }, + { + "loss": 1.2488, + "grad_norm": 0.8151567578315735, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7651663422584534, + "epoch": 0.038, + "step": 38 + }, + { + "loss": 1.5095, + "grad_norm": 1.0585670471191406, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.7254575490951538, + "epoch": 0.039, + "step": 39 + }, + { + "loss": 2.6828, + "grad_norm": 3.3681087493896484, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.04, + "step": 40 + }, + { + "loss": 1.1754, + "grad_norm": 1.029766321182251, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.041, + "step": 41 + }, + { + "loss": 1.0827, + "grad_norm": 0.7520174980163574, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.042, + "step": 42 + }, + { + "loss": 1.1385, + "grad_norm": 1.012759804725647, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.043, + "step": 43 + }, + { + "loss": 2.6322, + "grad_norm": 3.4875218868255615, + "learning_rate": 1.957e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.044, + "step": 44 + }, + { + "loss": 1.23, + "grad_norm": 0.9103058576583862, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.045, + "step": 45 + }, + { + "loss": 1.4499, + "grad_norm": 1.0566458702087402, + "learning_rate": 1.955e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.046, + "step": 46 + }, + { + "loss": 1.1171, + "grad_norm": 1.0389467477798462, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.047, + "step": 47 + }, + { + "loss": 1.4262, + "grad_norm": 1.0595616102218628, + "learning_rate": 1.953e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.048, + "step": 48 + }, + { + "loss": 1.1224, + "grad_norm": 1.0530123710632324, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.049, + "step": 49 + }, + { + "loss": 2.5409, + "grad_norm": 3.6781489849090576, + "learning_rate": 1.951e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.05, + "step": 50 + }, + { + "loss": 1.0942, + "grad_norm": 1.0411880016326904, + "learning_rate": 1.95e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.7970049977302551, + "epoch": 0.051, + "step": 51 + }, + { + "loss": 1.0622, + "grad_norm": 0.8258970975875854, + "learning_rate": 1.949e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.052, + "step": 52 + }, + { + "loss": 1.1977, + "grad_norm": 0.8957047462463379, + "learning_rate": 1.948e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.7700586915016174, + "epoch": 0.053, + "step": 53 + }, + { + "loss": 1.3695, + "grad_norm": 1.122542142868042, + "learning_rate": 1.947e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.7520798444747925, + "epoch": 0.054, + "step": 54 + }, + { + "loss": 0.8548, + "grad_norm": 0.7688314914703369, + "learning_rate": 1.946e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.055, + "step": 55 + }, + { + "loss": 1.0659, + "grad_norm": 1.0568362474441528, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.056, + "step": 56 + }, + { + "loss": 1.0294, + "grad_norm": 0.8596540689468384, + "learning_rate": 1.944e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.057, + "step": 57 + }, + { + "loss": 1.4359, + "grad_norm": 1.2490142583847046, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.7321131229400635, + "epoch": 0.058, + "step": 58 + }, + { + "loss": 2.416, + "grad_norm": 3.7482848167419434, + "learning_rate": 1.942e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.059, + "step": 59 + }, + { + "loss": 1.0725, + "grad_norm": 1.117326259613037, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.06, + "step": 60 + }, + { + "loss": 0.9739, + "grad_norm": 0.8864734768867493, + "learning_rate": 1.94e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.061, + "step": 61 + }, + { + "loss": 1.1443, + "grad_norm": 0.9423307776451111, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.7739726305007935, + "epoch": 0.062, + "step": 62 + }, + { + "loss": 0.8009, + "grad_norm": 0.8988932967185974, + "learning_rate": 1.938e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.063, + "step": 63 + }, + { + "loss": 1.0508, + "grad_norm": 1.1697311401367188, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.064, + "step": 64 + }, + { + "loss": 1.2747, + "grad_norm": 1.2967511415481567, + "learning_rate": 1.936e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.7570715546607971, + "epoch": 0.065, + "step": 65 + }, + { + "loss": 1.2796, + "grad_norm": 1.2881773710250854, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7554076313972473, + "epoch": 0.066, + "step": 66 + }, + { + "loss": 2.3052, + "grad_norm": 4.034823894500732, + "learning_rate": 1.934e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.067, + "step": 67 + }, + { + "loss": 1.2806, + "grad_norm": 1.3690178394317627, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.7587354183197021, + "epoch": 0.068, + "step": 68 + }, + { + "loss": 1.1807, + "grad_norm": 1.0886963605880737, + "learning_rate": 1.932e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.7632094025611877, + "epoch": 0.069, + "step": 69 + }, + { + "loss": 1.0076, + "grad_norm": 1.3501569032669067, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.07, + "step": 70 + }, + { + "loss": 0.921, + "grad_norm": 1.0231209993362427, + "learning_rate": 1.93e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8111546039581299, + "epoch": 0.071, + "step": 71 + }, + { + "loss": 2.1999, + "grad_norm": 4.47637939453125, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.072, + "step": 72 + }, + { + "loss": 2.1852, + "grad_norm": 4.533531188964844, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.073, + "step": 73 + }, + { + "loss": 2.1623, + "grad_norm": 4.683750152587891, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.074, + "step": 74 + }, + { + "loss": 1.2988, + "grad_norm": 1.5087296962738037, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.075, + "step": 75 + }, + { + "loss": 2.1266, + "grad_norm": 4.944180011749268, + "learning_rate": 1.925e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.076, + "step": 76 + }, + { + "loss": 0.9762, + "grad_norm": 1.0376505851745605, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.077, + "step": 77 + }, + { + "loss": 2.0834, + "grad_norm": 5.394686222076416, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.078, + "step": 78 + }, + { + "loss": 0.9309, + "grad_norm": 1.0764528512954712, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8170254230499268, + "epoch": 0.079, + "step": 79 + }, + { + "loss": 0.7549, + "grad_norm": 1.089787244796753, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.08, + "step": 80 + }, + { + "loss": 1.0972, + "grad_norm": 1.2265634536743164, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.7915851473808289, + "epoch": 0.081, + "step": 81 + }, + { + "loss": 2.0061, + "grad_norm": 5.302765846252441, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.082, + "step": 82 + }, + { + "loss": 1.1197, + "grad_norm": 1.216346025466919, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.7749511003494263, + "epoch": 0.083, + "step": 83 + }, + { + "loss": 1.181, + "grad_norm": 1.5846738815307617, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.084, + "step": 84 + }, + { + "loss": 0.8929, + "grad_norm": 1.1130127906799316, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8268101811408997, + "epoch": 0.085, + "step": 85 + }, + { + "loss": 1.9339, + "grad_norm": NaN, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.086, + "step": 86 + }, + { + "loss": 1.1623, + "grad_norm": 1.7714096307754517, + "learning_rate": 1.915e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.7720465660095215, + "epoch": 0.087, + "step": 87 + }, + { + "loss": 1.0203, + "grad_norm": 1.204126000404358, + "learning_rate": 1.914e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.088, + "step": 88 + }, + { + "loss": 0.8569, + "grad_norm": 1.2058078050613403, + "learning_rate": 1.913e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.089, + "step": 89 + }, + { + "loss": 1.197, + "grad_norm": 1.8821589946746826, + "learning_rate": 1.912e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.7670549154281616, + "epoch": 0.09, + "step": 90 + }, + { + "loss": 1.1908, + "grad_norm": 1.9740996360778809, + "learning_rate": 1.911e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.7703827023506165, + "epoch": 0.091, + "step": 91 + }, + { + "loss": 0.889, + "grad_norm": 1.5037046670913696, + "learning_rate": 1.91e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8236272931098938, + "epoch": 0.092, + "step": 92 + }, + { + "loss": 1.1821, + "grad_norm": 1.539967656135559, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.093, + "step": 93 + }, + { + "loss": 1.0278, + "grad_norm": 1.2005809545516968, + "learning_rate": 1.908e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.094, + "step": 94 + }, + { + "loss": 1.1361, + "grad_norm": 1.8167128562927246, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.095, + "step": 95 + }, + { + "loss": 1.0977, + "grad_norm": 2.2985150814056396, + "learning_rate": 1.906e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.096, + "step": 96 + }, + { + "loss": 1.0695, + "grad_norm": 1.590173602104187, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.097, + "step": 97 + }, + { + "loss": 1.1519, + "grad_norm": 1.5389997959136963, + "learning_rate": 1.904e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.098, + "step": 98 + }, + { + "loss": 1.1507, + "grad_norm": 1.6002172231674194, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.099, + "step": 99 + }, + { + "loss": 1.0454, + "grad_norm": 1.181969404220581, + "learning_rate": 1.902e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.1, + "step": 100 + }, + { + "loss": 1.0897, + "grad_norm": 1.832823634147644, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.101, + "step": 101 + }, + { + "loss": 0.8593, + "grad_norm": 1.2972052097320557, + "learning_rate": 1.9e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.102, + "step": 102 + }, + { + "loss": 0.9507, + "grad_norm": 1.114174723625183, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8150684833526611, + "epoch": 0.103, + "step": 103 + }, + { + "loss": 0.8422, + "grad_norm": 1.0837013721466064, + "learning_rate": 1.898e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.104, + "step": 104 + }, + { + "loss": 0.9674, + "grad_norm": 1.1756479740142822, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.105, + "step": 105 + }, + { + "loss": 0.7975, + "grad_norm": 1.3874446153640747, + "learning_rate": 1.896e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.840266227722168, + "epoch": 0.106, + "step": 106 + }, + { + "loss": 1.0557, + "grad_norm": 1.959272027015686, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.7936772108078003, + "epoch": 0.107, + "step": 107 + }, + { + "loss": 1.0885, + "grad_norm": 1.503557801246643, + "learning_rate": 1.894e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.108, + "step": 108 + }, + { + "loss": 0.8082, + "grad_norm": 1.470276117324829, + "learning_rate": 1.893e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.8302828669548035, + "epoch": 0.109, + "step": 109 + }, + { + "loss": 1.5508, + "grad_norm": 6.328886985778809, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.6944444179534912, + "epoch": 0.11, + "step": 110 + }, + { + "loss": 1.0059, + "grad_norm": 1.5663049221038818, + "learning_rate": 1.891e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.8103161454200745, + "epoch": 0.111, + "step": 111 + }, + { + "loss": 1.0336, + "grad_norm": 1.4562171697616577, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.112, + "step": 112 + }, + { + "loss": 1.0438, + "grad_norm": 1.5646629333496094, + "learning_rate": 1.889e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.113, + "step": 113 + }, + { + "loss": 1.0279, + "grad_norm": 1.513607144355774, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.114, + "step": 114 + }, + { + "loss": 1.4402, + "grad_norm": 6.165053367614746, + "learning_rate": 1.887e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.115, + "step": 115 + }, + { + "loss": 0.7349, + "grad_norm": 1.454982876777649, + "learning_rate": 1.886e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.116, + "step": 116 + }, + { + "loss": 0.7338, + "grad_norm": 1.9169820547103882, + "learning_rate": 1.885e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.841930091381073, + "epoch": 0.117, + "step": 117 + }, + { + "loss": 0.7831, + "grad_norm": 1.3472567796707153, + "learning_rate": 1.884e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.118, + "step": 118 + }, + { + "loss": 1.028, + "grad_norm": 1.5241106748580933, + "learning_rate": 1.883e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.8036605715751648, + "epoch": 0.119, + "step": 119 + }, + { + "loss": 1.3458, + "grad_norm": 5.9579386711120605, + "learning_rate": 1.882e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.12, + "step": 120 + }, + { + "loss": 0.7727, + "grad_norm": 1.444265604019165, + "learning_rate": 1.881e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.8385518789291382, + "epoch": 0.121, + "step": 121 + }, + { + "loss": 0.6351, + "grad_norm": 1.281785488128662, + "learning_rate": 1.88e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.122, + "step": 122 + }, + { + "loss": 0.6884, + "grad_norm": 1.6917502880096436, + "learning_rate": 1.879e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.123, + "step": 123 + }, + { + "loss": 0.886, + "grad_norm": 1.6544225215911865, + "learning_rate": 1.878e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.8286189436912537, + "epoch": 0.124, + "step": 124 + }, + { + "loss": 0.7652, + "grad_norm": 1.2762014865875244, + "learning_rate": 1.877e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.125, + "step": 125 + }, + { + "loss": 1.2517, + "grad_norm": 7.621744632720947, + "learning_rate": 1.876e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.126, + "step": 126 + }, + { + "loss": 0.6909, + "grad_norm": 1.8651930093765259, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.127, + "step": 127 + }, + { + "loss": 0.9464, + "grad_norm": 2.0513856410980225, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.821963369846344, + "epoch": 0.128, + "step": 128 + }, + { + "loss": 0.8355, + "grad_norm": 1.3392603397369385, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.8405088186264038, + "epoch": 0.129, + "step": 129 + }, + { + "loss": 0.7124, + "grad_norm": 1.7539966106414795, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.861896812915802, + "epoch": 0.13, + "step": 130 + }, + { + "loss": 1.1931, + "grad_norm": 7.2109856605529785, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.131, + "step": 131 + }, + { + "loss": 0.806, + "grad_norm": 1.531593918800354, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.8424657583236694, + "epoch": 0.132, + "step": 132 + }, + { + "loss": 0.7483, + "grad_norm": 1.6686372756958008, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.133, + "step": 133 + }, + { + "loss": 0.905, + "grad_norm": 3.809466600418091, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.8336106538772583, + "epoch": 0.134, + "step": 134 + }, + { + "loss": 0.7299, + "grad_norm": 1.7963030338287354, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.135, + "step": 135 + }, + { + "loss": 0.6384, + "grad_norm": 2.485582113265991, + "learning_rate": 1.866e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.8718801736831665, + "epoch": 0.136, + "step": 136 + }, + { + "loss": 0.5473, + "grad_norm": 1.6607071161270142, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.137, + "step": 137 + }, + { + "loss": 0.6719, + "grad_norm": 1.6095962524414062, + "learning_rate": 1.864e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.860232949256897, + "epoch": 0.138, + "step": 138 + }, + { + "loss": 0.8772, + "grad_norm": 1.8398959636688232, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.8352745175361633, + "epoch": 0.139, + "step": 139 + }, + { + "loss": 0.6813, + "grad_norm": 1.754347324371338, + "learning_rate": 1.862e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.14, + "step": 140 + }, + { + "loss": 0.8176, + "grad_norm": 1.8010166883468628, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.141, + "step": 141 + }, + { + "loss": 0.6013, + "grad_norm": 2.131845712661743, + "learning_rate": 1.86e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.8768718838691711, + "epoch": 0.142, + "step": 142 + }, + { + "loss": 1.0551, + "grad_norm": 8.797135353088379, + "learning_rate": 1.859e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.8055555820465088, + "epoch": 0.143, + "step": 143 + }, + { + "loss": 0.8096, + "grad_norm": 1.6665289402008057, + "learning_rate": 1.858e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.144, + "step": 144 + }, + { + "loss": 0.6237, + "grad_norm": 2.031190872192383, + "learning_rate": 1.857e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.8735440969467163, + "epoch": 0.145, + "step": 145 + }, + { + "loss": 0.8527, + "grad_norm": 2.5186493396759033, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.8386023044586182, + "epoch": 0.146, + "step": 146 + }, + { + "loss": 0.83, + "grad_norm": 1.5677316188812256, + "learning_rate": 1.855e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.8444226980209351, + "epoch": 0.147, + "step": 147 + }, + { + "loss": 0.6951, + "grad_norm": 3.395341634750366, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.148, + "step": 148 + }, + { + "loss": 0.7634, + "grad_norm": 1.658737301826477, + "learning_rate": 1.853e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.149, + "step": 149 + }, + { + "loss": 0.6195, + "grad_norm": 1.4803838729858398, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.8776907920837402, + "epoch": 0.15, + "step": 150 + }, + { + "loss": 0.6916, + "grad_norm": 1.462860345840454, + "learning_rate": 1.851e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.151, + "step": 151 + }, + { + "loss": 0.7854, + "grad_norm": 1.6279668807983398, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.152, + "step": 152 + }, + { + "loss": 0.749, + "grad_norm": 1.8625388145446777, + "learning_rate": 1.849e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.153, + "step": 153 + }, + { + "loss": 0.6619, + "grad_norm": 1.6320242881774902, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.8679060935974121, + "epoch": 0.154, + "step": 154 + }, + { + "loss": 0.9864, + "grad_norm": NaN, + "learning_rate": 1.847e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.8222222328186035, + "epoch": 0.155, + "step": 155 + }, + { + "loss": 0.7698, + "grad_norm": 2.241466999053955, + "learning_rate": 1.847e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.156, + "step": 156 + }, + { + "loss": 0.8501, + "grad_norm": 2.594738721847534, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.8435940146446228, + "epoch": 0.157, + "step": 157 + }, + { + "loss": 0.962, + "grad_norm": 10.902610778808594, + "learning_rate": 1.845e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.8166666626930237, + "epoch": 0.158, + "step": 158 + }, + { + "loss": 0.7822, + "grad_norm": 1.6955127716064453, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.159, + "step": 159 + }, + { + "loss": 0.7942, + "grad_norm": 2.5727546215057373, + "learning_rate": 1.843e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.8519134521484375, + "epoch": 0.16, + "step": 160 + }, + { + "loss": 0.8074, + "grad_norm": 2.082172155380249, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.161, + "step": 161 + }, + { + "loss": 0.6346, + "grad_norm": 1.4917131662368774, + "learning_rate": 1.841e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.162, + "step": 162 + }, + { + "loss": 0.6574, + "grad_norm": 1.7243297100067139, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.163, + "step": 163 + }, + { + "loss": 0.7782, + "grad_norm": 2.236922264099121, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.164, + "step": 164 + }, + { + "loss": 0.7541, + "grad_norm": 2.998671531677246, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.165, + "step": 165 + }, + { + "loss": 0.7637, + "grad_norm": 2.231337070465088, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.166, + "step": 166 + }, + { + "loss": 0.4918, + "grad_norm": 2.1853654384613037, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.167, + "step": 167 + }, + { + "loss": 0.8615, + "grad_norm": 19.52778434753418, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.168, + "step": 168 + }, + { + "loss": 0.727, + "grad_norm": 2.8629372119903564, + "learning_rate": 1.834e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.169, + "step": 169 + }, + { + "loss": 0.6812, + "grad_norm": 2.578798294067383, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.8600782752037048, + "epoch": 0.17, + "step": 170 + }, + { + "loss": 0.718, + "grad_norm": 2.7950305938720703, + "learning_rate": 1.832e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.171, + "step": 171 + }, + { + "loss": 0.8269, + "grad_norm": 18.518278121948242, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.8333333134651184, + "epoch": 0.172, + "step": 172 + }, + { + "loss": 0.8122, + "grad_norm": 10.636402130126953, + "learning_rate": 1.83e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.8500000238418579, + "epoch": 0.173, + "step": 173 + }, + { + "loss": 0.5631, + "grad_norm": 1.8652675151824951, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.174, + "step": 174 + }, + { + "loss": 0.5823, + "grad_norm": 2.174743890762329, + "learning_rate": 1.828e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.175, + "step": 175 + }, + { + "loss": 0.6878, + "grad_norm": 2.426223039627075, + "learning_rate": 1.827e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.176, + "step": 176 + }, + { + "loss": 0.4815, + "grad_norm": 2.2111594676971436, + "learning_rate": 1.826e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.177, + "step": 177 + }, + { + "loss": 0.7905, + "grad_norm": 12.419157981872559, + "learning_rate": 1.825e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.178, + "step": 178 + }, + { + "loss": 0.6485, + "grad_norm": 2.6929852962493896, + "learning_rate": 1.824e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.8851913213729858, + "epoch": 0.179, + "step": 179 + }, + { + "loss": 0.5821, + "grad_norm": 2.588067054748535, + "learning_rate": 1.823e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.18, + "step": 180 + }, + { + "loss": 0.5376, + "grad_norm": 2.6413276195526123, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.181, + "step": 181 + }, + { + "loss": 0.4776, + "grad_norm": 2.0201733112335205, + "learning_rate": 1.821e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.182, + "step": 182 + }, + { + "loss": 0.7141, + "grad_norm": 8.398615837097168, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 0.8611111044883728, + "epoch": 0.183, + "step": 183 + }, + { + "loss": 0.687, + "grad_norm": 6.920986175537109, + "learning_rate": 1.819e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.184, + "step": 184 + }, + { + "loss": 0.6518, + "grad_norm": 3.54260516166687, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.185, + "step": 185 + }, + { + "loss": 0.6429, + "grad_norm": 4.033841609954834, + "learning_rate": 1.817e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.186, + "step": 186 + }, + { + "loss": 0.4786, + "grad_norm": 2.4023964405059814, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.187, + "step": 187 + }, + { + "loss": 0.5997, + "grad_norm": 2.695603370666504, + "learning_rate": 1.815e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.188, + "step": 188 + }, + { + "loss": 0.6251, + "grad_norm": 7.4209184646606445, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.189, + "step": 189 + }, + { + "loss": 0.6324, + "grad_norm": 10.130674362182617, + "learning_rate": 1.813e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.19, + "step": 190 + }, + { + "loss": 0.5939, + "grad_norm": 2.6180245876312256, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.873776912689209, + "epoch": 0.191, + "step": 191 + }, + { + "loss": 0.4098, + "grad_norm": 2.2663474082946777, + "learning_rate": 1.811e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.192, + "step": 192 + }, + { + "loss": 0.5111, + "grad_norm": 2.2139604091644287, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.8894324898719788, + "epoch": 0.193, + "step": 193 + }, + { + "loss": 0.4332, + "grad_norm": 2.2271547317504883, + "learning_rate": 1.809e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.194, + "step": 194 + }, + { + "loss": 0.4893, + "grad_norm": 2.0789742469787598, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.8972602486610413, + "epoch": 0.195, + "step": 195 + }, + { + "loss": 0.5755, + "grad_norm": 18.601898193359375, + "learning_rate": 1.807e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.196, + "step": 196 + }, + { + "loss": 0.4635, + "grad_norm": 6.127828598022461, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.197, + "step": 197 + }, + { + "loss": 0.603, + "grad_norm": 2.668287515640259, + "learning_rate": 1.805e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.198, + "step": 198 + }, + { + "loss": 0.6088, + "grad_norm": 2.419572353363037, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.8757338523864746, + "epoch": 0.199, + "step": 199 + }, + { + "loss": 0.5672, + "grad_norm": 3.028404712677002, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.8885191082954407, + "epoch": 0.2, + "step": 200 + }, + { + "loss": 0.4556, + "grad_norm": 4.009725093841553, + "learning_rate": 1.802e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.201, + "step": 201 + }, + { + "loss": 0.5269, + "grad_norm": 2.9101243019104004, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.202, + "step": 202 + }, + { + "loss": 0.6214, + "grad_norm": 2.7398433685302734, + "learning_rate": 1.8e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.8581213355064392, + "epoch": 0.203, + "step": 203 + }, + { + "loss": 0.5646, + "grad_norm": 2.60606050491333, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.204, + "step": 204 + }, + { + "loss": 0.3748, + "grad_norm": 3.7512423992156982, + "learning_rate": 1.798e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9234609007835388, + "epoch": 0.205, + "step": 205 + }, + { + "loss": 0.597, + "grad_norm": 3.150888442993164, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.206, + "step": 206 + }, + { + "loss": 0.511, + "grad_norm": 3.328899383544922, + "learning_rate": 1.796e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.207, + "step": 207 + }, + { + "loss": 0.491, + "grad_norm": 8.625993728637695, + "learning_rate": 1.795e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.208, + "step": 208 + }, + { + "loss": 0.4053, + "grad_norm": 2.2067341804504395, + "learning_rate": 1.794e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.209, + "step": 209 + }, + { + "loss": 0.4192, + "grad_norm": 2.0993006229400635, + "learning_rate": 1.793e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.21, + "step": 210 + }, + { + "loss": 0.3785, + "grad_norm": 2.821485996246338, + "learning_rate": 1.792e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9151414036750793, + "epoch": 0.211, + "step": 211 + }, + { + "loss": 0.5336, + "grad_norm": 2.169666051864624, + "learning_rate": 1.791e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.8901830315589905, + "epoch": 0.212, + "step": 212 + }, + { + "loss": 0.5235, + "grad_norm": 3.1590685844421387, + "learning_rate": 1.79e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.8835616707801819, + "epoch": 0.213, + "step": 213 + }, + { + "loss": 0.4736, + "grad_norm": 11.030704498291016, + "learning_rate": 1.789e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 0.9055555462837219, + "epoch": 0.214, + "step": 214 + }, + { + "loss": 0.5599, + "grad_norm": 3.9144341945648193, + "learning_rate": 1.788e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.215, + "step": 215 + }, + { + "loss": 0.5102, + "grad_norm": 2.9705278873443604, + "learning_rate": 1.787e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.216, + "step": 216 + }, + { + "loss": 0.4821, + "grad_norm": 3.4463229179382324, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.217, + "step": 217 + }, + { + "loss": 0.4385, + "grad_norm": 8.850930213928223, + "learning_rate": 1.785e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 0.9277777671813965, + "epoch": 0.218, + "step": 218 + }, + { + "loss": 0.4633, + "grad_norm": 2.936647415161133, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.219, + "step": 219 + }, + { + "loss": 0.4098, + "grad_norm": 6.922672271728516, + "learning_rate": 1.783e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.22, + "step": 220 + }, + { + "loss": 0.5233, + "grad_norm": 2.318746328353882, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.221, + "step": 221 + }, + { + "loss": 0.3223, + "grad_norm": 4.281177520751953, + "learning_rate": 1.781e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.222, + "step": 222 + }, + { + "loss": 0.4973, + "grad_norm": 3.6921546459198, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.8951746821403503, + "epoch": 0.223, + "step": 223 + }, + { + "loss": 0.4666, + "grad_norm": 3.4926915168762207, + "learning_rate": 1.779e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.224, + "step": 224 + }, + { + "loss": 0.3519, + "grad_norm": 2.668114423751831, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.225, + "step": 225 + }, + { + "loss": 0.4244, + "grad_norm": 2.4111084938049316, + "learning_rate": 1.777e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.226, + "step": 226 + }, + { + "loss": 0.3912, + "grad_norm": 10.561456680297852, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 0.949999988079071, + "epoch": 0.227, + "step": 227 + }, + { + "loss": 0.5091, + "grad_norm": 2.472616672515869, + "learning_rate": 1.775e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.228, + "step": 228 + }, + { + "loss": 0.4842, + "grad_norm": 2.881739854812622, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.229, + "step": 229 + }, + { + "loss": 0.4435, + "grad_norm": 3.2438275814056396, + "learning_rate": 1.773e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.23, + "step": 230 + }, + { + "loss": 0.3527, + "grad_norm": 2.2769415378570557, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.231, + "step": 231 + }, + { + "loss": 0.4951, + "grad_norm": 3.046674966812134, + "learning_rate": 1.771e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.232, + "step": 232 + }, + { + "loss": 0.4926, + "grad_norm": 4.042079925537109, + "learning_rate": 1.77e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.233, + "step": 233 + }, + { + "loss": 0.4564, + "grad_norm": 4.222212314605713, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9051580429077148, + "epoch": 0.234, + "step": 234 + }, + { + "loss": 0.3074, + "grad_norm": 3.150768280029297, + "learning_rate": 1.768e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.235, + "step": 235 + }, + { + "loss": 0.3858, + "grad_norm": 3.456815004348755, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.236, + "step": 236 + }, + { + "loss": 0.3352, + "grad_norm": 9.094295501708984, + "learning_rate": 1.766e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.237, + "step": 237 + }, + { + "loss": 0.4867, + "grad_norm": 3.2864322662353516, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.238, + "step": 238 + }, + { + "loss": 0.3303, + "grad_norm": 5.672657012939453, + "learning_rate": 1.764e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.239, + "step": 239 + }, + { + "loss": 0.4708, + "grad_norm": 3.677504062652588, + "learning_rate": 1.763e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.24, + "step": 240 + }, + { + "loss": 0.3175, + "grad_norm": 5.829269886016846, + "learning_rate": 1.762e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.241, + "step": 241 + }, + { + "loss": 0.4315, + "grad_norm": 3.211578130722046, + "learning_rate": 1.761e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.242, + "step": 242 + }, + { + "loss": 0.3084, + "grad_norm": 5.2650628089904785, + "learning_rate": 1.76e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.243, + "step": 243 + }, + { + "loss": 0.4516, + "grad_norm": 5.401496887207031, + "learning_rate": 1.759e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.244, + "step": 244 + }, + { + "loss": 0.4197, + "grad_norm": 3.938694953918457, + "learning_rate": 1.758e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.245, + "step": 245 + }, + { + "loss": 0.4329, + "grad_norm": 3.4744861125946045, + "learning_rate": 1.757e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.246, + "step": 246 + }, + { + "loss": 0.4525, + "grad_norm": 4.853247165679932, + "learning_rate": 1.756e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 0.9084858298301697, + "epoch": 0.247, + "step": 247 + }, + { + "loss": 0.2768, + "grad_norm": 5.6177144050598145, + "learning_rate": 1.755e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.248, + "step": 248 + }, + { + "loss": 0.3517, + "grad_norm": 2.8669052124023438, + "learning_rate": 1.754e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.249, + "step": 249 + }, + { + "loss": 0.4142, + "grad_norm": 3.5590577125549316, + "learning_rate": 1.753e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.25, + "step": 250 + }, + { + "loss": 0.4307, + "grad_norm": 5.072361946105957, + "learning_rate": 1.752e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.251, + "step": 251 + }, + { + "loss": 0.3981, + "grad_norm": 3.637819528579712, + "learning_rate": 1.751e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.912915825843811, + "epoch": 0.252, + "step": 252 + }, + { + "loss": 0.4344, + "grad_norm": 4.066125869750977, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.253, + "step": 253 + }, + { + "loss": 0.3574, + "grad_norm": 4.836447715759277, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.254, + "step": 254 + }, + { + "loss": 0.2738, + "grad_norm": 14.006624221801758, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.255, + "step": 255 + }, + { + "loss": 0.3416, + "grad_norm": 5.2639079093933105, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.256, + "step": 256 + }, + { + "loss": 0.2762, + "grad_norm": 12.536176681518555, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.257, + "step": 257 + }, + { + "loss": 0.4114, + "grad_norm": 6.311218738555908, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9090019464492798, + "epoch": 0.258, + "step": 258 + }, + { + "loss": 0.3912, + "grad_norm": 3.2677178382873535, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.259, + "step": 259 + }, + { + "loss": 0.3059, + "grad_norm": 4.582422256469727, + "learning_rate": 1.743e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.26, + "step": 260 + }, + { + "loss": 0.3697, + "grad_norm": 5.214661121368408, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.261, + "step": 261 + }, + { + "loss": 0.3486, + "grad_norm": 5.719533920288086, + "learning_rate": 1.741e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.262, + "step": 262 + }, + { + "loss": 0.328, + "grad_norm": 4.692359924316406, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9363992214202881, + "epoch": 0.263, + "step": 263 + }, + { + "loss": 0.3665, + "grad_norm": 2.810206174850464, + "learning_rate": 1.739e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.264, + "step": 264 + }, + { + "loss": 0.2363, + "grad_norm": 6.301739692687988, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.265, + "step": 265 + }, + { + "loss": 0.3762, + "grad_norm": 2.9034929275512695, + "learning_rate": 1.737e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.266, + "step": 266 + }, + { + "loss": 0.3573, + "grad_norm": 5.10465669631958, + "learning_rate": 1.736e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.267, + "step": 267 + }, + { + "loss": 0.3708, + "grad_norm": 2.8359761238098145, + "learning_rate": 1.735e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9251247644424438, + "epoch": 0.268, + "step": 268 + }, + { + "loss": 0.3615, + "grad_norm": 2.6100833415985107, + "learning_rate": 1.734e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.9267886877059937, + "epoch": 0.269, + "step": 269 + }, + { + "loss": 0.3131, + "grad_norm": 3.610330820083618, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.27, + "step": 270 + }, + { + "loss": 0.3301, + "grad_norm": 3.1220433712005615, + "learning_rate": 1.732e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.271, + "step": 271 + }, + { + "loss": 0.2314, + "grad_norm": 7.683000564575195, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.272, + "step": 272 + }, + { + "loss": 0.2391, + "grad_norm": 10.635171890258789, + "learning_rate": 1.73e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.273, + "step": 273 + }, + { + "loss": 0.3934, + "grad_norm": 7.659923076629639, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 0.9334442615509033, + "epoch": 0.274, + "step": 274 + }, + { + "loss": 0.3376, + "grad_norm": 5.6293864250183105, + "learning_rate": 1.728e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.275, + "step": 275 + }, + { + "loss": 0.3734, + "grad_norm": 4.872118949890137, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.276, + "step": 276 + }, + { + "loss": 0.2395, + "grad_norm": 3.4475960731506348, + "learning_rate": 1.726e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.277, + "step": 277 + }, + { + "loss": 0.3513, + "grad_norm": 3.5093634128570557, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.278, + "step": 278 + }, + { + "loss": 0.3505, + "grad_norm": 3.436389446258545, + "learning_rate": 1.724e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 0.9367720484733582, + "epoch": 0.279, + "step": 279 + }, + { + "loss": 0.3041, + "grad_norm": 3.4393298625946045, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.28, + "step": 280 + }, + { + "loss": 0.2922, + "grad_norm": 3.826392889022827, + "learning_rate": 1.722e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.281, + "step": 281 + }, + { + "loss": 0.3414, + "grad_norm": 7.017237663269043, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.282, + "step": 282 + }, + { + "loss": 0.3521, + "grad_norm": 4.018287658691406, + "learning_rate": 1.72e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.283, + "step": 283 + }, + { + "loss": 0.3455, + "grad_norm": 3.9697959423065186, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.284, + "step": 284 + }, + { + "loss": 0.3368, + "grad_norm": 3.0641541481018066, + "learning_rate": 1.718e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.285, + "step": 285 + }, + { + "loss": 0.3244, + "grad_norm": 4.277006149291992, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.286, + "step": 286 + }, + { + "loss": 0.353, + "grad_norm": 2.6876814365386963, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.287, + "step": 287 + }, + { + "loss": 0.3236, + "grad_norm": 3.7715723514556885, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.288, + "step": 288 + }, + { + "loss": 0.3158, + "grad_norm": 3.555406332015991, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.289, + "step": 289 + }, + { + "loss": 0.2062, + "grad_norm": 9.316679000854492, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.29, + "step": 290 + }, + { + "loss": 0.2002, + "grad_norm": 5.817254543304443, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.291, + "step": 291 + }, + { + "loss": 0.2809, + "grad_norm": 5.106694221496582, + "learning_rate": 1.711e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.292, + "step": 292 + }, + { + "loss": 0.295, + "grad_norm": 7.797866344451904, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.293, + "step": 293 + }, + { + "loss": 0.3144, + "grad_norm": 8.002677917480469, + "learning_rate": 1.709e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.294, + "step": 294 + }, + { + "loss": 0.2345, + "grad_norm": 4.315321445465088, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.295, + "step": 295 + }, + { + "loss": 0.306, + "grad_norm": 4.690162181854248, + "learning_rate": 1.707e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.296, + "step": 296 + }, + { + "loss": 0.3098, + "grad_norm": 4.387345790863037, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.297, + "step": 297 + }, + { + "loss": 0.2898, + "grad_norm": 5.204096794128418, + "learning_rate": 1.705e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.298, + "step": 298 + }, + { + "loss": 0.2894, + "grad_norm": 4.000877380371094, + "learning_rate": 1.704e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.299, + "step": 299 + }, + { + "loss": 0.3295, + "grad_norm": 5.276703357696533, + "learning_rate": 1.703e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9383561611175537, + "epoch": 0.3, + "step": 300 + }, + { + "loss": 0.2139, + "grad_norm": 2.6593077182769775, + "learning_rate": 1.702e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.301, + "step": 301 + }, + { + "loss": 0.2077, + "grad_norm": 9.37561321258545, + "learning_rate": 1.701e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.302, + "step": 302 + }, + { + "loss": 0.2274, + "grad_norm": 2.972815990447998, + "learning_rate": 1.7e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.303, + "step": 303 + }, + { + "loss": 0.2545, + "grad_norm": 2.4279375076293945, + "learning_rate": 1.699e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.304, + "step": 304 + }, + { + "loss": 0.2871, + "grad_norm": 2.8517541885375977, + "learning_rate": 1.698e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.305, + "step": 305 + }, + { + "loss": 0.2877, + "grad_norm": 4.114612102508545, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.306, + "step": 306 + }, + { + "loss": 0.2145, + "grad_norm": 14.7569580078125, + "learning_rate": 1.696e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.307, + "step": 307 + }, + { + "loss": 0.294, + "grad_norm": 3.094182252883911, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.308, + "step": 308 + }, + { + "loss": 0.2044, + "grad_norm": 3.026052951812744, + "learning_rate": 1.694e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.309, + "step": 309 + }, + { + "loss": 0.3061, + "grad_norm": 3.1381635665893555, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.31, + "step": 310 + }, + { + "loss": 0.2239, + "grad_norm": 2.3573496341705322, + "learning_rate": 1.692e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.311, + "step": 311 + }, + { + "loss": 0.2853, + "grad_norm": 7.762936115264893, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.312, + "step": 312 + }, + { + "loss": 0.2793, + "grad_norm": 7.716437816619873, + "learning_rate": 1.69e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.313, + "step": 313 + }, + { + "loss": 0.2764, + "grad_norm": 4.531182765960693, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.314, + "step": 314 + }, + { + "loss": 0.1807, + "grad_norm": 5.600939750671387, + "learning_rate": 1.688e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.315, + "step": 315 + }, + { + "loss": 0.1751, + "grad_norm": 6.357442378997803, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.316, + "step": 316 + }, + { + "loss": 0.2278, + "grad_norm": 4.381490230560303, + "learning_rate": 1.686e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.317, + "step": 317 + }, + { + "loss": 0.1693, + "grad_norm": 4.711330413818359, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.318, + "step": 318 + }, + { + "loss": 0.2719, + "grad_norm": 7.21658182144165, + "learning_rate": 1.684e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.319, + "step": 319 + }, + { + "loss": 0.1613, + "grad_norm": 2.806929111480713, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.32, + "step": 320 + }, + { + "loss": 0.2236, + "grad_norm": 3.729052782058716, + "learning_rate": 1.682e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.321, + "step": 321 + }, + { + "loss": 0.3026, + "grad_norm": 3.512017250061035, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.322, + "step": 322 + }, + { + "loss": 0.2492, + "grad_norm": 5.842523097991943, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.323, + "step": 323 + }, + { + "loss": 0.2591, + "grad_norm": 3.444624662399292, + "learning_rate": 1.679e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9442269802093506, + "epoch": 0.324, + "step": 324 + }, + { + "loss": 0.245, + "grad_norm": 3.560624837875366, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.325, + "step": 325 + }, + { + "loss": 0.2493, + "grad_norm": 3.812241792678833, + "learning_rate": 1.677e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.326, + "step": 326 + }, + { + "loss": 0.1623, + "grad_norm": 9.361125946044922, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.327, + "step": 327 + }, + { + "loss": 0.2385, + "grad_norm": 4.130789279937744, + "learning_rate": 1.675e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.328, + "step": 328 + }, + { + "loss": 0.248, + "grad_norm": 3.7591042518615723, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.329, + "step": 329 + }, + { + "loss": 0.2815, + "grad_norm": 6.346067905426025, + "learning_rate": 1.673e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.33, + "step": 330 + }, + { + "loss": 0.2502, + "grad_norm": 3.433945655822754, + "learning_rate": 1.672e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.331, + "step": 331 + }, + { + "loss": 0.2994, + "grad_norm": 3.7655599117279053, + "learning_rate": 1.671e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9403131008148193, + "epoch": 0.332, + "step": 332 + }, + { + "loss": 0.2622, + "grad_norm": 3.707118511199951, + "learning_rate": 1.67e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.333, + "step": 333 + }, + { + "loss": 0.2418, + "grad_norm": 5.776569843292236, + "learning_rate": 1.669e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.334, + "step": 334 + }, + { + "loss": 0.2278, + "grad_norm": 2.7461037635803223, + "learning_rate": 1.668e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.335, + "step": 335 + }, + { + "loss": 0.2152, + "grad_norm": 2.729001760482788, + "learning_rate": 1.667e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.336, + "step": 336 + }, + { + "loss": 0.2093, + "grad_norm": 2.409708261489868, + "learning_rate": 1.666e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.337, + "step": 337 + }, + { + "loss": 0.2121, + "grad_norm": 4.6761651039123535, + "learning_rate": 1.665e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.338, + "step": 338 + }, + { + "loss": 0.2645, + "grad_norm": 3.167815685272217, + "learning_rate": 1.664e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.339, + "step": 339 + }, + { + "loss": 0.1629, + "grad_norm": 12.654186248779297, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.34, + "step": 340 + }, + { + "loss": 0.2156, + "grad_norm": 2.461930751800537, + "learning_rate": 1.662e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.341, + "step": 341 + }, + { + "loss": 0.2281, + "grad_norm": 4.044505596160889, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.342, + "step": 342 + }, + { + "loss": 0.2303, + "grad_norm": 3.00589656829834, + "learning_rate": 1.66e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.343, + "step": 343 + }, + { + "loss": 0.2372, + "grad_norm": 1.9332551956176758, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.344, + "step": 344 + }, + { + "loss": 0.2303, + "grad_norm": 3.804724931716919, + "learning_rate": 1.658e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.345, + "step": 345 + }, + { + "loss": 0.1629, + "grad_norm": 13.47612190246582, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.346, + "step": 346 + }, + { + "loss": 0.2276, + "grad_norm": 3.5881187915802, + "learning_rate": 1.656e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.347, + "step": 347 + }, + { + "loss": 0.2474, + "grad_norm": 3.895529270172119, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.348, + "step": 348 + }, + { + "loss": 0.2205, + "grad_norm": 3.4531259536743164, + "learning_rate": 1.654e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.349, + "step": 349 + }, + { + "loss": 0.2277, + "grad_norm": 3.849405288696289, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.35, + "step": 350 + }, + { + "loss": 0.1993, + "grad_norm": 3.522599458694458, + "learning_rate": 1.652e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.351, + "step": 351 + }, + { + "loss": 0.2291, + "grad_norm": 3.7573893070220947, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.352, + "step": 352 + }, + { + "loss": 0.1756, + "grad_norm": 4.224817276000977, + "learning_rate": 1.65e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.353, + "step": 353 + }, + { + "loss": 0.1992, + "grad_norm": 2.2447433471679688, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.354, + "step": 354 + }, + { + "loss": 0.184, + "grad_norm": 2.0203311443328857, + "learning_rate": 1.648e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.355, + "step": 355 + }, + { + "loss": 0.2236, + "grad_norm": 3.499854803085327, + "learning_rate": 1.647e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.356, + "step": 356 + }, + { + "loss": 0.2141, + "grad_norm": 5.057332992553711, + "learning_rate": 1.646e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.357, + "step": 357 + }, + { + "loss": 0.232, + "grad_norm": 2.861778974533081, + "learning_rate": 1.645e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.358, + "step": 358 + }, + { + "loss": 0.184, + "grad_norm": 3.52634596824646, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.359, + "step": 359 + }, + { + "loss": 0.2205, + "grad_norm": 2.3115124702453613, + "learning_rate": 1.643e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.36, + "step": 360 + }, + { + "loss": 0.1838, + "grad_norm": 3.043916940689087, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.361, + "step": 361 + }, + { + "loss": 0.1874, + "grad_norm": 3.2404396533966064, + "learning_rate": 1.641e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.362, + "step": 362 + }, + { + "loss": 0.4084, + "grad_norm": 12.86927604675293, + "learning_rate": 1.64e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.363, + "step": 363 + }, + { + "loss": 0.1677, + "grad_norm": 3.4789700508117676, + "learning_rate": 1.639e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.364, + "step": 364 + }, + { + "loss": 0.1922, + "grad_norm": 4.1049699783325195, + "learning_rate": 1.638e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.365, + "step": 365 + }, + { + "loss": 0.1915, + "grad_norm": 3.2055957317352295, + "learning_rate": 1.637e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.366, + "step": 366 + }, + { + "loss": 0.166, + "grad_norm": 12.477117538452148, + "learning_rate": 1.636e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.367, + "step": 367 + }, + { + "loss": 0.1799, + "grad_norm": 4.58711051940918, + "learning_rate": 1.635e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.368, + "step": 368 + }, + { + "loss": 0.2299, + "grad_norm": 2.874641180038452, + "learning_rate": 1.634e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.369, + "step": 369 + }, + { + "loss": 0.1414, + "grad_norm": 5.157703399658203, + "learning_rate": 1.633e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.37, + "step": 370 + }, + { + "loss": 0.1812, + "grad_norm": 3.2541451454162598, + "learning_rate": 1.632e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.371, + "step": 371 + }, + { + "loss": 0.1366, + "grad_norm": 3.705273151397705, + "learning_rate": 1.631e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.372, + "step": 372 + }, + { + "loss": 0.1681, + "grad_norm": 3.6492865085601807, + "learning_rate": 1.63e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.373, + "step": 373 + }, + { + "loss": 0.1324, + "grad_norm": 3.3717288970947266, + "learning_rate": 1.629e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.374, + "step": 374 + }, + { + "loss": 0.1816, + "grad_norm": 4.410749912261963, + "learning_rate": 1.628e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.375, + "step": 375 + }, + { + "loss": 0.3611, + "grad_norm": 11.978804588317871, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.376, + "step": 376 + }, + { + "loss": 0.1686, + "grad_norm": 2.8153111934661865, + "learning_rate": 1.626e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.377, + "step": 377 + }, + { + "loss": 0.1293, + "grad_norm": 3.5253026485443115, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.378, + "step": 378 + }, + { + "loss": 0.1597, + "grad_norm": 2.9006922245025635, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.379, + "step": 379 + }, + { + "loss": 0.1975, + "grad_norm": 6.231935024261475, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.38, + "step": 380 + }, + { + "loss": 0.1232, + "grad_norm": 3.3006174564361572, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.381, + "step": 381 + }, + { + "loss": 0.1599, + "grad_norm": 3.177495241165161, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.382, + "step": 382 + }, + { + "loss": 0.1858, + "grad_norm": 2.967477798461914, + "learning_rate": 1.62e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.383, + "step": 383 + }, + { + "loss": 0.1725, + "grad_norm": 2.6947214603424072, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.384, + "step": 384 + }, + { + "loss": 0.1644, + "grad_norm": 3.6320605278015137, + "learning_rate": 1.618e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.385, + "step": 385 + }, + { + "loss": 0.1726, + "grad_norm": 6.163839817047119, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.386, + "step": 386 + }, + { + "loss": 0.2253, + "grad_norm": 3.695767879486084, + "learning_rate": 1.616e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.387, + "step": 387 + }, + { + "loss": 0.1295, + "grad_norm": 11.877620697021484, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.388, + "step": 388 + }, + { + "loss": 0.1641, + "grad_norm": 2.5848593711853027, + "learning_rate": 1.614e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.389, + "step": 389 + }, + { + "loss": 0.1299, + "grad_norm": 11.58799934387207, + "learning_rate": 1.613e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.39, + "step": 390 + }, + { + "loss": 0.153, + "grad_norm": 3.0241589546203613, + "learning_rate": 1.612e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.391, + "step": 391 + }, + { + "loss": 0.1741, + "grad_norm": 4.446482181549072, + "learning_rate": 1.611e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.392, + "step": 392 + }, + { + "loss": 0.1517, + "grad_norm": 2.0452992916107178, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.393, + "step": 393 + }, + { + "loss": 0.1482, + "grad_norm": 3.511587142944336, + "learning_rate": 1.609e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.394, + "step": 394 + }, + { + "loss": 0.1673, + "grad_norm": 4.165390968322754, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.395, + "step": 395 + }, + { + "loss": 0.1577, + "grad_norm": 2.5295603275299072, + "learning_rate": 1.607e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.396, + "step": 396 + }, + { + "loss": 0.1444, + "grad_norm": 2.6492788791656494, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.397, + "step": 397 + }, + { + "loss": 0.1731, + "grad_norm": 3.1617088317871094, + "learning_rate": 1.605e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.398, + "step": 398 + }, + { + "loss": 0.1411, + "grad_norm": 2.628790855407715, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.399, + "step": 399 + }, + { + "loss": 0.1442, + "grad_norm": 2.589632272720337, + "learning_rate": 1.603e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.4, + "step": 400 + }, + { + "loss": 0.1647, + "grad_norm": 2.7175090312957764, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.401, + "step": 401 + }, + { + "loss": 0.1225, + "grad_norm": 9.854316711425781, + "learning_rate": 1.601e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.402, + "step": 402 + }, + { + "loss": 0.1635, + "grad_norm": 2.513782501220703, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.403, + "step": 403 + }, + { + "loss": 0.1172, + "grad_norm": 4.978464126586914, + "learning_rate": 1.599e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.404, + "step": 404 + }, + { + "loss": 0.1535, + "grad_norm": 6.545207977294922, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.405, + "step": 405 + }, + { + "loss": 0.1554, + "grad_norm": 4.268946647644043, + "learning_rate": 1.597e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.406, + "step": 406 + }, + { + "loss": 0.1143, + "grad_norm": 2.5581111907958984, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.407, + "step": 407 + }, + { + "loss": 0.1446, + "grad_norm": 4.272138595581055, + "learning_rate": 1.595e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.408, + "step": 408 + }, + { + "loss": 0.1058, + "grad_norm": 1.8749103546142578, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.409, + "step": 409 + }, + { + "loss": 0.1972, + "grad_norm": 4.553700923919678, + "learning_rate": 1.593e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.41, + "step": 410 + }, + { + "loss": 0.1465, + "grad_norm": 4.258208751678467, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.411, + "step": 411 + }, + { + "loss": 0.1556, + "grad_norm": 2.6741788387298584, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.412, + "step": 412 + }, + { + "loss": 0.1074, + "grad_norm": 5.901241779327393, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.413, + "step": 413 + }, + { + "loss": 0.1999, + "grad_norm": 2.886406421661377, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 414 + }, + { + "loss": 0.163, + "grad_norm": 3.367415189743042, + "learning_rate": 1.588e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.415, + "step": 415 + }, + { + "loss": 0.1678, + "grad_norm": 2.3446123600006104, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.416, + "step": 416 + }, + { + "loss": 0.2442, + "grad_norm": 4.648331165313721, + "learning_rate": 1.586e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.417, + "step": 417 + }, + { + "loss": 0.1314, + "grad_norm": 3.296555519104004, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.418, + "step": 418 + }, + { + "loss": 0.1224, + "grad_norm": 14.873774528503418, + "learning_rate": 1.584e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.419, + "step": 419 + }, + { + "loss": 0.1792, + "grad_norm": 2.493760108947754, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.42, + "step": 420 + }, + { + "loss": 0.1289, + "grad_norm": 4.287231922149658, + "learning_rate": 1.582e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.421, + "step": 421 + }, + { + "loss": 0.1176, + "grad_norm": 12.776876449584961, + "learning_rate": 1.581e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.422, + "step": 422 + }, + { + "loss": 0.1651, + "grad_norm": 2.691632032394409, + "learning_rate": 1.58e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.423, + "step": 423 + }, + { + "loss": 0.271, + "grad_norm": 7.320021152496338, + "learning_rate": 1.579e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.424, + "step": 424 + }, + { + "loss": 0.1183, + "grad_norm": 2.511960029602051, + "learning_rate": 1.578e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.425, + "step": 425 + }, + { + "loss": 0.1387, + "grad_norm": 2.424102306365967, + "learning_rate": 1.577e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.426, + "step": 426 + }, + { + "loss": 0.1443, + "grad_norm": 3.659524917602539, + "learning_rate": 1.576e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.427, + "step": 427 + }, + { + "loss": 0.2176, + "grad_norm": 4.393547058105469, + "learning_rate": 1.575e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.428, + "step": 428 + }, + { + "loss": 0.1576, + "grad_norm": 3.995103359222412, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.429, + "step": 429 + }, + { + "loss": 0.0995, + "grad_norm": 7.335996627807617, + "learning_rate": 1.573e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.43, + "step": 430 + }, + { + "loss": 0.1224, + "grad_norm": 2.3261799812316895, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.431, + "step": 431 + }, + { + "loss": 0.1781, + "grad_norm": 3.084444761276245, + "learning_rate": 1.571e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.432, + "step": 432 + }, + { + "loss": 0.1262, + "grad_norm": 2.499669075012207, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.433, + "step": 433 + }, + { + "loss": 0.1306, + "grad_norm": 2.529611587524414, + "learning_rate": 1.569e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.434, + "step": 434 + }, + { + "loss": 0.1473, + "grad_norm": 2.308983325958252, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.435, + "step": 435 + }, + { + "loss": 0.1387, + "grad_norm": 2.9792327880859375, + "learning_rate": 1.567e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.436, + "step": 436 + }, + { + "loss": 0.1256, + "grad_norm": 3.446150302886963, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.437, + "step": 437 + }, + { + "loss": 0.1884, + "grad_norm": 2.8107986450195312, + "learning_rate": 1.565e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.438, + "step": 438 + }, + { + "loss": 0.1801, + "grad_norm": 2.476114511489868, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.439, + "step": 439 + }, + { + "loss": 0.1216, + "grad_norm": 2.8834075927734375, + "learning_rate": 1.563e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.44, + "step": 440 + }, + { + "loss": 0.1391, + "grad_norm": 3.0233523845672607, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.441, + "step": 441 + }, + { + "loss": 0.1355, + "grad_norm": 3.540644645690918, + "learning_rate": 1.561e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.442, + "step": 442 + }, + { + "loss": 0.1031, + "grad_norm": 2.104804515838623, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.443, + "step": 443 + }, + { + "loss": 0.1389, + "grad_norm": 2.2567386627197266, + "learning_rate": 1.559e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.444, + "step": 444 + }, + { + "loss": 0.116, + "grad_norm": 2.4400763511657715, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.445, + "step": 445 + }, + { + "loss": 0.1294, + "grad_norm": 2.306941509246826, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.446, + "step": 446 + }, + { + "loss": 0.1189, + "grad_norm": 2.5862247943878174, + "learning_rate": 1.556e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.447, + "step": 447 + }, + { + "loss": 0.2484, + "grad_norm": 4.606533050537109, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.448, + "step": 448 + }, + { + "loss": 0.2119, + "grad_norm": 3.4597740173339844, + "learning_rate": 1.554e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.449, + "step": 449 + }, + { + "loss": 0.1395, + "grad_norm": 3.5644280910491943, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.45, + "step": 450 + }, + { + "loss": 0.1167, + "grad_norm": 13.761821746826172, + "learning_rate": 1.552e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.451, + "step": 451 + }, + { + "loss": 0.1423, + "grad_norm": 3.3145618438720703, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.452, + "step": 452 + }, + { + "loss": 0.131, + "grad_norm": 4.129085540771484, + "learning_rate": 1.55e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.453, + "step": 453 + }, + { + "loss": 0.1337, + "grad_norm": 2.807199001312256, + "learning_rate": 1.549e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.454, + "step": 454 + }, + { + "loss": 0.1235, + "grad_norm": 2.291154384613037, + "learning_rate": 1.548e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.455, + "step": 455 + }, + { + "loss": 0.123, + "grad_norm": 3.186185836791992, + "learning_rate": 1.547e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.456, + "step": 456 + }, + { + "loss": 0.13, + "grad_norm": 2.2184228897094727, + "learning_rate": 1.546e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.457, + "step": 457 + }, + { + "loss": 0.1232, + "grad_norm": 2.6860218048095703, + "learning_rate": 1.545e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.458, + "step": 458 + }, + { + "loss": 0.1668, + "grad_norm": 2.615064859390259, + "learning_rate": 1.544e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.459, + "step": 459 + }, + { + "loss": 0.1268, + "grad_norm": 3.520294427871704, + "learning_rate": 1.543e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.46, + "step": 460 + }, + { + "loss": 0.1183, + "grad_norm": 3.490569829940796, + "learning_rate": 1.542e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.461, + "step": 461 + }, + { + "loss": 0.1025, + "grad_norm": 12.270122528076172, + "learning_rate": 1.541e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.462, + "step": 462 + }, + { + "loss": 0.1059, + "grad_norm": 2.1151371002197266, + "learning_rate": 1.54e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.463, + "step": 463 + }, + { + "loss": 0.1021, + "grad_norm": 2.0290112495422363, + "learning_rate": 1.539e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.464, + "step": 464 + }, + { + "loss": 0.0993, + "grad_norm": 10.768261909484863, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.465, + "step": 465 + }, + { + "loss": 0.1187, + "grad_norm": 3.7776851654052734, + "learning_rate": 1.537e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.466, + "step": 466 + }, + { + "loss": 0.0929, + "grad_norm": 3.5349013805389404, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.467, + "step": 467 + }, + { + "loss": 0.1292, + "grad_norm": 4.221794605255127, + "learning_rate": 1.535e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.468, + "step": 468 + }, + { + "loss": 0.1597, + "grad_norm": 3.645026445388794, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.469, + "step": 469 + }, + { + "loss": 0.1281, + "grad_norm": 4.336436748504639, + "learning_rate": 1.533e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.47, + "step": 470 + }, + { + "loss": 0.1427, + "grad_norm": 4.119178295135498, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.471, + "step": 471 + }, + { + "loss": 0.1959, + "grad_norm": 3.495059013366699, + "learning_rate": 1.531e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.472, + "step": 472 + }, + { + "loss": 0.1062, + "grad_norm": 2.910947799682617, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.473, + "step": 473 + }, + { + "loss": 0.1641, + "grad_norm": 1.9516125917434692, + "learning_rate": 1.529e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.474, + "step": 474 + }, + { + "loss": 0.1267, + "grad_norm": 2.637050151824951, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.475, + "step": 475 + }, + { + "loss": 0.1602, + "grad_norm": 2.365922689437866, + "learning_rate": 1.527e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 476 + }, + { + "loss": 0.145, + "grad_norm": 3.577690362930298, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.477, + "step": 477 + }, + { + "loss": 0.1917, + "grad_norm": 2.425001621246338, + "learning_rate": 1.525e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.478, + "step": 478 + }, + { + "loss": 0.1295, + "grad_norm": 2.570420503616333, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.479, + "step": 479 + }, + { + "loss": 0.1216, + "grad_norm": 2.951737403869629, + "learning_rate": 1.523e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.48, + "step": 480 + }, + { + "loss": 0.1172, + "grad_norm": 2.9054367542266846, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.481, + "step": 481 + }, + { + "loss": 0.1028, + "grad_norm": 11.967851638793945, + "learning_rate": 1.521e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.482, + "step": 482 + }, + { + "loss": 0.1411, + "grad_norm": 3.018132448196411, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.483, + "step": 483 + }, + { + "loss": 0.0953, + "grad_norm": 2.7196693420410156, + "learning_rate": 1.519e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.484, + "step": 484 + }, + { + "loss": 0.1322, + "grad_norm": 3.49013090133667, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.485, + "step": 485 + }, + { + "loss": 0.0793, + "grad_norm": 3.015738010406494, + "learning_rate": 1.517e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.486, + "step": 486 + }, + { + "loss": 0.1429, + "grad_norm": 2.9223875999450684, + "learning_rate": 1.516e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.487, + "step": 487 + }, + { + "loss": 0.1468, + "grad_norm": 3.956615924835205, + "learning_rate": 1.515e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.488, + "step": 488 + }, + { + "loss": 0.1171, + "grad_norm": 4.619190216064453, + "learning_rate": 1.514e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.489, + "step": 489 + }, + { + "loss": 0.0767, + "grad_norm": 1.605452299118042, + "learning_rate": 1.513e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.49, + "step": 490 + }, + { + "loss": 0.128, + "grad_norm": 4.304430961608887, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.491, + "step": 491 + }, + { + "loss": 0.0781, + "grad_norm": 1.868319034576416, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.492, + "step": 492 + }, + { + "loss": 0.1311, + "grad_norm": 2.720447540283203, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.493, + "step": 493 + }, + { + "loss": 0.1312, + "grad_norm": 3.6773548126220703, + "learning_rate": 1.509e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.494, + "step": 494 + }, + { + "loss": 0.164, + "grad_norm": 3.9428446292877197, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.495, + "step": 495 + }, + { + "loss": 0.1516, + "grad_norm": 2.488532781600952, + "learning_rate": 1.507e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.496, + "step": 496 + }, + { + "loss": 0.076, + "grad_norm": 3.0369679927825928, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.497, + "step": 497 + }, + { + "loss": 0.1552, + "grad_norm": 2.921428680419922, + "learning_rate": 1.505e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.498, + "step": 498 + }, + { + "loss": 0.0745, + "grad_norm": 4.530489921569824, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.499, + "step": 499 + }, + { + "loss": 0.1431, + "grad_norm": 2.894956350326538, + "learning_rate": 1.503e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.5, + "step": 500 + }, + { + "loss": 0.1196, + "grad_norm": 2.8564133644104004, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.501, + "step": 501 + }, + { + "loss": 0.1022, + "grad_norm": 2.487640857696533, + "learning_rate": 1.501e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.502, + "step": 502 + }, + { + "loss": 0.0816, + "grad_norm": 9.081964492797852, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.503, + "step": 503 + }, + { + "loss": 0.0696, + "grad_norm": 5.340896129608154, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.504, + "step": 504 + }, + { + "loss": 0.1355, + "grad_norm": 2.5042786598205566, + "learning_rate": 1.498e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.505, + "step": 505 + }, + { + "loss": 0.1177, + "grad_norm": 2.9676339626312256, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.506, + "step": 506 + }, + { + "loss": 0.1305, + "grad_norm": 2.792555570602417, + "learning_rate": 1.496e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.507, + "step": 507 + }, + { + "loss": 0.1155, + "grad_norm": 3.074509620666504, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.508, + "step": 508 + }, + { + "loss": 0.1274, + "grad_norm": 3.4446146488189697, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.509, + "step": 509 + }, + { + "loss": 0.0961, + "grad_norm": 4.31768798828125, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.51, + "step": 510 + }, + { + "loss": 0.1406, + "grad_norm": 3.5040206909179688, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.511, + "step": 511 + }, + { + "loss": 0.163, + "grad_norm": 3.973576307296753, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.512, + "step": 512 + }, + { + "loss": 0.1435, + "grad_norm": 2.7186615467071533, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.513, + "step": 513 + }, + { + "loss": 0.1024, + "grad_norm": 2.8186845779418945, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.514, + "step": 514 + }, + { + "loss": 0.0781, + "grad_norm": 10.394554138183594, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.515, + "step": 515 + }, + { + "loss": 0.0874, + "grad_norm": 10.657512664794922, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.516, + "step": 516 + }, + { + "loss": 0.0946, + "grad_norm": 2.6607813835144043, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.517, + "step": 517 + }, + { + "loss": 0.1189, + "grad_norm": 2.2012691497802734, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.518, + "step": 518 + }, + { + "loss": 0.1313, + "grad_norm": 3.873806953430176, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.519, + "step": 519 + }, + { + "loss": 0.0999, + "grad_norm": 1.8396018743515015, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.52, + "step": 520 + }, + { + "loss": 0.1057, + "grad_norm": 2.922558307647705, + "learning_rate": 1.482e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.521, + "step": 521 + }, + { + "loss": 0.0865, + "grad_norm": 2.5007052421569824, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.522, + "step": 522 + }, + { + "loss": 0.1029, + "grad_norm": 1.885617733001709, + "learning_rate": 1.48e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.523, + "step": 523 + }, + { + "loss": 0.0958, + "grad_norm": 1.7554020881652832, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.524, + "step": 524 + }, + { + "loss": 0.1244, + "grad_norm": 3.055809736251831, + "learning_rate": 1.478e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.525, + "step": 525 + }, + { + "loss": 0.1059, + "grad_norm": 2.518828868865967, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.526, + "step": 526 + }, + { + "loss": 0.0849, + "grad_norm": 4.157986640930176, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.527, + "step": 527 + }, + { + "loss": 0.0949, + "grad_norm": 5.624795436859131, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.528, + "step": 528 + }, + { + "loss": 0.1133, + "grad_norm": 4.383209228515625, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.529, + "step": 529 + }, + { + "loss": 0.0753, + "grad_norm": 10.447527885437012, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.53, + "step": 530 + }, + { + "loss": 0.0758, + "grad_norm": 2.0648767948150635, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.531, + "step": 531 + }, + { + "loss": 0.109, + "grad_norm": 2.311145782470703, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.532, + "step": 532 + }, + { + "loss": 0.0993, + "grad_norm": 2.5646841526031494, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.533, + "step": 533 + }, + { + "loss": 0.061, + "grad_norm": 4.201132774353027, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 534 + }, + { + "loss": 0.1403, + "grad_norm": 3.2465627193450928, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.535, + "step": 535 + }, + { + "loss": 0.0917, + "grad_norm": 4.278575420379639, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.536, + "step": 536 + }, + { + "loss": 0.1363, + "grad_norm": 2.6477434635162354, + "learning_rate": 1.466e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.537, + "step": 537 + }, + { + "loss": 0.1035, + "grad_norm": 2.616262435913086, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.538, + "step": 538 + }, + { + "loss": 0.1702, + "grad_norm": 2.8426945209503174, + "learning_rate": 1.464e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.539, + "step": 539 + }, + { + "loss": 0.0969, + "grad_norm": 2.934753179550171, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.54, + "step": 540 + }, + { + "loss": 0.0628, + "grad_norm": 6.173173904418945, + "learning_rate": 1.462e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.541, + "step": 541 + }, + { + "loss": 0.113, + "grad_norm": 2.183295249938965, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.542, + "step": 542 + }, + { + "loss": 0.0674, + "grad_norm": 2.466468095779419, + "learning_rate": 1.46e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.543, + "step": 543 + }, + { + "loss": 0.0629, + "grad_norm": 6.685276508331299, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.544, + "step": 544 + }, + { + "loss": 0.0606, + "grad_norm": 6.428196907043457, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 545 + }, + { + "loss": 0.0552, + "grad_norm": 3.2987399101257324, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 546 + }, + { + "loss": 0.1492, + "grad_norm": 3.802187919616699, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.547, + "step": 547 + }, + { + "loss": 0.0903, + "grad_norm": 3.23189115524292, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.548, + "step": 548 + }, + { + "loss": 0.0758, + "grad_norm": 3.0735082626342773, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.549, + "step": 549 + }, + { + "loss": 0.0978, + "grad_norm": 2.9236018657684326, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.55, + "step": 550 + }, + { + "loss": 0.0489, + "grad_norm": 1.232297420501709, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 551 + }, + { + "loss": 0.0472, + "grad_norm": 1.1960967779159546, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 552 + }, + { + "loss": 0.1622, + "grad_norm": 2.9212372303009033, + "learning_rate": 1.45e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.553, + "step": 553 + }, + { + "loss": 0.0964, + "grad_norm": 2.9365901947021484, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.554, + "step": 554 + }, + { + "loss": 0.1015, + "grad_norm": 3.297194719314575, + "learning_rate": 1.448e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.555, + "step": 555 + }, + { + "loss": 0.108, + "grad_norm": 3.8434770107269287, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.556, + "step": 556 + }, + { + "loss": 0.0869, + "grad_norm": 3.068513870239258, + "learning_rate": 1.446e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.557, + "step": 557 + }, + { + "loss": 0.0823, + "grad_norm": 2.382955312728882, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.558, + "step": 558 + }, + { + "loss": 0.0952, + "grad_norm": 2.0796663761138916, + "learning_rate": 1.444e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.559, + "step": 559 + }, + { + "loss": 0.0904, + "grad_norm": 2.491260290145874, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.56, + "step": 560 + }, + { + "loss": 0.0888, + "grad_norm": 1.8683680295944214, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.561, + "step": 561 + }, + { + "loss": 0.0824, + "grad_norm": 2.5860776901245117, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.562, + "step": 562 + }, + { + "loss": 0.0648, + "grad_norm": 10.482237815856934, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 563 + }, + { + "loss": 0.1033, + "grad_norm": 1.8212071657180786, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.564, + "step": 564 + }, + { + "loss": 0.1275, + "grad_norm": 2.206996440887451, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.565, + "step": 565 + }, + { + "loss": 0.1174, + "grad_norm": 2.454157590866089, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.566, + "step": 566 + }, + { + "loss": 0.0846, + "grad_norm": 2.7483479976654053, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.567, + "step": 567 + }, + { + "loss": 0.0712, + "grad_norm": 9.780473709106445, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.568, + "step": 568 + }, + { + "loss": 0.0838, + "grad_norm": 2.227144718170166, + "learning_rate": 1.434e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.569, + "step": 569 + }, + { + "loss": 0.0996, + "grad_norm": 2.4927093982696533, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.57, + "step": 570 + }, + { + "loss": 0.0723, + "grad_norm": 2.6736180782318115, + "learning_rate": 1.432e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.571, + "step": 571 + }, + { + "loss": 0.0765, + "grad_norm": 1.8901737928390503, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 0.572, + "step": 572 + }, + { + "loss": 0.0661, + "grad_norm": 1.9803191423416138, + "learning_rate": 1.43e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.573, + "step": 573 + }, + { + "loss": 0.06, + "grad_norm": 1.9032983779907227, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.574, + "step": 574 + }, + { + "loss": 0.0437, + "grad_norm": 2.9226999282836914, + "learning_rate": 1.428e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 575 + }, + { + "loss": 0.1345, + "grad_norm": 2.60559344291687, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.576, + "step": 576 + }, + { + "loss": 0.043, + "grad_norm": 3.43766713142395, + "learning_rate": 1.426e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 577 + }, + { + "loss": 0.0881, + "grad_norm": 3.27600359916687, + "learning_rate": 1.425e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.578, + "step": 578 + }, + { + "loss": 0.0777, + "grad_norm": 3.8467905521392822, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.579, + "step": 579 + }, + { + "loss": 0.0971, + "grad_norm": 3.3157150745391846, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.58, + "step": 580 + }, + { + "loss": 0.0769, + "grad_norm": 2.6883363723754883, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.581, + "step": 581 + }, + { + "loss": 0.0381, + "grad_norm": 2.187551736831665, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 582 + }, + { + "loss": 0.0571, + "grad_norm": 1.9329798221588135, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.583, + "step": 583 + }, + { + "loss": 0.0984, + "grad_norm": 2.6686573028564453, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 584 + }, + { + "loss": 0.0904, + "grad_norm": 2.7718393802642822, + "learning_rate": 1.418e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.585, + "step": 585 + }, + { + "loss": 0.0364, + "grad_norm": 3.612837314605713, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 586 + }, + { + "loss": 0.1408, + "grad_norm": 2.518528461456299, + "learning_rate": 1.416e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.587, + "step": 587 + }, + { + "loss": 0.0875, + "grad_norm": 2.7795908451080322, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.588, + "step": 588 + }, + { + "loss": 0.0644, + "grad_norm": 2.4260590076446533, + "learning_rate": 1.414e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 0.589, + "step": 589 + }, + { + "loss": 0.0884, + "grad_norm": 2.681588888168335, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 590 + }, + { + "loss": 0.1001, + "grad_norm": 2.8202459812164307, + "learning_rate": 1.412e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.591, + "step": 591 + }, + { + "loss": 0.0774, + "grad_norm": 1.7170965671539307, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.592, + "step": 592 + }, + { + "loss": 0.069, + "grad_norm": 1.68620765209198, + "learning_rate": 1.41e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.593, + "step": 593 + }, + { + "loss": 0.0694, + "grad_norm": 2.236591339111328, + "learning_rate": 1.409e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.594, + "step": 594 + }, + { + "loss": 0.0943, + "grad_norm": 2.7542996406555176, + "learning_rate": 1.408e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.595, + "step": 595 + }, + { + "loss": 0.0578, + "grad_norm": 1.8813996315002441, + "learning_rate": 1.407e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.596, + "step": 596 + }, + { + "loss": 0.0911, + "grad_norm": 2.0993378162384033, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.597, + "step": 597 + }, + { + "loss": 0.107, + "grad_norm": 2.6184418201446533, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.598, + "step": 598 + }, + { + "loss": 0.0803, + "grad_norm": 1.8751370906829834, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.599, + "step": 599 + }, + { + "loss": 0.0774, + "grad_norm": 3.0198869705200195, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.6, + "step": 600 + }, + { + "loss": 0.2953, + "grad_norm": 14.372690200805664, + "learning_rate": 1.402e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.601, + "step": 601 + }, + { + "loss": 0.0943, + "grad_norm": 2.2585110664367676, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.602, + "step": 602 + }, + { + "loss": 0.0432, + "grad_norm": 8.796082496643066, + "learning_rate": 1.4e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.603, + "step": 603 + }, + { + "loss": 0.1307, + "grad_norm": 2.903687000274658, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.604, + "step": 604 + }, + { + "loss": 0.1348, + "grad_norm": 3.1296894550323486, + "learning_rate": 1.398e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.605, + "step": 605 + }, + { + "loss": 0.1161, + "grad_norm": 2.436495542526245, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.606, + "step": 606 + }, + { + "loss": 0.0368, + "grad_norm": 5.359442710876465, + "learning_rate": 1.396e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.607, + "step": 607 + }, + { + "loss": 0.1177, + "grad_norm": 3.3482797145843506, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.608, + "step": 608 + }, + { + "loss": 0.1024, + "grad_norm": 3.229761838912964, + "learning_rate": 1.394e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.609, + "step": 609 + }, + { + "loss": 0.0988, + "grad_norm": 2.772888660430908, + "learning_rate": 1.393e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.61, + "step": 610 + }, + { + "loss": 0.0699, + "grad_norm": 2.91560435295105, + "learning_rate": 1.392e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.611, + "step": 611 + }, + { + "loss": 0.1212, + "grad_norm": 3.1388144493103027, + "learning_rate": 1.391e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.612, + "step": 612 + }, + { + "loss": 0.0776, + "grad_norm": 2.409531831741333, + "learning_rate": 1.39e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.613, + "step": 613 + }, + { + "loss": 0.0922, + "grad_norm": 2.301997423171997, + "learning_rate": 1.389e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.614, + "step": 614 + }, + { + "loss": 0.0382, + "grad_norm": 6.567748546600342, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.615, + "step": 615 + }, + { + "loss": 0.0702, + "grad_norm": 2.9374635219573975, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 616 + }, + { + "loss": 0.0952, + "grad_norm": 2.805278778076172, + "learning_rate": 1.386e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.617, + "step": 617 + }, + { + "loss": 0.0809, + "grad_norm": 2.7832789421081543, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.618, + "step": 618 + }, + { + "loss": 0.0967, + "grad_norm": 2.5809061527252197, + "learning_rate": 1.384e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.619, + "step": 619 + }, + { + "loss": 0.1193, + "grad_norm": 4.146383285522461, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.62, + "step": 620 + }, + { + "loss": 0.0646, + "grad_norm": 2.3339507579803467, + "learning_rate": 1.382e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.621, + "step": 621 + }, + { + "loss": 0.0698, + "grad_norm": 2.154700756072998, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.622, + "step": 622 + }, + { + "loss": 0.0861, + "grad_norm": 3.4389989376068115, + "learning_rate": 1.38e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.623, + "step": 623 + }, + { + "loss": 0.0744, + "grad_norm": 2.087575674057007, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.624, + "step": 624 + }, + { + "loss": 0.093, + "grad_norm": 2.7172322273254395, + "learning_rate": 1.378e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.625, + "step": 625 + }, + { + "loss": 0.0731, + "grad_norm": 2.2669014930725098, + "learning_rate": 1.377e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.626, + "step": 626 + }, + { + "loss": 0.0747, + "grad_norm": 3.104933500289917, + "learning_rate": 1.376e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.627, + "step": 627 + }, + { + "loss": 0.085, + "grad_norm": 2.475816249847412, + "learning_rate": 1.375e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.628, + "step": 628 + }, + { + "loss": 0.1415, + "grad_norm": 3.2964231967926025, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.629, + "step": 629 + }, + { + "loss": 0.0823, + "grad_norm": 1.5372464656829834, + "learning_rate": 1.373e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.63, + "step": 630 + }, + { + "loss": 0.1085, + "grad_norm": 2.136002540588379, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.631, + "step": 631 + }, + { + "loss": 0.0802, + "grad_norm": 2.1365489959716797, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.632, + "step": 632 + }, + { + "loss": 0.0359, + "grad_norm": 7.951494216918945, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.633, + "step": 633 + }, + { + "loss": 0.0344, + "grad_norm": 7.441174507141113, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.634, + "step": 634 + }, + { + "loss": 0.0838, + "grad_norm": 2.689347505569458, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.635, + "step": 635 + }, + { + "loss": 0.1337, + "grad_norm": 4.8380937576293945, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.636, + "step": 636 + }, + { + "loss": 0.1259, + "grad_norm": 3.2358460426330566, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.637, + "step": 637 + }, + { + "loss": 0.0269, + "grad_norm": 3.706432580947876, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 638 + }, + { + "loss": 0.0617, + "grad_norm": 2.4131107330322266, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.639, + "step": 639 + }, + { + "loss": 0.0225, + "grad_norm": 2.5498831272125244, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 640 + }, + { + "loss": 0.1159, + "grad_norm": 2.7629480361938477, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.641, + "step": 641 + }, + { + "loss": 0.0249, + "grad_norm": 2.194697380065918, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 642 + }, + { + "loss": 0.0852, + "grad_norm": 2.5653960704803467, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.643, + "step": 643 + }, + { + "loss": 0.0783, + "grad_norm": 2.402456283569336, + "learning_rate": 1.359e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 644 + }, + { + "loss": 0.1104, + "grad_norm": 2.646005392074585, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.645, + "step": 645 + }, + { + "loss": 0.0582, + "grad_norm": 2.135377883911133, + "learning_rate": 1.357e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.646, + "step": 646 + }, + { + "loss": 0.0242, + "grad_norm": 2.295201539993286, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 647 + }, + { + "loss": 0.0712, + "grad_norm": 2.529376745223999, + "learning_rate": 1.355e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.648, + "step": 648 + }, + { + "loss": 0.0697, + "grad_norm": 2.2107226848602295, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.649, + "step": 649 + }, + { + "loss": 0.1203, + "grad_norm": 2.456563711166382, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.65, + "step": 650 + }, + { + "loss": 0.091, + "grad_norm": 2.3880977630615234, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.651, + "step": 651 + }, + { + "loss": 0.0641, + "grad_norm": 2.5870609283447266, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.652, + "step": 652 + }, + { + "loss": 0.0678, + "grad_norm": 2.0148985385894775, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.653, + "step": 653 + }, + { + "loss": 0.0745, + "grad_norm": 2.9625463485717773, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.654, + "step": 654 + }, + { + "loss": 0.0759, + "grad_norm": 2.3625717163085938, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.655, + "step": 655 + }, + { + "loss": 0.0826, + "grad_norm": 3.747469902038574, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.656, + "step": 656 + }, + { + "loss": 0.0772, + "grad_norm": 2.4018380641937256, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.657, + "step": 657 + }, + { + "loss": 0.0834, + "grad_norm": 2.684398889541626, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.658, + "step": 658 + }, + { + "loss": 0.074, + "grad_norm": 2.106499671936035, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.659, + "step": 659 + }, + { + "loss": 0.0759, + "grad_norm": 2.1065762042999268, + "learning_rate": 1.343e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.66, + "step": 660 + }, + { + "loss": 0.1232, + "grad_norm": 2.89585280418396, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.661, + "step": 661 + }, + { + "loss": 0.0784, + "grad_norm": 2.267303943634033, + "learning_rate": 1.341e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.662, + "step": 662 + }, + { + "loss": 0.0591, + "grad_norm": 1.4712592363357544, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.663, + "step": 663 + }, + { + "loss": 0.0626, + "grad_norm": 1.9069504737854004, + "learning_rate": 1.339e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.664, + "step": 664 + }, + { + "loss": 0.1356, + "grad_norm": 3.2215309143066406, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.665, + "step": 665 + }, + { + "loss": 0.0678, + "grad_norm": 2.080892562866211, + "learning_rate": 1.337e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.666, + "step": 666 + }, + { + "loss": 0.0643, + "grad_norm": 2.593749523162842, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.667, + "step": 667 + }, + { + "loss": 0.3105, + "grad_norm": 13.254192352294922, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.668, + "step": 668 + }, + { + "loss": 0.0305, + "grad_norm": 7.083673000335693, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.669, + "step": 669 + }, + { + "loss": 0.0827, + "grad_norm": 1.9234445095062256, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.67, + "step": 670 + }, + { + "loss": 0.072, + "grad_norm": 1.6489096879959106, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.671, + "step": 671 + }, + { + "loss": 0.0786, + "grad_norm": 2.5704004764556885, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.672, + "step": 672 + }, + { + "loss": 0.1092, + "grad_norm": 2.335846424102783, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.673, + "step": 673 + }, + { + "loss": 0.08, + "grad_norm": 1.7859958410263062, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.674, + "step": 674 + }, + { + "loss": 0.0303, + "grad_norm": 6.245123386383057, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.675, + "step": 675 + }, + { + "loss": 0.0248, + "grad_norm": 6.11707878112793, + "learning_rate": 1.327e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.676, + "step": 676 + }, + { + "loss": 0.0714, + "grad_norm": 2.122776985168457, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.677, + "step": 677 + }, + { + "loss": 0.0583, + "grad_norm": 2.350274085998535, + "learning_rate": 1.325e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.678, + "step": 678 + }, + { + "loss": 0.0192, + "grad_norm": 3.1966686248779297, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 679 + }, + { + "loss": 0.087, + "grad_norm": 2.123091459274292, + "learning_rate": 1.323e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.68, + "step": 680 + }, + { + "loss": 0.0536, + "grad_norm": 2.108837842941284, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.681, + "step": 681 + }, + { + "loss": 0.0187, + "grad_norm": 2.225255012512207, + "learning_rate": 1.321e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 682 + }, + { + "loss": 0.0689, + "grad_norm": 1.968031883239746, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.683, + "step": 683 + }, + { + "loss": 0.0822, + "grad_norm": 2.5669515132904053, + "learning_rate": 1.319e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.684, + "step": 684 + }, + { + "loss": 0.0661, + "grad_norm": 2.156057596206665, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.685, + "step": 685 + }, + { + "loss": 0.0545, + "grad_norm": 2.8333444595336914, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.686, + "step": 686 + }, + { + "loss": 0.0889, + "grad_norm": 3.069793939590454, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.687, + "step": 687 + }, + { + "loss": 0.0761, + "grad_norm": 1.9274708032608032, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.688, + "step": 688 + }, + { + "loss": 0.1089, + "grad_norm": 2.992846965789795, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.689, + "step": 689 + }, + { + "loss": 0.1287, + "grad_norm": 4.56328821182251, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.69, + "step": 690 + }, + { + "loss": 0.1186, + "grad_norm": 2.255676746368408, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.691, + "step": 691 + }, + { + "loss": 0.0906, + "grad_norm": 1.8538860082626343, + "learning_rate": 1.311e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.692, + "step": 692 + }, + { + "loss": 0.2418, + "grad_norm": 11.443807601928711, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9434276223182678, + "epoch": 0.693, + "step": 693 + }, + { + "loss": 0.0399, + "grad_norm": 9.349817276000977, + "learning_rate": 1.309e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.694, + "step": 694 + }, + { + "loss": 0.037, + "grad_norm": 9.234195709228516, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.695, + "step": 695 + }, + { + "loss": 0.1228, + "grad_norm": 2.415926456451416, + "learning_rate": 1.307e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.696, + "step": 696 + }, + { + "loss": 0.0524, + "grad_norm": 2.570728063583374, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.697, + "step": 697 + }, + { + "loss": 0.086, + "grad_norm": 3.062072992324829, + "learning_rate": 1.305e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.698, + "step": 698 + }, + { + "loss": 0.0829, + "grad_norm": 2.552957534790039, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.699, + "step": 699 + }, + { + "loss": 0.1109, + "grad_norm": 2.1273176670074463, + "learning_rate": 1.303e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.7, + "step": 700 + }, + { + "loss": 0.0811, + "grad_norm": 2.13920259475708, + "learning_rate": 1.302e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.701, + "step": 701 + }, + { + "loss": 0.0689, + "grad_norm": 2.0192079544067383, + "learning_rate": 1.301e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.702, + "step": 702 + }, + { + "loss": 0.0726, + "grad_norm": 1.9012140035629272, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.703, + "step": 703 + }, + { + "loss": 0.075, + "grad_norm": 2.420971393585205, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.704, + "step": 704 + }, + { + "loss": 0.0965, + "grad_norm": 1.7867904901504517, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.705, + "step": 705 + }, + { + "loss": 0.0757, + "grad_norm": 2.5515830516815186, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.706, + "step": 706 + }, + { + "loss": 0.0758, + "grad_norm": 2.5376474857330322, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.707, + "step": 707 + }, + { + "loss": 0.0995, + "grad_norm": 1.8845465183258057, + "learning_rate": 1.295e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.708, + "step": 708 + }, + { + "loss": 0.0824, + "grad_norm": 2.292940616607666, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.709, + "step": 709 + }, + { + "loss": 0.0723, + "grad_norm": 2.140986919403076, + "learning_rate": 1.293e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.71, + "step": 710 + }, + { + "loss": 0.0714, + "grad_norm": 2.8790059089660645, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.711, + "step": 711 + }, + { + "loss": 0.0623, + "grad_norm": 1.6493089199066162, + "learning_rate": 1.291e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.712, + "step": 712 + }, + { + "loss": 0.0657, + "grad_norm": 1.8830665349960327, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.713, + "step": 713 + }, + { + "loss": 0.029, + "grad_norm": 7.065803527832031, + "learning_rate": 1.289e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.714, + "step": 714 + }, + { + "loss": 0.0952, + "grad_norm": 2.2632198333740234, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.715, + "step": 715 + }, + { + "loss": 0.0383, + "grad_norm": 8.098624229431152, + "learning_rate": 1.287e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.716, + "step": 716 + }, + { + "loss": 0.023, + "grad_norm": 5.657382011413574, + "learning_rate": 1.286e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.717, + "step": 717 + }, + { + "loss": 0.0649, + "grad_norm": 1.4795526266098022, + "learning_rate": 1.285e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.718, + "step": 718 + }, + { + "loss": 0.0737, + "grad_norm": 2.7369728088378906, + "learning_rate": 1.284e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.719, + "step": 719 + }, + { + "loss": 0.0637, + "grad_norm": 2.345536708831787, + "learning_rate": 1.283e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.72, + "step": 720 + }, + { + "loss": 0.0594, + "grad_norm": 2.2326128482818604, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.721, + "step": 721 + }, + { + "loss": 0.057, + "grad_norm": 3.0859591960906982, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.722, + "step": 722 + }, + { + "loss": 0.0709, + "grad_norm": 2.870548963546753, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.723, + "step": 723 + }, + { + "loss": 0.0772, + "grad_norm": 3.3536510467529297, + "learning_rate": 1.279e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.724, + "step": 724 + }, + { + "loss": 0.0163, + "grad_norm": 2.2633590698242188, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 725 + }, + { + "loss": 0.0128, + "grad_norm": 1.1394838094711304, + "learning_rate": 1.277e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 726 + }, + { + "loss": 0.0683, + "grad_norm": 2.8505446910858154, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.727, + "step": 727 + }, + { + "loss": 0.0557, + "grad_norm": 2.6770808696746826, + "learning_rate": 1.275e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.728, + "step": 728 + }, + { + "loss": 0.0586, + "grad_norm": 3.0272936820983887, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.729, + "step": 729 + }, + { + "loss": 0.0126, + "grad_norm": 0.8217504620552063, + "learning_rate": 1.273e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 730 + }, + { + "loss": 0.0776, + "grad_norm": 4.100428581237793, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.731, + "step": 731 + }, + { + "loss": 0.0689, + "grad_norm": 2.3711600303649902, + "learning_rate": 1.271e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.732, + "step": 732 + }, + { + "loss": 0.0797, + "grad_norm": 3.585756301879883, + "learning_rate": 1.27e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.733, + "step": 733 + }, + { + "loss": 0.0532, + "grad_norm": 2.134615421295166, + "learning_rate": 1.269e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.734, + "step": 734 + }, + { + "loss": 0.0974, + "grad_norm": 2.3772988319396973, + "learning_rate": 1.268e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.735, + "step": 735 + }, + { + "loss": 0.1153, + "grad_norm": 2.4541940689086914, + "learning_rate": 1.267e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.736, + "step": 736 + }, + { + "loss": 0.048, + "grad_norm": 1.6060377359390259, + "learning_rate": 1.266e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.737, + "step": 737 + }, + { + "loss": 0.0451, + "grad_norm": 2.1678755283355713, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.738, + "step": 738 + }, + { + "loss": 0.0748, + "grad_norm": 2.047844409942627, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.739, + "step": 739 + }, + { + "loss": 0.0824, + "grad_norm": 2.762352705001831, + "learning_rate": 1.263e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.74, + "step": 740 + }, + { + "loss": 0.1146, + "grad_norm": 3.0128841400146484, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.741, + "step": 741 + }, + { + "loss": 0.0711, + "grad_norm": 2.0650486946105957, + "learning_rate": 1.261e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.742, + "step": 742 + }, + { + "loss": 0.0334, + "grad_norm": 7.7052412033081055, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.743, + "step": 743 + }, + { + "loss": 0.0709, + "grad_norm": 1.5119361877441406, + "learning_rate": 1.259e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.744, + "step": 744 + }, + { + "loss": 0.0308, + "grad_norm": 7.3754143714904785, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.745, + "step": 745 + }, + { + "loss": 0.0995, + "grad_norm": 2.8331611156463623, + "learning_rate": 1.257e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.746, + "step": 746 + }, + { + "loss": 0.0562, + "grad_norm": 3.423184871673584, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.747, + "step": 747 + }, + { + "loss": 0.0659, + "grad_norm": 1.857692003250122, + "learning_rate": 1.255e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.748, + "step": 748 + }, + { + "loss": 0.2618, + "grad_norm": 11.681804656982422, + "learning_rate": 1.254e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.749, + "step": 749 + }, + { + "loss": 0.0791, + "grad_norm": 2.311647415161133, + "learning_rate": 1.253e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.75, + "step": 750 + }, + { + "loss": 0.0486, + "grad_norm": 2.8530430793762207, + "learning_rate": 1.252e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.751, + "step": 751 + }, + { + "loss": 0.1104, + "grad_norm": 2.617987871170044, + "learning_rate": 1.251e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.752, + "step": 752 + }, + { + "loss": 0.0195, + "grad_norm": 4.978179931640625, + "learning_rate": 1.25e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.753, + "step": 753 + }, + { + "loss": 0.0726, + "grad_norm": 2.0882959365844727, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.754, + "step": 754 + }, + { + "loss": 0.0754, + "grad_norm": 2.1230452060699463, + "learning_rate": 1.248e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.755, + "step": 755 + }, + { + "loss": 0.0707, + "grad_norm": 2.2002744674682617, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.756, + "step": 756 + }, + { + "loss": 0.0494, + "grad_norm": 1.7500207424163818, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.757, + "step": 757 + }, + { + "loss": 0.0811, + "grad_norm": 1.8128851652145386, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.758, + "step": 758 + }, + { + "loss": 0.0756, + "grad_norm": 2.397252082824707, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.759, + "step": 759 + }, + { + "loss": 0.0501, + "grad_norm": 1.975466012954712, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.76, + "step": 760 + }, + { + "loss": 0.1087, + "grad_norm": 2.2733750343322754, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 761 + }, + { + "loss": 0.1041, + "grad_norm": 2.3084492683410645, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.762, + "step": 762 + }, + { + "loss": 0.0496, + "grad_norm": 2.098421096801758, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.763, + "step": 763 + }, + { + "loss": 0.0626, + "grad_norm": 2.004920482635498, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.764, + "step": 764 + }, + { + "loss": 0.0667, + "grad_norm": 1.603124737739563, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.765, + "step": 765 + }, + { + "loss": 0.0829, + "grad_norm": 2.5960142612457275, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.766, + "step": 766 + }, + { + "loss": 0.0234, + "grad_norm": 5.8595757484436035, + "learning_rate": 1.236e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.767, + "step": 767 + }, + { + "loss": 0.1032, + "grad_norm": 1.7731209993362427, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 768 + }, + { + "loss": 0.0228, + "grad_norm": 6.049434185028076, + "learning_rate": 1.234e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.769, + "step": 769 + }, + { + "loss": 0.0828, + "grad_norm": 1.9529765844345093, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.77, + "step": 770 + }, + { + "loss": 0.0718, + "grad_norm": 1.3272991180419922, + "learning_rate": 1.232e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.771, + "step": 771 + }, + { + "loss": 0.0907, + "grad_norm": 2.2710683345794678, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.772, + "step": 772 + }, + { + "loss": 0.2171, + "grad_norm": 6.965005397796631, + "learning_rate": 1.23e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.773, + "step": 773 + }, + { + "loss": 0.0657, + "grad_norm": 2.213243007659912, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.774, + "step": 774 + }, + { + "loss": 0.1745, + "grad_norm": 6.300892353057861, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.775, + "step": 775 + }, + { + "loss": 0.06, + "grad_norm": 2.4582417011260986, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.776, + "step": 776 + }, + { + "loss": 0.0516, + "grad_norm": 1.6709243059158325, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.777, + "step": 777 + }, + { + "loss": 0.1051, + "grad_norm": 2.654740810394287, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.778, + "step": 778 + }, + { + "loss": 0.072, + "grad_norm": 2.0503504276275635, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.779, + "step": 779 + }, + { + "loss": 0.0742, + "grad_norm": 1.800299882888794, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.78, + "step": 780 + }, + { + "loss": 0.0737, + "grad_norm": 2.063502788543701, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.781, + "step": 781 + }, + { + "loss": 0.1061, + "grad_norm": 2.698178291320801, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.782, + "step": 782 + }, + { + "loss": 0.0737, + "grad_norm": 2.0112061500549316, + "learning_rate": 1.22e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.783, + "step": 783 + }, + { + "loss": 0.0195, + "grad_norm": 5.365294933319092, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.784, + "step": 784 + }, + { + "loss": 0.0601, + "grad_norm": 1.5453028678894043, + "learning_rate": 1.218e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.785, + "step": 785 + }, + { + "loss": 0.2441, + "grad_norm": 10.393324851989746, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.786, + "step": 786 + }, + { + "loss": 0.1079, + "grad_norm": 2.6032726764678955, + "learning_rate": 1.216e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.787, + "step": 787 + }, + { + "loss": 0.0639, + "grad_norm": 2.6428260803222656, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.788, + "step": 788 + }, + { + "loss": 0.0632, + "grad_norm": 1.3782398700714111, + "learning_rate": 1.214e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.789, + "step": 789 + }, + { + "loss": 0.0189, + "grad_norm": 4.952188014984131, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.79, + "step": 790 + }, + { + "loss": 0.0613, + "grad_norm": 1.8376456499099731, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.791, + "step": 791 + }, + { + "loss": 0.0539, + "grad_norm": 1.6092228889465332, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.792, + "step": 792 + }, + { + "loss": 0.0151, + "grad_norm": 3.721954345703125, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 793 + }, + { + "loss": 0.0168, + "grad_norm": 3.578442096710205, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 794 + }, + { + "loss": 0.0494, + "grad_norm": 1.714572787284851, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 795 + }, + { + "loss": 0.0715, + "grad_norm": 2.152249813079834, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 796 + }, + { + "loss": 0.0106, + "grad_norm": 1.2338261604309082, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 797 + }, + { + "loss": 0.0948, + "grad_norm": 3.4057295322418213, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 798 + }, + { + "loss": 0.0967, + "grad_norm": 2.297558546066284, + "learning_rate": 1.204e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.799, + "step": 799 + }, + { + "loss": 0.0715, + "grad_norm": 2.948807716369629, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 800 + }, + { + "loss": 0.0691, + "grad_norm": 2.480257749557495, + "learning_rate": 1.202e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.801, + "step": 801 + }, + { + "loss": 0.2602, + "grad_norm": 9.955911636352539, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.802, + "step": 802 + }, + { + "loss": 0.0623, + "grad_norm": 2.92844295501709, + "learning_rate": 1.2e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.803, + "step": 803 + }, + { + "loss": 0.0922, + "grad_norm": 2.3774516582489014, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.804, + "step": 804 + }, + { + "loss": 0.0664, + "grad_norm": 1.5494801998138428, + "learning_rate": 1.198e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.805, + "step": 805 + }, + { + "loss": 0.1929, + "grad_norm": 6.599433422088623, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.806, + "step": 806 + }, + { + "loss": 0.02, + "grad_norm": 5.4353718757629395, + "learning_rate": 1.196e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.807, + "step": 807 + }, + { + "loss": 0.0603, + "grad_norm": 1.707094669342041, + "learning_rate": 1.195e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.808, + "step": 808 + }, + { + "loss": 0.0722, + "grad_norm": 2.148479461669922, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.809, + "step": 809 + }, + { + "loss": 0.0717, + "grad_norm": 2.687295436859131, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.81, + "step": 810 + }, + { + "loss": 0.0695, + "grad_norm": 2.940627098083496, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.811, + "step": 811 + }, + { + "loss": 0.0195, + "grad_norm": 5.349563121795654, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.812, + "step": 812 + }, + { + "loss": 0.0931, + "grad_norm": 1.7995429039001465, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.813, + "step": 813 + }, + { + "loss": 0.0175, + "grad_norm": 5.07689094543457, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.814, + "step": 814 + }, + { + "loss": 0.0159, + "grad_norm": 4.247437000274658, + "learning_rate": 1.188e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.815, + "step": 815 + }, + { + "loss": 0.0783, + "grad_norm": 2.34236216545105, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.816, + "step": 816 + }, + { + "loss": 0.113, + "grad_norm": 2.772456407546997, + "learning_rate": 1.186e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.817, + "step": 817 + }, + { + "loss": 0.0621, + "grad_norm": 2.3582286834716797, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.818, + "step": 818 + }, + { + "loss": 0.0522, + "grad_norm": 3.014678716659546, + "learning_rate": 1.184e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.819, + "step": 819 + }, + { + "loss": 0.0758, + "grad_norm": 2.709341049194336, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.82, + "step": 820 + }, + { + "loss": 0.0718, + "grad_norm": 2.3536617755889893, + "learning_rate": 1.182e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.821, + "step": 821 + }, + { + "loss": 0.0789, + "grad_norm": 3.258106231689453, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.822, + "step": 822 + }, + { + "loss": 0.0763, + "grad_norm": 2.218254804611206, + "learning_rate": 1.18e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.823, + "step": 823 + }, + { + "loss": 0.0599, + "grad_norm": 2.2704806327819824, + "learning_rate": 1.179e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.824, + "step": 824 + }, + { + "loss": 0.0126, + "grad_norm": 2.4626388549804688, + "learning_rate": 1.178e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 825 + }, + { + "loss": 0.0669, + "grad_norm": 2.0617358684539795, + "learning_rate": 1.177e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.826, + "step": 826 + }, + { + "loss": 0.066, + "grad_norm": 2.0766263008117676, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.827, + "step": 827 + }, + { + "loss": 0.0618, + "grad_norm": 1.5771903991699219, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.828, + "step": 828 + }, + { + "loss": 0.0687, + "grad_norm": 1.789569616317749, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.829, + "step": 829 + }, + { + "loss": 0.0157, + "grad_norm": 4.058000087738037, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.83, + "step": 830 + }, + { + "loss": 0.0389, + "grad_norm": 1.5074262619018555, + "learning_rate": 1.172e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.831, + "step": 831 + }, + { + "loss": 0.0663, + "grad_norm": 2.1943564414978027, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.832, + "step": 832 + }, + { + "loss": 0.0734, + "grad_norm": 2.0293729305267334, + "learning_rate": 1.17e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.833, + "step": 833 + }, + { + "loss": 0.0734, + "grad_norm": 1.9577043056488037, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.834, + "step": 834 + }, + { + "loss": 0.0729, + "grad_norm": 2.053274154663086, + "learning_rate": 1.168e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 835 + }, + { + "loss": 0.1016, + "grad_norm": 4.023435115814209, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.836, + "step": 836 + }, + { + "loss": 0.0618, + "grad_norm": 2.152527093887329, + "learning_rate": 1.166e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.837, + "step": 837 + }, + { + "loss": 0.0633, + "grad_norm": 2.2773494720458984, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.838, + "step": 838 + }, + { + "loss": 0.0207, + "grad_norm": 5.423501491546631, + "learning_rate": 1.164e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.839, + "step": 839 + }, + { + "loss": 0.0651, + "grad_norm": 1.2856030464172363, + "learning_rate": 1.163e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.84, + "step": 840 + }, + { + "loss": 0.0628, + "grad_norm": 1.8682835102081299, + "learning_rate": 1.162e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 841 + }, + { + "loss": 0.0192, + "grad_norm": 4.855226516723633, + "learning_rate": 1.161e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.842, + "step": 842 + }, + { + "loss": 0.0757, + "grad_norm": 1.910493016242981, + "learning_rate": 1.16e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.843, + "step": 843 + }, + { + "loss": 0.0778, + "grad_norm": 3.503009796142578, + "learning_rate": 1.159e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.844, + "step": 844 + }, + { + "loss": 0.05, + "grad_norm": 1.867902398109436, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.845, + "step": 845 + }, + { + "loss": 0.0145, + "grad_norm": 3.8562870025634766, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 846 + }, + { + "loss": 0.0668, + "grad_norm": 1.7752705812454224, + "learning_rate": 1.156e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.847, + "step": 847 + }, + { + "loss": 0.0735, + "grad_norm": 2.393582582473755, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.848, + "step": 848 + }, + { + "loss": 0.0985, + "grad_norm": 2.7950665950775146, + "learning_rate": 1.154e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.849, + "step": 849 + }, + { + "loss": 0.0681, + "grad_norm": 2.1131601333618164, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.85, + "step": 850 + }, + { + "loss": 0.0515, + "grad_norm": 2.2755846977233887, + "learning_rate": 1.152e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.851, + "step": 851 + }, + { + "loss": 0.0434, + "grad_norm": 1.569434642791748, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.852, + "step": 852 + }, + { + "loss": 0.1047, + "grad_norm": 3.0928077697753906, + "learning_rate": 1.15e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.853, + "step": 853 + }, + { + "loss": 0.0575, + "grad_norm": 2.008404016494751, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.854, + "step": 854 + }, + { + "loss": 0.0579, + "grad_norm": 1.4861952066421509, + "learning_rate": 1.148e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.855, + "step": 855 + }, + { + "loss": 0.069, + "grad_norm": 1.9950709342956543, + "learning_rate": 1.147e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.856, + "step": 856 + }, + { + "loss": 0.0155, + "grad_norm": 4.394257068634033, + "learning_rate": 1.146e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.857, + "step": 857 + }, + { + "loss": 0.0969, + "grad_norm": 2.6770575046539307, + "learning_rate": 1.145e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.858, + "step": 858 + }, + { + "loss": 0.0712, + "grad_norm": 2.319610595703125, + "learning_rate": 1.144e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 859 + }, + { + "loss": 0.0689, + "grad_norm": 1.8970541954040527, + "learning_rate": 1.143e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.86, + "step": 860 + }, + { + "loss": 0.0899, + "grad_norm": 1.8339478969573975, + "learning_rate": 1.142e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.861, + "step": 861 + }, + { + "loss": 0.1032, + "grad_norm": 2.781162977218628, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.862, + "step": 862 + }, + { + "loss": 0.0604, + "grad_norm": 2.540081024169922, + "learning_rate": 1.14e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.863, + "step": 863 + }, + { + "loss": 0.0491, + "grad_norm": 1.9644439220428467, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.864, + "step": 864 + }, + { + "loss": 0.0802, + "grad_norm": 1.8939117193222046, + "learning_rate": 1.138e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.865, + "step": 865 + }, + { + "loss": 0.0681, + "grad_norm": 2.0177180767059326, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.866, + "step": 866 + }, + { + "loss": 0.0476, + "grad_norm": 1.9407687187194824, + "learning_rate": 1.136e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.867, + "step": 867 + }, + { + "loss": 0.0188, + "grad_norm": 5.371039390563965, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.868, + "step": 868 + }, + { + "loss": 0.0508, + "grad_norm": 1.873732566833496, + "learning_rate": 1.134e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.869, + "step": 869 + }, + { + "loss": 0.0237, + "grad_norm": 6.1496429443359375, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.87, + "step": 870 + }, + { + "loss": 0.099, + "grad_norm": 4.506502151489258, + "learning_rate": 1.132e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.871, + "step": 871 + }, + { + "loss": 0.1, + "grad_norm": 5.314243316650391, + "learning_rate": 1.131e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.872, + "step": 872 + }, + { + "loss": 0.0123, + "grad_norm": 3.1825995445251465, + "learning_rate": 1.13e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 873 + }, + { + "loss": 0.0132, + "grad_norm": 3.1502106189727783, + "learning_rate": 1.129e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 874 + }, + { + "loss": 0.0622, + "grad_norm": 2.719097375869751, + "learning_rate": 1.128e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.875, + "step": 875 + }, + { + "loss": 0.0992, + "grad_norm": 3.1199769973754883, + "learning_rate": 1.127e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.876, + "step": 876 + }, + { + "loss": 0.066, + "grad_norm": 2.5837504863739014, + "learning_rate": 1.126e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.877, + "step": 877 + }, + { + "loss": 0.0542, + "grad_norm": 2.4771666526794434, + "learning_rate": 1.125e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.878, + "step": 878 + }, + { + "loss": 0.0937, + "grad_norm": 3.6200714111328125, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.879, + "step": 879 + }, + { + "loss": 0.0674, + "grad_norm": 2.399535655975342, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.88, + "step": 880 + }, + { + "loss": 0.0678, + "grad_norm": 2.516605854034424, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.881, + "step": 881 + }, + { + "loss": 0.0668, + "grad_norm": 2.5172040462493896, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.882, + "step": 882 + }, + { + "loss": 0.0744, + "grad_norm": 2.4523816108703613, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.883, + "step": 883 + }, + { + "loss": 0.1019, + "grad_norm": 3.3321380615234375, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.884, + "step": 884 + }, + { + "loss": 0.0837, + "grad_norm": 1.8811334371566772, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.885, + "step": 885 + }, + { + "loss": 0.0531, + "grad_norm": 1.9141852855682373, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.886, + "step": 886 + }, + { + "loss": 0.0408, + "grad_norm": 1.487582802772522, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.887, + "step": 887 + }, + { + "loss": 0.0218, + "grad_norm": 5.286271095275879, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.888, + "step": 888 + }, + { + "loss": 0.0628, + "grad_norm": 1.7239201068878174, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.889, + "step": 889 + }, + { + "loss": 0.0625, + "grad_norm": 1.7386255264282227, + "learning_rate": 1.113e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.89, + "step": 890 + }, + { + "loss": 0.0405, + "grad_norm": 1.4104888439178467, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.891, + "step": 891 + }, + { + "loss": 0.0226, + "grad_norm": 4.608585834503174, + "learning_rate": 1.111e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.892, + "step": 892 + }, + { + "loss": 0.0968, + "grad_norm": 2.3830323219299316, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.893, + "step": 893 + }, + { + "loss": 0.0739, + "grad_norm": 1.8739683628082275, + "learning_rate": 1.109e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.894, + "step": 894 + }, + { + "loss": 0.058, + "grad_norm": 2.673945665359497, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.895, + "step": 895 + }, + { + "loss": 0.0943, + "grad_norm": 3.0288586616516113, + "learning_rate": 1.107e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.896, + "step": 896 + }, + { + "loss": 0.0726, + "grad_norm": 2.270813465118408, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.897, + "step": 897 + }, + { + "loss": 0.0589, + "grad_norm": 1.880444049835205, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.898, + "step": 898 + }, + { + "loss": 0.0143, + "grad_norm": 3.3361847400665283, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 899 + }, + { + "loss": 0.059, + "grad_norm": 1.848816990852356, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.9, + "step": 900 + }, + { + "loss": 0.0714, + "grad_norm": 2.0221500396728516, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.901, + "step": 901 + }, + { + "loss": 0.0668, + "grad_norm": 4.154532432556152, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.902, + "step": 902 + }, + { + "loss": 0.0617, + "grad_norm": 1.9648317098617554, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.903, + "step": 903 + }, + { + "loss": 0.0652, + "grad_norm": 2.866431474685669, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.904, + "step": 904 + }, + { + "loss": 0.0459, + "grad_norm": 2.3324079513549805, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.905, + "step": 905 + }, + { + "loss": 0.0111, + "grad_norm": 2.3991503715515137, + "learning_rate": 1.097e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 906 + }, + { + "loss": 0.0654, + "grad_norm": 1.9646960496902466, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.907, + "step": 907 + }, + { + "loss": 0.0798, + "grad_norm": 2.720228433609009, + "learning_rate": 1.095e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.908, + "step": 908 + }, + { + "loss": 0.0974, + "grad_norm": 2.5758628845214844, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.909, + "step": 909 + }, + { + "loss": 0.0621, + "grad_norm": 2.303436517715454, + "learning_rate": 1.093e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.91, + "step": 910 + }, + { + "loss": 0.0944, + "grad_norm": 2.617363929748535, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.911, + "step": 911 + }, + { + "loss": 0.0571, + "grad_norm": 1.898218035697937, + "learning_rate": 1.091e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.912, + "step": 912 + }, + { + "loss": 0.0136, + "grad_norm": 3.2630972862243652, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 913 + }, + { + "loss": 0.0482, + "grad_norm": 2.0208237171173096, + "learning_rate": 1.089e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.914, + "step": 914 + }, + { + "loss": 0.0486, + "grad_norm": 1.8037229776382446, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.915, + "step": 915 + }, + { + "loss": 0.0118, + "grad_norm": 2.722412586212158, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 916 + }, + { + "loss": 0.0687, + "grad_norm": 2.6608150005340576, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.917, + "step": 917 + }, + { + "loss": 0.0101, + "grad_norm": 1.664276361465454, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 918 + }, + { + "loss": 0.0609, + "grad_norm": 2.5043087005615234, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.919, + "step": 919 + }, + { + "loss": 0.0685, + "grad_norm": 2.0320653915405273, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.92, + "step": 920 + }, + { + "loss": 0.0709, + "grad_norm": 2.7590584754943848, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.921, + "step": 921 + }, + { + "loss": 0.0511, + "grad_norm": 2.424579620361328, + "learning_rate": 1.081e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.922, + "step": 922 + }, + { + "loss": 0.061, + "grad_norm": 1.826949119567871, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.923, + "step": 923 + }, + { + "loss": 0.0086, + "grad_norm": 1.5401605367660522, + "learning_rate": 1.079e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 924 + }, + { + "loss": 0.0667, + "grad_norm": 2.49796724319458, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.925, + "step": 925 + }, + { + "loss": 0.0741, + "grad_norm": 2.141827344894409, + "learning_rate": 1.077e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.926, + "step": 926 + }, + { + "loss": 0.0662, + "grad_norm": 2.1507174968719482, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.927, + "step": 927 + }, + { + "loss": 0.0596, + "grad_norm": 1.928731083869934, + "learning_rate": 1.075e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.928, + "step": 928 + }, + { + "loss": 0.0469, + "grad_norm": 2.391432523727417, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.929, + "step": 929 + }, + { + "loss": 0.0121, + "grad_norm": 2.9941039085388184, + "learning_rate": 1.073e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 930 + }, + { + "loss": 0.0452, + "grad_norm": 2.110806465148926, + "learning_rate": 1.072e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.931, + "step": 931 + }, + { + "loss": 0.0624, + "grad_norm": 1.8115919828414917, + "learning_rate": 1.071e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.932, + "step": 932 + }, + { + "loss": 0.0456, + "grad_norm": 1.548567533493042, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.933, + "step": 933 + }, + { + "loss": 0.0565, + "grad_norm": 1.9886720180511475, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.934, + "step": 934 + }, + { + "loss": 0.0457, + "grad_norm": 1.8589720726013184, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.935, + "step": 935 + }, + { + "loss": 0.041, + "grad_norm": 1.6640335321426392, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.936, + "step": 936 + }, + { + "loss": 0.0712, + "grad_norm": 2.0171613693237305, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.937, + "step": 937 + }, + { + "loss": 0.0628, + "grad_norm": 1.6715848445892334, + "learning_rate": 1.065e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.938, + "step": 938 + }, + { + "loss": 0.0416, + "grad_norm": 2.1554946899414062, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.939, + "step": 939 + }, + { + "loss": 0.0737, + "grad_norm": 2.242116689682007, + "learning_rate": 1.063e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.94, + "step": 940 + }, + { + "loss": 0.0177, + "grad_norm": 4.810120105743408, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.941, + "step": 941 + }, + { + "loss": 0.0649, + "grad_norm": 1.675683617591858, + "learning_rate": 1.061e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.942, + "step": 942 + }, + { + "loss": 0.0727, + "grad_norm": 2.5127744674682617, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.943, + "step": 943 + }, + { + "loss": 0.0587, + "grad_norm": 2.14599871635437, + "learning_rate": 1.059e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.944, + "step": 944 + }, + { + "loss": 0.1132, + "grad_norm": 2.5991926193237305, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.945, + "step": 945 + }, + { + "loss": 0.0786, + "grad_norm": 2.0661518573760986, + "learning_rate": 1.057e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.946, + "step": 946 + }, + { + "loss": 0.0686, + "grad_norm": 1.411996841430664, + "learning_rate": 1.056e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 947 + }, + { + "loss": 0.0886, + "grad_norm": 1.8908826112747192, + "learning_rate": 1.055e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.948, + "step": 948 + }, + { + "loss": 0.0795, + "grad_norm": 1.8596928119659424, + "learning_rate": 1.054e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.949, + "step": 949 + }, + { + "loss": 0.064, + "grad_norm": 2.0051939487457275, + "learning_rate": 1.053e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.95, + "step": 950 + }, + { + "loss": 0.0761, + "grad_norm": 1.7486968040466309, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 951 + }, + { + "loss": 0.0519, + "grad_norm": 1.7253214120864868, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.952, + "step": 952 + }, + { + "loss": 0.0688, + "grad_norm": 1.7860913276672363, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.953, + "step": 953 + }, + { + "loss": 0.0287, + "grad_norm": 6.397044658660889, + "learning_rate": 1.049e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 954 + }, + { + "loss": 0.0877, + "grad_norm": 1.6188372373580933, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.955, + "step": 955 + }, + { + "loss": 0.0595, + "grad_norm": 1.6029514074325562, + "learning_rate": 1.047e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.956, + "step": 956 + }, + { + "loss": 0.2163, + "grad_norm": 8.956819534301758, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.957, + "step": 957 + }, + { + "loss": 0.0666, + "grad_norm": 1.4872380495071411, + "learning_rate": 1.045e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.958, + "step": 958 + }, + { + "loss": 0.092, + "grad_norm": 3.029266595840454, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.959, + "step": 959 + }, + { + "loss": 0.0757, + "grad_norm": 1.899221658706665, + "learning_rate": 1.043e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.96, + "step": 960 + }, + { + "loss": 0.0666, + "grad_norm": 1.577907681465149, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.961, + "step": 961 + }, + { + "loss": 0.0581, + "grad_norm": 1.467238426208496, + "learning_rate": 1.041e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 962 + }, + { + "loss": 0.1923, + "grad_norm": 8.706313133239746, + "learning_rate": 1.04e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.963, + "step": 963 + }, + { + "loss": 0.062, + "grad_norm": 2.0428693294525146, + "learning_rate": 1.039e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.964, + "step": 964 + }, + { + "loss": 0.0775, + "grad_norm": 2.0258123874664307, + "learning_rate": 1.038e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.965, + "step": 965 + }, + { + "loss": 0.0661, + "grad_norm": 1.7304749488830566, + "learning_rate": 1.037e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.966, + "step": 966 + }, + { + "loss": 0.0547, + "grad_norm": 1.6691105365753174, + "learning_rate": 1.036e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.967, + "step": 967 + }, + { + "loss": 0.0617, + "grad_norm": 1.681009292602539, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.968, + "step": 968 + }, + { + "loss": 0.0544, + "grad_norm": 1.8074179887771606, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.969, + "step": 969 + }, + { + "loss": 0.0396, + "grad_norm": 1.812711477279663, + "learning_rate": 1.033e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.97, + "step": 970 + }, + { + "loss": 0.0577, + "grad_norm": 2.0831782817840576, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.971, + "step": 971 + }, + { + "loss": 0.0776, + "grad_norm": 1.3640745878219604, + "learning_rate": 1.031e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.972, + "step": 972 + }, + { + "loss": 0.0454, + "grad_norm": 1.9006543159484863, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.973, + "step": 973 + }, + { + "loss": 0.0633, + "grad_norm": 1.6996928453445435, + "learning_rate": 1.029e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.974, + "step": 974 + }, + { + "loss": 0.0738, + "grad_norm": 1.9721561670303345, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.975, + "step": 975 + }, + { + "loss": 0.0439, + "grad_norm": 2.2615768909454346, + "learning_rate": 1.027e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.976, + "step": 976 + }, + { + "loss": 0.0237, + "grad_norm": 5.635776519775391, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.977, + "step": 977 + }, + { + "loss": 0.094, + "grad_norm": 2.4352505207061768, + "learning_rate": 1.025e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.978, + "step": 978 + }, + { + "loss": 0.0648, + "grad_norm": 1.6868159770965576, + "learning_rate": 1.024e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.979, + "step": 979 + }, + { + "loss": 0.0652, + "grad_norm": 2.1479756832122803, + "learning_rate": 1.023e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.98, + "step": 980 + }, + { + "loss": 0.0597, + "grad_norm": 2.0000855922698975, + "learning_rate": 1.022e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.981, + "step": 981 + }, + { + "loss": 0.0643, + "grad_norm": 2.511259078979492, + "learning_rate": 1.021e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.982, + "step": 982 + }, + { + "loss": 0.0161, + "grad_norm": 3.99651837348938, + "learning_rate": 1.02e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.983, + "step": 983 + }, + { + "loss": 0.0649, + "grad_norm": 2.231045722961426, + "learning_rate": 1.019e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.984, + "step": 984 + }, + { + "loss": 0.0386, + "grad_norm": 1.9224427938461304, + "learning_rate": 1.018e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.985, + "step": 985 + }, + { + "loss": 0.0673, + "grad_norm": 2.328557014465332, + "learning_rate": 1.017e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.986, + "step": 986 + }, + { + "loss": 0.0642, + "grad_norm": 2.1176366806030273, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.987, + "step": 987 + }, + { + "loss": 0.0643, + "grad_norm": 2.319209098815918, + "learning_rate": 1.015e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.988, + "step": 988 + }, + { + "loss": 0.0126, + "grad_norm": 2.7921886444091797, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 989 + }, + { + "loss": 0.056, + "grad_norm": 1.6485341787338257, + "learning_rate": 1.013e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.99, + "step": 990 + }, + { + "loss": 0.0559, + "grad_norm": 1.85313081741333, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.991, + "step": 991 + }, + { + "loss": 0.0718, + "grad_norm": 2.0347867012023926, + "learning_rate": 1.011e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.992, + "step": 992 + }, + { + "loss": 0.0611, + "grad_norm": 2.6210453510284424, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.993, + "step": 993 + }, + { + "loss": 0.0428, + "grad_norm": 2.1774537563323975, + "learning_rate": 1.009e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.994, + "step": 994 + }, + { + "loss": 0.0564, + "grad_norm": 1.4708741903305054, + "learning_rate": 1.008e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.995, + "step": 995 + }, + { + "loss": 0.0461, + "grad_norm": 2.133490562438965, + "learning_rate": 1.007e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.996, + "step": 996 + }, + { + "loss": 0.0654, + "grad_norm": 1.8513908386230469, + "learning_rate": 1.006e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.997, + "step": 997 + }, + { + "loss": 0.0467, + "grad_norm": 2.651682138442993, + "learning_rate": 1.005e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.998, + "step": 998 + }, + { + "loss": 0.0496, + "grad_norm": 1.6719735860824585, + "learning_rate": 1.004e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.999, + "step": 999 + }, + { + "loss": 0.064, + "grad_norm": 1.7016679048538208, + "learning_rate": 1.003e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.0, + "step": 1000 + }, + { + "loss": 0.0601, + "grad_norm": 1.5496330261230469, + "learning_rate": 1.002e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.001, + "step": 1001 + }, + { + "loss": 0.0185, + "grad_norm": 4.8348541259765625, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687985.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.002, + "step": 1002 + }, + { + "loss": 0.0205, + "grad_norm": 5.356715202331543, + "learning_rate": 1e-05, + "num_tokens": 688167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.003, + "step": 1003 + }, + { + "loss": 0.065, + "grad_norm": 2.8306968212127686, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.004, + "step": 1004 + }, + { + "loss": 0.048, + "grad_norm": 1.684121012687683, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.005, + "step": 1005 + }, + { + "loss": 0.0611, + "grad_norm": 1.78119957447052, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.006, + "step": 1006 + }, + { + "loss": 0.069, + "grad_norm": 2.2316365242004395, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.007, + "step": 1007 + }, + { + "loss": 0.0779, + "grad_norm": 2.183338165283203, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.008, + "step": 1008 + }, + { + "loss": 0.0642, + "grad_norm": 1.943967580795288, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.009, + "step": 1009 + }, + { + "loss": 0.0415, + "grad_norm": 1.6110951900482178, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.01, + "step": 1010 + }, + { + "loss": 0.0117, + "grad_norm": 3.0185630321502686, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 1011 + }, + { + "loss": 0.0992, + "grad_norm": 3.14607310295105, + "learning_rate": 9.91e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 1.012, + "step": 1012 + }, + { + "loss": 0.047, + "grad_norm": 1.2475289106369019, + "learning_rate": 9.9e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.013, + "step": 1013 + }, + { + "loss": 0.0819, + "grad_norm": 2.5398612022399902, + "learning_rate": 9.89e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.014, + "step": 1014 + }, + { + "loss": 0.0555, + "grad_norm": 1.682294249534607, + "learning_rate": 9.88e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.015, + "step": 1015 + }, + { + "loss": 0.0867, + "grad_norm": 2.457875967025757, + "learning_rate": 9.87e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.016, + "step": 1016 + }, + { + "loss": 0.0667, + "grad_norm": 1.7135660648345947, + "learning_rate": 9.86e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.017, + "step": 1017 + }, + { + "loss": 0.0378, + "grad_norm": 1.4605510234832764, + "learning_rate": 9.85e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.018, + "step": 1018 + }, + { + "loss": 0.0612, + "grad_norm": 3.01509690284729, + "learning_rate": 9.84e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.019, + "step": 1019 + }, + { + "loss": 0.0623, + "grad_norm": 2.2433955669403076, + "learning_rate": 9.83e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.02, + "step": 1020 + }, + { + "loss": 0.0192, + "grad_norm": 5.402326583862305, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.021, + "step": 1021 + }, + { + "loss": 0.099, + "grad_norm": 4.552786827087402, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.022, + "step": 1022 + }, + { + "loss": 0.0569, + "grad_norm": 2.1845462322235107, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.023, + "step": 1023 + }, + { + "loss": 0.063, + "grad_norm": 2.7287683486938477, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.024, + "step": 1024 + }, + { + "loss": 0.0426, + "grad_norm": 2.1356048583984375, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.025, + "step": 1025 + }, + { + "loss": 0.0626, + "grad_norm": 2.1982219219207764, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.026, + "step": 1026 + }, + { + "loss": 0.0881, + "grad_norm": 2.790822982788086, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.027, + "step": 1027 + }, + { + "loss": 0.0872, + "grad_norm": 2.464653968811035, + "learning_rate": 9.75e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.028, + "step": 1028 + }, + { + "loss": 0.0144, + "grad_norm": 3.807983636856079, + "learning_rate": 9.74e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.029, + "step": 1029 + }, + { + "loss": 0.0594, + "grad_norm": 1.6763768196105957, + "learning_rate": 9.73e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.03, + "step": 1030 + }, + { + "loss": 0.0882, + "grad_norm": 1.924737811088562, + "learning_rate": 9.72e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.031, + "step": 1031 + }, + { + "loss": 0.0488, + "grad_norm": 2.331883430480957, + "learning_rate": 9.71e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.032, + "step": 1032 + }, + { + "loss": 0.088, + "grad_norm": 2.7460174560546875, + "learning_rate": 9.7e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.033, + "step": 1033 + }, + { + "loss": 0.0446, + "grad_norm": 1.7645024061203003, + "learning_rate": 9.69e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.034, + "step": 1034 + }, + { + "loss": 0.0806, + "grad_norm": 1.7870028018951416, + "learning_rate": 9.68e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.035, + "step": 1035 + }, + { + "loss": 0.0602, + "grad_norm": 1.6170544624328613, + "learning_rate": 9.67e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.036, + "step": 1036 + }, + { + "loss": 0.0427, + "grad_norm": 2.0376412868499756, + "learning_rate": 9.66e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.037, + "step": 1037 + }, + { + "loss": 0.0636, + "grad_norm": 2.1391189098358154, + "learning_rate": 9.65e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.038, + "step": 1038 + }, + { + "loss": 0.0127, + "grad_norm": 3.4139318466186523, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 1039 + }, + { + "loss": 0.0532, + "grad_norm": 2.2980690002441406, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.04, + "step": 1040 + }, + { + "loss": 0.042, + "grad_norm": 1.7804741859436035, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.041, + "step": 1041 + }, + { + "loss": 0.039, + "grad_norm": 1.5417966842651367, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.042, + "step": 1042 + }, + { + "loss": 0.0691, + "grad_norm": 1.9181416034698486, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.043, + "step": 1043 + }, + { + "loss": 0.0105, + "grad_norm": 2.567687511444092, + "learning_rate": 9.59e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 1044 + }, + { + "loss": 0.0513, + "grad_norm": 2.1507062911987305, + "learning_rate": 9.58e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.045, + "step": 1045 + }, + { + "loss": 0.0661, + "grad_norm": 2.6471474170684814, + "learning_rate": 9.57e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.046, + "step": 1046 + }, + { + "loss": 0.0528, + "grad_norm": 1.6081326007843018, + "learning_rate": 9.56e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.047, + "step": 1047 + }, + { + "loss": 0.0148, + "grad_norm": 3.6129963397979736, + "learning_rate": 9.55e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.048, + "step": 1048 + }, + { + "loss": 0.0589, + "grad_norm": 1.6536871194839478, + "learning_rate": 9.54e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 1049 + }, + { + "loss": 0.0893, + "grad_norm": 2.1024138927459717, + "learning_rate": 9.53e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.05, + "step": 1050 + }, + { + "loss": 0.0628, + "grad_norm": 1.6858649253845215, + "learning_rate": 9.52e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.051, + "step": 1051 + }, + { + "loss": 0.0532, + "grad_norm": 1.6352399587631226, + "learning_rate": 9.51e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.052, + "step": 1052 + }, + { + "loss": 0.0673, + "grad_norm": 1.62017822265625, + "learning_rate": 9.5e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.053, + "step": 1053 + }, + { + "loss": 0.0577, + "grad_norm": 1.5879229307174683, + "learning_rate": 9.49e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.054, + "step": 1054 + }, + { + "loss": 0.0148, + "grad_norm": 4.010829925537109, + "learning_rate": 9.48e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.055, + "step": 1055 + }, + { + "loss": 0.0147, + "grad_norm": 4.00789213180542, + "learning_rate": 9.47e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.056, + "step": 1056 + }, + { + "loss": 0.015, + "grad_norm": 4.107461929321289, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.057, + "step": 1057 + }, + { + "loss": 0.0458, + "grad_norm": 2.3218655586242676, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.058, + "step": 1058 + }, + { + "loss": 0.0119, + "grad_norm": 2.9490623474121094, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 1059 + }, + { + "loss": 0.0367, + "grad_norm": 1.8217196464538574, + "learning_rate": 9.43e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.06, + "step": 1060 + }, + { + "loss": 0.0079, + "grad_norm": 1.3022953271865845, + "learning_rate": 9.42e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 1061 + }, + { + "loss": 0.0724, + "grad_norm": 2.17926287651062, + "learning_rate": 9.41e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.062, + "step": 1062 + }, + { + "loss": 0.039, + "grad_norm": 1.739366888999939, + "learning_rate": 9.4e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.063, + "step": 1063 + }, + { + "loss": 0.0534, + "grad_norm": 2.180590867996216, + "learning_rate": 9.39e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.064, + "step": 1064 + }, + { + "loss": 0.0063, + "grad_norm": 0.5163084864616394, + "learning_rate": 9.38e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 1065 + }, + { + "loss": 0.0584, + "grad_norm": 2.8058063983917236, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.066, + "step": 1066 + }, + { + "loss": 0.0582, + "grad_norm": 2.005493640899658, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.067, + "step": 1067 + }, + { + "loss": 0.0497, + "grad_norm": 2.923448324203491, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.068, + "step": 1068 + }, + { + "loss": 0.006, + "grad_norm": 0.48110926151275635, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 1069 + }, + { + "loss": 0.0704, + "grad_norm": 2.408653497695923, + "learning_rate": 9.33e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.07, + "step": 1070 + }, + { + "loss": 0.0878, + "grad_norm": 2.767408847808838, + "learning_rate": 9.32e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 1071 + }, + { + "loss": 0.0599, + "grad_norm": 1.9640824794769287, + "learning_rate": 9.31e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.072, + "step": 1072 + }, + { + "loss": 0.0674, + "grad_norm": 2.939439535140991, + "learning_rate": 9.3e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.073, + "step": 1073 + }, + { + "loss": 0.0866, + "grad_norm": 2.223776340484619, + "learning_rate": 9.29e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.074, + "step": 1074 + }, + { + "loss": 0.0819, + "grad_norm": 1.7831770181655884, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.075, + "step": 1075 + }, + { + "loss": 0.0552, + "grad_norm": 1.528134822845459, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.076, + "step": 1076 + }, + { + "loss": 0.0105, + "grad_norm": 2.722768783569336, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 1077 + }, + { + "loss": 0.0559, + "grad_norm": 1.601446509361267, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.078, + "step": 1078 + }, + { + "loss": 0.0571, + "grad_norm": 1.6370468139648438, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.079, + "step": 1079 + }, + { + "loss": 0.0611, + "grad_norm": 1.7496470212936401, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.08, + "step": 1080 + }, + { + "loss": 0.0582, + "grad_norm": 1.8051985502243042, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.081, + "step": 1081 + }, + { + "loss": 0.0527, + "grad_norm": 1.1893869638442993, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.082, + "step": 1082 + }, + { + "loss": 0.0613, + "grad_norm": 1.7861930131912231, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.083, + "step": 1083 + }, + { + "loss": 0.0771, + "grad_norm": 1.6442121267318726, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.084, + "step": 1084 + }, + { + "loss": 0.0614, + "grad_norm": 1.7604858875274658, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.085, + "step": 1085 + }, + { + "loss": 0.0686, + "grad_norm": 1.7211897373199463, + "learning_rate": 9.17e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.086, + "step": 1086 + }, + { + "loss": 0.0851, + "grad_norm": 2.2072157859802246, + "learning_rate": 9.16e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.087, + "step": 1087 + }, + { + "loss": 0.0234, + "grad_norm": 6.049727916717529, + "learning_rate": 9.15e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.088, + "step": 1088 + }, + { + "loss": 0.0462, + "grad_norm": 2.178677558898926, + "learning_rate": 9.14e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.089, + "step": 1089 + }, + { + "loss": 0.0866, + "grad_norm": 2.1971359252929688, + "learning_rate": 9.13e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.09, + "step": 1090 + }, + { + "loss": 0.0701, + "grad_norm": 2.604931116104126, + "learning_rate": 9.12e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.091, + "step": 1091 + }, + { + "loss": 0.1403, + "grad_norm": 4.8585004806518555, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.092, + "step": 1092 + }, + { + "loss": 0.0418, + "grad_norm": 2.0918304920196533, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.093, + "step": 1093 + }, + { + "loss": 0.0607, + "grad_norm": 1.5581291913986206, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.094, + "step": 1094 + }, + { + "loss": 0.0464, + "grad_norm": 2.2121376991271973, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.095, + "step": 1095 + }, + { + "loss": 0.0187, + "grad_norm": 5.02223539352417, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.096, + "step": 1096 + }, + { + "loss": 0.051, + "grad_norm": 1.1968108415603638, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.097, + "step": 1097 + }, + { + "loss": 0.0379, + "grad_norm": 1.5838263034820557, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.098, + "step": 1098 + }, + { + "loss": 0.0599, + "grad_norm": 2.1656548976898193, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.099, + "step": 1099 + }, + { + "loss": 0.0531, + "grad_norm": 1.5780129432678223, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1, + "step": 1100 + }, + { + "loss": 0.0101, + "grad_norm": 2.5371878147125244, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 1101 + }, + { + "loss": 0.0635, + "grad_norm": 1.7947604656219482, + "learning_rate": 9.01e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.102, + "step": 1102 + }, + { + "loss": 0.0522, + "grad_norm": 2.101656436920166, + "learning_rate": 9e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.103, + "step": 1103 + }, + { + "loss": 0.0803, + "grad_norm": 1.9881861209869385, + "learning_rate": 8.99e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.104, + "step": 1104 + }, + { + "loss": 0.0618, + "grad_norm": 1.884840965270996, + "learning_rate": 8.98e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.105, + "step": 1105 + }, + { + "loss": 0.0554, + "grad_norm": 1.8216484785079956, + "learning_rate": 8.97e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.106, + "step": 1106 + }, + { + "loss": 0.0631, + "grad_norm": 2.1785407066345215, + "learning_rate": 8.96e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.107, + "step": 1107 + }, + { + "loss": 0.0409, + "grad_norm": 1.5896263122558594, + "learning_rate": 8.95e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.108, + "step": 1108 + }, + { + "loss": 0.1964, + "grad_norm": 6.368833541870117, + "learning_rate": 8.94e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 1.109, + "step": 1109 + }, + { + "loss": 0.0087, + "grad_norm": 1.9522284269332886, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 1110 + }, + { + "loss": 0.2323, + "grad_norm": 7.9943718910217285, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 1.111, + "step": 1111 + }, + { + "loss": 0.0801, + "grad_norm": 1.92306387424469, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.112, + "step": 1112 + }, + { + "loss": 0.045, + "grad_norm": 1.3462337255477905, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.113, + "step": 1113 + }, + { + "loss": 0.0721, + "grad_norm": 2.416792869567871, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 1114 + }, + { + "loss": 0.0406, + "grad_norm": 2.1178133487701416, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.115, + "step": 1115 + }, + { + "loss": 0.0559, + "grad_norm": 1.5205347537994385, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.116, + "step": 1116 + }, + { + "loss": 0.0342, + "grad_norm": 1.617630124092102, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.117, + "step": 1117 + }, + { + "loss": 0.0438, + "grad_norm": 2.34078049659729, + "learning_rate": 8.85e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1179999999999999, + "step": 1118 + }, + { + "loss": 0.0753, + "grad_norm": 1.8780885934829712, + "learning_rate": 8.84e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.119, + "step": 1119 + }, + { + "loss": 0.147, + "grad_norm": 5.077685356140137, + "learning_rate": 8.83e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.12, + "step": 1120 + }, + { + "loss": 0.0469, + "grad_norm": 1.9634060859680176, + "learning_rate": 8.82e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.121, + "step": 1121 + }, + { + "loss": 0.0662, + "grad_norm": 1.4567596912384033, + "learning_rate": 8.81e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1219999999999999, + "step": 1122 + }, + { + "loss": 0.0167, + "grad_norm": 4.722336292266846, + "learning_rate": 8.8e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.123, + "step": 1123 + }, + { + "loss": 0.0388, + "grad_norm": 2.1787490844726562, + "learning_rate": 8.79e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.124, + "step": 1124 + }, + { + "loss": 0.0508, + "grad_norm": 1.4540494680404663, + "learning_rate": 8.78e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.125, + "step": 1125 + }, + { + "loss": 0.0463, + "grad_norm": 1.9126884937286377, + "learning_rate": 8.77e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.126, + "step": 1126 + }, + { + "loss": 0.0413, + "grad_norm": 1.3725852966308594, + "learning_rate": 8.76e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.127, + "step": 1127 + }, + { + "loss": 0.0406, + "grad_norm": 1.769464373588562, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.1280000000000001, + "step": 1128 + }, + { + "loss": 0.0157, + "grad_norm": 4.246346473693848, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.129, + "step": 1129 + }, + { + "loss": 0.1541, + "grad_norm": 4.8993754386901855, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.13, + "step": 1130 + }, + { + "loss": 0.041, + "grad_norm": 1.7246980667114258, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.131, + "step": 1131 + }, + { + "loss": 0.0726, + "grad_norm": 2.2514991760253906, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1320000000000001, + "step": 1132 + }, + { + "loss": 0.0097, + "grad_norm": 2.538367509841919, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 1133 + }, + { + "loss": 0.083, + "grad_norm": 2.2139499187469482, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.134, + "step": 1134 + }, + { + "loss": 0.0086, + "grad_norm": 2.0688657760620117, + "learning_rate": 8.68e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 1135 + }, + { + "loss": 0.0579, + "grad_norm": 1.7580430507659912, + "learning_rate": 8.67e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.1360000000000001, + "step": 1136 + }, + { + "loss": 0.0071, + "grad_norm": 1.2317492961883545, + "learning_rate": 8.66e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 1137 + }, + { + "loss": 0.0547, + "grad_norm": 1.7383458614349365, + "learning_rate": 8.65e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.138, + "step": 1138 + }, + { + "loss": 0.0493, + "grad_norm": 1.9442108869552612, + "learning_rate": 8.64e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.139, + "step": 1139 + }, + { + "loss": 0.0743, + "grad_norm": 2.8182926177978516, + "learning_rate": 8.63e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.1400000000000001, + "step": 1140 + }, + { + "loss": 0.0058, + "grad_norm": 0.5721865296363831, + "learning_rate": 8.62e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 1141 + }, + { + "loss": 0.0615, + "grad_norm": 2.226674795150757, + "learning_rate": 8.61e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.142, + "step": 1142 + }, + { + "loss": 0.0063, + "grad_norm": 0.8222597241401672, + "learning_rate": 8.6e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 1143 + }, + { + "loss": 0.0679, + "grad_norm": 2.1432037353515625, + "learning_rate": 8.59e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.144, + "step": 1144 + }, + { + "loss": 0.0604, + "grad_norm": 2.196251392364502, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.145, + "step": 1145 + }, + { + "loss": 0.0067, + "grad_norm": 0.9334397912025452, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 1146 + }, + { + "loss": 0.0877, + "grad_norm": 2.9189441204071045, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.147, + "step": 1147 + }, + { + "loss": 0.04, + "grad_norm": 1.8555492162704468, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.148, + "step": 1148 + }, + { + "loss": 0.0433, + "grad_norm": 2.1462485790252686, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.149, + "step": 1149 + }, + { + "loss": 0.0912, + "grad_norm": 2.674384593963623, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.15, + "step": 1150 + }, + { + "loss": 0.0806, + "grad_norm": 2.1967833042144775, + "learning_rate": 8.52e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.151, + "step": 1151 + }, + { + "loss": 0.0397, + "grad_norm": 1.576885461807251, + "learning_rate": 8.51e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.152, + "step": 1152 + }, + { + "loss": 0.0385, + "grad_norm": 1.8607549667358398, + "learning_rate": 8.5e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.153, + "step": 1153 + }, + { + "loss": 0.0591, + "grad_norm": 2.075608491897583, + "learning_rate": 8.49e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.154, + "step": 1154 + }, + { + "loss": 0.0072, + "grad_norm": 1.595956563949585, + "learning_rate": 8.48e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 1155 + }, + { + "loss": 0.0107, + "grad_norm": 2.7350447177886963, + "learning_rate": 8.47e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 1156 + }, + { + "loss": 0.0675, + "grad_norm": 1.7995527982711792, + "learning_rate": 8.46e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.157, + "step": 1157 + }, + { + "loss": 0.0655, + "grad_norm": 2.3666279315948486, + "learning_rate": 8.45e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.158, + "step": 1158 + }, + { + "loss": 0.0898, + "grad_norm": 2.2464659214019775, + "learning_rate": 8.44e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.159, + "step": 1159 + }, + { + "loss": 0.0555, + "grad_norm": 2.4049134254455566, + "learning_rate": 8.43e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.16, + "step": 1160 + }, + { + "loss": 0.0835, + "grad_norm": 2.0087289810180664, + "learning_rate": 8.42e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.161, + "step": 1161 + }, + { + "loss": 0.0679, + "grad_norm": 2.1180970668792725, + "learning_rate": 8.41e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.162, + "step": 1162 + }, + { + "loss": 0.0605, + "grad_norm": 1.7271490097045898, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.163, + "step": 1163 + }, + { + "loss": 0.0381, + "grad_norm": 2.031334400177002, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.164, + "step": 1164 + }, + { + "loss": 0.0639, + "grad_norm": 1.7528166770935059, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.165, + "step": 1165 + }, + { + "loss": 0.1307, + "grad_norm": 3.783503293991089, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.166, + "step": 1166 + }, + { + "loss": 0.0473, + "grad_norm": 2.779741048812866, + "learning_rate": 8.36e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.167, + "step": 1167 + }, + { + "loss": 0.0455, + "grad_norm": 1.9504565000534058, + "learning_rate": 8.35e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.168, + "step": 1168 + }, + { + "loss": 0.0662, + "grad_norm": 2.2791426181793213, + "learning_rate": 8.34e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.169, + "step": 1169 + }, + { + "loss": 0.0857, + "grad_norm": 2.4661900997161865, + "learning_rate": 8.33e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.17, + "step": 1170 + }, + { + "loss": 0.0817, + "grad_norm": 2.018150568008423, + "learning_rate": 8.32e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.171, + "step": 1171 + }, + { + "loss": 0.0491, + "grad_norm": 1.4105336666107178, + "learning_rate": 8.31e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.172, + "step": 1172 + }, + { + "loss": 0.0705, + "grad_norm": 1.7099734544754028, + "learning_rate": 8.3e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.173, + "step": 1173 + }, + { + "loss": 0.0197, + "grad_norm": 5.4979472160339355, + "learning_rate": 8.29e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.174, + "step": 1174 + }, + { + "loss": 0.0515, + "grad_norm": 1.9852694272994995, + "learning_rate": 8.28e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.175, + "step": 1175 + }, + { + "loss": 0.0435, + "grad_norm": 1.3928176164627075, + "learning_rate": 8.27e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.176, + "step": 1176 + }, + { + "loss": 0.062, + "grad_norm": 2.7774510383605957, + "learning_rate": 8.26e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.177, + "step": 1177 + }, + { + "loss": 0.053, + "grad_norm": 0.9669445753097534, + "learning_rate": 8.25e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.178, + "step": 1178 + }, + { + "loss": 0.0178, + "grad_norm": 4.694067478179932, + "learning_rate": 8.24e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.179, + "step": 1179 + }, + { + "loss": 0.0133, + "grad_norm": 3.8942577838897705, + "learning_rate": 8.23e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.18, + "step": 1180 + }, + { + "loss": 0.042, + "grad_norm": 1.4630885124206543, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.181, + "step": 1181 + }, + { + "loss": 0.0598, + "grad_norm": 1.6373014450073242, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.182, + "step": 1182 + }, + { + "loss": 0.0454, + "grad_norm": 1.9768292903900146, + "learning_rate": 8.2e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.183, + "step": 1183 + }, + { + "loss": 0.0734, + "grad_norm": 1.4859123229980469, + "learning_rate": 8.19e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.184, + "step": 1184 + }, + { + "loss": 0.0647, + "grad_norm": 1.7751868963241577, + "learning_rate": 8.18e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.185, + "step": 1185 + }, + { + "loss": 0.0643, + "grad_norm": 1.6454154253005981, + "learning_rate": 8.17e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.186, + "step": 1186 + }, + { + "loss": 0.0511, + "grad_norm": 1.9402817487716675, + "learning_rate": 8.16e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.187, + "step": 1187 + }, + { + "loss": 0.047, + "grad_norm": 1.6513389348983765, + "learning_rate": 8.15e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.188, + "step": 1188 + }, + { + "loss": 0.0107, + "grad_norm": 2.9602744579315186, + "learning_rate": 8.14e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 1189 + }, + { + "loss": 0.0708, + "grad_norm": 1.9953235387802124, + "learning_rate": 8.13e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.19, + "step": 1190 + }, + { + "loss": 0.0562, + "grad_norm": 1.7549750804901123, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.191, + "step": 1191 + }, + { + "loss": 0.0589, + "grad_norm": 2.0597615242004395, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.192, + "step": 1192 + }, + { + "loss": 0.0469, + "grad_norm": 1.7559466361999512, + "learning_rate": 8.1e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.193, + "step": 1193 + }, + { + "loss": 0.0757, + "grad_norm": 2.0765254497528076, + "learning_rate": 8.09e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.194, + "step": 1194 + }, + { + "loss": 0.0118, + "grad_norm": 3.379472017288208, + "learning_rate": 8.08e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 1195 + }, + { + "loss": 0.0692, + "grad_norm": 1.6905264854431152, + "learning_rate": 8.07e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.196, + "step": 1196 + }, + { + "loss": 0.0493, + "grad_norm": 2.3974990844726562, + "learning_rate": 8.06e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.197, + "step": 1197 + }, + { + "loss": 0.0533, + "grad_norm": 1.609572410583496, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.198, + "step": 1198 + }, + { + "loss": 0.0727, + "grad_norm": 2.563096523284912, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.199, + "step": 1199 + }, + { + "loss": 0.0556, + "grad_norm": 2.0002143383026123, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.2, + "step": 1200 + }, + { + "loss": 0.0487, + "grad_norm": 1.7846338748931885, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.201, + "step": 1201 + }, + { + "loss": 0.0802, + "grad_norm": 2.2537660598754883, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.202, + "step": 1202 + }, + { + "loss": 0.0584, + "grad_norm": 3.043835163116455, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.203, + "step": 1203 + }, + { + "loss": 0.012, + "grad_norm": 3.2526142597198486, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.204, + "step": 1204 + }, + { + "loss": 0.063, + "grad_norm": 1.3797202110290527, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.205, + "step": 1205 + }, + { + "loss": 0.0658, + "grad_norm": 2.5818750858306885, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.206, + "step": 1206 + }, + { + "loss": 0.0108, + "grad_norm": 3.089911699295044, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 1207 + }, + { + "loss": 0.0781, + "grad_norm": 2.348559856414795, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.208, + "step": 1208 + }, + { + "loss": 0.053, + "grad_norm": 1.6293948888778687, + "learning_rate": 7.94e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.209, + "step": 1209 + }, + { + "loss": 0.0541, + "grad_norm": 1.7948721647262573, + "learning_rate": 7.93e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.21, + "step": 1210 + }, + { + "loss": 0.0408, + "grad_norm": 2.3477344512939453, + "learning_rate": 7.92e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.211, + "step": 1211 + }, + { + "loss": 0.0579, + "grad_norm": 2.6738388538360596, + "learning_rate": 7.91e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.212, + "step": 1212 + }, + { + "loss": 0.055, + "grad_norm": 1.522643804550171, + "learning_rate": 7.9e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.213, + "step": 1213 + }, + { + "loss": 0.0634, + "grad_norm": 1.585366129875183, + "learning_rate": 7.89e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.214, + "step": 1214 + }, + { + "loss": 0.0616, + "grad_norm": 1.645047664642334, + "learning_rate": 7.88e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.215, + "step": 1215 + }, + { + "loss": 0.0757, + "grad_norm": 1.689460039138794, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.216, + "step": 1216 + }, + { + "loss": 0.0454, + "grad_norm": 2.0291545391082764, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.217, + "step": 1217 + }, + { + "loss": 0.0104, + "grad_norm": 3.0368359088897705, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 1218 + }, + { + "loss": 0.0097, + "grad_norm": 2.792633533477783, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 1219 + }, + { + "loss": 0.0776, + "grad_norm": 2.638593912124634, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.22, + "step": 1220 + }, + { + "loss": 0.0612, + "grad_norm": 2.7605133056640625, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.221, + "step": 1221 + }, + { + "loss": 0.0884, + "grad_norm": 2.6775927543640137, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.222, + "step": 1222 + }, + { + "loss": 0.0752, + "grad_norm": 1.9850537776947021, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.223, + "step": 1223 + }, + { + "loss": 0.0439, + "grad_norm": 1.5452102422714233, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.224, + "step": 1224 + }, + { + "loss": 0.0435, + "grad_norm": 2.2355833053588867, + "learning_rate": 7.78e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.225, + "step": 1225 + }, + { + "loss": 0.0532, + "grad_norm": 1.7478253841400146, + "learning_rate": 7.77e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.226, + "step": 1226 + }, + { + "loss": 0.0106, + "grad_norm": 3.0870492458343506, + "learning_rate": 7.76e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 1227 + }, + { + "loss": 0.0534, + "grad_norm": 1.8180068731307983, + "learning_rate": 7.75e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.228, + "step": 1228 + }, + { + "loss": 0.0088, + "grad_norm": 2.428753137588501, + "learning_rate": 7.74e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 1229 + }, + { + "loss": 0.0094, + "grad_norm": 2.480687141418457, + "learning_rate": 7.73e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 1230 + }, + { + "loss": 0.056, + "grad_norm": 1.977836012840271, + "learning_rate": 7.72e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.231, + "step": 1231 + }, + { + "loss": 0.0576, + "grad_norm": 2.694723129272461, + "learning_rate": 7.71e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.232, + "step": 1232 + }, + { + "loss": 0.0559, + "grad_norm": 1.785524606704712, + "learning_rate": 7.7e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.233, + "step": 1233 + }, + { + "loss": 0.0548, + "grad_norm": 1.7176051139831543, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.234, + "step": 1234 + }, + { + "loss": 0.07, + "grad_norm": 1.961999773979187, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2349999999999999, + "step": 1235 + }, + { + "loss": 0.0592, + "grad_norm": 2.465545654296875, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.236, + "step": 1236 + }, + { + "loss": 0.0378, + "grad_norm": 1.4544801712036133, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.237, + "step": 1237 + }, + { + "loss": 0.0602, + "grad_norm": 1.772146224975586, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.238, + "step": 1238 + }, + { + "loss": 0.04, + "grad_norm": 2.1550979614257812, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2389999999999999, + "step": 1239 + }, + { + "loss": 0.0448, + "grad_norm": 2.0862441062927246, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.24, + "step": 1240 + }, + { + "loss": 0.073, + "grad_norm": 1.8445123434066772, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.241, + "step": 1241 + }, + { + "loss": 0.0701, + "grad_norm": 1.734731912612915, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.242, + "step": 1242 + }, + { + "loss": 0.0621, + "grad_norm": 2.5419921875, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2429999999999999, + "step": 1243 + }, + { + "loss": 0.0387, + "grad_norm": 2.232482671737671, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.244, + "step": 1244 + }, + { + "loss": 0.041, + "grad_norm": 2.1068978309631348, + "learning_rate": 7.58e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.245, + "step": 1245 + }, + { + "loss": 0.0677, + "grad_norm": 1.7934560775756836, + "learning_rate": 7.57e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.246, + "step": 1246 + }, + { + "loss": 0.0866, + "grad_norm": 2.3774123191833496, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.2469999999999999, + "step": 1247 + }, + { + "loss": 0.0188, + "grad_norm": 5.182284832000732, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.248, + "step": 1248 + }, + { + "loss": 0.0517, + "grad_norm": 1.6540446281433105, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.249, + "step": 1249 + }, + { + "loss": 0.0801, + "grad_norm": 1.7044258117675781, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.25, + "step": 1250 + }, + { + "loss": 0.018, + "grad_norm": 4.825031757354736, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.251, + "step": 1251 + }, + { + "loss": 0.0579, + "grad_norm": 1.9127049446105957, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.252, + "step": 1252 + }, + { + "loss": 0.0387, + "grad_norm": 1.524353265762329, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2530000000000001, + "step": 1253 + }, + { + "loss": 0.0743, + "grad_norm": 1.8598476648330688, + "learning_rate": 7.49e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.254, + "step": 1254 + }, + { + "loss": 0.0364, + "grad_norm": 1.6264195442199707, + "learning_rate": 7.48e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.255, + "step": 1255 + }, + { + "loss": 0.0746, + "grad_norm": 1.4887213706970215, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.256, + "step": 1256 + }, + { + "loss": 0.0117, + "grad_norm": 3.425563335418701, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 1257 + }, + { + "loss": 0.0552, + "grad_norm": 1.6610738039016724, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.258, + "step": 1258 + }, + { + "loss": 0.0105, + "grad_norm": 2.9016385078430176, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 1259 + }, + { + "loss": 0.0657, + "grad_norm": 2.349597215652466, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.26, + "step": 1260 + }, + { + "loss": 0.0706, + "grad_norm": 1.7171733379364014, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.2610000000000001, + "step": 1261 + }, + { + "loss": 0.0076, + "grad_norm": 2.070596933364868, + "learning_rate": 7.41e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 1262 + }, + { + "loss": 0.082, + "grad_norm": 2.476560115814209, + "learning_rate": 7.4e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.263, + "step": 1263 + }, + { + "loss": 0.0696, + "grad_norm": 2.013134002685547, + "learning_rate": 7.39e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 1264 + }, + { + "loss": 0.0456, + "grad_norm": 2.0719385147094727, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2650000000000001, + "step": 1265 + }, + { + "loss": 0.0789, + "grad_norm": 2.737678289413452, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.266, + "step": 1266 + }, + { + "loss": 0.0755, + "grad_norm": 2.932962417602539, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.267, + "step": 1267 + }, + { + "loss": 0.0621, + "grad_norm": 1.5760010480880737, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.268, + "step": 1268 + }, + { + "loss": 0.145, + "grad_norm": 4.413599491119385, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.2690000000000001, + "step": 1269 + }, + { + "loss": 0.052, + "grad_norm": 1.3965295553207397, + "learning_rate": 7.33e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.27, + "step": 1270 + }, + { + "loss": 0.0507, + "grad_norm": 1.5652461051940918, + "learning_rate": 7.32e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.271, + "step": 1271 + }, + { + "loss": 0.1608, + "grad_norm": 5.22923469543457, + "learning_rate": 7.31e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 1.272, + "step": 1272 + }, + { + "loss": 0.04, + "grad_norm": 2.1607284545898438, + "learning_rate": 7.3e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2730000000000001, + "step": 1273 + }, + { + "loss": 0.0093, + "grad_norm": 2.755345106124878, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 1274 + }, + { + "loss": 0.0403, + "grad_norm": 1.6918083429336548, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.275, + "step": 1275 + }, + { + "loss": 0.0569, + "grad_norm": 1.4805766344070435, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.276, + "step": 1276 + }, + { + "loss": 0.0639, + "grad_norm": 1.9898265600204468, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2770000000000001, + "step": 1277 + }, + { + "loss": 0.0764, + "grad_norm": 2.4644553661346436, + "learning_rate": 7.25e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.278, + "step": 1278 + }, + { + "loss": 0.0458, + "grad_norm": 1.6111081838607788, + "learning_rate": 7.24e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.279, + "step": 1279 + }, + { + "loss": 0.0439, + "grad_norm": 1.847048282623291, + "learning_rate": 7.23e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.28, + "step": 1280 + }, + { + "loss": 0.0485, + "grad_norm": 2.2336626052856445, + "learning_rate": 7.22e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2810000000000001, + "step": 1281 + }, + { + "loss": 0.0204, + "grad_norm": 5.058897972106934, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.282, + "step": 1282 + }, + { + "loss": 0.059, + "grad_norm": 1.464397668838501, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.283, + "step": 1283 + }, + { + "loss": 0.0663, + "grad_norm": 1.986909031867981, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.284, + "step": 1284 + }, + { + "loss": 0.0553, + "grad_norm": 1.3948322534561157, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.285, + "step": 1285 + }, + { + "loss": 0.0762, + "grad_norm": 1.8114221096038818, + "learning_rate": 7.17e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.286, + "step": 1286 + }, + { + "loss": 0.0596, + "grad_norm": 1.3451945781707764, + "learning_rate": 7.16e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 1287 + }, + { + "loss": 0.066, + "grad_norm": 1.6588683128356934, + "learning_rate": 7.15e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.288, + "step": 1288 + }, + { + "loss": 0.0486, + "grad_norm": 1.8605456352233887, + "learning_rate": 7.14e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.289, + "step": 1289 + }, + { + "loss": 0.0567, + "grad_norm": 1.8595200777053833, + "learning_rate": 7.13e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.29, + "step": 1290 + }, + { + "loss": 0.0651, + "grad_norm": 1.3704520463943481, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.291, + "step": 1291 + }, + { + "loss": 0.0776, + "grad_norm": 1.5874192714691162, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.292, + "step": 1292 + }, + { + "loss": 0.0584, + "grad_norm": 1.6083050966262817, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.293, + "step": 1293 + }, + { + "loss": 0.0526, + "grad_norm": 2.637402296066284, + "learning_rate": 7.09e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.294, + "step": 1294 + }, + { + "loss": 0.0434, + "grad_norm": 1.125180721282959, + "learning_rate": 7.08e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.295, + "step": 1295 + }, + { + "loss": 0.0604, + "grad_norm": 1.9658552408218384, + "learning_rate": 7.07e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.296, + "step": 1296 + }, + { + "loss": 0.0609, + "grad_norm": 2.3239123821258545, + "learning_rate": 7.06e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.297, + "step": 1297 + }, + { + "loss": 0.0822, + "grad_norm": 2.9983248710632324, + "learning_rate": 7.05e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.298, + "step": 1298 + }, + { + "loss": 0.062, + "grad_norm": 1.7106144428253174, + "learning_rate": 7.04e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.299, + "step": 1299 + }, + { + "loss": 0.0542, + "grad_norm": 1.9297690391540527, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3, + "step": 1300 + }, + { + "loss": 0.0174, + "grad_norm": 4.6414361000061035, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.301, + "step": 1301 + }, + { + "loss": 0.0755, + "grad_norm": 2.1787867546081543, + "learning_rate": 7.01e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.302, + "step": 1302 + }, + { + "loss": 0.015, + "grad_norm": 4.113848686218262, + "learning_rate": 7e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.303, + "step": 1303 + }, + { + "loss": 0.0492, + "grad_norm": 1.3803060054779053, + "learning_rate": 6.99e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.304, + "step": 1304 + }, + { + "loss": 0.0512, + "grad_norm": 1.5045576095581055, + "learning_rate": 6.98e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.305, + "step": 1305 + }, + { + "loss": 0.0608, + "grad_norm": 1.5915031433105469, + "learning_rate": 6.97e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.306, + "step": 1306 + }, + { + "loss": 0.0583, + "grad_norm": 1.2304151058197021, + "learning_rate": 6.96e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.307, + "step": 1307 + }, + { + "loss": 0.0563, + "grad_norm": 1.7730633020401, + "learning_rate": 6.95e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.308, + "step": 1308 + }, + { + "loss": 0.0684, + "grad_norm": 1.730749249458313, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.309, + "step": 1309 + }, + { + "loss": 0.052, + "grad_norm": 1.6816562414169312, + "learning_rate": 6.93e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.31, + "step": 1310 + }, + { + "loss": 0.0732, + "grad_norm": 2.309110164642334, + "learning_rate": 6.92e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.311, + "step": 1311 + }, + { + "loss": 0.0634, + "grad_norm": 1.8224540948867798, + "learning_rate": 6.91e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.312, + "step": 1312 + }, + { + "loss": 0.0584, + "grad_norm": 1.9186445474624634, + "learning_rate": 6.9e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.313, + "step": 1313 + }, + { + "loss": 0.0348, + "grad_norm": 1.3239874839782715, + "learning_rate": 6.89e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.314, + "step": 1314 + }, + { + "loss": 0.0938, + "grad_norm": 2.3451895713806152, + "learning_rate": 6.88e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.315, + "step": 1315 + }, + { + "loss": 0.0623, + "grad_norm": 1.8779281377792358, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.316, + "step": 1316 + }, + { + "loss": 0.167, + "grad_norm": 4.993703842163086, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.317, + "step": 1317 + }, + { + "loss": 0.0142, + "grad_norm": 4.2328338623046875, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.318, + "step": 1318 + }, + { + "loss": 0.0792, + "grad_norm": 2.0863592624664307, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.319, + "step": 1319 + }, + { + "loss": 0.044, + "grad_norm": 2.3412485122680664, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.32, + "step": 1320 + }, + { + "loss": 0.0404, + "grad_norm": 1.4804179668426514, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.321, + "step": 1321 + }, + { + "loss": 0.0168, + "grad_norm": 4.645394802093506, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.322, + "step": 1322 + }, + { + "loss": 0.0718, + "grad_norm": 1.6375811100006104, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.323, + "step": 1323 + }, + { + "loss": 0.06, + "grad_norm": 1.5656460523605347, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.324, + "step": 1324 + }, + { + "loss": 0.065, + "grad_norm": 1.7190107107162476, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.325, + "step": 1325 + }, + { + "loss": 0.0152, + "grad_norm": 3.9972171783447266, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.326, + "step": 1326 + }, + { + "loss": 0.0679, + "grad_norm": 2.4974441528320312, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 1327 + }, + { + "loss": 0.0582, + "grad_norm": 2.3485262393951416, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.328, + "step": 1328 + }, + { + "loss": 0.0829, + "grad_norm": 2.598663091659546, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.329, + "step": 1329 + }, + { + "loss": 0.01, + "grad_norm": 2.8793528079986572, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 1330 + }, + { + "loss": 0.0661, + "grad_norm": 1.9478849172592163, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.331, + "step": 1331 + }, + { + "loss": 0.0715, + "grad_norm": 1.916156530380249, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.332, + "step": 1332 + }, + { + "loss": 0.0601, + "grad_norm": 1.6466504335403442, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.333, + "step": 1333 + }, + { + "loss": 0.01, + "grad_norm": 2.8242533206939697, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 1334 + }, + { + "loss": 0.0409, + "grad_norm": 1.506545066833496, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.335, + "step": 1335 + }, + { + "loss": 0.0809, + "grad_norm": 1.7198259830474854, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.336, + "step": 1336 + }, + { + "loss": 0.1451, + "grad_norm": 4.725864887237549, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 1.337, + "step": 1337 + }, + { + "loss": 0.0649, + "grad_norm": 1.4829907417297363, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.338, + "step": 1338 + }, + { + "loss": 0.0779, + "grad_norm": 1.798589825630188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.339, + "step": 1339 + }, + { + "loss": 0.0645, + "grad_norm": 2.8309855461120605, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.34, + "step": 1340 + }, + { + "loss": 0.0573, + "grad_norm": 2.2329795360565186, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.341, + "step": 1341 + }, + { + "loss": 0.0633, + "grad_norm": 1.7102524042129517, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.342, + "step": 1342 + }, + { + "loss": 0.0533, + "grad_norm": 1.8966953754425049, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.343, + "step": 1343 + }, + { + "loss": 0.1242, + "grad_norm": 3.5069096088409424, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3439999999999999, + "step": 1344 + }, + { + "loss": 0.0668, + "grad_norm": 1.6451408863067627, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.345, + "step": 1345 + }, + { + "loss": 0.0168, + "grad_norm": 4.646505355834961, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.346, + "step": 1346 + }, + { + "loss": 0.0122, + "grad_norm": 3.5036394596099854, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.347, + "step": 1347 + }, + { + "loss": 0.054, + "grad_norm": 1.476265788078308, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3479999999999999, + "step": 1348 + }, + { + "loss": 0.0771, + "grad_norm": 2.343313455581665, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.349, + "step": 1349 + }, + { + "loss": 0.041, + "grad_norm": 1.5659995079040527, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.35, + "step": 1350 + }, + { + "loss": 0.0377, + "grad_norm": 1.196007251739502, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.351, + "step": 1351 + }, + { + "loss": 0.1297, + "grad_norm": 3.8112542629241943, + "learning_rate": 6.51e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 1.3519999999999999, + "step": 1352 + }, + { + "loss": 0.0526, + "grad_norm": 1.3368208408355713, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.353, + "step": 1353 + }, + { + "loss": 0.0444, + "grad_norm": 1.8093925714492798, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.354, + "step": 1354 + }, + { + "loss": 0.0101, + "grad_norm": 2.882591485977173, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 1355 + }, + { + "loss": 0.0437, + "grad_norm": 1.7717807292938232, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3559999999999999, + "step": 1356 + }, + { + "loss": 0.0546, + "grad_norm": 2.2301149368286133, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.357, + "step": 1357 + }, + { + "loss": 0.0102, + "grad_norm": 2.8497674465179443, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 1358 + }, + { + "loss": 0.059, + "grad_norm": 1.9033845663070679, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.359, + "step": 1359 + }, + { + "loss": 0.0431, + "grad_norm": 1.6551549434661865, + "learning_rate": 6.43e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3599999999999999, + "step": 1360 + }, + { + "loss": 0.0585, + "grad_norm": 1.5250738859176636, + "learning_rate": 6.42e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.361, + "step": 1361 + }, + { + "loss": 0.0576, + "grad_norm": 1.7390161752700806, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.362, + "step": 1362 + }, + { + "loss": 0.0642, + "grad_norm": 2.0047788619995117, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.363, + "step": 1363 + }, + { + "loss": 0.0409, + "grad_norm": 1.696035385131836, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.3639999999999999, + "step": 1364 + }, + { + "loss": 0.0577, + "grad_norm": 1.9078930616378784, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.365, + "step": 1365 + }, + { + "loss": 0.0098, + "grad_norm": 2.792039155960083, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 1366 + }, + { + "loss": 0.0582, + "grad_norm": 1.8414034843444824, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.367, + "step": 1367 + }, + { + "loss": 0.0545, + "grad_norm": 2.1793394088745117, + "learning_rate": 6.35e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 1368 + }, + { + "loss": 0.0449, + "grad_norm": 2.220048666000366, + "learning_rate": 6.34e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.369, + "step": 1369 + }, + { + "loss": 0.0545, + "grad_norm": 1.9344781637191772, + "learning_rate": 6.33e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.37, + "step": 1370 + }, + { + "loss": 0.0567, + "grad_norm": 1.8442058563232422, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.371, + "step": 1371 + }, + { + "loss": 0.0118, + "grad_norm": 3.14497971534729, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.3719999999999999, + "step": 1372 + }, + { + "loss": 0.0721, + "grad_norm": 2.7254114151000977, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.373, + "step": 1373 + }, + { + "loss": 0.0587, + "grad_norm": 1.436458945274353, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.374, + "step": 1374 + }, + { + "loss": 0.1323, + "grad_norm": 3.204223871231079, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.375, + "step": 1375 + }, + { + "loss": 0.0704, + "grad_norm": 1.601090431213379, + "learning_rate": 6.27e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.376, + "step": 1376 + }, + { + "loss": 0.0601, + "grad_norm": 1.5754057168960571, + "learning_rate": 6.26e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.377, + "step": 1377 + }, + { + "loss": 0.0711, + "grad_norm": 1.8766717910766602, + "learning_rate": 6.25e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.3780000000000001, + "step": 1378 + }, + { + "loss": 0.059, + "grad_norm": 2.119466781616211, + "learning_rate": 6.24e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.379, + "step": 1379 + }, + { + "loss": 0.0772, + "grad_norm": 1.8192287683486938, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.38, + "step": 1380 + }, + { + "loss": 0.0588, + "grad_norm": 1.6275320053100586, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.381, + "step": 1381 + }, + { + "loss": 0.0417, + "grad_norm": 2.3129870891571045, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3820000000000001, + "step": 1382 + }, + { + "loss": 0.0444, + "grad_norm": 1.6177237033843994, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.383, + "step": 1383 + }, + { + "loss": 0.0566, + "grad_norm": 2.093630075454712, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.384, + "step": 1384 + }, + { + "loss": 0.0655, + "grad_norm": 1.9267455339431763, + "learning_rate": 6.18e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.385, + "step": 1385 + }, + { + "loss": 0.0442, + "grad_norm": 1.0200287103652954, + "learning_rate": 6.17e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3860000000000001, + "step": 1386 + }, + { + "loss": 0.0638, + "grad_norm": 1.3187520503997803, + "learning_rate": 6.16e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.387, + "step": 1387 + }, + { + "loss": 0.0364, + "grad_norm": 1.6464682817459106, + "learning_rate": 6.15e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.388, + "step": 1388 + }, + { + "loss": 0.0775, + "grad_norm": 2.474910020828247, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.389, + "step": 1389 + }, + { + "loss": 0.0621, + "grad_norm": 1.1011793613433838, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.3900000000000001, + "step": 1390 + }, + { + "loss": 0.0218, + "grad_norm": 5.168939113616943, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.391, + "step": 1391 + }, + { + "loss": 0.0221, + "grad_norm": 5.572858810424805, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.392, + "step": 1392 + }, + { + "loss": 0.0561, + "grad_norm": 1.8146536350250244, + "learning_rate": 6.1e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.393, + "step": 1393 + }, + { + "loss": 0.0804, + "grad_norm": 3.2232189178466797, + "learning_rate": 6.09e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.3940000000000001, + "step": 1394 + }, + { + "loss": 0.039, + "grad_norm": 1.8940805196762085, + "learning_rate": 6.08e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.395, + "step": 1395 + }, + { + "loss": 0.0584, + "grad_norm": 2.0325937271118164, + "learning_rate": 6.07e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.396, + "step": 1396 + }, + { + "loss": 0.0422, + "grad_norm": 1.980771541595459, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.397, + "step": 1397 + }, + { + "loss": 0.0593, + "grad_norm": 1.710123896598816, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.3980000000000001, + "step": 1398 + }, + { + "loss": 0.0592, + "grad_norm": 2.430305004119873, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.399, + "step": 1399 + }, + { + "loss": 0.0467, + "grad_norm": 2.204895496368408, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.4, + "step": 1400 + }, + { + "loss": 0.0496, + "grad_norm": 1.7684513330459595, + "learning_rate": 6.02e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.401, + "step": 1401 + }, + { + "loss": 0.0462, + "grad_norm": 1.7807819843292236, + "learning_rate": 6.01e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.4020000000000001, + "step": 1402 + }, + { + "loss": 0.08, + "grad_norm": 1.9608607292175293, + "learning_rate": 6e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.403, + "step": 1403 + }, + { + "loss": 0.0588, + "grad_norm": 1.6851762533187866, + "learning_rate": 5.99e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.404, + "step": 1404 + }, + { + "loss": 0.0448, + "grad_norm": 1.395566701889038, + "learning_rate": 5.98e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 1.405, + "step": 1405 + }, + { + "loss": 0.0771, + "grad_norm": 1.94028639793396, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.4060000000000001, + "step": 1406 + }, + { + "loss": 0.0717, + "grad_norm": 2.421177864074707, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.407, + "step": 1407 + }, + { + "loss": 0.0602, + "grad_norm": 1.947490930557251, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.408, + "step": 1408 + }, + { + "loss": 0.084, + "grad_norm": 3.4976916313171387, + "learning_rate": 5.94e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.409, + "step": 1409 + }, + { + "loss": 0.0146, + "grad_norm": 3.9808900356292725, + "learning_rate": 5.93e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.41, + "step": 1410 + }, + { + "loss": 0.0583, + "grad_norm": 1.8078984022140503, + "learning_rate": 5.92e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 1411 + }, + { + "loss": 0.0687, + "grad_norm": 1.9551893472671509, + "learning_rate": 5.91e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.412, + "step": 1412 + }, + { + "loss": 0.0133, + "grad_norm": 3.68121075630188, + "learning_rate": 5.9e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.413, + "step": 1413 + }, + { + "loss": 0.0411, + "grad_norm": 1.987641453742981, + "learning_rate": 5.89e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.414, + "step": 1414 + }, + { + "loss": 0.0527, + "grad_norm": 1.6725058555603027, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.415, + "step": 1415 + }, + { + "loss": 0.0516, + "grad_norm": 1.3503282070159912, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.416, + "step": 1416 + }, + { + "loss": 0.0439, + "grad_norm": 1.5804824829101562, + "learning_rate": 5.86e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.417, + "step": 1417 + }, + { + "loss": 0.0481, + "grad_norm": 1.3769683837890625, + "learning_rate": 5.85e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.418, + "step": 1418 + }, + { + "loss": 0.0108, + "grad_norm": 3.01991868019104, + "learning_rate": 5.84e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.419, + "step": 1419 + }, + { + "loss": 0.0497, + "grad_norm": 1.416107177734375, + "learning_rate": 5.83e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.42, + "step": 1420 + }, + { + "loss": 0.0377, + "grad_norm": 1.3515864610671997, + "learning_rate": 5.82e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.421, + "step": 1421 + }, + { + "loss": 0.0607, + "grad_norm": 1.8614403009414673, + "learning_rate": 5.81e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.422, + "step": 1422 + }, + { + "loss": 0.0679, + "grad_norm": 2.109128952026367, + "learning_rate": 5.8e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.423, + "step": 1423 + }, + { + "loss": 0.0751, + "grad_norm": 1.5067026615142822, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.424, + "step": 1424 + }, + { + "loss": 0.0547, + "grad_norm": 1.5301975011825562, + "learning_rate": 5.78e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.425, + "step": 1425 + }, + { + "loss": 0.0683, + "grad_norm": 2.2441554069519043, + "learning_rate": 5.77e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.426, + "step": 1426 + }, + { + "loss": 0.0458, + "grad_norm": 1.8737249374389648, + "learning_rate": 5.76e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.427, + "step": 1427 + }, + { + "loss": 0.0687, + "grad_norm": 1.9434070587158203, + "learning_rate": 5.75e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.428, + "step": 1428 + }, + { + "loss": 0.0806, + "grad_norm": 1.8568007946014404, + "learning_rate": 5.74e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.429, + "step": 1429 + }, + { + "loss": 0.065, + "grad_norm": 2.0390608310699463, + "learning_rate": 5.73e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.43, + "step": 1430 + }, + { + "loss": 0.0615, + "grad_norm": 1.7913262844085693, + "learning_rate": 5.72e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.431, + "step": 1431 + }, + { + "loss": 0.0515, + "grad_norm": 2.496122121810913, + "learning_rate": 5.71e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.432, + "step": 1432 + }, + { + "loss": 0.0501, + "grad_norm": 1.633486270904541, + "learning_rate": 5.7e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.433, + "step": 1433 + }, + { + "loss": 0.0171, + "grad_norm": 4.812644958496094, + "learning_rate": 5.69e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.434, + "step": 1434 + }, + { + "loss": 0.0756, + "grad_norm": 2.208841562271118, + "learning_rate": 5.68e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.435, + "step": 1435 + }, + { + "loss": 0.0358, + "grad_norm": 1.725355625152588, + "learning_rate": 5.67e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.436, + "step": 1436 + }, + { + "loss": 0.0173, + "grad_norm": 4.879479885101318, + "learning_rate": 5.66e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.437, + "step": 1437 + }, + { + "loss": 0.1386, + "grad_norm": 3.6769933700561523, + "learning_rate": 5.65e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.438, + "step": 1438 + }, + { + "loss": 0.0712, + "grad_norm": 1.624098300933838, + "learning_rate": 5.64e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.439, + "step": 1439 + }, + { + "loss": 0.0534, + "grad_norm": 2.2485837936401367, + "learning_rate": 5.63e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.44, + "step": 1440 + }, + { + "loss": 0.0572, + "grad_norm": 1.977672815322876, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.441, + "step": 1441 + }, + { + "loss": 0.0515, + "grad_norm": 2.81058669090271, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.442, + "step": 1442 + }, + { + "loss": 0.0118, + "grad_norm": 3.3733158111572266, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.443, + "step": 1443 + }, + { + "loss": 0.0546, + "grad_norm": 1.634824275970459, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.444, + "step": 1444 + }, + { + "loss": 0.0549, + "grad_norm": 1.9184083938598633, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.445, + "step": 1445 + }, + { + "loss": 0.1835, + "grad_norm": 5.609441757202148, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 1.446, + "step": 1446 + }, + { + "loss": 0.0568, + "grad_norm": 1.4348167181015015, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.447, + "step": 1447 + }, + { + "loss": 0.0711, + "grad_norm": 1.6240220069885254, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.448, + "step": 1448 + }, + { + "loss": 0.0395, + "grad_norm": 1.7122279405593872, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.449, + "step": 1449 + }, + { + "loss": 0.0092, + "grad_norm": 2.6746726036071777, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 1450 + }, + { + "loss": 0.0516, + "grad_norm": 1.2466599941253662, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 1451 + }, + { + "loss": 0.0755, + "grad_norm": 2.3185651302337646, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.452, + "step": 1452 + }, + { + "loss": 0.0107, + "grad_norm": 3.2160799503326416, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.453, + "step": 1453 + }, + { + "loss": 0.0353, + "grad_norm": 1.6237694025039673, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.454, + "step": 1454 + }, + { + "loss": 0.052, + "grad_norm": 1.6856698989868164, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.455, + "step": 1455 + }, + { + "loss": 0.0672, + "grad_norm": 1.7814722061157227, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.456, + "step": 1456 + }, + { + "loss": 0.0354, + "grad_norm": 1.4843939542770386, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.457, + "step": 1457 + }, + { + "loss": 0.0642, + "grad_norm": 1.6205660104751587, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.458, + "step": 1458 + }, + { + "loss": 0.0694, + "grad_norm": 2.024721384048462, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.459, + "step": 1459 + }, + { + "loss": 0.0587, + "grad_norm": 1.8312665224075317, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.46, + "step": 1460 + }, + { + "loss": 0.0411, + "grad_norm": 1.8380608558654785, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.461, + "step": 1461 + }, + { + "loss": 0.0597, + "grad_norm": 1.7451549768447876, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.462, + "step": 1462 + }, + { + "loss": 0.0773, + "grad_norm": 1.7938144207000732, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.463, + "step": 1463 + }, + { + "loss": 0.0639, + "grad_norm": 2.6028213500976562, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.464, + "step": 1464 + }, + { + "loss": 0.0686, + "grad_norm": 1.8541765213012695, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.465, + "step": 1465 + }, + { + "loss": 0.0548, + "grad_norm": 1.739157795906067, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.466, + "step": 1466 + }, + { + "loss": 0.0131, + "grad_norm": 3.847865581512451, + "learning_rate": 5.36e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.467, + "step": 1467 + }, + { + "loss": 0.0556, + "grad_norm": 1.4072014093399048, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.468, + "step": 1468 + }, + { + "loss": 0.0656, + "grad_norm": 1.7529304027557373, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.4689999999999999, + "step": 1469 + }, + { + "loss": 0.0472, + "grad_norm": 1.359227180480957, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 1470 + }, + { + "loss": 0.0553, + "grad_norm": 1.8881477117538452, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.471, + "step": 1471 + }, + { + "loss": 0.0728, + "grad_norm": 1.792786717414856, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.472, + "step": 1472 + }, + { + "loss": 0.0589, + "grad_norm": 1.9897642135620117, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.4729999999999999, + "step": 1473 + }, + { + "loss": 0.0641, + "grad_norm": 2.224968433380127, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.474, + "step": 1474 + }, + { + "loss": 0.0176, + "grad_norm": 4.579442977905273, + "learning_rate": 5.28e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.475, + "step": 1475 + }, + { + "loss": 0.0465, + "grad_norm": 1.7030646800994873, + "learning_rate": 5.27e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.476, + "step": 1476 + }, + { + "loss": 0.0638, + "grad_norm": 1.8251057863235474, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.4769999999999999, + "step": 1477 + }, + { + "loss": 0.0532, + "grad_norm": 1.7170004844665527, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.478, + "step": 1478 + }, + { + "loss": 0.0146, + "grad_norm": 4.36711311340332, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.479, + "step": 1479 + }, + { + "loss": 0.0384, + "grad_norm": 1.4616270065307617, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.48, + "step": 1480 + }, + { + "loss": 0.0536, + "grad_norm": 1.4146326780319214, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4809999999999999, + "step": 1481 + }, + { + "loss": 0.058, + "grad_norm": 1.4087859392166138, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.482, + "step": 1482 + }, + { + "loss": 0.0131, + "grad_norm": 3.685961961746216, + "learning_rate": 5.2e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.483, + "step": 1483 + }, + { + "loss": 0.054, + "grad_norm": 2.024017572402954, + "learning_rate": 5.19e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.484, + "step": 1484 + }, + { + "loss": 0.0127, + "grad_norm": 3.772671699523926, + "learning_rate": 5.18e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.4849999999999999, + "step": 1485 + }, + { + "loss": 0.0119, + "grad_norm": 3.4980599880218506, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.486, + "step": 1486 + }, + { + "loss": 0.0759, + "grad_norm": 2.152510643005371, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.487, + "step": 1487 + }, + { + "loss": 0.0408, + "grad_norm": 1.5923069715499878, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.488, + "step": 1488 + }, + { + "loss": 0.0085, + "grad_norm": 2.5293490886688232, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 1489 + }, + { + "loss": 0.0694, + "grad_norm": 2.434215545654297, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.49, + "step": 1490 + }, + { + "loss": 0.0084, + "grad_norm": 2.269744873046875, + "learning_rate": 5.12e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 1491 + }, + { + "loss": 0.0472, + "grad_norm": 2.460083246231079, + "learning_rate": 5.11e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.492, + "step": 1492 + }, + { + "loss": 0.0346, + "grad_norm": 1.8150253295898438, + "learning_rate": 5.1e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.4929999999999999, + "step": 1493 + }, + { + "loss": 0.0436, + "grad_norm": 2.3509392738342285, + "learning_rate": 5.09e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.494, + "step": 1494 + }, + { + "loss": 0.0413, + "grad_norm": 1.7899376153945923, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.495, + "step": 1495 + }, + { + "loss": 0.0068, + "grad_norm": 1.4986844062805176, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 1496 + }, + { + "loss": 0.0719, + "grad_norm": 1.9978880882263184, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4969999999999999, + "step": 1497 + }, + { + "loss": 0.0407, + "grad_norm": 1.5322047472000122, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.498, + "step": 1498 + }, + { + "loss": 0.0057, + "grad_norm": 1.21915602684021, + "learning_rate": 5.04e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 1499 + }, + { + "loss": 0.0392, + "grad_norm": 1.8600904941558838, + "learning_rate": 5.03e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5, + "step": 1500 + }, + { + "loss": 0.058, + "grad_norm": 1.788377285003662, + "learning_rate": 5.02e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.501, + "step": 1501 + }, + { + "loss": 0.073, + "grad_norm": 2.0460190773010254, + "learning_rate": 5.01e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 1502 + }, + { + "loss": 0.0631, + "grad_norm": 2.3501951694488525, + "learning_rate": 5e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5030000000000001, + "step": 1503 + }, + { + "loss": 0.0655, + "grad_norm": 1.5405539274215698, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.504, + "step": 1504 + }, + { + "loss": 0.0527, + "grad_norm": 2.613194227218628, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.505, + "step": 1505 + }, + { + "loss": 0.0533, + "grad_norm": 2.3490524291992188, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.506, + "step": 1506 + }, + { + "loss": 0.007, + "grad_norm": 1.7071534395217896, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 1507 + }, + { + "loss": 0.0063, + "grad_norm": 1.578574776649475, + "learning_rate": 4.95e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 1508 + }, + { + "loss": 0.0586, + "grad_norm": 1.7500479221343994, + "learning_rate": 4.94e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.509, + "step": 1509 + }, + { + "loss": 0.0489, + "grad_norm": 2.1021506786346436, + "learning_rate": 4.93e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.51, + "step": 1510 + }, + { + "loss": 0.0505, + "grad_norm": 1.444482684135437, + "learning_rate": 4.92e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5110000000000001, + "step": 1511 + }, + { + "loss": 0.0663, + "grad_norm": 2.043468475341797, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.512, + "step": 1512 + }, + { + "loss": 0.0429, + "grad_norm": 1.7074294090270996, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.513, + "step": 1513 + }, + { + "loss": 0.0655, + "grad_norm": 2.4234681129455566, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.514, + "step": 1514 + }, + { + "loss": 0.0766, + "grad_norm": 2.124605655670166, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.5150000000000001, + "step": 1515 + }, + { + "loss": 0.0549, + "grad_norm": 1.533837080001831, + "learning_rate": 4.87e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.516, + "step": 1516 + }, + { + "loss": 0.0674, + "grad_norm": 1.8479790687561035, + "learning_rate": 4.86e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.517, + "step": 1517 + }, + { + "loss": 0.0105, + "grad_norm": 2.9812541007995605, + "learning_rate": 4.85e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 1518 + }, + { + "loss": 0.0394, + "grad_norm": 1.3361161947250366, + "learning_rate": 4.84e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5190000000000001, + "step": 1519 + }, + { + "loss": 0.0526, + "grad_norm": 1.8740735054016113, + "learning_rate": 4.83e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.52, + "step": 1520 + }, + { + "loss": 0.0622, + "grad_norm": 2.8182497024536133, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.521, + "step": 1521 + }, + { + "loss": 0.053, + "grad_norm": 1.3909233808517456, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.522, + "step": 1522 + }, + { + "loss": 0.0352, + "grad_norm": 1.3657585382461548, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5230000000000001, + "step": 1523 + }, + { + "loss": 0.0667, + "grad_norm": 1.9412925243377686, + "learning_rate": 4.79e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.524, + "step": 1524 + }, + { + "loss": 0.0536, + "grad_norm": 1.9261113405227661, + "learning_rate": 4.78e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.525, + "step": 1525 + }, + { + "loss": 0.0371, + "grad_norm": 1.7484430074691772, + "learning_rate": 4.77e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.526, + "step": 1526 + }, + { + "loss": 0.0629, + "grad_norm": 1.5757131576538086, + "learning_rate": 4.76e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5270000000000001, + "step": 1527 + }, + { + "loss": 0.0743, + "grad_norm": 2.2460429668426514, + "learning_rate": 4.75e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.528, + "step": 1528 + }, + { + "loss": 0.0537, + "grad_norm": 2.029741048812866, + "learning_rate": 4.74e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.529, + "step": 1529 + }, + { + "loss": 0.0363, + "grad_norm": 1.7011500597000122, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.53, + "step": 1530 + }, + { + "loss": 0.0773, + "grad_norm": 2.4450201988220215, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.5310000000000001, + "step": 1531 + }, + { + "loss": 0.0597, + "grad_norm": 2.192077159881592, + "learning_rate": 4.71e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.532, + "step": 1532 + }, + { + "loss": 0.0539, + "grad_norm": 1.464800238609314, + "learning_rate": 4.7e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.533, + "step": 1533 + }, + { + "loss": 0.0762, + "grad_norm": 2.326375722885132, + "learning_rate": 4.69e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.534, + "step": 1534 + }, + { + "loss": 0.0517, + "grad_norm": 1.547634482383728, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5350000000000001, + "step": 1535 + }, + { + "loss": 0.0783, + "grad_norm": 2.2572309970855713, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.536, + "step": 1536 + }, + { + "loss": 0.0644, + "grad_norm": 2.7545583248138428, + "learning_rate": 4.66e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.537, + "step": 1537 + }, + { + "loss": 0.0596, + "grad_norm": 1.4186100959777832, + "learning_rate": 4.65e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.538, + "step": 1538 + }, + { + "loss": 0.0408, + "grad_norm": 1.7284655570983887, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5390000000000001, + "step": 1539 + }, + { + "loss": 0.0605, + "grad_norm": 1.7523491382598877, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.54, + "step": 1540 + }, + { + "loss": 0.0593, + "grad_norm": 1.346951961517334, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.541, + "step": 1541 + }, + { + "loss": 0.0618, + "grad_norm": 1.4633326530456543, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.542, + "step": 1542 + }, + { + "loss": 0.0401, + "grad_norm": 1.6125143766403198, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5430000000000001, + "step": 1543 + }, + { + "loss": 0.0703, + "grad_norm": 1.801979422569275, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.544, + "step": 1544 + }, + { + "loss": 0.0168, + "grad_norm": 4.75988245010376, + "learning_rate": 4.58e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.545, + "step": 1545 + }, + { + "loss": 0.0395, + "grad_norm": 1.7274175882339478, + "learning_rate": 4.57e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.546, + "step": 1546 + }, + { + "loss": 0.0673, + "grad_norm": 1.813065767288208, + "learning_rate": 4.56e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5470000000000002, + "step": 1547 + }, + { + "loss": 0.0149, + "grad_norm": 4.271875858306885, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.548, + "step": 1548 + }, + { + "loss": 0.0663, + "grad_norm": 2.038168430328369, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.549, + "step": 1549 + }, + { + "loss": 0.0129, + "grad_norm": 3.939451217651367, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.55, + "step": 1550 + }, + { + "loss": 0.0375, + "grad_norm": 1.818014144897461, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5510000000000002, + "step": 1551 + }, + { + "loss": 0.0589, + "grad_norm": 1.9127329587936401, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.552, + "step": 1552 + }, + { + "loss": 0.062, + "grad_norm": 2.125767946243286, + "learning_rate": 4.5e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.553, + "step": 1553 + }, + { + "loss": 0.0627, + "grad_norm": 1.3601936101913452, + "learning_rate": 4.49e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.554, + "step": 1554 + }, + { + "loss": 0.0573, + "grad_norm": 1.9718780517578125, + "learning_rate": 4.48e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.5550000000000002, + "step": 1555 + }, + { + "loss": 0.0702, + "grad_norm": 1.8015897274017334, + "learning_rate": 4.47e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.556, + "step": 1556 + }, + { + "loss": 0.0456, + "grad_norm": 2.072335958480835, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.557, + "step": 1557 + }, + { + "loss": 0.0567, + "grad_norm": 1.921351432800293, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.558, + "step": 1558 + }, + { + "loss": 0.065, + "grad_norm": 1.5375345945358276, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5590000000000002, + "step": 1559 + }, + { + "loss": 0.0384, + "grad_norm": 1.3858362436294556, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.56, + "step": 1560 + }, + { + "loss": 0.0613, + "grad_norm": 1.8221303224563599, + "learning_rate": 4.42e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.561, + "step": 1561 + }, + { + "loss": 0.051, + "grad_norm": 1.5935691595077515, + "learning_rate": 4.41e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.562, + "step": 1562 + }, + { + "loss": 0.052, + "grad_norm": 1.4923861026763916, + "learning_rate": 4.4e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.563, + "step": 1563 + }, + { + "loss": 0.0114, + "grad_norm": 3.3136603832244873, + "learning_rate": 4.39e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.564, + "step": 1564 + }, + { + "loss": 0.0634, + "grad_norm": 1.8046377897262573, + "learning_rate": 4.38e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.565, + "step": 1565 + }, + { + "loss": 0.01, + "grad_norm": 2.8774094581604004, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.5659999999999998, + "step": 1566 + }, + { + "loss": 0.0506, + "grad_norm": 1.315585732460022, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.567, + "step": 1567 + }, + { + "loss": 0.051, + "grad_norm": 1.6535403728485107, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.568, + "step": 1568 + }, + { + "loss": 0.069, + "grad_norm": 1.9435205459594727, + "learning_rate": 4.34e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.569, + "step": 1569 + }, + { + "loss": 0.0599, + "grad_norm": 1.8793127536773682, + "learning_rate": 4.33e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.5699999999999998, + "step": 1570 + }, + { + "loss": 0.0098, + "grad_norm": 2.910207986831665, + "learning_rate": 4.32e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 1571 + }, + { + "loss": 0.0636, + "grad_norm": 2.1943273544311523, + "learning_rate": 4.31e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.572, + "step": 1572 + }, + { + "loss": 0.0567, + "grad_norm": 1.5598511695861816, + "learning_rate": 4.3e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.573, + "step": 1573 + }, + { + "loss": 0.0453, + "grad_norm": 1.9701513051986694, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 1574 + }, + { + "loss": 0.0102, + "grad_norm": 3.0775904655456543, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.575, + "step": 1575 + }, + { + "loss": 0.0422, + "grad_norm": 1.8043560981750488, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.576, + "step": 1576 + }, + { + "loss": 0.0473, + "grad_norm": 1.871073842048645, + "learning_rate": 4.26e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.577, + "step": 1577 + }, + { + "loss": 0.0514, + "grad_norm": 1.4562617540359497, + "learning_rate": 4.25e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5779999999999998, + "step": 1578 + }, + { + "loss": 0.0367, + "grad_norm": 1.4301601648330688, + "learning_rate": 4.24e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.579, + "step": 1579 + }, + { + "loss": 0.0504, + "grad_norm": 1.6110836267471313, + "learning_rate": 4.23e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.58, + "step": 1580 + }, + { + "loss": 0.074, + "grad_norm": 2.0486574172973633, + "learning_rate": 4.22e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.581, + "step": 1581 + }, + { + "loss": 0.1233, + "grad_norm": 3.3242132663726807, + "learning_rate": 4.21e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5819999999999999, + "step": 1582 + }, + { + "loss": 0.0647, + "grad_norm": 1.307567834854126, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.583, + "step": 1583 + }, + { + "loss": 0.0609, + "grad_norm": 1.7847832441329956, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.584, + "step": 1584 + }, + { + "loss": 0.0095, + "grad_norm": 2.857769727706909, + "learning_rate": 4.18e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 1585 + }, + { + "loss": 0.0358, + "grad_norm": 1.3912484645843506, + "learning_rate": 4.17e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5859999999999999, + "step": 1586 + }, + { + "loss": 0.0389, + "grad_norm": 1.5175739526748657, + "learning_rate": 4.16e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.587, + "step": 1587 + }, + { + "loss": 0.0126, + "grad_norm": 3.7526566982269287, + "learning_rate": 4.15e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.588, + "step": 1588 + }, + { + "loss": 0.0558, + "grad_norm": 1.6538053750991821, + "learning_rate": 4.14e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.589, + "step": 1589 + }, + { + "loss": 0.0538, + "grad_norm": 1.3453150987625122, + "learning_rate": 4.13e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5899999999999999, + "step": 1590 + }, + { + "loss": 0.0608, + "grad_norm": 2.0873332023620605, + "learning_rate": 4.12e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.591, + "step": 1591 + }, + { + "loss": 0.0611, + "grad_norm": 1.9410951137542725, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.592, + "step": 1592 + }, + { + "loss": 0.0769, + "grad_norm": 1.8411427736282349, + "learning_rate": 4.1e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.593, + "step": 1593 + }, + { + "loss": 0.0111, + "grad_norm": 3.2430572509765625, + "learning_rate": 4.09e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 1594 + }, + { + "loss": 0.0722, + "grad_norm": 2.1307482719421387, + "learning_rate": 4.08e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.595, + "step": 1595 + }, + { + "loss": 0.0377, + "grad_norm": 2.088995933532715, + "learning_rate": 4.07e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.596, + "step": 1596 + }, + { + "loss": 0.0617, + "grad_norm": 1.546595811843872, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.597, + "step": 1597 + }, + { + "loss": 0.0683, + "grad_norm": 1.7900023460388184, + "learning_rate": 4.05e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.5979999999999999, + "step": 1598 + }, + { + "loss": 0.057, + "grad_norm": 1.5026994943618774, + "learning_rate": 4.04e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.599, + "step": 1599 + }, + { + "loss": 0.0468, + "grad_norm": 1.8879090547561646, + "learning_rate": 4.03e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6, + "step": 1600 + }, + { + "loss": 0.0345, + "grad_norm": 1.3179066181182861, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.601, + "step": 1601 + }, + { + "loss": 0.0363, + "grad_norm": 1.297089695930481, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.6019999999999999, + "step": 1602 + }, + { + "loss": 0.0465, + "grad_norm": 1.4451963901519775, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.603, + "step": 1603 + }, + { + "loss": 0.0593, + "grad_norm": 1.6601592302322388, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.604, + "step": 1604 + }, + { + "loss": 0.0633, + "grad_norm": 1.759940266609192, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.605, + "step": 1605 + }, + { + "loss": 0.0394, + "grad_norm": 1.640942096710205, + "learning_rate": 3.97e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.6059999999999999, + "step": 1606 + }, + { + "loss": 0.0107, + "grad_norm": 3.121732711791992, + "learning_rate": 3.96e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.607, + "step": 1607 + }, + { + "loss": 0.0343, + "grad_norm": 1.376590371131897, + "learning_rate": 3.95e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.608, + "step": 1608 + }, + { + "loss": 0.0731, + "grad_norm": 1.5605193376541138, + "learning_rate": 3.94e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.609, + "step": 1609 + }, + { + "loss": 0.011, + "grad_norm": 3.3589043617248535, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6099999999999999, + "step": 1610 + }, + { + "loss": 0.0541, + "grad_norm": 1.0635466575622559, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.611, + "step": 1611 + }, + { + "loss": 0.0801, + "grad_norm": 2.1112594604492188, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.612, + "step": 1612 + }, + { + "loss": 0.0541, + "grad_norm": 1.915789008140564, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.613, + "step": 1613 + }, + { + "loss": 0.0097, + "grad_norm": 2.9668385982513428, + "learning_rate": 3.89e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 1614 + }, + { + "loss": 0.0785, + "grad_norm": 1.7575700283050537, + "learning_rate": 3.88e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 1.615, + "step": 1615 + }, + { + "loss": 0.0092, + "grad_norm": 2.8856735229492188, + "learning_rate": 3.87e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 1616 + }, + { + "loss": 0.0842, + "grad_norm": 2.108201265335083, + "learning_rate": 3.86e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.617, + "step": 1617 + }, + { + "loss": 0.0513, + "grad_norm": 1.646217942237854, + "learning_rate": 3.85e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6179999999999999, + "step": 1618 + }, + { + "loss": 0.0323, + "grad_norm": 1.7345075607299805, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.619, + "step": 1619 + }, + { + "loss": 0.0508, + "grad_norm": 2.1174609661102295, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.62, + "step": 1620 + }, + { + "loss": 0.0794, + "grad_norm": 1.751968502998352, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.621, + "step": 1621 + }, + { + "loss": 0.052, + "grad_norm": 2.0297329425811768, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6219999999999999, + "step": 1622 + }, + { + "loss": 0.0414, + "grad_norm": 1.4483790397644043, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.623, + "step": 1623 + }, + { + "loss": 0.0387, + "grad_norm": 1.6367487907409668, + "learning_rate": 3.79e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.624, + "step": 1624 + }, + { + "loss": 0.0579, + "grad_norm": 1.947627305984497, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.625, + "step": 1625 + }, + { + "loss": 0.0746, + "grad_norm": 1.7073363065719604, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.626, + "step": 1626 + }, + { + "loss": 0.07, + "grad_norm": 2.310190439224243, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.627, + "step": 1627 + }, + { + "loss": 0.0614, + "grad_norm": 1.841750979423523, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6280000000000001, + "step": 1628 + }, + { + "loss": 0.01, + "grad_norm": 3.1444506645202637, + "learning_rate": 3.74e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 1629 + }, + { + "loss": 0.0522, + "grad_norm": 1.662224292755127, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.63, + "step": 1630 + }, + { + "loss": 0.0132, + "grad_norm": 3.9977800846099854, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.631, + "step": 1631 + }, + { + "loss": 0.0544, + "grad_norm": 1.3922324180603027, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6320000000000001, + "step": 1632 + }, + { + "loss": 0.054, + "grad_norm": 2.120187759399414, + "learning_rate": 3.7e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.633, + "step": 1633 + }, + { + "loss": 0.0536, + "grad_norm": 1.914109468460083, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.634, + "step": 1634 + }, + { + "loss": 0.0598, + "grad_norm": 1.831244707107544, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.635, + "step": 1635 + }, + { + "loss": 0.0573, + "grad_norm": 1.5706382989883423, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6360000000000001, + "step": 1636 + }, + { + "loss": 0.1282, + "grad_norm": 2.7458832263946533, + "learning_rate": 3.66e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 1.637, + "step": 1637 + }, + { + "loss": 0.0356, + "grad_norm": 1.4152108430862427, + "learning_rate": 3.65e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.638, + "step": 1638 + }, + { + "loss": 0.0121, + "grad_norm": 3.4849400520324707, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.639, + "step": 1639 + }, + { + "loss": 0.0702, + "grad_norm": 1.8692002296447754, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.6400000000000001, + "step": 1640 + }, + { + "loss": 0.0601, + "grad_norm": 1.828239917755127, + "learning_rate": 3.62e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.641, + "step": 1641 + }, + { + "loss": 0.0399, + "grad_norm": 1.8158057928085327, + "learning_rate": 3.61e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.642, + "step": 1642 + }, + { + "loss": 0.0451, + "grad_norm": 1.7628754377365112, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.643, + "step": 1643 + }, + { + "loss": 0.0679, + "grad_norm": 1.837315320968628, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6440000000000001, + "step": 1644 + }, + { + "loss": 0.0112, + "grad_norm": 3.3357973098754883, + "learning_rate": 3.58e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.645, + "step": 1645 + }, + { + "loss": 0.0501, + "grad_norm": 1.5952306985855103, + "learning_rate": 3.57e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 1646 + }, + { + "loss": 0.0742, + "grad_norm": 2.5686585903167725, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.647, + "step": 1647 + }, + { + "loss": 0.0109, + "grad_norm": 3.133192777633667, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 1648 + }, + { + "loss": 0.068, + "grad_norm": 1.585485577583313, + "learning_rate": 3.54e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.649, + "step": 1649 + }, + { + "loss": 0.0687, + "grad_norm": 2.0019702911376953, + "learning_rate": 3.53e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.65, + "step": 1650 + }, + { + "loss": 0.0575, + "grad_norm": 1.6265766620635986, + "learning_rate": 3.52e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.651, + "step": 1651 + }, + { + "loss": 0.0707, + "grad_norm": 1.6374586820602417, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6520000000000001, + "step": 1652 + }, + { + "loss": 0.0697, + "grad_norm": 2.4204654693603516, + "learning_rate": 3.5e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.653, + "step": 1653 + }, + { + "loss": 0.0588, + "grad_norm": 2.1378262042999268, + "learning_rate": 3.49e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.654, + "step": 1654 + }, + { + "loss": 0.0562, + "grad_norm": 2.214315414428711, + "learning_rate": 3.48e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.655, + "step": 1655 + }, + { + "loss": 0.0124, + "grad_norm": 3.5861706733703613, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6560000000000001, + "step": 1656 + }, + { + "loss": 0.0487, + "grad_norm": 1.6121397018432617, + "learning_rate": 3.46e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.657, + "step": 1657 + }, + { + "loss": 0.0556, + "grad_norm": 2.084545850753784, + "learning_rate": 3.45e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.658, + "step": 1658 + }, + { + "loss": 0.0471, + "grad_norm": 1.8340671062469482, + "learning_rate": 3.44e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.659, + "step": 1659 + }, + { + "loss": 0.0507, + "grad_norm": 1.5023232698440552, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6600000000000001, + "step": 1660 + }, + { + "loss": 0.055, + "grad_norm": 1.5226930379867554, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.661, + "step": 1661 + }, + { + "loss": 0.0689, + "grad_norm": 1.8650307655334473, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.662, + "step": 1662 + }, + { + "loss": 0.0687, + "grad_norm": 1.4976561069488525, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.663, + "step": 1663 + }, + { + "loss": 0.012, + "grad_norm": 3.7820823192596436, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6640000000000001, + "step": 1664 + }, + { + "loss": 0.0644, + "grad_norm": 1.6768338680267334, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.665, + "step": 1665 + }, + { + "loss": 0.0508, + "grad_norm": 1.6384755373001099, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.666, + "step": 1666 + }, + { + "loss": 0.0557, + "grad_norm": 1.67027747631073, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.667, + "step": 1667 + }, + { + "loss": 0.0443, + "grad_norm": 1.8305268287658691, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6680000000000001, + "step": 1668 + }, + { + "loss": 0.0398, + "grad_norm": 1.6602362394332886, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.669, + "step": 1669 + }, + { + "loss": 0.0479, + "grad_norm": 1.694201946258545, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.67, + "step": 1670 + }, + { + "loss": 0.0693, + "grad_norm": 1.8437001705169678, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.671, + "step": 1671 + }, + { + "loss": 0.0512, + "grad_norm": 1.319399118423462, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6720000000000002, + "step": 1672 + }, + { + "loss": 0.0141, + "grad_norm": 4.160251617431641, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.673, + "step": 1673 + }, + { + "loss": 0.0473, + "grad_norm": 1.736594557762146, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 1674 + }, + { + "loss": 0.0117, + "grad_norm": 3.6965503692626953, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.675, + "step": 1675 + }, + { + "loss": 0.0129, + "grad_norm": 3.8872127532958984, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6760000000000002, + "step": 1676 + }, + { + "loss": 0.0338, + "grad_norm": 1.6114709377288818, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.677, + "step": 1677 + }, + { + "loss": 0.0401, + "grad_norm": 1.4854273796081543, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.678, + "step": 1678 + }, + { + "loss": 0.0091, + "grad_norm": 2.8193323612213135, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 1679 + }, + { + "loss": 0.0104, + "grad_norm": 3.194824457168579, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 1680 + }, + { + "loss": 0.0082, + "grad_norm": 2.627159357070923, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 1681 + }, + { + "loss": 0.0715, + "grad_norm": 2.015965223312378, + "learning_rate": 3.21e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.682, + "step": 1682 + }, + { + "loss": 0.0752, + "grad_norm": 1.8641659021377563, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.683, + "step": 1683 + }, + { + "loss": 0.0446, + "grad_norm": 1.8558416366577148, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 1684 + }, + { + "loss": 0.0754, + "grad_norm": 2.614729881286621, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.685, + "step": 1685 + }, + { + "loss": 0.0781, + "grad_norm": 2.3581247329711914, + "learning_rate": 3.17e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.686, + "step": 1686 + }, + { + "loss": 0.044, + "grad_norm": 2.02897310256958, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.687, + "step": 1687 + }, + { + "loss": 0.0576, + "grad_norm": 1.8537285327911377, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.688, + "step": 1688 + }, + { + "loss": 0.0673, + "grad_norm": 2.3672072887420654, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 1689 + }, + { + "loss": 0.0406, + "grad_norm": 2.049578905105591, + "learning_rate": 3.13e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.69, + "step": 1690 + }, + { + "loss": 0.0514, + "grad_norm": 1.8079686164855957, + "learning_rate": 3.12e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.6909999999999998, + "step": 1691 + }, + { + "loss": 0.0467, + "grad_norm": 1.5584005117416382, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.692, + "step": 1692 + }, + { + "loss": 0.0073, + "grad_norm": 2.0741705894470215, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 1693 + }, + { + "loss": 0.0501, + "grad_norm": 1.9797930717468262, + "learning_rate": 3.09e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.694, + "step": 1694 + }, + { + "loss": 0.0514, + "grad_norm": 1.531952977180481, + "learning_rate": 3.08e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 1695 + }, + { + "loss": 0.0511, + "grad_norm": 2.27657413482666, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.696, + "step": 1696 + }, + { + "loss": 0.0501, + "grad_norm": 1.5408827066421509, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.697, + "step": 1697 + }, + { + "loss": 0.0356, + "grad_norm": 1.3495177030563354, + "learning_rate": 3.05e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.698, + "step": 1698 + }, + { + "loss": 0.0524, + "grad_norm": 2.264927864074707, + "learning_rate": 3.04e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6989999999999998, + "step": 1699 + }, + { + "loss": 0.0085, + "grad_norm": 2.3997385501861572, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 1700 + }, + { + "loss": 0.0537, + "grad_norm": 2.03108811378479, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.701, + "step": 1701 + }, + { + "loss": 0.0625, + "grad_norm": 1.5735002756118774, + "learning_rate": 3.01e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.702, + "step": 1702 + }, + { + "loss": 0.0498, + "grad_norm": 1.4873791933059692, + "learning_rate": 3e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7029999999999998, + "step": 1703 + }, + { + "loss": 0.0401, + "grad_norm": 1.646492600440979, + "learning_rate": 2.99e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.704, + "step": 1704 + }, + { + "loss": 0.0092, + "grad_norm": 2.825364828109741, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 1705 + }, + { + "loss": 0.0094, + "grad_norm": 2.7768924236297607, + "learning_rate": 2.97e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 1706 + }, + { + "loss": 0.0095, + "grad_norm": 2.475404977798462, + "learning_rate": 2.96e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 1707 + }, + { + "loss": 0.0416, + "grad_norm": 2.0638792514801025, + "learning_rate": 2.95e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.708, + "step": 1708 + }, + { + "loss": 0.0544, + "grad_norm": 1.6516914367675781, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.709, + "step": 1709 + }, + { + "loss": 0.0534, + "grad_norm": 1.9903455972671509, + "learning_rate": 2.93e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.71, + "step": 1710 + }, + { + "loss": 0.061, + "grad_norm": 1.6336207389831543, + "learning_rate": 2.92e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7109999999999999, + "step": 1711 + }, + { + "loss": 0.0484, + "grad_norm": 1.5735485553741455, + "learning_rate": 2.91e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.712, + "step": 1712 + }, + { + "loss": 0.0523, + "grad_norm": 1.7996323108673096, + "learning_rate": 2.9e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.713, + "step": 1713 + }, + { + "loss": 0.0568, + "grad_norm": 1.6357063055038452, + "learning_rate": 2.89e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.714, + "step": 1714 + }, + { + "loss": 0.0097, + "grad_norm": 2.460446357727051, + "learning_rate": 2.88e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 1715 + }, + { + "loss": 0.0488, + "grad_norm": 1.7914141416549683, + "learning_rate": 2.87e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.716, + "step": 1716 + }, + { + "loss": 0.0426, + "grad_norm": 2.875281572341919, + "learning_rate": 2.86e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.717, + "step": 1717 + }, + { + "loss": 0.0535, + "grad_norm": 1.9656765460968018, + "learning_rate": 2.85e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.718, + "step": 1718 + }, + { + "loss": 0.0582, + "grad_norm": 1.7268273830413818, + "learning_rate": 2.84e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.7189999999999999, + "step": 1719 + }, + { + "loss": 0.0625, + "grad_norm": 1.7748886346817017, + "learning_rate": 2.83e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 1720 + }, + { + "loss": 0.0624, + "grad_norm": 1.655421257019043, + "learning_rate": 2.82e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.721, + "step": 1721 + }, + { + "loss": 0.0418, + "grad_norm": 1.857727289199829, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.722, + "step": 1722 + }, + { + "loss": 0.0628, + "grad_norm": 1.6072860956192017, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7229999999999999, + "step": 1723 + }, + { + "loss": 0.0079, + "grad_norm": 2.1282646656036377, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 1724 + }, + { + "loss": 0.0097, + "grad_norm": 2.870497465133667, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 1725 + }, + { + "loss": 0.0573, + "grad_norm": 2.2278597354888916, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.726, + "step": 1726 + }, + { + "loss": 0.0479, + "grad_norm": 1.6248372793197632, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.7269999999999999, + "step": 1727 + }, + { + "loss": 0.0098, + "grad_norm": 3.043905258178711, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 1728 + }, + { + "loss": 0.0515, + "grad_norm": 1.613357424736023, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.729, + "step": 1729 + }, + { + "loss": 0.0391, + "grad_norm": 1.959555983543396, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.73, + "step": 1730 + }, + { + "loss": 0.0085, + "grad_norm": 2.4167284965515137, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 1731 + }, + { + "loss": 0.0638, + "grad_norm": 1.9236712455749512, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.732, + "step": 1732 + }, + { + "loss": 0.0359, + "grad_norm": 1.9113582372665405, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.733, + "step": 1733 + }, + { + "loss": 0.0083, + "grad_norm": 2.5152554512023926, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 1734 + }, + { + "loss": 0.0471, + "grad_norm": 1.6409229040145874, + "learning_rate": 2.68e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7349999999999999, + "step": 1735 + }, + { + "loss": 0.0695, + "grad_norm": 2.0613510608673096, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.736, + "step": 1736 + }, + { + "loss": 0.057, + "grad_norm": 2.3862340450286865, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.737, + "step": 1737 + }, + { + "loss": 0.0733, + "grad_norm": 2.13395357131958, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.738, + "step": 1738 + }, + { + "loss": 0.0398, + "grad_norm": 1.8025071620941162, + "learning_rate": 2.64e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7389999999999999, + "step": 1739 + }, + { + "loss": 0.0076, + "grad_norm": 2.0499792098999023, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 1740 + }, + { + "loss": 0.061, + "grad_norm": 1.6320290565490723, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.741, + "step": 1741 + }, + { + "loss": 0.0581, + "grad_norm": 1.9588946104049683, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.742, + "step": 1742 + }, + { + "loss": 0.062, + "grad_norm": 1.8158897161483765, + "learning_rate": 2.6e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.7429999999999999, + "step": 1743 + }, + { + "loss": 0.0464, + "grad_norm": 2.4023096561431885, + "learning_rate": 2.59e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.744, + "step": 1744 + }, + { + "loss": 0.0604, + "grad_norm": 2.0760178565979004, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.745, + "step": 1745 + }, + { + "loss": 0.0721, + "grad_norm": 1.8943363428115845, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.746, + "step": 1746 + }, + { + "loss": 0.0394, + "grad_norm": 1.6580768823623657, + "learning_rate": 2.56e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.7469999999999999, + "step": 1747 + }, + { + "loss": 0.0575, + "grad_norm": 1.7064754962921143, + "learning_rate": 2.55e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.748, + "step": 1748 + }, + { + "loss": 0.1451, + "grad_norm": 5.286960124969482, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 1.749, + "step": 1749 + }, + { + "loss": 0.0367, + "grad_norm": 1.5256696939468384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.75, + "step": 1750 + }, + { + "loss": 0.0352, + "grad_norm": 1.4353508949279785, + "learning_rate": 2.52e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.751, + "step": 1751 + }, + { + "loss": 0.0544, + "grad_norm": 1.449508547782898, + "learning_rate": 2.51e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.752, + "step": 1752 + }, + { + "loss": 0.0088, + "grad_norm": 2.6737008094787598, + "learning_rate": 2.5e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 1753 + }, + { + "loss": 0.054, + "grad_norm": 1.1922411918640137, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.754, + "step": 1754 + }, + { + "loss": 0.0108, + "grad_norm": 3.180657386779785, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.755, + "step": 1755 + }, + { + "loss": 0.0636, + "grad_norm": 1.900195598602295, + "learning_rate": 2.47e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.756, + "step": 1756 + }, + { + "loss": 0.0602, + "grad_norm": 2.505511522293091, + "learning_rate": 2.46e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7570000000000001, + "step": 1757 + }, + { + "loss": 0.0516, + "grad_norm": 1.517896056175232, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.758, + "step": 1758 + }, + { + "loss": 0.0653, + "grad_norm": 1.5359817743301392, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.759, + "step": 1759 + }, + { + "loss": 0.062, + "grad_norm": 2.56500244140625, + "learning_rate": 2.43e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.76, + "step": 1760 + }, + { + "loss": 0.0616, + "grad_norm": 1.2327522039413452, + "learning_rate": 2.42e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7610000000000001, + "step": 1761 + }, + { + "loss": 0.0641, + "grad_norm": 2.0313050746917725, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.762, + "step": 1762 + }, + { + "loss": 0.0509, + "grad_norm": 1.9020798206329346, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.763, + "step": 1763 + }, + { + "loss": 0.0573, + "grad_norm": 1.3576561212539673, + "learning_rate": 2.39e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.764, + "step": 1764 + }, + { + "loss": 0.0359, + "grad_norm": 1.6285313367843628, + "learning_rate": 2.38e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7650000000000001, + "step": 1765 + }, + { + "loss": 0.0779, + "grad_norm": 2.119893789291382, + "learning_rate": 2.37e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.766, + "step": 1766 + }, + { + "loss": 0.0459, + "grad_norm": 1.8730247020721436, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.767, + "step": 1767 + }, + { + "loss": 0.0359, + "grad_norm": 1.5724204778671265, + "learning_rate": 2.35e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.768, + "step": 1768 + }, + { + "loss": 0.0375, + "grad_norm": 1.7161457538604736, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.7690000000000001, + "step": 1769 + }, + { + "loss": 0.0522, + "grad_norm": 1.3714388608932495, + "learning_rate": 2.33e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.77, + "step": 1770 + }, + { + "loss": 0.0368, + "grad_norm": 1.6326324939727783, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.771, + "step": 1771 + }, + { + "loss": 0.0526, + "grad_norm": 1.4099246263504028, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.772, + "step": 1772 + }, + { + "loss": 0.0343, + "grad_norm": 1.331606149673462, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7730000000000001, + "step": 1773 + }, + { + "loss": 0.0521, + "grad_norm": 2.03346586227417, + "learning_rate": 2.29e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.774, + "step": 1774 + }, + { + "loss": 0.0738, + "grad_norm": 2.287825584411621, + "learning_rate": 2.28e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.775, + "step": 1775 + }, + { + "loss": 0.0711, + "grad_norm": 1.560683012008667, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.776, + "step": 1776 + }, + { + "loss": 0.0483, + "grad_norm": 1.860205888748169, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.7770000000000001, + "step": 1777 + }, + { + "loss": 0.0418, + "grad_norm": 1.6539009809494019, + "learning_rate": 2.25e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.778, + "step": 1778 + }, + { + "loss": 0.0669, + "grad_norm": 1.5473995208740234, + "learning_rate": 2.24e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.779, + "step": 1779 + }, + { + "loss": 0.0488, + "grad_norm": 1.3596010208129883, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.78, + "step": 1780 + }, + { + "loss": 0.0407, + "grad_norm": 1.8577399253845215, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7810000000000001, + "step": 1781 + }, + { + "loss": 0.0639, + "grad_norm": 2.693002462387085, + "learning_rate": 2.21e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.782, + "step": 1782 + }, + { + "loss": 0.0146, + "grad_norm": 4.3713555335998535, + "learning_rate": 2.2e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.783, + "step": 1783 + }, + { + "loss": 0.0702, + "grad_norm": 1.8829140663146973, + "learning_rate": 2.19e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.784, + "step": 1784 + }, + { + "loss": 0.0145, + "grad_norm": 4.203199863433838, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.7850000000000001, + "step": 1785 + }, + { + "loss": 0.0418, + "grad_norm": 1.0440939664840698, + "learning_rate": 2.17e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.786, + "step": 1786 + }, + { + "loss": 0.0658, + "grad_norm": 1.5156137943267822, + "learning_rate": 2.16e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.787, + "step": 1787 + }, + { + "loss": 0.0506, + "grad_norm": 1.6226084232330322, + "learning_rate": 2.15e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.788, + "step": 1788 + }, + { + "loss": 0.087, + "grad_norm": 1.8399536609649658, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7890000000000001, + "step": 1789 + }, + { + "loss": 0.0607, + "grad_norm": 2.031243324279785, + "learning_rate": 2.13e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.79, + "step": 1790 + }, + { + "loss": 0.0609, + "grad_norm": 1.581013798713684, + "learning_rate": 2.12e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.791, + "step": 1791 + }, + { + "loss": 0.0149, + "grad_norm": 4.233753681182861, + "learning_rate": 2.11e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.792, + "step": 1792 + }, + { + "loss": 0.0698, + "grad_norm": 1.890411615371704, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7930000000000001, + "step": 1793 + }, + { + "loss": 0.0529, + "grad_norm": 1.3680751323699951, + "learning_rate": 2.09e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.794, + "step": 1794 + }, + { + "loss": 0.0528, + "grad_norm": 1.9651073217391968, + "learning_rate": 2.08e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.795, + "step": 1795 + }, + { + "loss": 0.0133, + "grad_norm": 3.887544631958008, + "learning_rate": 2.07e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.796, + "step": 1796 + }, + { + "loss": 0.05, + "grad_norm": 1.304778814315796, + "learning_rate": 2.06e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7970000000000002, + "step": 1797 + }, + { + "loss": 0.071, + "grad_norm": 1.9661753177642822, + "learning_rate": 2.05e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.798, + "step": 1798 + }, + { + "loss": 0.0557, + "grad_norm": 1.5037291049957275, + "learning_rate": 2.04e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.799, + "step": 1799 + }, + { + "loss": 0.0372, + "grad_norm": 1.4804255962371826, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.8, + "step": 1800 + }, + { + "loss": 0.0645, + "grad_norm": 1.577778697013855, + "learning_rate": 2.02e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.8010000000000002, + "step": 1801 + }, + { + "loss": 0.0399, + "grad_norm": 1.5963507890701294, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.802, + "step": 1802 + }, + { + "loss": 0.0612, + "grad_norm": 1.7424527406692505, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.803, + "step": 1803 + }, + { + "loss": 0.0377, + "grad_norm": 1.4296543598175049, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.804, + "step": 1804 + }, + { + "loss": 0.0378, + "grad_norm": 1.4681419134140015, + "learning_rate": 1.98e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8050000000000002, + "step": 1805 + }, + { + "loss": 0.0385, + "grad_norm": 1.876345157623291, + "learning_rate": 1.97e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.806, + "step": 1806 + }, + { + "loss": 0.0454, + "grad_norm": 1.3991385698318481, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.807, + "step": 1807 + }, + { + "loss": 0.0706, + "grad_norm": 1.6286864280700684, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.808, + "step": 1808 + }, + { + "loss": 0.0409, + "grad_norm": 1.7534390687942505, + "learning_rate": 1.94e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8090000000000002, + "step": 1809 + }, + { + "loss": 0.1302, + "grad_norm": 4.238317966461182, + "learning_rate": 1.93e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.81, + "step": 1810 + }, + { + "loss": 0.0525, + "grad_norm": 2.2462339401245117, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.811, + "step": 1811 + }, + { + "loss": 0.0609, + "grad_norm": 1.5136423110961914, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.812, + "step": 1812 + }, + { + "loss": 0.0595, + "grad_norm": 1.4645228385925293, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.813, + "step": 1813 + }, + { + "loss": 0.0485, + "grad_norm": 1.4663139581680298, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.814, + "step": 1814 + }, + { + "loss": 0.0117, + "grad_norm": 3.569246768951416, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.815, + "step": 1815 + }, + { + "loss": 0.0765, + "grad_norm": 1.4224154949188232, + "learning_rate": 1.87e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.8159999999999998, + "step": 1816 + }, + { + "loss": 0.0517, + "grad_norm": 1.4875210523605347, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.817, + "step": 1817 + }, + { + "loss": 0.0123, + "grad_norm": 3.643899440765381, + "learning_rate": 1.85e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.818, + "step": 1818 + }, + { + "loss": 0.0358, + "grad_norm": 1.7132638692855835, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.819, + "step": 1819 + }, + { + "loss": 0.0396, + "grad_norm": 1.291243553161621, + "learning_rate": 1.83e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8199999999999998, + "step": 1820 + }, + { + "loss": 0.0611, + "grad_norm": 1.6885188817977905, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.821, + "step": 1821 + }, + { + "loss": 0.0507, + "grad_norm": 1.215349555015564, + "learning_rate": 1.81e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.822, + "step": 1822 + }, + { + "loss": 0.0508, + "grad_norm": 1.5074315071105957, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.823, + "step": 1823 + }, + { + "loss": 0.0593, + "grad_norm": 1.500303030014038, + "learning_rate": 1.79e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8239999999999998, + "step": 1824 + }, + { + "loss": 0.0696, + "grad_norm": 2.0285537242889404, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.825, + "step": 1825 + }, + { + "loss": 0.051, + "grad_norm": 1.3399317264556885, + "learning_rate": 1.77e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.826, + "step": 1826 + }, + { + "loss": 0.0479, + "grad_norm": 1.868754506111145, + "learning_rate": 1.76e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.827, + "step": 1827 + }, + { + "loss": 0.0123, + "grad_norm": 3.5505826473236084, + "learning_rate": 1.75e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.8279999999999998, + "step": 1828 + }, + { + "loss": 0.0384, + "grad_norm": 1.1001877784729004, + "learning_rate": 1.74e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.829, + "step": 1829 + }, + { + "loss": 0.0503, + "grad_norm": 1.5732758045196533, + "learning_rate": 1.73e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.83, + "step": 1830 + }, + { + "loss": 0.0569, + "grad_norm": 1.4768040180206299, + "learning_rate": 1.72e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.831, + "step": 1831 + }, + { + "loss": 0.0376, + "grad_norm": 2.298859119415283, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8319999999999999, + "step": 1832 + }, + { + "loss": 0.0626, + "grad_norm": 1.4698207378387451, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 1833 + }, + { + "loss": 0.0527, + "grad_norm": 1.462391972541809, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.834, + "step": 1834 + }, + { + "loss": 0.0751, + "grad_norm": 2.242673873901367, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.835, + "step": 1835 + }, + { + "loss": 0.0633, + "grad_norm": 1.4788683652877808, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.8359999999999999, + "step": 1836 + }, + { + "loss": 0.0523, + "grad_norm": 1.5662829875946045, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.837, + "step": 1837 + }, + { + "loss": 0.0496, + "grad_norm": 1.2137081623077393, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.838, + "step": 1838 + }, + { + "loss": 0.0144, + "grad_norm": 3.972593307495117, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.839, + "step": 1839 + }, + { + "loss": 0.0612, + "grad_norm": 2.0851247310638428, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.8399999999999999, + "step": 1840 + }, + { + "loss": 0.0351, + "grad_norm": 1.7115992307662964, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.841, + "step": 1841 + }, + { + "loss": 0.0543, + "grad_norm": 1.7121071815490723, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.842, + "step": 1842 + }, + { + "loss": 0.0398, + "grad_norm": 2.520775318145752, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.843, + "step": 1843 + }, + { + "loss": 0.0588, + "grad_norm": 1.4704424142837524, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8439999999999999, + "step": 1844 + }, + { + "loss": 0.0393, + "grad_norm": 1.1732555627822876, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.845, + "step": 1845 + }, + { + "loss": 0.0126, + "grad_norm": 3.8587839603424072, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.846, + "step": 1846 + }, + { + "loss": 0.0154, + "grad_norm": 4.2589006423950195, + "learning_rate": 1.56e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.847, + "step": 1847 + }, + { + "loss": 0.0525, + "grad_norm": 1.5793870687484741, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.8479999999999999, + "step": 1848 + }, + { + "loss": 0.0711, + "grad_norm": 1.637081265449524, + "learning_rate": 1.54e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.849, + "step": 1849 + }, + { + "loss": 0.0367, + "grad_norm": 1.405205488204956, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.85, + "step": 1850 + }, + { + "loss": 0.0122, + "grad_norm": 3.7381093502044678, + "learning_rate": 1.52e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.851, + "step": 1851 + }, + { + "loss": 0.0595, + "grad_norm": 1.4563549757003784, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8519999999999999, + "step": 1852 + }, + { + "loss": 0.012, + "grad_norm": 3.3752598762512207, + "learning_rate": 1.5e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.853, + "step": 1853 + }, + { + "loss": 0.0575, + "grad_norm": 1.6581268310546875, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.854, + "step": 1854 + }, + { + "loss": 0.037, + "grad_norm": 1.6496632099151611, + "learning_rate": 1.48e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.855, + "step": 1855 + }, + { + "loss": 0.0435, + "grad_norm": 2.816823959350586, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.8559999999999999, + "step": 1856 + }, + { + "loss": 0.0691, + "grad_norm": 1.9923897981643677, + "learning_rate": 1.46e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.857, + "step": 1857 + }, + { + "loss": 0.0601, + "grad_norm": 1.9515984058380127, + "learning_rate": 1.45e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.858, + "step": 1858 + }, + { + "loss": 0.0097, + "grad_norm": 3.0719552040100098, + "learning_rate": 1.44e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 1859 + }, + { + "loss": 0.0641, + "grad_norm": 1.8086748123168945, + "learning_rate": 1.43e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8599999999999999, + "step": 1860 + }, + { + "loss": 0.067, + "grad_norm": 1.6446064710617065, + "learning_rate": 1.42e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.861, + "step": 1861 + }, + { + "loss": 0.0101, + "grad_norm": 3.0983476638793945, + "learning_rate": 1.41e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 1862 + }, + { + "loss": 0.0362, + "grad_norm": 1.6780548095703125, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.863, + "step": 1863 + }, + { + "loss": 0.054, + "grad_norm": 1.5340514183044434, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8639999999999999, + "step": 1864 + }, + { + "loss": 0.0562, + "grad_norm": 1.6704845428466797, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.865, + "step": 1865 + }, + { + "loss": 0.0647, + "grad_norm": 2.0944159030914307, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.866, + "step": 1866 + }, + { + "loss": 0.0497, + "grad_norm": 1.6780622005462646, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.867, + "step": 1867 + }, + { + "loss": 0.0531, + "grad_norm": 1.5871188640594482, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8679999999999999, + "step": 1868 + }, + { + "loss": 0.061, + "grad_norm": 1.572225570678711, + "learning_rate": 1.34e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.869, + "step": 1869 + }, + { + "loss": 0.0636, + "grad_norm": 1.7540369033813477, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.87, + "step": 1870 + }, + { + "loss": 0.0516, + "grad_norm": 1.9117010831832886, + "learning_rate": 1.32e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.871, + "step": 1871 + }, + { + "loss": 0.0516, + "grad_norm": 1.8945181369781494, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8719999999999999, + "step": 1872 + }, + { + "loss": 0.1903, + "grad_norm": 7.168573379516602, + "learning_rate": 1.3e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 1.873, + "step": 1873 + }, + { + "loss": 0.0584, + "grad_norm": 1.7484742403030396, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.874, + "step": 1874 + }, + { + "loss": 0.0592, + "grad_norm": 1.998748540878296, + "learning_rate": 1.28e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.875, + "step": 1875 + }, + { + "loss": 0.0132, + "grad_norm": 3.7218382358551025, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.876, + "step": 1876 + }, + { + "loss": 0.0397, + "grad_norm": 1.7368042469024658, + "learning_rate": 1.26e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.877, + "step": 1877 + }, + { + "loss": 0.0747, + "grad_norm": 1.7804408073425293, + "learning_rate": 1.25e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8780000000000001, + "step": 1878 + }, + { + "loss": 0.0564, + "grad_norm": 1.812559962272644, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.879, + "step": 1879 + }, + { + "loss": 0.0359, + "grad_norm": 1.5748106241226196, + "learning_rate": 1.23e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.88, + "step": 1880 + }, + { + "loss": 0.1015, + "grad_norm": 2.9346442222595215, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.881, + "step": 1881 + }, + { + "loss": 0.0714, + "grad_norm": 2.8724288940429688, + "learning_rate": 1.21e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.8820000000000001, + "step": 1882 + }, + { + "loss": 0.0544, + "grad_norm": 1.6409680843353271, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.883, + "step": 1883 + }, + { + "loss": 0.0569, + "grad_norm": 1.441733479499817, + "learning_rate": 1.19e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.884, + "step": 1884 + }, + { + "loss": 0.0709, + "grad_norm": 2.3944602012634277, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.885, + "step": 1885 + }, + { + "loss": 0.0593, + "grad_norm": 2.0737223625183105, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8860000000000001, + "step": 1886 + }, + { + "loss": 0.011, + "grad_norm": 3.4782493114471436, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.887, + "step": 1887 + }, + { + "loss": 0.0115, + "grad_norm": 3.5657458305358887, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.888, + "step": 1888 + }, + { + "loss": 0.0598, + "grad_norm": 1.5167820453643799, + "learning_rate": 1.14e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.889, + "step": 1889 + }, + { + "loss": 0.0507, + "grad_norm": 1.6942130327224731, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.8900000000000001, + "step": 1890 + }, + { + "loss": 0.05, + "grad_norm": 1.4450113773345947, + "learning_rate": 1.12e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.891, + "step": 1891 + }, + { + "loss": 0.0672, + "grad_norm": 1.7840543985366821, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.892, + "step": 1892 + }, + { + "loss": 0.0114, + "grad_norm": 3.6806554794311523, + "learning_rate": 1.1e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.893, + "step": 1893 + }, + { + "loss": 0.0433, + "grad_norm": 2.5975944995880127, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.8940000000000001, + "step": 1894 + }, + { + "loss": 0.048, + "grad_norm": 1.2934935092926025, + "learning_rate": 1.08e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.895, + "step": 1895 + }, + { + "loss": 0.0129, + "grad_norm": 3.9428789615631104, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.896, + "step": 1896 + }, + { + "loss": 0.0106, + "grad_norm": 3.178393840789795, + "learning_rate": 1.06e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.897, + "step": 1897 + }, + { + "loss": 0.0601, + "grad_norm": 1.3654727935791016, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8980000000000001, + "step": 1898 + }, + { + "loss": 0.0372, + "grad_norm": 1.596958041191101, + "learning_rate": 1.04e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.899, + "step": 1899 + }, + { + "loss": 0.0407, + "grad_norm": 1.3870348930358887, + "learning_rate": 1.03e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9, + "step": 1900 + }, + { + "loss": 0.0398, + "grad_norm": 1.8837169408798218, + "learning_rate": 1.02e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.901, + "step": 1901 + }, + { + "loss": 0.0685, + "grad_norm": 2.1320674419403076, + "learning_rate": 1.01e-06, + "num_tokens": 1308570.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9020000000000001, + "step": 1902 + }, + { + "loss": 0.0824, + "grad_norm": 2.3401284217834473, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.903, + "step": 1903 + }, + { + "loss": 0.0107, + "grad_norm": 3.2646677494049072, + "learning_rate": 9.9e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 1904 + }, + { + "loss": 0.053, + "grad_norm": 1.7195311784744263, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.905, + "step": 1905 + }, + { + "loss": 0.0388, + "grad_norm": 1.4336844682693481, + "learning_rate": 9.7e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.9060000000000001, + "step": 1906 + }, + { + "loss": 0.0496, + "grad_norm": 1.5110867023468018, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.907, + "step": 1907 + }, + { + "loss": 0.0106, + "grad_norm": 3.0311079025268555, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.908, + "step": 1908 + }, + { + "loss": 0.0536, + "grad_norm": 1.9689549207687378, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.909, + "step": 1909 + }, + { + "loss": 0.0761, + "grad_norm": 2.2891626358032227, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.9100000000000001, + "step": 1910 + }, + { + "loss": 0.0099, + "grad_norm": 2.886558771133423, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 1911 + }, + { + "loss": 0.0509, + "grad_norm": 2.247649669647217, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.912, + "step": 1912 + }, + { + "loss": 0.0396, + "grad_norm": 1.8190995454788208, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.913, + "step": 1913 + }, + { + "loss": 0.0681, + "grad_norm": 1.9473356008529663, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.9140000000000001, + "step": 1914 + }, + { + "loss": 0.0583, + "grad_norm": 1.7244383096694946, + "learning_rate": 8.8e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.915, + "step": 1915 + }, + { + "loss": 0.0497, + "grad_norm": 1.471281886100769, + "learning_rate": 8.7e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.916, + "step": 1916 + }, + { + "loss": 0.0105, + "grad_norm": 3.1323492527008057, + "learning_rate": 8.6e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.917, + "step": 1917 + }, + { + "loss": 0.0587, + "grad_norm": 1.6258044242858887, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9180000000000001, + "step": 1918 + }, + { + "loss": 0.0396, + "grad_norm": 3.7344205379486084, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.919, + "step": 1919 + }, + { + "loss": 0.0669, + "grad_norm": 1.567430853843689, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.92, + "step": 1920 + }, + { + "loss": 0.0403, + "grad_norm": 2.391710042953491, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.921, + "step": 1921 + }, + { + "loss": 0.0731, + "grad_norm": 1.7387372255325317, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 1922 + }, + { + "loss": 0.0346, + "grad_norm": 1.5562756061553955, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.923, + "step": 1923 + }, + { + "loss": 0.0094, + "grad_norm": 2.8271360397338867, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 1924 + }, + { + "loss": 0.0458, + "grad_norm": 2.486022472381592, + "learning_rate": 7.8e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.925, + "step": 1925 + }, + { + "loss": 0.0432, + "grad_norm": 1.4174907207489014, + "learning_rate": 7.7e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9260000000000002, + "step": 1926 + }, + { + "loss": 0.0685, + "grad_norm": 1.9511269330978394, + "learning_rate": 7.6e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.927, + "step": 1927 + }, + { + "loss": 0.0541, + "grad_norm": 1.7855056524276733, + "learning_rate": 7.5e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.928, + "step": 1928 + }, + { + "loss": 0.0381, + "grad_norm": 1.345107913017273, + "learning_rate": 7.4e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.929, + "step": 1929 + }, + { + "loss": 0.0405, + "grad_norm": 2.1388049125671387, + "learning_rate": 7.3e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9300000000000002, + "step": 1930 + }, + { + "loss": 0.065, + "grad_norm": 1.9286760091781616, + "learning_rate": 7.2e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.931, + "step": 1931 + }, + { + "loss": 0.0084, + "grad_norm": 2.553018808364868, + "learning_rate": 7.1e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 1932 + }, + { + "loss": 0.0591, + "grad_norm": 1.3521795272827148, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.933, + "step": 1933 + }, + { + "loss": 0.0407, + "grad_norm": 2.3110647201538086, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.9340000000000002, + "step": 1934 + }, + { + "loss": 0.0087, + "grad_norm": 2.560931921005249, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 1935 + }, + { + "loss": 0.1207, + "grad_norm": 3.6795732975006104, + "learning_rate": 6.7e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 1.936, + "step": 1936 + }, + { + "loss": 0.0079, + "grad_norm": 2.1008386611938477, + "learning_rate": 6.6e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 1937 + }, + { + "loss": 0.0087, + "grad_norm": 2.5367555618286133, + "learning_rate": 6.5e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 1938 + }, + { + "loss": 0.0518, + "grad_norm": 2.0541486740112305, + "learning_rate": 6.4e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.939, + "step": 1939 + }, + { + "loss": 0.0618, + "grad_norm": 1.8797075748443604, + "learning_rate": 6.3e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.94, + "step": 1940 + }, + { + "loss": 0.0628, + "grad_norm": 2.0876829624176025, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9409999999999998, + "step": 1941 + }, + { + "loss": 0.0453, + "grad_norm": 1.7904268503189087, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.942, + "step": 1942 + }, + { + "loss": 0.009, + "grad_norm": 2.73040771484375, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 1943 + }, + { + "loss": 0.0617, + "grad_norm": 1.6844722032546997, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.944, + "step": 1944 + }, + { + "loss": 0.0431, + "grad_norm": 1.8085075616836548, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9449999999999998, + "step": 1945 + }, + { + "loss": 0.0554, + "grad_norm": 1.8000997304916382, + "learning_rate": 5.7e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.946, + "step": 1946 + }, + { + "loss": 0.0608, + "grad_norm": 1.8177446126937866, + "learning_rate": 5.6e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.947, + "step": 1947 + }, + { + "loss": 0.0624, + "grad_norm": 1.5957430601119995, + "learning_rate": 5.5e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.948, + "step": 1948 + }, + { + "loss": 0.0615, + "grad_norm": 1.5245059728622437, + "learning_rate": 5.4e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9489999999999998, + "step": 1949 + }, + { + "loss": 0.0087, + "grad_norm": 2.8260550498962402, + "learning_rate": 5.3e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 1950 + }, + { + "loss": 0.0491, + "grad_norm": 1.5616376399993896, + "learning_rate": 5.2e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.951, + "step": 1951 + }, + { + "loss": 0.0552, + "grad_norm": 1.530611276626587, + "learning_rate": 5.1e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.952, + "step": 1952 + }, + { + "loss": 0.0563, + "grad_norm": 1.5877563953399658, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.9529999999999998, + "step": 1953 + }, + { + "loss": 0.034, + "grad_norm": 1.3671666383743286, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.954, + "step": 1954 + }, + { + "loss": 0.0447, + "grad_norm": 1.4045659303665161, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.955, + "step": 1955 + }, + { + "loss": 0.0523, + "grad_norm": 1.3664851188659668, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.956, + "step": 1956 + }, + { + "loss": 0.0545, + "grad_norm": 1.9731861352920532, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9569999999999999, + "step": 1957 + }, + { + "loss": 0.056, + "grad_norm": 1.9783090353012085, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.958, + "step": 1958 + }, + { + "loss": 0.0103, + "grad_norm": 3.2062110900878906, + "learning_rate": 4.4e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.959, + "step": 1959 + }, + { + "loss": 0.0356, + "grad_norm": 1.8231993913650513, + "learning_rate": 4.3e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.96, + "step": 1960 + }, + { + "loss": 0.0525, + "grad_norm": 1.708391785621643, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9609999999999999, + "step": 1961 + }, + { + "loss": 0.0794, + "grad_norm": 2.159344434738159, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.962, + "step": 1962 + }, + { + "loss": 0.0815, + "grad_norm": 1.9803351163864136, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 1963 + }, + { + "loss": 0.0442, + "grad_norm": 2.2135045528411865, + "learning_rate": 3.9e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.964, + "step": 1964 + }, + { + "loss": 0.0082, + "grad_norm": 2.504026174545288, + "learning_rate": 3.8e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 1965 + }, + { + "loss": 0.0524, + "grad_norm": 2.4293482303619385, + "learning_rate": 3.7e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.966, + "step": 1966 + }, + { + "loss": 0.0543, + "grad_norm": 1.5671586990356445, + "learning_rate": 3.6e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.967, + "step": 1967 + }, + { + "loss": 0.0549, + "grad_norm": 2.1507840156555176, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.968, + "step": 1968 + }, + { + "loss": 0.0561, + "grad_norm": 1.4668017625808716, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9689999999999999, + "step": 1969 + }, + { + "loss": 0.008, + "grad_norm": 2.4691226482391357, + "learning_rate": 3.3e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 1970 + }, + { + "loss": 0.0104, + "grad_norm": 3.135504722595215, + "learning_rate": 3.2e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.971, + "step": 1971 + }, + { + "loss": 0.0442, + "grad_norm": 1.5039496421813965, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 1972 + }, + { + "loss": 0.035, + "grad_norm": 1.5489939451217651, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9729999999999999, + "step": 1973 + }, + { + "loss": 0.0687, + "grad_norm": 1.601294994354248, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.974, + "step": 1974 + }, + { + "loss": 0.0629, + "grad_norm": 1.7154121398925781, + "learning_rate": 2.8e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.975, + "step": 1975 + }, + { + "loss": 0.0587, + "grad_norm": 2.0388171672821045, + "learning_rate": 2.7e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 1976 + }, + { + "loss": 0.051, + "grad_norm": 1.9510704278945923, + "learning_rate": 2.6e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9769999999999999, + "step": 1977 + }, + { + "loss": 0.0512, + "grad_norm": 1.7245160341262817, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.978, + "step": 1978 + }, + { + "loss": 0.0465, + "grad_norm": 1.383158802986145, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.979, + "step": 1979 + }, + { + "loss": 0.054, + "grad_norm": 2.2401952743530273, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.98, + "step": 1980 + }, + { + "loss": 0.0516, + "grad_norm": 2.7115116119384766, + "learning_rate": 2.2e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.9809999999999999, + "step": 1981 + }, + { + "loss": 0.0095, + "grad_norm": 2.8770017623901367, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 1982 + }, + { + "loss": 0.0618, + "grad_norm": 1.8771051168441772, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.983, + "step": 1983 + }, + { + "loss": 0.0524, + "grad_norm": 1.3788121938705444, + "learning_rate": 1.9e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.984, + "step": 1984 + }, + { + "loss": 0.0582, + "grad_norm": 1.583976149559021, + "learning_rate": 1.8e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9849999999999999, + "step": 1985 + }, + { + "loss": 0.0802, + "grad_norm": 1.9991214275360107, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.986, + "step": 1986 + }, + { + "loss": 0.0085, + "grad_norm": 2.6479129791259766, + "learning_rate": 1.6e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 1987 + }, + { + "loss": 0.06, + "grad_norm": 1.4170489311218262, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.988, + "step": 1988 + }, + { + "loss": 0.0502, + "grad_norm": 1.5151011943817139, + "learning_rate": 1.4e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9889999999999999, + "step": 1989 + }, + { + "loss": 0.0639, + "grad_norm": 1.8262159824371338, + "learning_rate": 1.3e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.99, + "step": 1990 + }, + { + "loss": 0.039, + "grad_norm": 1.5687544345855713, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.991, + "step": 1991 + }, + { + "loss": 0.0601, + "grad_norm": 1.4482007026672363, + "learning_rate": 1.1e-07, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.992, + "step": 1992 + }, + { + "loss": 0.0549, + "grad_norm": 2.2203757762908936, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9929999999999999, + "step": 1993 + }, + { + "loss": 0.0089, + "grad_norm": 2.697735071182251, + "learning_rate": 9e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 1994 + }, + { + "loss": 0.0595, + "grad_norm": 1.751516342163086, + "learning_rate": 8e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.995, + "step": 1995 + }, + { + "loss": 0.0499, + "grad_norm": 1.9196858406066895, + "learning_rate": 7e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.996, + "step": 1996 + }, + { + "loss": 0.0385, + "grad_norm": 1.7155030965805054, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.9969999999999999, + "step": 1997 + }, + { + "loss": 0.0089, + "grad_norm": 2.759751558303833, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 1998 + }, + { + "loss": 0.0393, + "grad_norm": 1.8272897005081177, + "learning_rate": 4e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.999, + "step": 1999 + }, + { + "loss": 0.0626, + "grad_norm": 1.7148770093917847, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 2.0, + "step": 2000 + }, + { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502, + "epoch": 2.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba2cfbcdcee3753e484b0a1b3adcb35ee225098 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "unsloth_available": false, + "train_runtime": 234.6302, + "train_loss": 0.19233327957964502, + "train_metrics": { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json new file mode 100644 index 0000000000000000000000000000000000000000..f2d95c49345fee0c966ee899582d2fc611158764 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json @@ -0,0 +1,27 @@ +{ + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + } +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..7d529fa55bcfe3e68b31a5702ee3f96c63526167 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 2.158, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely to continue the sentence smoothly? Please provide a brief justification for your choice.\" rationale: \"The chosen candidate is more likely to maintain coherence with the existing context and future predictions accurately.\" \n\nMy reasoning:\n- The", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.557, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI; justification: The instruction is incomplete. Please provide a specific question or task related to the candidates mentioned in order for me to choose an appropriate candidate_id and provide a justified response.\" \"rationale\": \"This question asks for a justification, but no question or task was provided in the initial instruction. To fulfill this request, I need more information about what", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 2.04, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate is most likely to be correct for the following sentence fragment: 'The quick brown fox jumps over the lazy dog.'? Choose one candidate_id and provide your justification.\" \nrationale: The choice of 'cand_07' as the", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.059, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"I would choose candidate_id=cand_05; rationale=This candidate uses", + "candidate_id": "cand_05", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.007, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you select as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"Selecting the best candidate involves considering factors such as syntactic correctness, semantic coherence", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.127, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5778936849f9a5bb988c315271fbf3c3507aba26 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "model_index": 1, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 1, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..d8c5d1cfe6fab1b4a4647f03f5ca461b1739180f --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json @@ -0,0 +1,36011 @@ +[ + { + "loss": 2.9686, + "grad_norm": 1.1798820495605469, + "learning_rate": 2e-05, + "num_tokens": 91.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 2.9639, + "grad_norm": 1.146132469177246, + "learning_rate": 1.9995e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 1.2609, + "grad_norm": 0.2891564667224884, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 694.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 2.9479, + "grad_norm": 1.1511788368225098, + "learning_rate": 1.9985000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.002, + "step": 4 + }, + { + "loss": 0.8201, + "grad_norm": 0.27247434854507446, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1297.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 1.1688, + "grad_norm": 0.30153799057006836, + "learning_rate": 1.9975e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 2.927, + "grad_norm": 1.123976469039917, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1900.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 2.9219, + "grad_norm": 1.1258331537246704, + "learning_rate": 1.9965e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 1.2624, + "grad_norm": 0.3105297088623047, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 2503.0, + "mean_token_accuracy": 0.7592955231666565, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.8468, + "grad_norm": 0.27270445227622986, + "learning_rate": 1.9955e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.005, + "step": 10 + }, + { + "loss": 1.1895, + "grad_norm": 0.31019389629364014, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3527.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 2.8961, + "grad_norm": 1.0758286714553833, + "learning_rate": 1.9945e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 1.1822, + "grad_norm": 0.3052140772342682, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4130.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 2.8831, + "grad_norm": 1.0789313316345215, + "learning_rate": 1.9935e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.8383, + "grad_norm": 0.2903873026371002, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 4733.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 1.2037, + "grad_norm": 0.3023833632469177, + "learning_rate": 1.9925e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 1.2477, + "grad_norm": 0.28835517168045044, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 5757.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 1.237, + "grad_norm": 0.30421048402786255, + "learning_rate": 1.9915e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 2.8549, + "grad_norm": 1.0703911781311035, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6360.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 1.2092, + "grad_norm": 0.30991482734680176, + "learning_rate": 1.9905e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7690802216529846, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 1.2362, + "grad_norm": 0.3097628951072693, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7384.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 1.223, + "grad_norm": 0.31258082389831543, + "learning_rate": 1.9895000000000002e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 2.8321, + "grad_norm": 1.0650557279586792, + "learning_rate": 1.989e-05, + "num_tokens": 7987.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 1.1381, + "grad_norm": 0.31106889247894287, + "learning_rate": 1.9885e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.8059, + "grad_norm": 0.28179118037223816, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9011.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 2.8152, + "grad_norm": 1.0609599351882935, + "learning_rate": 1.9875000000000002e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 2.8078, + "grad_norm": 1.06212317943573, + "learning_rate": 1.987e-05, + "num_tokens": 9193.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 1.205, + "grad_norm": 0.3027011752128601, + "learning_rate": 1.9865e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 1.1295, + "grad_norm": 0.30131977796554565, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10217.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 2.7894, + "grad_norm": 1.0723512172698975, + "learning_rate": 1.9855000000000002e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 1.1157, + "grad_norm": 0.30370256304740906, + "learning_rate": 1.985e-05, + "num_tokens": 10820.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 1.2198, + "grad_norm": 0.3102725148200989, + "learning_rate": 1.9845e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 2.7699, + "grad_norm": 1.0780471563339233, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11423.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 2.7633, + "grad_norm": 1.0721458196640015, + "learning_rate": 1.9835000000000002e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.8241, + "grad_norm": 0.2753015458583832, + "learning_rate": 1.983e-05, + "num_tokens": 12026.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 1.2029, + "grad_norm": 0.32459118962287903, + "learning_rate": 1.9825e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 2.7393, + "grad_norm": 1.089471459388733, + "learning_rate": 1.982e-05, + "num_tokens": 12629.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 2.7339, + "grad_norm": 1.085958480834961, + "learning_rate": 1.9815000000000003e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 2.7235, + "grad_norm": 1.1013903617858887, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 12811.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 1.1925, + "grad_norm": 0.322603315114975, + "learning_rate": 1.9805e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 1.0755, + "grad_norm": 0.33030447363853455, + "learning_rate": 1.98e-05, + "num_tokens": 13835.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.8072, + "grad_norm": 0.292123407125473, + "learning_rate": 1.9795000000000003e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.7719, + "grad_norm": 0.2785574495792389, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14859.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 2.6826, + "grad_norm": 1.1196017265319824, + "learning_rate": 1.9785e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 2.6763, + "grad_norm": 1.1198991537094116, + "learning_rate": 1.978e-05, + "num_tokens": 15041.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 1.0823, + "grad_norm": 0.3456343412399292, + "learning_rate": 1.9775000000000003e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 1.1172, + "grad_norm": 0.3377469480037689, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16065.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 1.19, + "grad_norm": 0.3273194134235382, + "learning_rate": 1.9765e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 1.0897, + "grad_norm": 0.330640584230423, + "learning_rate": 1.976e-05, + "num_tokens": 17089.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 2.6381, + "grad_norm": 1.1452019214630127, + "learning_rate": 1.9755000000000003e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.7974, + "grad_norm": 0.30913424491882324, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 17692.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 1.175, + "grad_norm": 0.3387100100517273, + "learning_rate": 1.9745e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 1.1322, + "grad_norm": 0.3353443443775177, + "learning_rate": 1.974e-05, + "num_tokens": 18716.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 2.6086, + "grad_norm": 1.1715646982192993, + "learning_rate": 1.9735000000000003e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 2.5992, + "grad_norm": 1.1846489906311035, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18898.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 2.5913, + "grad_norm": 1.1861159801483154, + "learning_rate": 1.9725000000000002e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 1.1598, + "grad_norm": 0.3380836546421051, + "learning_rate": 1.972e-05, + "num_tokens": 19501.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 1.1193, + "grad_norm": 0.34247249364852905, + "learning_rate": 1.9715000000000004e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 2.5644, + "grad_norm": 1.205854892730713, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20104.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 2.5553, + "grad_norm": 1.211520791053772, + "learning_rate": 1.9705000000000002e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 2.5452, + "grad_norm": 1.2238597869873047, + "learning_rate": 1.97e-05, + "num_tokens": 20286.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 1.1531, + "grad_norm": 0.3495417535305023, + "learning_rate": 1.9695e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 1.0714, + "grad_norm": 0.3549030125141144, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21310.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.765, + "grad_norm": 0.3008621335029602, + "learning_rate": 1.9685000000000002e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 1.0392, + "grad_norm": 0.3398958444595337, + "learning_rate": 1.968e-05, + "num_tokens": 22334.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 1.0477, + "grad_norm": 0.35012176632881165, + "learning_rate": 1.9675e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 2.4882, + "grad_norm": 1.2684752941131592, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 22937.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 2.478, + "grad_norm": 1.2892162799835205, + "learning_rate": 1.9665000000000002e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 2.4664, + "grad_norm": 1.296135663986206, + "learning_rate": 1.966e-05, + "num_tokens": 23119.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.7605, + "grad_norm": 0.3300800323486328, + "learning_rate": 1.9655e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.7663, + "grad_norm": 0.33007505536079407, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24143.0, + "mean_token_accuracy": 0.8512719869613647, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 2.4349, + "grad_norm": 1.3247182369232178, + "learning_rate": 1.9645e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 1.0354, + "grad_norm": 0.3528023660182953, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 24746.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.738, + "grad_norm": 0.3283436894416809, + "learning_rate": 1.9635e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 1.1271, + "grad_norm": 0.38431045413017273, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 25770.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": 1.0373, + "grad_norm": 0.3673364818096161, + "learning_rate": 1.9625e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 1.156, + "grad_norm": 0.3851627707481384, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26794.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 2.3789, + "grad_norm": 1.3850467205047607, + "learning_rate": 1.9615e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 2.3734, + "grad_norm": 1.3814043998718262, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 26976.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 2.3599, + "grad_norm": 1.3965320587158203, + "learning_rate": 1.9605e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 2.3458, + "grad_norm": 1.4337000846862793, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27158.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.7631, + "grad_norm": 0.328967422246933, + "learning_rate": 1.9595e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 1.0816, + "grad_norm": 0.40056440234184265, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28182.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.761, + "grad_norm": 0.34349334239959717, + "learning_rate": 1.9585e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.7308, + "grad_norm": 0.35714098811149597, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29206.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 2.2886, + "grad_norm": 1.4950672388076782, + "learning_rate": 1.9575e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 2.2801, + "grad_norm": 1.5058231353759766, + "learning_rate": 1.957e-05, + "num_tokens": 29388.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 2.2683, + "grad_norm": 1.5141775608062744, + "learning_rate": 1.9565e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.9814, + "grad_norm": 0.3899815082550049, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 29991.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 1.1155, + "grad_norm": 0.40274983644485474, + "learning_rate": 1.9555e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 2.2309, + "grad_norm": 1.5758429765701294, + "learning_rate": 1.955e-05, + "num_tokens": 30594.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 1.0635, + "grad_norm": 0.4182218015193939, + "learning_rate": 1.9545e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.7083, + "grad_norm": 0.35819146037101746, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31618.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 2.1959, + "grad_norm": 1.6126611232757568, + "learning_rate": 1.9535000000000002e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 2.1797, + "grad_norm": 1.676061987876892, + "learning_rate": 1.953e-05, + "num_tokens": 31800.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 1.0347, + "grad_norm": 0.4216737151145935, + "learning_rate": 1.9525e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.6884, + "grad_norm": 0.39531153440475464, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32824.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 2.1441, + "grad_norm": 1.7453250885009766, + "learning_rate": 1.9515000000000002e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 2.1265, + "grad_norm": 1.7851935625076294, + "learning_rate": 1.951e-05, + "num_tokens": 33006.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 2.112, + "grad_norm": 1.830625057220459, + "learning_rate": 1.9505e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 2.0989, + "grad_norm": 1.851873755455017, + "learning_rate": 1.95e-05, + "num_tokens": 33188.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.6824, + "grad_norm": 0.39206984639167786, + "learning_rate": 1.9495000000000002e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.6874, + "grad_norm": 0.3998919725418091, + "learning_rate": 1.949e-05, + "num_tokens": 34212.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 1.0692, + "grad_norm": 0.45781052112579346, + "learning_rate": 1.9485e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.052, + "step": 104 + }, + { + "loss": 1.061, + "grad_norm": 0.4857180714607239, + "learning_rate": 1.948e-05, + "num_tokens": 35236.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.9418, + "grad_norm": 0.4719521701335907, + "learning_rate": 1.9475000000000002e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.9888, + "grad_norm": 0.4797465205192566, + "learning_rate": 1.947e-05, + "num_tokens": 36260.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 1.994, + "grad_norm": 2.2058191299438477, + "learning_rate": 1.9465e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.7016, + "grad_norm": 0.41740846633911133, + "learning_rate": 1.946e-05, + "num_tokens": 36863.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.6818, + "grad_norm": 0.43658050894737244, + "learning_rate": 1.9455000000000003e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.6655, + "grad_norm": 0.46398866176605225, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37887.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 1.9355, + "grad_norm": 2.4030585289001465, + "learning_rate": 1.9445e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 1.0308, + "grad_norm": 0.47935715317726135, + "learning_rate": 1.944e-05, + "num_tokens": 38490.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": 0.6529, + "grad_norm": 0.5175711512565613, + "learning_rate": 1.9435000000000003e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 1.9, + "grad_norm": 2.3800323009490967, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39093.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 1.0589, + "grad_norm": 0.5446810722351074, + "learning_rate": 1.9425e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 1.8661, + "grad_norm": 2.2952208518981934, + "learning_rate": 1.942e-05, + "num_tokens": 39696.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 1.8546, + "grad_norm": 2.2471399307250977, + "learning_rate": 1.9415000000000003e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 1.8394, + "grad_norm": 2.1859543323516846, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 39878.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.6737, + "grad_norm": 0.5614652633666992, + "learning_rate": 1.9405e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.6406, + "grad_norm": 0.5995651483535767, + "learning_rate": 1.94e-05, + "num_tokens": 40902.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.9218, + "grad_norm": 0.6819480657577515, + "learning_rate": 1.9395000000000003e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.9464, + "grad_norm": 0.6670010089874268, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 41926.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.9323, + "grad_norm": 0.8481072187423706, + "learning_rate": 1.9385e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.6372, + "grad_norm": 0.5398988127708435, + "learning_rate": 1.938e-05, + "num_tokens": 42950.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.6362, + "grad_norm": 0.5465712547302246, + "learning_rate": 1.9375e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 1.7297, + "grad_norm": 2.4601035118103027, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 43553.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.6423, + "grad_norm": 0.5248544812202454, + "learning_rate": 1.9365000000000002e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 1.7024, + "grad_norm": 2.7017173767089844, + "learning_rate": 1.936e-05, + "num_tokens": 44156.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.8623, + "grad_norm": 0.6321293711662292, + "learning_rate": 1.9355e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.8852, + "grad_norm": 0.7586547136306763, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45180.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 1.6632, + "grad_norm": 3.066443920135498, + "learning_rate": 1.9345000000000002e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 1.642, + "grad_norm": 3.3219645023345947, + "learning_rate": 1.934e-05, + "num_tokens": 45362.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 1.623, + "grad_norm": 3.5062637329101562, + "learning_rate": 1.9335e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 1.6017, + "grad_norm": 3.623307228088379, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 45544.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.8752, + "grad_norm": 0.7358177900314331, + "learning_rate": 1.9325000000000002e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.9563, + "grad_norm": 0.8089514970779419, + "learning_rate": 1.932e-05, + "num_tokens": 46568.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.9479, + "grad_norm": 0.8843920826911926, + "learning_rate": 1.9315e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 1.5158, + "grad_norm": 3.546642303466797, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47171.0, + "mean_token_accuracy": 0.7333333492279053, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.5831, + "grad_norm": 0.7032448053359985, + "learning_rate": 1.9305000000000002e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.8191, + "grad_norm": 0.9835058450698853, + "learning_rate": 1.93e-05, + "num_tokens": 48195.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.5936, + "grad_norm": 0.7396312952041626, + "learning_rate": 1.9295e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 1.4418, + "grad_norm": 3.6846494674682617, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48798.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 1.4276, + "grad_norm": 3.8224549293518066, + "learning_rate": 1.9285000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 1.4024, + "grad_norm": 3.874878168106079, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 48980.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 1.3769, + "grad_norm": 3.8388218879699707, + "learning_rate": 1.9275e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 1.3516, + "grad_norm": 3.6529314517974854, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49162.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 1.3215, + "grad_norm": 3.6978349685668945, + "learning_rate": 1.9265000000000003e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.7666666507720947, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 1.2966, + "grad_norm": 3.7301321029663086, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49344.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.9111, + "grad_norm": 0.9517998695373535, + "learning_rate": 1.9255e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 1.2327, + "grad_norm": 4.175051212310791, + "learning_rate": 1.925e-05, + "num_tokens": 49947.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 1.2076, + "grad_norm": 4.348862171173096, + "learning_rate": 1.9245000000000003e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.5662, + "grad_norm": 0.9280498623847961, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 50550.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.8844, + "grad_norm": 1.042202353477478, + "learning_rate": 1.9235e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 1.1432, + "grad_norm": NaN, + "learning_rate": 1.923e-05, + "num_tokens": 51153.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 1.1364, + "grad_norm": 3.4773733615875244, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.7888888716697693, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.5305, + "grad_norm": 1.0232493877410889, + "learning_rate": 1.9225000000000003e-05, + "num_tokens": 51756.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.8352, + "grad_norm": 1.172676920890808, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.5667, + "grad_norm": 1.041461706161499, + "learning_rate": 1.9215e-05, + "num_tokens": 52780.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.5104, + "grad_norm": 1.050549030303955, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.875, + "grad_norm": 1.1163139343261719, + "learning_rate": 1.9205000000000003e-05, + "num_tokens": 53804.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.799, + "grad_norm": 0.9202898740768433, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 1.0468, + "grad_norm": 6.722721576690674, + "learning_rate": 1.9195000000000002e-05, + "num_tokens": 54407.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 1.032, + "grad_norm": 6.30849027633667, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.8387, + "grad_norm": 0.8642046451568604, + "learning_rate": 1.9185000000000004e-05, + "num_tokens": 55010.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.8299, + "grad_norm": 0.8796883821487427, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.9957, + "grad_norm": 6.16769552230835, + "learning_rate": 1.9175000000000002e-05, + "num_tokens": 55613.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.7521, + "grad_norm": 0.8700262904167175, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.5251, + "grad_norm": 1.2144312858581543, + "learning_rate": 1.9165000000000004e-05, + "num_tokens": 56637.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": 0.76, + "grad_norm": 0.9009570479393005, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.941, + "grad_norm": 5.8355841636657715, + "learning_rate": 1.9155000000000002e-05, + "num_tokens": 57240.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.928, + "grad_norm": 5.541483402252197, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.716, + "grad_norm": 1.0414000749588013, + "learning_rate": 1.9145000000000004e-05, + "num_tokens": 57843.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.8929, + "grad_norm": 4.810738563537598, + "learning_rate": 1.914e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.7684, + "grad_norm": 1.2132883071899414, + "learning_rate": 1.9135000000000002e-05, + "num_tokens": 58446.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.6497, + "grad_norm": 1.1370697021484375, + "learning_rate": 1.913e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.6995, + "grad_norm": 1.2495081424713135, + "learning_rate": 1.9125000000000004e-05, + "num_tokens": 59470.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.4539, + "grad_norm": 1.0713244676589966, + "learning_rate": 1.912e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.8311, + "grad_norm": 8.016578674316406, + "learning_rate": 1.9115000000000002e-05, + "num_tokens": 60073.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.7657, + "grad_norm": 1.6656423807144165, + "learning_rate": 1.911e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.7687, + "grad_norm": 1.0611323118209839, + "learning_rate": 1.9105e-05, + "num_tokens": 61097.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.8062, + "grad_norm": 10.057961463928223, + "learning_rate": 1.91e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.4494, + "grad_norm": 0.8912132978439331, + "learning_rate": 1.9095000000000003e-05, + "num_tokens": 61700.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.7813, + "grad_norm": 8.121318817138672, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.762, + "grad_norm": 7.607242584228516, + "learning_rate": 1.9085e-05, + "num_tokens": 61882.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.7692, + "grad_norm": 1.015843391418457, + "learning_rate": 1.908e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.7587, + "grad_norm": 0.9659166932106018, + "learning_rate": 1.9075000000000003e-05, + "num_tokens": 62906.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.6702, + "grad_norm": 1.6121653318405151, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.7191, + "grad_norm": 5.08962345123291, + "learning_rate": 1.9065e-05, + "num_tokens": 63509.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.7033, + "grad_norm": 1.2752808332443237, + "learning_rate": 1.906e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.7025, + "grad_norm": 5.420579433441162, + "learning_rate": 1.9055e-05, + "num_tokens": 64112.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.6507, + "grad_norm": 0.9945167899131775, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.5894, + "grad_norm": 1.0229939222335815, + "learning_rate": 1.9045e-05, + "num_tokens": 65136.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.6627, + "grad_norm": 9.837233543395996, + "learning_rate": 1.904e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.7, + "grad_norm": 1.4510327577590942, + "learning_rate": 1.9035e-05, + "num_tokens": 65739.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.6437, + "grad_norm": 11.414746284484863, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.631, + "grad_norm": 10.233067512512207, + "learning_rate": 1.9025e-05, + "num_tokens": 65921.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.6945, + "grad_norm": 1.3608763217926025, + "learning_rate": 1.902e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.6546, + "grad_norm": 1.217339038848877, + "learning_rate": 1.9015e-05, + "num_tokens": 66945.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.6805, + "grad_norm": 1.5453741550445557, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.5748, + "grad_norm": 4.581247806549072, + "learning_rate": 1.9005000000000002e-05, + "num_tokens": 67548.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.6366, + "grad_norm": 1.6470707654953003, + "learning_rate": 1.9e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.4235, + "grad_norm": 0.9932326078414917, + "learning_rate": 1.8995e-05, + "num_tokens": 68572.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": 0.6296, + "grad_norm": 1.9582555294036865, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.5822, + "grad_norm": 1.569627046585083, + "learning_rate": 1.8985000000000002e-05, + "num_tokens": 69596.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.5748, + "grad_norm": 1.2322492599487305, + "learning_rate": 1.898e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.6398, + "grad_norm": 1.6496992111206055, + "learning_rate": 1.8975e-05, + "num_tokens": 70620.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.3614, + "grad_norm": 1.1484179496765137, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.6247, + "grad_norm": 2.376291275024414, + "learning_rate": 1.8965000000000002e-05, + "num_tokens": 71644.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.5296, + "grad_norm": 1.148452877998352, + "learning_rate": 1.896e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.3511, + "grad_norm": 1.6766430139541626, + "learning_rate": 1.8955e-05, + "num_tokens": 72668.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.5254, + "grad_norm": 13.195364952087402, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.5164, + "grad_norm": 10.336882591247559, + "learning_rate": 1.8945000000000002e-05, + "num_tokens": 72850.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.5768, + "grad_norm": 1.2533048391342163, + "learning_rate": 1.894e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.5941, + "grad_norm": 1.1360353231430054, + "learning_rate": 1.8935e-05, + "num_tokens": 73874.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.4831, + "grad_norm": 6.034897327423096, + "learning_rate": 1.893e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.4774, + "grad_norm": 5.36783504486084, + "learning_rate": 1.8925000000000003e-05, + "num_tokens": 74056.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.3472, + "grad_norm": 2.312915563583374, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.4547, + "grad_norm": 5.124778747558594, + "learning_rate": 1.8915e-05, + "num_tokens": 74659.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.4438, + "grad_norm": 3.7214717864990234, + "learning_rate": 1.891e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.5071, + "grad_norm": 1.825179100036621, + "learning_rate": 1.8905000000000003e-05, + "num_tokens": 75262.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.4157, + "grad_norm": 2.892442464828491, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.4085, + "grad_norm": 3.1406774520874023, + "learning_rate": 1.8895e-05, + "num_tokens": 75444.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.532, + "grad_norm": 2.529170274734497, + "learning_rate": 1.889e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.3828, + "grad_norm": 3.846367597579956, + "learning_rate": 1.8885000000000003e-05, + "num_tokens": 76047.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.5073, + "grad_norm": 2.1968491077423096, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.5165, + "grad_norm": 1.508063793182373, + "learning_rate": 1.8875e-05, + "num_tokens": 77071.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.3491, + "grad_norm": 2.4780421257019043, + "learning_rate": 1.887e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.3379, + "grad_norm": 2.2446343898773193, + "learning_rate": 1.8865000000000003e-05, + "num_tokens": 77253.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.3318, + "grad_norm": 3.05029296875, + "learning_rate": 1.886e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.3173, + "grad_norm": 2.2870967388153076, + "learning_rate": 1.8855e-05, + "num_tokens": 77435.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.3278, + "grad_norm": 1.3750704526901245, + "learning_rate": 1.885e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.2964, + "grad_norm": 2.238151788711548, + "learning_rate": 1.8845000000000003e-05, + "num_tokens": 78038.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.305, + "grad_norm": 1.4246138334274292, + "learning_rate": 1.884e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.3385, + "grad_norm": 1.810808777809143, + "learning_rate": 1.8835000000000002e-05, + "num_tokens": 79062.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.5181, + "grad_norm": 2.939674139022827, + "learning_rate": 1.883e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.4909, + "grad_norm": 2.4543910026550293, + "learning_rate": 1.8825000000000004e-05, + "num_tokens": 80086.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.2604, + "grad_norm": 2.63846492767334, + "learning_rate": 1.882e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.2533, + "grad_norm": 3.536795139312744, + "learning_rate": 1.8815000000000002e-05, + "num_tokens": 80268.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.2449, + "grad_norm": 2.941943645477295, + "learning_rate": 1.881e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.4928, + "grad_norm": 2.69899582862854, + "learning_rate": 1.8805000000000004e-05, + "num_tokens": 80871.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.3019, + "grad_norm": 1.5328068733215332, + "learning_rate": 1.88e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.4154, + "grad_norm": 5.932051181793213, + "learning_rate": 1.8795000000000002e-05, + "num_tokens": 81895.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.4072, + "grad_norm": 3.7254579067230225, + "learning_rate": 1.879e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.2266, + "grad_norm": 4.67811918258667, + "learning_rate": 1.8785e-05, + "num_tokens": 82498.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.2835, + "grad_norm": 2.31062650680542, + "learning_rate": 1.878e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.222, + "grad_norm": 4.9225335121154785, + "learning_rate": 1.8775000000000002e-05, + "num_tokens": 83101.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.4098, + "grad_norm": 2.3302409648895264, + "learning_rate": 1.877e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.4401, + "grad_norm": 1.917952299118042, + "learning_rate": 1.8765e-05, + "num_tokens": 84125.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.3927, + "grad_norm": 4.312741279602051, + "learning_rate": 1.876e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.2032, + "grad_norm": 4.237610340118408, + "learning_rate": 1.8755000000000003e-05, + "num_tokens": 84728.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.2, + "grad_norm": 4.144465446472168, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.1974, + "grad_norm": 4.548800945281982, + "learning_rate": 1.8745e-05, + "num_tokens": 84910.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.2936, + "grad_norm": 1.368138313293457, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.4425, + "grad_norm": 1.6547119617462158, + "learning_rate": 1.8735e-05, + "num_tokens": 85934.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.1815, + "grad_norm": 1.936987042427063, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.3853, + "grad_norm": 1.9844653606414795, + "learning_rate": 1.8725e-05, + "num_tokens": 86537.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.3816, + "grad_norm": 2.563992977142334, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.1717, + "grad_norm": 1.9275789260864258, + "learning_rate": 1.8715e-05, + "num_tokens": 87140.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.3635, + "grad_norm": 2.198817014694214, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.166, + "grad_norm": 2.225175380706787, + "learning_rate": 1.8705e-05, + "num_tokens": 87743.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.1618, + "grad_norm": 1.4393062591552734, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.3188, + "grad_norm": 1.8201826810836792, + "learning_rate": 1.8695e-05, + "num_tokens": 88346.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.3957, + "grad_norm": 1.8483490943908691, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.3545, + "grad_norm": 2.5658915042877197, + "learning_rate": 1.8685e-05, + "num_tokens": 89370.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.4109, + "grad_norm": 2.197061777114868, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.3934, + "grad_norm": 1.9570775032043457, + "learning_rate": 1.8675e-05, + "num_tokens": 90394.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.149, + "grad_norm": 2.242249011993408, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.3673, + "grad_norm": 2.5640757083892822, + "learning_rate": 1.8665000000000002e-05, + "num_tokens": 90997.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.3437, + "grad_norm": 1.6239393949508667, + "learning_rate": 1.866e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.1448, + "grad_norm": 2.4205758571624756, + "learning_rate": 1.8655e-05, + "num_tokens": 91600.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.2803, + "grad_norm": 1.5447510480880737, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.2501, + "grad_norm": 1.2362499237060547, + "learning_rate": 1.8645000000000002e-05, + "num_tokens": 92624.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.263, + "grad_norm": 1.3345736265182495, + "learning_rate": 1.864e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.3598, + "grad_norm": 5.145051002502441, + "learning_rate": 1.8635e-05, + "num_tokens": 93648.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.143, + "grad_norm": 3.363790988922119, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.3858, + "grad_norm": 2.9212327003479004, + "learning_rate": 1.8625000000000002e-05, + "num_tokens": 94251.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.1404, + "grad_norm": 2.9169602394104004, + "learning_rate": 1.862e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.2422, + "grad_norm": 1.9243407249450684, + "learning_rate": 1.8615e-05, + "num_tokens": 94854.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.3585, + "grad_norm": 4.024987697601318, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.3474, + "grad_norm": 2.019094944000244, + "learning_rate": 1.8605000000000002e-05, + "num_tokens": 95878.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.3368, + "grad_norm": 1.5415781736373901, + "learning_rate": 1.86e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.1373, + "grad_norm": 3.6068742275238037, + "learning_rate": 1.8595e-05, + "num_tokens": 96481.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.2176, + "grad_norm": 1.1446317434310913, + "learning_rate": 1.859e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.1328, + "grad_norm": 3.26859974861145, + "learning_rate": 1.8585000000000002e-05, + "num_tokens": 97084.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.131, + "grad_norm": 2.849381446838379, + "learning_rate": 1.858e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.3323, + "grad_norm": 4.831865310668945, + "learning_rate": 1.8575e-05, + "num_tokens": 97687.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.3036, + "grad_norm": 1.8017945289611816, + "learning_rate": 1.857e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.3478, + "grad_norm": 4.759650707244873, + "learning_rate": 1.8565000000000003e-05, + "num_tokens": 98711.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.1239, + "grad_norm": 1.6707216501235962, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.3554, + "grad_norm": 3.568655014038086, + "learning_rate": 1.8555e-05, + "num_tokens": 99314.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.1219, + "grad_norm": 1.743139624595642, + "learning_rate": 1.855e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.3297, + "grad_norm": 3.192558526992798, + "learning_rate": 1.8545000000000003e-05, + "num_tokens": 99917.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.339, + "grad_norm": 2.8700854778289795, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.3341, + "grad_norm": 3.1597092151641846, + "learning_rate": 1.8535e-05, + "num_tokens": 100941.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.3151, + "grad_norm": 2.549912929534912, + "learning_rate": 1.853e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.249, + "grad_norm": 4.164290904998779, + "learning_rate": 1.8525000000000003e-05, + "num_tokens": 101965.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.2877, + "grad_norm": 1.8462411165237427, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.2215, + "grad_norm": 1.49083411693573, + "learning_rate": 1.8515e-05, + "num_tokens": 102989.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.2631, + "grad_norm": 1.5168116092681885, + "learning_rate": 1.851e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.3179, + "grad_norm": 3.1732399463653564, + "learning_rate": 1.8505000000000003e-05, + "num_tokens": 104013.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.315, + "grad_norm": 2.9725892543792725, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.2763, + "grad_norm": 1.4138047695159912, + "learning_rate": 1.8495e-05, + "num_tokens": 105037.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.3151, + "grad_norm": 2.3229987621307373, + "learning_rate": 1.849e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.2862, + "grad_norm": 3.2318272590637207, + "learning_rate": 1.8485000000000003e-05, + "num_tokens": 106061.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.2339, + "grad_norm": 3.401787757873535, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.2094, + "grad_norm": 2.1061453819274902, + "learning_rate": 1.8475000000000002e-05, + "num_tokens": 107085.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.2863, + "grad_norm": 1.6479979753494263, + "learning_rate": 1.847e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.1445, + "grad_norm": 7.635932445526123, + "learning_rate": 1.8465e-05, + "num_tokens": 107688.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.1347, + "grad_norm": 6.305334091186523, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.2233, + "grad_norm": 3.41860294342041, + "learning_rate": 1.8455000000000002e-05, + "num_tokens": 108291.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.128, + "grad_norm": 5.801213264465332, + "learning_rate": 1.845e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.1283, + "grad_norm": 5.675178527832031, + "learning_rate": 1.8445e-05, + "num_tokens": 108473.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.3029, + "grad_norm": 5.509076118469238, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.1112, + "grad_norm": 2.6948108673095703, + "learning_rate": 1.8435000000000002e-05, + "num_tokens": 109076.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.107, + "grad_norm": 2.523871421813965, + "learning_rate": 1.843e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.2636, + "grad_norm": 2.1710612773895264, + "learning_rate": 1.8425e-05, + "num_tokens": 109679.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.2891, + "grad_norm": 2.2263383865356445, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.2611, + "grad_norm": 1.752862572669983, + "learning_rate": 1.8415e-05, + "num_tokens": 110703.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.1023, + "grad_norm": 3.256633996963501, + "learning_rate": 1.841e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.1009, + "grad_norm": 2.10860276222229, + "learning_rate": 1.8405e-05, + "num_tokens": 110885.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.2849, + "grad_norm": 3.3475303649902344, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.2727, + "grad_norm": 2.763415575027466, + "learning_rate": 1.8395e-05, + "num_tokens": 111909.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.1914, + "grad_norm": 1.7206056118011475, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.2981, + "grad_norm": 4.825778484344482, + "learning_rate": 1.8385e-05, + "num_tokens": 112933.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.2575, + "grad_norm": 2.3532052040100098, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.3108, + "grad_norm": 2.1766650676727295, + "learning_rate": 1.8375e-05, + "num_tokens": 113957.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.2547, + "grad_norm": 1.6271114349365234, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.2451, + "grad_norm": 1.533071517944336, + "learning_rate": 1.8365e-05, + "num_tokens": 114981.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.2362, + "grad_norm": 1.4881736040115356, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0898, + "grad_norm": 1.764446496963501, + "learning_rate": 1.8355e-05, + "num_tokens": 115584.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.2345, + "grad_norm": 1.3447750806808472, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.2802, + "grad_norm": 3.713470458984375, + "learning_rate": 1.8345e-05, + "num_tokens": 116608.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.1853, + "grad_norm": 1.427515983581543, + "learning_rate": 1.834e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0921, + "grad_norm": 2.3074567317962646, + "learning_rate": 1.8335e-05, + "num_tokens": 117211.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0887, + "grad_norm": 2.2687530517578125, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.2126, + "grad_norm": 3.1814491748809814, + "learning_rate": 1.8325e-05, + "num_tokens": 117814.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0881, + "grad_norm": 2.606569528579712, + "learning_rate": 1.832e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.1751, + "grad_norm": 2.4892592430114746, + "learning_rate": 1.8315e-05, + "num_tokens": 118417.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.2011, + "grad_norm": 2.357940673828125, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.2168, + "grad_norm": 2.8288958072662354, + "learning_rate": 1.8305000000000002e-05, + "num_tokens": 119441.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.183, + "grad_norm": 1.945565104484558, + "learning_rate": 1.83e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0746, + "grad_norm": 1.7267169952392578, + "learning_rate": 1.8295e-05, + "num_tokens": 120044.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0733, + "grad_norm": 1.9393048286437988, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0728, + "grad_norm": 2.1715469360351562, + "learning_rate": 1.8285000000000002e-05, + "num_tokens": 120226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0704, + "grad_norm": 2.0847175121307373, + "learning_rate": 1.828e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.1791, + "grad_norm": 1.5438156127929688, + "learning_rate": 1.8275e-05, + "num_tokens": 120829.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.2073, + "grad_norm": 1.6084765195846558, + "learning_rate": 1.827e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.2215, + "grad_norm": 1.543698787689209, + "learning_rate": 1.8265000000000002e-05, + "num_tokens": 121853.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.1904, + "grad_norm": 1.41824209690094, + "learning_rate": 1.826e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.2005, + "grad_norm": 1.6803160905838013, + "learning_rate": 1.8255e-05, + "num_tokens": 122877.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0605, + "grad_norm": 1.5710349082946777, + "learning_rate": 1.825e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0615, + "grad_norm": 1.633989691734314, + "learning_rate": 1.8245000000000002e-05, + "num_tokens": 123059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.1828, + "grad_norm": 1.6902644634246826, + "learning_rate": 1.824e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0558, + "grad_norm": 1.7157853841781616, + "learning_rate": 1.8235e-05, + "num_tokens": 123662.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0515, + "grad_norm": 1.4476577043533325, + "learning_rate": 1.823e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0502, + "grad_norm": 2.1938326358795166, + "learning_rate": 1.8225000000000003e-05, + "num_tokens": 123844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.1783, + "grad_norm": 2.738436460494995, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.275, + "grad_norm": 3.493831157684326, + "learning_rate": 1.8215e-05, + "num_tokens": 124868.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.1786, + "grad_norm": 1.7162284851074219, + "learning_rate": 1.821e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0448, + "grad_norm": 2.925360679626465, + "learning_rate": 1.8205000000000003e-05, + "num_tokens": 125471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.3138, + "grad_norm": 4.2967753410339355, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.0381, + "grad_norm": 1.3151957988739014, + "learning_rate": 1.8195e-05, + "num_tokens": 126074.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.1773, + "grad_norm": 1.440629243850708, + "learning_rate": 1.819e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0361, + "grad_norm": 1.378117561340332, + "learning_rate": 1.8185000000000003e-05, + "num_tokens": 126677.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0357, + "grad_norm": 1.3120638132095337, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 1.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0333, + "grad_norm": 1.1625266075134277, + "learning_rate": 1.8175e-05, + "num_tokens": 126859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0292, + "grad_norm": 1.198464035987854, + "learning_rate": 1.817e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.193, + "grad_norm": 1.9310072660446167, + "learning_rate": 1.8165000000000003e-05, + "num_tokens": 127462.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": 0.209, + "grad_norm": 1.7112150192260742, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.1398, + "grad_norm": 1.4659478664398193, + "learning_rate": 1.8155e-05, + "num_tokens": 128486.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.1688, + "grad_norm": 3.3470299243927, + "learning_rate": 1.815e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.2416, + "grad_norm": 3.232045888900757, + "learning_rate": 1.8145e-05, + "num_tokens": 129510.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0242, + "grad_norm": 2.809112548828125, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 1.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.0222, + "grad_norm": 2.652397394180298, + "learning_rate": 1.8135000000000002e-05, + "num_tokens": 129692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.1619, + "grad_norm": 1.6935186386108398, + "learning_rate": 1.813e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0206, + "grad_norm": 1.8048573732376099, + "learning_rate": 1.8125e-05, + "num_tokens": 130295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.0199, + "grad_norm": 1.7344465255737305, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0174, + "grad_norm": 1.6794533729553223, + "learning_rate": 1.8115000000000002e-05, + "num_tokens": 130477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0172, + "grad_norm": 2.995704174041748, + "learning_rate": 1.811e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 1.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.241, + "grad_norm": 2.3058347702026367, + "learning_rate": 1.8105e-05, + "num_tokens": 131080.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.2068, + "grad_norm": 2.030050277709961, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.1573, + "grad_norm": 2.108264207839966, + "learning_rate": 1.8095000000000002e-05, + "num_tokens": 132104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0128, + "grad_norm": 0.9666662812232971, + "learning_rate": 1.809e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.1613, + "grad_norm": 1.9703510999679565, + "learning_rate": 1.8085e-05, + "num_tokens": 132707.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.1579, + "grad_norm": 1.7536500692367554, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.2503, + "grad_norm": 3.074944257736206, + "learning_rate": 1.8075000000000002e-05, + "num_tokens": 133731.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.154, + "grad_norm": 2.3541879653930664, + "learning_rate": 1.807e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.1655, + "grad_norm": 1.2853813171386719, + "learning_rate": 1.8065e-05, + "num_tokens": 134755.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.1481, + "grad_norm": 1.4534378051757812, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0156, + "grad_norm": 2.346766710281372, + "learning_rate": 1.8055000000000002e-05, + "num_tokens": 135358.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0179, + "grad_norm": 2.7506628036499023, + "learning_rate": 1.805e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 1.0, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.2665, + "grad_norm": 7.800353050231934, + "learning_rate": 1.8045e-05, + "num_tokens": 135961.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0137, + "grad_norm": 1.6062291860580444, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 1.0, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.1298, + "grad_norm": 1.9706884622573853, + "learning_rate": 1.8035000000000003e-05, + "num_tokens": 136564.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.1587, + "grad_norm": 4.288624286651611, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.1706, + "grad_norm": 2.351865291595459, + "learning_rate": 1.8025e-05, + "num_tokens": 137588.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.1391, + "grad_norm": 2.3107855319976807, + "learning_rate": 1.802e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0116, + "grad_norm": 1.2413067817687988, + "learning_rate": 1.8015000000000003e-05, + "num_tokens": 138191.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.1528, + "grad_norm": 2.238205671310425, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0111, + "grad_norm": 1.0291837453842163, + "learning_rate": 1.8005e-05, + "num_tokens": 138794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": 0.2551, + "grad_norm": 3.0084855556488037, + "learning_rate": 1.8e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.271, + "grad_norm": 3.355750560760498, + "learning_rate": 1.7995000000000003e-05, + "num_tokens": 139818.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.1479, + "grad_norm": 3.3119289875030518, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.1951, + "grad_norm": 3.4890756607055664, + "learning_rate": 1.7985e-05, + "num_tokens": 140842.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.1439, + "grad_norm": 2.5274429321289062, + "learning_rate": 1.798e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.1537, + "grad_norm": 3.0909008979797363, + "learning_rate": 1.7975000000000003e-05, + "num_tokens": 141866.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0107, + "grad_norm": 2.0530686378479004, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 1.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.011, + "grad_norm": 1.7325184345245361, + "learning_rate": 1.7965e-05, + "num_tokens": 142048.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.135, + "grad_norm": 1.9106756448745728, + "learning_rate": 1.796e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.177, + "grad_norm": 3.206461191177368, + "learning_rate": 1.7955000000000003e-05, + "num_tokens": 143072.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0095, + "grad_norm": 0.8696625828742981, + "learning_rate": 1.795e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 1.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.1656, + "grad_norm": 5.9883856773376465, + "learning_rate": 1.7945000000000002e-05, + "num_tokens": 143675.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.2393, + "grad_norm": 3.601959466934204, + "learning_rate": 1.794e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0092, + "grad_norm": 1.547377586364746, + "learning_rate": 1.7935000000000004e-05, + "num_tokens": 144278.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0099, + "grad_norm": 1.7349345684051514, + "learning_rate": 1.793e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 1.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.1454, + "grad_norm": 2.134899377822876, + "learning_rate": 1.7925000000000002e-05, + "num_tokens": 144881.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.2317, + "grad_norm": 3.7199866771698, + "learning_rate": 1.792e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.2081, + "grad_norm": 3.7679033279418945, + "learning_rate": 1.7915000000000004e-05, + "num_tokens": 145905.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0084, + "grad_norm": 0.7981175184249878, + "learning_rate": 1.791e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 1.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0078, + "grad_norm": 0.624564528465271, + "learning_rate": 1.7905000000000002e-05, + "num_tokens": 146087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.153, + "grad_norm": 1.46378755569458, + "learning_rate": 1.79e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0085, + "grad_norm": 1.403277039527893, + "learning_rate": 1.7895000000000004e-05, + "num_tokens": 146690.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.1413, + "grad_norm": 2.821493148803711, + "learning_rate": 1.789e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.1268, + "grad_norm": 2.5567212104797363, + "learning_rate": 1.7885000000000002e-05, + "num_tokens": 147714.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.1303, + "grad_norm": 2.5823540687561035, + "learning_rate": 1.788e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0075, + "grad_norm": 1.26413094997406, + "learning_rate": 1.7875e-05, + "num_tokens": 148317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0067, + "grad_norm": 0.9559513330459595, + "learning_rate": 1.787e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0069, + "grad_norm": 0.641984224319458, + "learning_rate": 1.7865000000000003e-05, + "num_tokens": 148499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.1762, + "grad_norm": 2.6874637603759766, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0062, + "grad_norm": 0.4612693786621094, + "learning_rate": 1.7855e-05, + "num_tokens": 149102.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.1284, + "grad_norm": 2.1469764709472656, + "learning_rate": 1.785e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.1216, + "grad_norm": 2.77829909324646, + "learning_rate": 1.7845000000000003e-05, + "num_tokens": 150126.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0056, + "grad_norm": 0.3416956067085266, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 1.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0055, + "grad_norm": 0.3599971830844879, + "learning_rate": 1.7835e-05, + "num_tokens": 150308.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0054, + "grad_norm": 0.3336946368217468, + "learning_rate": 1.783e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 1.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.1384, + "grad_norm": 2.486008882522583, + "learning_rate": 1.7825e-05, + "num_tokens": 150911.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.1366, + "grad_norm": 1.806955337524414, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.0053, + "grad_norm": 0.3250260651111603, + "learning_rate": 1.7815e-05, + "num_tokens": 151514.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0048, + "grad_norm": 0.33809739351272583, + "learning_rate": 1.781e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 1.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.1241, + "grad_norm": 1.514503002166748, + "learning_rate": 1.7805e-05, + "num_tokens": 152117.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.1369, + "grad_norm": 1.73817777633667, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.005, + "grad_norm": 0.6402959227561951, + "learning_rate": 1.7795e-05, + "num_tokens": 152720.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.1392, + "grad_norm": 2.1087169647216797, + "learning_rate": 1.779e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0056, + "grad_norm": 0.7931351661682129, + "learning_rate": 1.7785e-05, + "num_tokens": 153323.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.1216, + "grad_norm": 2.559343099594116, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.1415, + "grad_norm": 3.7847163677215576, + "learning_rate": 1.7775000000000002e-05, + "num_tokens": 154347.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0056, + "grad_norm": 0.6650505661964417, + "learning_rate": 1.777e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0058, + "grad_norm": 0.6711560487747192, + "learning_rate": 1.7765e-05, + "num_tokens": 154529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.1339, + "grad_norm": 2.383869171142578, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.1384, + "grad_norm": 2.9380829334259033, + "learning_rate": 1.7755000000000002e-05, + "num_tokens": 155553.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.1355, + "grad_norm": 3.530726432800293, + "learning_rate": 1.775e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0057, + "grad_norm": 0.6963756680488586, + "learning_rate": 1.7745e-05, + "num_tokens": 156156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0047, + "grad_norm": 0.45467251539230347, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.1322, + "grad_norm": 2.1101133823394775, + "learning_rate": 1.7735000000000002e-05, + "num_tokens": 156759.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.3436, + "grad_norm": 10.156854629516602, + "learning_rate": 1.773e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.1111, + "grad_norm": 1.9533101320266724, + "learning_rate": 1.7725e-05, + "num_tokens": 157783.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0054, + "grad_norm": 0.571807861328125, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 1.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0045, + "grad_norm": 0.6374226808547974, + "learning_rate": 1.7715000000000002e-05, + "num_tokens": 157965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.1115, + "grad_norm": 1.9669644832611084, + "learning_rate": 1.771e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.1336, + "grad_norm": 1.4811934232711792, + "learning_rate": 1.7705e-05, + "num_tokens": 158989.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.2041, + "grad_norm": 3.112797737121582, + "learning_rate": 1.77e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0045, + "grad_norm": 0.5766833424568176, + "learning_rate": 1.7695000000000003e-05, + "num_tokens": 159592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.1237, + "grad_norm": 1.863338589668274, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.1236, + "grad_norm": 2.4069719314575195, + "learning_rate": 1.7685e-05, + "num_tokens": 160616.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0055, + "grad_norm": 0.8338965177536011, + "learning_rate": 1.768e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 1.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0044, + "grad_norm": 0.5481887459754944, + "learning_rate": 1.7675000000000003e-05, + "num_tokens": 160798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.1354, + "grad_norm": 4.145319938659668, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.1279, + "grad_norm": 3.560887575149536, + "learning_rate": 1.7665e-05, + "num_tokens": 161822.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0044, + "grad_norm": 0.43582797050476074, + "learning_rate": 1.766e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 1.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.004, + "grad_norm": 0.3212014138698578, + "learning_rate": 1.7655000000000003e-05, + "num_tokens": 162004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.1956, + "grad_norm": 2.662240982055664, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0038, + "grad_norm": 0.32649490237236023, + "learning_rate": 1.7645e-05, + "num_tokens": 162607.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0039, + "grad_norm": 0.33435314893722534, + "learning_rate": 1.764e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": 0.1217, + "grad_norm": 3.422117233276367, + "learning_rate": 1.7635000000000003e-05, + "num_tokens": 163210.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.1169, + "grad_norm": 1.9841532707214355, + "learning_rate": 1.763e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0035, + "grad_norm": 0.23611226677894592, + "learning_rate": 1.7625e-05, + "num_tokens": 163813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0036, + "grad_norm": 0.35102367401123047, + "learning_rate": 1.762e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 1.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0034, + "grad_norm": 0.22219745814800262, + "learning_rate": 1.7615000000000003e-05, + "num_tokens": 163995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.1109, + "grad_norm": 1.8000237941741943, + "learning_rate": 1.761e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0034, + "grad_norm": 0.4621182084083557, + "learning_rate": 1.7605000000000002e-05, + "num_tokens": 164598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0035, + "grad_norm": 0.5149714350700378, + "learning_rate": 1.76e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.004, + "grad_norm": 0.5277268886566162, + "learning_rate": 1.7595000000000003e-05, + "num_tokens": 164780.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.1178, + "grad_norm": 1.9578617811203003, + "learning_rate": 1.759e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0032, + "grad_norm": 0.30999821424484253, + "learning_rate": 1.7585000000000002e-05, + "num_tokens": 165383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0032, + "grad_norm": 0.3227098882198334, + "learning_rate": 1.758e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 1.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0032, + "grad_norm": 0.2970958352088928, + "learning_rate": 1.7575000000000004e-05, + "num_tokens": 165565.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.1054, + "grad_norm": 3.3750076293945312, + "learning_rate": 1.757e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.003, + "grad_norm": 0.315746933221817, + "learning_rate": 1.7565000000000002e-05, + "num_tokens": 166168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.1014, + "grad_norm": 1.7110451459884644, + "learning_rate": 1.756e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.1009, + "grad_norm": 2.0282938480377197, + "learning_rate": 1.7555e-05, + "num_tokens": 167192.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0029, + "grad_norm": 0.18862634897232056, + "learning_rate": 1.755e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 1.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.1251, + "grad_norm": 1.5325688123703003, + "learning_rate": 1.7545000000000002e-05, + "num_tokens": 167795.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0032, + "grad_norm": 0.37112897634506226, + "learning_rate": 1.754e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 1.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.0031, + "grad_norm": 0.32201266288757324, + "learning_rate": 1.7535e-05, + "num_tokens": 167977.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.003, + "grad_norm": 0.32648831605911255, + "learning_rate": 1.753e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 1.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": 0.1251, + "grad_norm": 2.044515371322632, + "learning_rate": 1.7525000000000002e-05, + "num_tokens": 168580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.1099, + "grad_norm": 2.5852344036102295, + "learning_rate": 1.752e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0032, + "grad_norm": 0.33884692192077637, + "learning_rate": 1.7515e-05, + "num_tokens": 169183.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.1006, + "grad_norm": 1.9987916946411133, + "learning_rate": 1.751e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0895, + "grad_norm": 2.697984457015991, + "learning_rate": 1.7505e-05, + "num_tokens": 170207.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.0034, + "grad_norm": 0.4763769507408142, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 1.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0899, + "grad_norm": 3.0565173625946045, + "learning_rate": 1.7495e-05, + "num_tokens": 170810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0909, + "grad_norm": 1.3817325830459595, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0044, + "grad_norm": 0.8519660830497742, + "learning_rate": 1.7485e-05, + "num_tokens": 171413.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.1095, + "grad_norm": 2.0203707218170166, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0048, + "grad_norm": 1.1067970991134644, + "learning_rate": 1.7475e-05, + "num_tokens": 172016.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.1167, + "grad_norm": 2.3915855884552, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0049, + "grad_norm": 1.0700874328613281, + "learning_rate": 1.7465e-05, + "num_tokens": 172619.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.004, + "grad_norm": 0.6739718317985535, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 1.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.1176, + "grad_norm": 2.5957095623016357, + "learning_rate": 1.7455e-05, + "num_tokens": 173222.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": 0.0763, + "grad_norm": 2.0077261924743652, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0028, + "grad_norm": 0.2505457103252411, + "learning_rate": 1.7445e-05, + "num_tokens": 173825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0025, + "grad_norm": 0.1596791297197342, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 1.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.1892, + "grad_norm": 2.4415338039398193, + "learning_rate": 1.7435e-05, + "num_tokens": 174428.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.1134, + "grad_norm": 2.0744497776031494, + "learning_rate": 1.743e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0991, + "grad_norm": 2.4540417194366455, + "learning_rate": 1.7425e-05, + "num_tokens": 175452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0025, + "grad_norm": 0.17656919360160828, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.1227, + "grad_norm": 2.1174721717834473, + "learning_rate": 1.7415000000000002e-05, + "num_tokens": 176055.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0026, + "grad_norm": 0.23843693733215332, + "learning_rate": 1.741e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 1.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.1103, + "grad_norm": 3.4821200370788574, + "learning_rate": 1.7405e-05, + "num_tokens": 176658.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0027, + "grad_norm": 0.3274306654930115, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 1.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0924, + "grad_norm": 1.685363531112671, + "learning_rate": 1.7395000000000002e-05, + "num_tokens": 177261.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0028, + "grad_norm": 0.3265073299407959, + "learning_rate": 1.739e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 1.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.1099, + "grad_norm": 3.1508426666259766, + "learning_rate": 1.7385e-05, + "num_tokens": 177864.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.1034, + "grad_norm": 1.8193601369857788, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.1016, + "grad_norm": 1.59476637840271, + "learning_rate": 1.7375000000000002e-05, + "num_tokens": 178888.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.1998, + "grad_norm": 3.547844648361206, + "learning_rate": 1.737e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.004, + "grad_norm": 0.7272564172744751, + "learning_rate": 1.7365e-05, + "num_tokens": 179491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0046, + "grad_norm": 0.918525755405426, + "learning_rate": 1.736e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 1.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.1078, + "grad_norm": 2.3493764400482178, + "learning_rate": 1.7355000000000002e-05, + "num_tokens": 180094.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0042, + "grad_norm": 0.7224324941635132, + "learning_rate": 1.735e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 1.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0043, + "grad_norm": 0.6705859303474426, + "learning_rate": 1.7345e-05, + "num_tokens": 180276.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.1953, + "grad_norm": 2.93843674659729, + "learning_rate": 1.734e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.0034, + "grad_norm": 0.46903571486473083, + "learning_rate": 1.7335000000000003e-05, + "num_tokens": 180879.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0938, + "grad_norm": 2.1053452491760254, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0026, + "grad_norm": 0.24292589724063873, + "learning_rate": 1.7325e-05, + "num_tokens": 181482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0922, + "grad_norm": 2.257225275039673, + "learning_rate": 1.732e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.003, + "grad_norm": 0.4069388508796692, + "learning_rate": 1.7315000000000003e-05, + "num_tokens": 182085.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.183, + "grad_norm": 3.2919442653656006, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.1693, + "grad_norm": 2.224686861038208, + "learning_rate": 1.7305e-05, + "num_tokens": 183109.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.1085, + "grad_norm": 1.8910117149353027, + "learning_rate": 1.73e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0026, + "grad_norm": 0.40661975741386414, + "learning_rate": 1.7295000000000003e-05, + "num_tokens": 183712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0027, + "grad_norm": 0.4873325228691101, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 1.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0028, + "grad_norm": 0.6161079406738281, + "learning_rate": 1.7285e-05, + "num_tokens": 183894.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0027, + "grad_norm": 0.4630989134311676, + "learning_rate": 1.728e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 1.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0985, + "grad_norm": 1.9053902626037598, + "learning_rate": 1.7275000000000003e-05, + "num_tokens": 184497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.0026, + "grad_norm": 0.37032097578048706, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 1.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.0024, + "grad_norm": 0.27917778491973877, + "learning_rate": 1.7265e-05, + "num_tokens": 184679.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0826, + "grad_norm": 2.2242591381073, + "learning_rate": 1.726e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0023, + "grad_norm": 0.22320418059825897, + "learning_rate": 1.7255000000000003e-05, + "num_tokens": 185282.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0958, + "grad_norm": 2.1955316066741943, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.1204, + "grad_norm": 2.8383123874664307, + "learning_rate": 1.7245000000000002e-05, + "num_tokens": 186306.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0025, + "grad_norm": 0.2997134327888489, + "learning_rate": 1.724e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0024, + "grad_norm": 0.24415498971939087, + "learning_rate": 1.7235e-05, + "num_tokens": 186488.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0885, + "grad_norm": 2.02583384513855, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0927, + "grad_norm": 2.139193534851074, + "learning_rate": 1.7225000000000002e-05, + "num_tokens": 187512.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0025, + "grad_norm": 0.3212721347808838, + "learning_rate": 1.722e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.1594, + "grad_norm": 1.6018428802490234, + "learning_rate": 1.7215e-05, + "num_tokens": 188115.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0027, + "grad_norm": 0.43617552518844604, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 1.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.1228, + "grad_norm": 1.8676470518112183, + "learning_rate": 1.7205000000000002e-05, + "num_tokens": 188718.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": 0.1523, + "grad_norm": 2.5800390243530273, + "learning_rate": 1.72e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0036, + "grad_norm": 0.7294099926948547, + "learning_rate": 1.7195e-05, + "num_tokens": 189321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.0797, + "grad_norm": 2.594087600708008, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.1031, + "grad_norm": 3.2291526794433594, + "learning_rate": 1.7185e-05, + "num_tokens": 190345.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0036, + "grad_norm": 0.7465726733207703, + "learning_rate": 1.718e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 1.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.1692, + "grad_norm": 2.709357500076294, + "learning_rate": 1.7175e-05, + "num_tokens": 190948.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.1003, + "grad_norm": 2.117990493774414, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.1015, + "grad_norm": 2.4742591381073, + "learning_rate": 1.7165e-05, + "num_tokens": 191972.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0911, + "grad_norm": 2.098302125930786, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.1107, + "grad_norm": 1.915540337562561, + "learning_rate": 1.7155e-05, + "num_tokens": 192996.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0049, + "grad_norm": 1.0682960748672485, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0965, + "grad_norm": 1.5651695728302002, + "learning_rate": 1.7145e-05, + "num_tokens": 193599.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.103, + "grad_norm": 2.3110480308532715, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.005, + "grad_norm": 1.1688706874847412, + "learning_rate": 1.7135e-05, + "num_tokens": 194202.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0801, + "grad_norm": 2.4091689586639404, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.159, + "grad_norm": 2.0551347732543945, + "learning_rate": 1.7125e-05, + "num_tokens": 195226.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.004, + "grad_norm": 0.8690920472145081, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0052, + "grad_norm": 1.225834608078003, + "learning_rate": 1.7115e-05, + "num_tokens": 195408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0038, + "grad_norm": 0.7105492949485779, + "learning_rate": 1.711e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0027, + "grad_norm": 0.3135615587234497, + "learning_rate": 1.7105e-05, + "num_tokens": 195590.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0025, + "grad_norm": 0.33731189370155334, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 1.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0024, + "grad_norm": 0.6950210928916931, + "learning_rate": 1.7095e-05, + "num_tokens": 195772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.166, + "grad_norm": 3.7873523235321045, + "learning_rate": 1.709e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.081, + "grad_norm": 2.6900861263275146, + "learning_rate": 1.7085e-05, + "num_tokens": 196796.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.002, + "grad_norm": 0.19354696571826935, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 1.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0935, + "grad_norm": 2.4997594356536865, + "learning_rate": 1.7075e-05, + "num_tokens": 197399.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.002, + "grad_norm": 0.24508339166641235, + "learning_rate": 1.707e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 1.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0019, + "grad_norm": 0.1790609359741211, + "learning_rate": 1.7065e-05, + "num_tokens": 197581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.1101, + "grad_norm": 2.382162570953369, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.1892, + "grad_norm": 3.0123023986816406, + "learning_rate": 1.7055000000000002e-05, + "num_tokens": 198605.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0019, + "grad_norm": 0.27882760763168335, + "learning_rate": 1.705e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0019, + "grad_norm": 0.23136040568351746, + "learning_rate": 1.7045e-05, + "num_tokens": 198787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.1046, + "grad_norm": 1.8799446821212769, + "learning_rate": 1.704e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0018, + "grad_norm": 0.23780478537082672, + "learning_rate": 1.7035000000000002e-05, + "num_tokens": 199390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0849, + "grad_norm": 1.9498792886734009, + "learning_rate": 1.703e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.298, + "step": 596 + }, + { + "loss": 0.0953, + "grad_norm": 2.2400667667388916, + "learning_rate": 1.7025e-05, + "num_tokens": 200414.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.002, + "grad_norm": 0.3908434510231018, + "learning_rate": 1.702e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 1.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0939, + "grad_norm": 2.667379140853882, + "learning_rate": 1.7015000000000002e-05, + "num_tokens": 201017.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.0745, + "grad_norm": 2.066331624984741, + "learning_rate": 1.701e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0025, + "grad_norm": 0.5688944458961487, + "learning_rate": 1.7005e-05, + "num_tokens": 201620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.1069, + "grad_norm": 2.021451950073242, + "learning_rate": 1.7e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.003, + "grad_norm": 0.6418687105178833, + "learning_rate": 1.6995000000000002e-05, + "num_tokens": 202223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0029, + "grad_norm": 0.6194710731506348, + "learning_rate": 1.699e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 1.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.1193, + "grad_norm": 3.001216411590576, + "learning_rate": 1.6985e-05, + "num_tokens": 202826.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": 0.1078, + "grad_norm": 2.1146023273468018, + "learning_rate": 1.698e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.098, + "grad_norm": 3.064103841781616, + "learning_rate": 1.6975000000000003e-05, + "num_tokens": 203850.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0747, + "grad_norm": 3.1524202823638916, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.1506, + "grad_norm": 3.1213419437408447, + "learning_rate": 1.6965e-05, + "num_tokens": 204874.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0038, + "grad_norm": 0.8761835098266602, + "learning_rate": 1.696e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0733, + "grad_norm": 2.0461108684539795, + "learning_rate": 1.6955000000000003e-05, + "num_tokens": 205477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0948, + "grad_norm": 2.52803111076355, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0036, + "grad_norm": 0.837294340133667, + "learning_rate": 1.6945e-05, + "num_tokens": 206080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0036, + "grad_norm": 0.8330880403518677, + "learning_rate": 1.694e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0608, + "grad_norm": 1.6941643953323364, + "learning_rate": 1.6935000000000003e-05, + "num_tokens": 206683.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0897, + "grad_norm": 1.850446105003357, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.0933, + "grad_norm": 2.3541157245635986, + "learning_rate": 1.6925e-05, + "num_tokens": 207707.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0026, + "grad_norm": 0.45243605971336365, + "learning_rate": 1.692e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0694, + "grad_norm": 2.299668312072754, + "learning_rate": 1.6915e-05, + "num_tokens": 208310.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0029, + "grad_norm": 0.6032459139823914, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.0967, + "grad_norm": 2.7924766540527344, + "learning_rate": 1.6905e-05, + "num_tokens": 208913.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0027, + "grad_norm": 0.5459297299385071, + "learning_rate": 1.69e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0033, + "grad_norm": 0.7005264759063721, + "learning_rate": 1.6895e-05, + "num_tokens": 209095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0712, + "grad_norm": 2.0087270736694336, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0816, + "grad_norm": 2.023620843887329, + "learning_rate": 1.6885000000000002e-05, + "num_tokens": 210119.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0884, + "grad_norm": 3.3579723834991455, + "learning_rate": 1.688e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.1001, + "grad_norm": 2.1446380615234375, + "learning_rate": 1.6875e-05, + "num_tokens": 211143.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0024, + "grad_norm": 0.46906810998916626, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.003, + "grad_norm": 0.6180875897407532, + "learning_rate": 1.6865000000000002e-05, + "num_tokens": 211325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0024, + "grad_norm": 0.44018203020095825, + "learning_rate": 1.686e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0021, + "grad_norm": 0.3610388934612274, + "learning_rate": 1.6855e-05, + "num_tokens": 211507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0025, + "grad_norm": 0.42492103576660156, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0785, + "grad_norm": 2.052070379257202, + "learning_rate": 1.6845000000000002e-05, + "num_tokens": 212110.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0955, + "grad_norm": 1.5501021146774292, + "learning_rate": 1.684e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0017, + "grad_norm": 0.14774425327777863, + "learning_rate": 1.6835e-05, + "num_tokens": 212713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0016, + "grad_norm": 0.13003599643707275, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0016, + "grad_norm": 0.11263933777809143, + "learning_rate": 1.6825000000000002e-05, + "num_tokens": 212895.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0814, + "grad_norm": 2.4652907848358154, + "learning_rate": 1.682e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0016, + "grad_norm": 0.1284048706293106, + "learning_rate": 1.6815e-05, + "num_tokens": 213498.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0016, + "grad_norm": 0.14626798033714294, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 1.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0984, + "grad_norm": 2.53958797454834, + "learning_rate": 1.6805000000000003e-05, + "num_tokens": 214101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0983, + "grad_norm": 2.0881552696228027, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0016, + "grad_norm": 0.14537213742733002, + "learning_rate": 1.6795e-05, + "num_tokens": 214704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.0642, + "grad_norm": 2.0831480026245117, + "learning_rate": 1.679e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.0016, + "grad_norm": 0.12770842015743256, + "learning_rate": 1.6785000000000003e-05, + "num_tokens": 215307.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0878, + "grad_norm": 2.531637668609619, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0864, + "grad_norm": 2.4697654247283936, + "learning_rate": 1.6775e-05, + "num_tokens": 216331.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0865, + "grad_norm": 1.655576229095459, + "learning_rate": 1.677e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.1086, + "grad_norm": 2.826423168182373, + "learning_rate": 1.6765000000000003e-05, + "num_tokens": 217355.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.1042, + "grad_norm": 3.4096198081970215, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.0027, + "grad_norm": 0.5534147620201111, + "learning_rate": 1.6755e-05, + "num_tokens": 217958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0611, + "grad_norm": 1.5646562576293945, + "learning_rate": 1.675e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0033, + "grad_norm": 1.048545479774475, + "learning_rate": 1.6745000000000003e-05, + "num_tokens": 218561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.005, + "grad_norm": 1.3414465188980103, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0032, + "grad_norm": 0.636330246925354, + "learning_rate": 1.6735e-05, + "num_tokens": 218743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0882, + "grad_norm": 1.7900675535202026, + "learning_rate": 1.673e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.0883, + "grad_norm": 1.8037763833999634, + "learning_rate": 1.6725000000000003e-05, + "num_tokens": 219767.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0733, + "grad_norm": 1.7987661361694336, + "learning_rate": 1.672e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0033, + "grad_norm": 0.6671841740608215, + "learning_rate": 1.6715000000000002e-05, + "num_tokens": 220370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": 0.0699, + "grad_norm": 2.178269147872925, + "learning_rate": 1.671e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0653, + "grad_norm": 2.165506601333618, + "learning_rate": 1.6705000000000004e-05, + "num_tokens": 221394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0828, + "grad_norm": 1.837323546409607, + "learning_rate": 1.67e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0798, + "grad_norm": 2.296050548553467, + "learning_rate": 1.6695000000000002e-05, + "num_tokens": 222418.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.169, + "grad_norm": 3.554818868637085, + "learning_rate": 1.669e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.1585, + "grad_norm": 2.993666887283325, + "learning_rate": 1.6685000000000004e-05, + "num_tokens": 223442.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0864, + "grad_norm": 3.0106112957000732, + "learning_rate": 1.668e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0605, + "grad_norm": 1.362823247909546, + "learning_rate": 1.6675000000000002e-05, + "num_tokens": 224466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.0055, + "grad_norm": 1.2802313566207886, + "learning_rate": 1.667e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0906, + "grad_norm": 2.1969728469848633, + "learning_rate": 1.6665000000000004e-05, + "num_tokens": 225069.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.0919, + "grad_norm": 3.0707828998565674, + "learning_rate": 1.666e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0061, + "grad_norm": 1.514074444770813, + "learning_rate": 1.6655000000000002e-05, + "num_tokens": 225672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0968, + "grad_norm": 2.7561936378479004, + "learning_rate": 1.665e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0887, + "grad_norm": 2.4263193607330322, + "learning_rate": 1.6645e-05, + "num_tokens": 226696.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0924, + "grad_norm": 2.360464572906494, + "learning_rate": 1.664e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.0926, + "grad_norm": 2.564941644668579, + "learning_rate": 1.6635000000000003e-05, + "num_tokens": 227720.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0849, + "grad_norm": 3.0359439849853516, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.1488, + "grad_norm": 2.505728006362915, + "learning_rate": 1.6625e-05, + "num_tokens": 228744.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0757, + "grad_norm": 1.8170560598373413, + "learning_rate": 1.662e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0083, + "grad_norm": 2.0260066986083984, + "learning_rate": 1.6615000000000003e-05, + "num_tokens": 229347.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0088, + "grad_norm": 2.0579655170440674, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0698, + "grad_norm": 2.465139865875244, + "learning_rate": 1.6605e-05, + "num_tokens": 229950.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.0865, + "grad_norm": 2.2099132537841797, + "learning_rate": 1.66e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0585, + "grad_norm": 2.1250336170196533, + "learning_rate": 1.6595e-05, + "num_tokens": 230974.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0047, + "grad_norm": 1.0128132104873657, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 1.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0934, + "grad_norm": 2.2283778190612793, + "learning_rate": 1.6585e-05, + "num_tokens": 231577.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0616, + "grad_norm": 1.5224443674087524, + "learning_rate": 1.658e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0831, + "grad_norm": 2.9646942615509033, + "learning_rate": 1.6575e-05, + "num_tokens": 232601.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.1237, + "grad_norm": 2.9797046184539795, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0636, + "grad_norm": 2.184934139251709, + "learning_rate": 1.6565e-05, + "num_tokens": 233625.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0624, + "grad_norm": 2.1586413383483887, + "learning_rate": 1.656e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.004, + "grad_norm": 0.7300480604171753, + "learning_rate": 1.6555e-05, + "num_tokens": 234228.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0034, + "grad_norm": 0.6544972062110901, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 1.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0696, + "grad_norm": 2.013485908508301, + "learning_rate": 1.6545e-05, + "num_tokens": 234831.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0029, + "grad_norm": 0.5221191048622131, + "learning_rate": 1.654e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 1.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0772, + "grad_norm": 1.8417952060699463, + "learning_rate": 1.6535e-05, + "num_tokens": 235434.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0713, + "grad_norm": 1.9944443702697754, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.0658, + "grad_norm": 1.900722861289978, + "learning_rate": 1.6525000000000002e-05, + "num_tokens": 236458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0679, + "grad_norm": 2.4299168586730957, + "learning_rate": 1.652e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.06, + "grad_norm": 1.561680793762207, + "learning_rate": 1.6515e-05, + "num_tokens": 237482.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0739, + "grad_norm": 1.774482011795044, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0436, + "grad_norm": 1.7762006521224976, + "learning_rate": 1.6505000000000002e-05, + "num_tokens": 238506.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0034, + "grad_norm": 0.7131043672561646, + "learning_rate": 1.65e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0823, + "grad_norm": 2.994682550430298, + "learning_rate": 1.6495e-05, + "num_tokens": 239109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": 0.0776, + "grad_norm": 2.6362464427948, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0846, + "grad_norm": 2.8052642345428467, + "learning_rate": 1.6485000000000002e-05, + "num_tokens": 240133.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0048, + "grad_norm": 1.1239407062530518, + "learning_rate": 1.648e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 1.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0841, + "grad_norm": 2.1707019805908203, + "learning_rate": 1.6475e-05, + "num_tokens": 240736.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0423, + "grad_norm": 1.9918863773345947, + "learning_rate": 1.647e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.0903, + "grad_norm": 2.1334235668182373, + "learning_rate": 1.6465000000000002e-05, + "num_tokens": 241760.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0067, + "grad_norm": 1.6682239770889282, + "learning_rate": 1.646e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 1.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0652, + "grad_norm": 1.4505804777145386, + "learning_rate": 1.6455e-05, + "num_tokens": 242363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0769, + "grad_norm": 1.6511123180389404, + "learning_rate": 1.645e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.006, + "grad_norm": 1.3824306726455688, + "learning_rate": 1.6445000000000003e-05, + "num_tokens": 242966.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0742, + "grad_norm": 2.109647512435913, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.1414, + "grad_norm": 2.5469703674316406, + "learning_rate": 1.6435e-05, + "num_tokens": 243990.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0669, + "grad_norm": 1.3465361595153809, + "learning_rate": 1.643e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.082, + "grad_norm": 2.1633052825927734, + "learning_rate": 1.6425000000000003e-05, + "num_tokens": 245014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0043, + "grad_norm": 0.926991879940033, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.143, + "grad_norm": 2.2284176349639893, + "learning_rate": 1.6415e-05, + "num_tokens": 245617.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0981, + "grad_norm": 2.301908493041992, + "learning_rate": 1.641e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0448, + "grad_norm": 1.2258681058883667, + "learning_rate": 1.6405000000000003e-05, + "num_tokens": 246641.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.0043, + "grad_norm": 0.9370044469833374, + "learning_rate": 1.64e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 1.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0788, + "grad_norm": 3.762192964553833, + "learning_rate": 1.6395e-05, + "num_tokens": 247244.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.0046, + "grad_norm": 0.9186903238296509, + "learning_rate": 1.639e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 1.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0035, + "grad_norm": 0.6930652260780334, + "learning_rate": 1.6385000000000003e-05, + "num_tokens": 247426.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.3322, + "grad_norm": 9.659932136535645, + "learning_rate": 1.638e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0662, + "grad_norm": 1.7305420637130737, + "learning_rate": 1.6375e-05, + "num_tokens": 248450.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0024, + "grad_norm": 0.3103489577770233, + "learning_rate": 1.637e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 1.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0915, + "grad_norm": 2.235250234603882, + "learning_rate": 1.6365000000000003e-05, + "num_tokens": 249053.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0596, + "grad_norm": 2.24996280670166, + "learning_rate": 1.636e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0679, + "grad_norm": 2.596879005432129, + "learning_rate": 1.6355000000000002e-05, + "num_tokens": 250077.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0642, + "grad_norm": 1.9771475791931152, + "learning_rate": 1.635e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0024, + "grad_norm": 0.7699919939041138, + "learning_rate": 1.6345000000000004e-05, + "num_tokens": 250680.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792540490627289, + "learning_rate": 1.634e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0021, + "grad_norm": 0.32606813311576843, + "learning_rate": 1.6335000000000002e-05, + "num_tokens": 250862.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0574, + "grad_norm": 2.3009800910949707, + "learning_rate": 1.633e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0966, + "grad_norm": 2.396700859069824, + "learning_rate": 1.6325e-05, + "num_tokens": 251886.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.1378, + "grad_norm": 2.726357936859131, + "learning_rate": 1.632e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0022, + "grad_norm": 0.36913836002349854, + "learning_rate": 1.6315000000000002e-05, + "num_tokens": 252489.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0021, + "grad_norm": 0.34592556953430176, + "learning_rate": 1.631e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0024, + "grad_norm": 0.45417988300323486, + "learning_rate": 1.6305e-05, + "num_tokens": 252671.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0672, + "grad_norm": 2.153691053390503, + "learning_rate": 1.63e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0021, + "grad_norm": 0.35626691579818726, + "learning_rate": 1.6295000000000002e-05, + "num_tokens": 253274.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0021, + "grad_norm": 0.37343284487724304, + "learning_rate": 1.629e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.002, + "grad_norm": 0.34979110956192017, + "learning_rate": 1.6285e-05, + "num_tokens": 253456.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.078, + "grad_norm": 2.1453590393066406, + "learning_rate": 1.628e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0019, + "grad_norm": 0.21562984585762024, + "learning_rate": 1.6275e-05, + "num_tokens": 254059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0017, + "grad_norm": 0.18868863582611084, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.075, + "grad_norm": 2.238870143890381, + "learning_rate": 1.6265e-05, + "num_tokens": 254662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0712, + "grad_norm": 1.3297274112701416, + "learning_rate": 1.626e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.2668, + "grad_norm": 6.078666687011719, + "learning_rate": 1.6255e-05, + "num_tokens": 255686.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0017, + "grad_norm": 0.18387450277805328, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 1.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0017, + "grad_norm": 0.1908990740776062, + "learning_rate": 1.6245e-05, + "num_tokens": 255868.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0796, + "grad_norm": 1.9942879676818848, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0017, + "grad_norm": 0.18278343975543976, + "learning_rate": 1.6235e-05, + "num_tokens": 256471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0017, + "grad_norm": 0.2012937068939209, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0019, + "grad_norm": 0.23027914762496948, + "learning_rate": 1.6225e-05, + "num_tokens": 256653.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.088, + "grad_norm": 2.3463082313537598, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0015, + "grad_norm": 0.1516222059726715, + "learning_rate": 1.6215e-05, + "num_tokens": 257256.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0592, + "grad_norm": 1.780516505241394, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0016, + "grad_norm": 0.1569552719593048, + "learning_rate": 1.6205e-05, + "num_tokens": 257859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0015, + "grad_norm": 0.15376536548137665, + "learning_rate": 1.62e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0016, + "grad_norm": 0.16803313791751862, + "learning_rate": 1.6195e-05, + "num_tokens": 258041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0877, + "grad_norm": 1.7319484949111938, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0015, + "grad_norm": 0.14868228137493134, + "learning_rate": 1.6185000000000002e-05, + "num_tokens": 258644.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0703, + "grad_norm": 1.626076102256775, + "learning_rate": 1.618e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0852, + "grad_norm": 1.4952802658081055, + "learning_rate": 1.6175e-05, + "num_tokens": 259668.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0782, + "grad_norm": 1.6785380840301514, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0806, + "grad_norm": 1.424209475517273, + "learning_rate": 1.6165000000000002e-05, + "num_tokens": 260692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0018, + "grad_norm": 0.27588197588920593, + "learning_rate": 1.616e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 1.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0682, + "grad_norm": 2.780993938446045, + "learning_rate": 1.6155e-05, + "num_tokens": 261295.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.0027, + "grad_norm": 0.5201116800308228, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0028, + "grad_norm": 0.5331841111183167, + "learning_rate": 1.6145000000000002e-05, + "num_tokens": 261477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.1404, + "grad_norm": 3.156398296356201, + "learning_rate": 1.614e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.003, + "grad_norm": 0.5515365600585938, + "learning_rate": 1.6135e-05, + "num_tokens": 262080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.0029, + "grad_norm": 0.5499039888381958, + "learning_rate": 1.613e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0473, + "grad_norm": 1.4062751531600952, + "learning_rate": 1.6125000000000002e-05, + "num_tokens": 262683.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0638, + "grad_norm": 1.5207608938217163, + "learning_rate": 1.612e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0018, + "grad_norm": 0.24566565454006195, + "learning_rate": 1.6115e-05, + "num_tokens": 263286.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0019, + "grad_norm": 0.26229217648506165, + "learning_rate": 1.611e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 1.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0019, + "grad_norm": 0.2518826425075531, + "learning_rate": 1.6105000000000003e-05, + "num_tokens": 263468.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.066, + "grad_norm": 1.8491489887237549, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0771, + "grad_norm": 2.3547780513763428, + "learning_rate": 1.6095e-05, + "num_tokens": 264492.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": 0.067, + "grad_norm": 1.581396222114563, + "learning_rate": 1.609e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0017, + "grad_norm": 0.22524242103099823, + "learning_rate": 1.6085000000000003e-05, + "num_tokens": 265095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0825, + "grad_norm": 1.542362928390503, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.0019, + "grad_norm": 0.2753300964832306, + "learning_rate": 1.6075e-05, + "num_tokens": 265698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0594, + "grad_norm": 2.435917377471924, + "learning_rate": 1.607e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0671, + "grad_norm": 1.3892773389816284, + "learning_rate": 1.6065000000000003e-05, + "num_tokens": 266722.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0552, + "grad_norm": 1.9706708192825317, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0029, + "grad_norm": 0.5541112422943115, + "learning_rate": 1.6055e-05, + "num_tokens": 267325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0765, + "grad_norm": 2.187875270843506, + "learning_rate": 1.605e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0533, + "grad_norm": 1.9069744348526, + "learning_rate": 1.6045000000000003e-05, + "num_tokens": 268349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0034, + "grad_norm": 0.6806110739707947, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0033, + "grad_norm": 0.6904415488243103, + "learning_rate": 1.6035e-05, + "num_tokens": 268531.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0835, + "grad_norm": 1.7817496061325073, + "learning_rate": 1.603e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.003, + "grad_norm": 0.576019823551178, + "learning_rate": 1.6025000000000003e-05, + "num_tokens": 269134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0444, + "grad_norm": 2.0043082237243652, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0456, + "grad_norm": 1.6300431489944458, + "learning_rate": 1.6015e-05, + "num_tokens": 270158.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.002, + "grad_norm": 0.3286590874195099, + "learning_rate": 1.601e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0593, + "grad_norm": 3.0931613445281982, + "learning_rate": 1.6005e-05, + "num_tokens": 270761.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0774, + "grad_norm": 2.7380502223968506, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0025, + "grad_norm": 0.5391877293586731, + "learning_rate": 1.5995000000000002e-05, + "num_tokens": 271364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0022, + "grad_norm": 0.43329155445098877, + "learning_rate": 1.599e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0753, + "grad_norm": 2.46846866607666, + "learning_rate": 1.5985e-05, + "num_tokens": 271967.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0021, + "grad_norm": 0.3546755313873291, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0023, + "grad_norm": 0.4083067774772644, + "learning_rate": 1.5975000000000002e-05, + "num_tokens": 272149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.002, + "grad_norm": 0.3581921458244324, + "learning_rate": 1.597e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0613, + "grad_norm": 2.8087387084960938, + "learning_rate": 1.5965e-05, + "num_tokens": 272752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0015, + "grad_norm": 0.1888950765132904, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0671, + "grad_norm": 2.2728195190429688, + "learning_rate": 1.5955e-05, + "num_tokens": 273355.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0829, + "grad_norm": 2.8371574878692627, + "learning_rate": 1.595e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0013, + "grad_norm": 0.12679244577884674, + "learning_rate": 1.5945e-05, + "num_tokens": 273958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0014, + "grad_norm": 0.14318323135375977, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0013, + "grad_norm": 0.12078670412302017, + "learning_rate": 1.5935e-05, + "num_tokens": 274140.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0605, + "grad_norm": 2.762150764465332, + "learning_rate": 1.593e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0013, + "grad_norm": 0.1383422166109085, + "learning_rate": 1.5925e-05, + "num_tokens": 274743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0012, + "grad_norm": 0.1123310998082161, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0802, + "grad_norm": 2.965071201324463, + "learning_rate": 1.5915e-05, + "num_tokens": 275346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.1343, + "grad_norm": 3.2984137535095215, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0829, + "grad_norm": 1.568178415298462, + "learning_rate": 1.5905e-05, + "num_tokens": 276370.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0014, + "grad_norm": 0.21307793259620667, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 1.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0579, + "grad_norm": 2.5958898067474365, + "learning_rate": 1.5895e-05, + "num_tokens": 276973.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0013, + "grad_norm": 0.1617453545331955, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0014, + "grad_norm": 0.1798456758260727, + "learning_rate": 1.5885e-05, + "num_tokens": 277155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0016, + "grad_norm": 0.20433904230594635, + "learning_rate": 1.588e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0486, + "grad_norm": 1.5812333822250366, + "learning_rate": 1.5875e-05, + "num_tokens": 277758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.1437, + "grad_norm": 3.0360054969787598, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0804, + "grad_norm": 2.6603028774261475, + "learning_rate": 1.5865e-05, + "num_tokens": 278782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": 0.0814, + "grad_norm": 1.870706558227539, + "learning_rate": 1.586e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0718, + "grad_norm": 1.5813627243041992, + "learning_rate": 1.5855e-05, + "num_tokens": 279806.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0729, + "grad_norm": 2.107619285583496, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0683, + "grad_norm": 1.209026575088501, + "learning_rate": 1.5845e-05, + "num_tokens": 280830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.2674, + "grad_norm": 6.916773319244385, + "learning_rate": 1.584e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0581, + "grad_norm": 2.1409847736358643, + "learning_rate": 1.5835e-05, + "num_tokens": 281854.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0054, + "grad_norm": 1.191935420036316, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0057, + "grad_norm": 1.2228178977966309, + "learning_rate": 1.5825000000000002e-05, + "num_tokens": 282036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.005, + "grad_norm": 1.1271437406539917, + "learning_rate": 1.582e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0545, + "grad_norm": 2.2059969902038574, + "learning_rate": 1.5815e-05, + "num_tokens": 282639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.1348, + "grad_norm": 2.8853166103363037, + "learning_rate": 1.581e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0027, + "grad_norm": 0.5147932767868042, + "learning_rate": 1.5805000000000002e-05, + "num_tokens": 283242.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0514, + "grad_norm": 1.7287933826446533, + "learning_rate": 1.58e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0024, + "grad_norm": 0.41022399067878723, + "learning_rate": 1.5795e-05, + "num_tokens": 283845.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0021, + "grad_norm": 0.31408146023750305, + "learning_rate": 1.579e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 1.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0019, + "grad_norm": 0.3368740677833557, + "learning_rate": 1.5785000000000002e-05, + "num_tokens": 284027.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0733, + "grad_norm": 1.9898301362991333, + "learning_rate": 1.578e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.2631, + "grad_norm": 6.1759562492370605, + "learning_rate": 1.5775e-05, + "num_tokens": 285051.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0518, + "grad_norm": 1.7494398355484009, + "learning_rate": 1.577e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0634, + "grad_norm": 3.39536452293396, + "learning_rate": 1.5765000000000002e-05, + "num_tokens": 286075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0015, + "grad_norm": 0.16311416029930115, + "learning_rate": 1.576e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0014, + "grad_norm": 0.1292622685432434, + "learning_rate": 1.5755e-05, + "num_tokens": 286257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0487, + "grad_norm": 1.4789959192276, + "learning_rate": 1.575e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0704, + "grad_norm": 1.8533966541290283, + "learning_rate": 1.5745000000000003e-05, + "num_tokens": 287281.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0431, + "grad_norm": 1.6309059858322144, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.125, + "grad_norm": 1.811131238937378, + "learning_rate": 1.5735e-05, + "num_tokens": 288305.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0018, + "grad_norm": 0.2807428240776062, + "learning_rate": 1.573e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 1.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.0991, + "grad_norm": 2.5759706497192383, + "learning_rate": 1.5725000000000003e-05, + "num_tokens": 288908.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0646, + "grad_norm": 2.325784206390381, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0021, + "grad_norm": 0.398372620344162, + "learning_rate": 1.5715e-05, + "num_tokens": 289511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.002, + "grad_norm": 0.34870296716690063, + "learning_rate": 1.571e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0458, + "grad_norm": 1.5269895792007446, + "learning_rate": 1.5705000000000003e-05, + "num_tokens": 290114.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0023, + "grad_norm": 0.4617532789707184, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.1164, + "grad_norm": 2.049588680267334, + "learning_rate": 1.5695e-05, + "num_tokens": 290717.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0707, + "grad_norm": 3.5546929836273193, + "learning_rate": 1.569e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0686, + "grad_norm": 1.6962814331054688, + "learning_rate": 1.5685e-05, + "num_tokens": 291741.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0509, + "grad_norm": 1.9832770824432373, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0028, + "grad_norm": 0.5347197651863098, + "learning_rate": 1.5675e-05, + "num_tokens": 292344.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.0716, + "grad_norm": 2.209432363510132, + "learning_rate": 1.567e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0826, + "grad_norm": 1.7408462762832642, + "learning_rate": 1.5665e-05, + "num_tokens": 293368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0553, + "grad_norm": 1.7983943223953247, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0042, + "grad_norm": 0.8812737464904785, + "learning_rate": 1.5655000000000002e-05, + "num_tokens": 293971.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0649, + "grad_norm": 2.0859007835388184, + "learning_rate": 1.565e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0581, + "grad_norm": 1.566475510597229, + "learning_rate": 1.5645e-05, + "num_tokens": 294995.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0045, + "grad_norm": 0.9423922896385193, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0517, + "grad_norm": 1.8182531595230103, + "learning_rate": 1.5635e-05, + "num_tokens": 295598.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.1177, + "grad_norm": 2.7388081550598145, + "learning_rate": 1.563e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.1132, + "grad_norm": 2.579310655593872, + "learning_rate": 1.5625e-05, + "num_tokens": 296622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": 0.065, + "grad_norm": 1.4705184698104858, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0032, + "grad_norm": 0.6671587228775024, + "learning_rate": 1.5615000000000002e-05, + "num_tokens": 297225.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0579, + "grad_norm": 2.3290131092071533, + "learning_rate": 1.561e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0814, + "grad_norm": 2.8370614051818848, + "learning_rate": 1.5605e-05, + "num_tokens": 298249.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0689, + "grad_norm": 2.715596914291382, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.0671, + "grad_norm": 1.7622898817062378, + "learning_rate": 1.5595000000000002e-05, + "num_tokens": 299273.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0042, + "grad_norm": 0.9052322506904602, + "learning_rate": 1.559e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.049, + "grad_norm": 1.3162498474121094, + "learning_rate": 1.5585e-05, + "num_tokens": 299876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0036, + "grad_norm": 0.7319129109382629, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 1.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.0032, + "grad_norm": 0.6452810764312744, + "learning_rate": 1.5575000000000002e-05, + "num_tokens": 300058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0757, + "grad_norm": 2.2865378856658936, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0538, + "grad_norm": 1.7665457725524902, + "learning_rate": 1.5565e-05, + "num_tokens": 301082.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.047, + "grad_norm": 1.9683163166046143, + "learning_rate": 1.556e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0664, + "grad_norm": 2.087733030319214, + "learning_rate": 1.5555000000000003e-05, + "num_tokens": 302106.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0023, + "grad_norm": 0.39902573823928833, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 1.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0021, + "grad_norm": 0.34475409984588623, + "learning_rate": 1.5545e-05, + "num_tokens": 302288.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0532, + "grad_norm": 1.763016700744629, + "learning_rate": 1.554e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0625, + "grad_norm": 2.4447097778320312, + "learning_rate": 1.5535000000000003e-05, + "num_tokens": 303312.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.2444, + "grad_norm": 5.089849948883057, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.1233, + "grad_norm": 1.9174350500106812, + "learning_rate": 1.5525e-05, + "num_tokens": 304336.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.002, + "grad_norm": 0.34749460220336914, + "learning_rate": 1.552e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 1.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.075, + "grad_norm": 1.8123295307159424, + "learning_rate": 1.5515000000000003e-05, + "num_tokens": 304939.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0942, + "grad_norm": 2.2524919509887695, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0023, + "grad_norm": 0.4282050132751465, + "learning_rate": 1.5505e-05, + "num_tokens": 305542.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0022, + "grad_norm": 0.4201665222644806, + "learning_rate": 1.55e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0022, + "grad_norm": 0.38267236948013306, + "learning_rate": 1.5495000000000003e-05, + "num_tokens": 305724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0668, + "grad_norm": 1.5852563381195068, + "learning_rate": 1.549e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0686, + "grad_norm": 2.5186655521392822, + "learning_rate": 1.5485e-05, + "num_tokens": 306748.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0018, + "grad_norm": 0.3009900450706482, + "learning_rate": 1.548e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 1.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0591, + "grad_norm": 2.0340046882629395, + "learning_rate": 1.5475000000000003e-05, + "num_tokens": 307351.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.0652, + "grad_norm": 2.206228017807007, + "learning_rate": 1.547e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0485, + "grad_norm": 1.763405203819275, + "learning_rate": 1.5465000000000002e-05, + "num_tokens": 308375.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.002, + "grad_norm": 0.35779571533203125, + "learning_rate": 1.546e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0019, + "grad_norm": 0.32313865423202515, + "learning_rate": 1.5455000000000004e-05, + "num_tokens": 308557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0749, + "grad_norm": 2.2083141803741455, + "learning_rate": 1.545e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0762, + "grad_norm": 1.5048847198486328, + "learning_rate": 1.5445000000000002e-05, + "num_tokens": 309581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0536, + "grad_norm": 1.6958098411560059, + "learning_rate": 1.544e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0718, + "grad_norm": 1.9835456609725952, + "learning_rate": 1.5435000000000004e-05, + "num_tokens": 310605.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0459, + "grad_norm": 1.618090033531189, + "learning_rate": 1.543e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0611, + "grad_norm": 1.508302092552185, + "learning_rate": 1.5425000000000002e-05, + "num_tokens": 311629.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.1341, + "grad_norm": 3.744704008102417, + "learning_rate": 1.542e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0649, + "grad_norm": 1.4073272943496704, + "learning_rate": 1.5415e-05, + "num_tokens": 312653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0059, + "grad_norm": 1.3199745416641235, + "learning_rate": 1.541e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0445, + "grad_norm": 1.7224688529968262, + "learning_rate": 1.5405000000000002e-05, + "num_tokens": 313256.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.0697, + "grad_norm": 1.5272228717803955, + "learning_rate": 1.54e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0061, + "grad_norm": 1.3069825172424316, + "learning_rate": 1.5395e-05, + "num_tokens": 313859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0059, + "grad_norm": 1.285326600074768, + "learning_rate": 1.539e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0044, + "grad_norm": 0.9240864515304565, + "learning_rate": 1.5385000000000003e-05, + "num_tokens": 314041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0535, + "grad_norm": 1.9520580768585205, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0426, + "grad_norm": 1.3014405965805054, + "learning_rate": 1.5375e-05, + "num_tokens": 315065.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0024, + "grad_norm": 0.4011932611465454, + "learning_rate": 1.537e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0019, + "grad_norm": 0.2749421298503876, + "learning_rate": 1.5365e-05, + "num_tokens": 315247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0022, + "grad_norm": 0.31892502307891846, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 1.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0483, + "grad_norm": 2.0664267539978027, + "learning_rate": 1.5355e-05, + "num_tokens": 315850.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0819, + "grad_norm": 2.846149206161499, + "learning_rate": 1.535e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0013, + "grad_norm": 0.1373102068901062, + "learning_rate": 1.5345e-05, + "num_tokens": 316453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0013, + "grad_norm": 0.1736987680196762, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.053, + "grad_norm": 1.4268443584442139, + "learning_rate": 1.5335e-05, + "num_tokens": 317056.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0917, + "grad_norm": 1.9649128913879395, + "learning_rate": 1.533e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.1411, + "grad_norm": 2.5292632579803467, + "learning_rate": 1.5325e-05, + "num_tokens": 318080.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0527, + "grad_norm": 1.9480016231536865, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.0846, + "grad_norm": 2.2493338584899902, + "learning_rate": 1.5315e-05, + "num_tokens": 319104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0013, + "grad_norm": 0.13474015891551971, + "learning_rate": 1.531e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0832, + "grad_norm": 1.5178154706954956, + "learning_rate": 1.5305e-05, + "num_tokens": 319707.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": 0.0795, + "grad_norm": 2.071016788482666, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.1163, + "grad_norm": 2.11936092376709, + "learning_rate": 1.5295000000000002e-05, + "num_tokens": 320731.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0018, + "grad_norm": 0.2738206088542938, + "learning_rate": 1.529e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 1.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0674, + "grad_norm": 1.7774465084075928, + "learning_rate": 1.5285e-05, + "num_tokens": 321334.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0019, + "grad_norm": 0.3061210513114929, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.1228, + "grad_norm": 2.0818684101104736, + "learning_rate": 1.5275000000000002e-05, + "num_tokens": 321937.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0716, + "grad_norm": 1.6649255752563477, + "learning_rate": 1.527e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0026, + "grad_norm": 0.477672815322876, + "learning_rate": 1.5265e-05, + "num_tokens": 322540.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0728, + "grad_norm": 1.9350183010101318, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0631, + "grad_norm": 1.786603569984436, + "learning_rate": 1.5255000000000002e-05, + "num_tokens": 323564.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.1006, + "grad_norm": 2.4447789192199707, + "learning_rate": 1.525e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0034, + "grad_norm": 0.6078147292137146, + "learning_rate": 1.5245e-05, + "num_tokens": 324167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0769, + "grad_norm": 1.76687753200531, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.1099, + "grad_norm": 1.7330924272537231, + "learning_rate": 1.5235000000000002e-05, + "num_tokens": 325191.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.1119, + "grad_norm": 2.317302942276001, + "learning_rate": 1.523e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0047, + "grad_norm": 0.8692587018013, + "learning_rate": 1.5225e-05, + "num_tokens": 325794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0752, + "grad_norm": 2.7787444591522217, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0042, + "grad_norm": 0.7904698252677917, + "learning_rate": 1.5215000000000003e-05, + "num_tokens": 326397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0637, + "grad_norm": 1.9206311702728271, + "learning_rate": 1.521e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0641, + "grad_norm": 1.5487322807312012, + "learning_rate": 1.5205000000000001e-05, + "num_tokens": 327421.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0034, + "grad_norm": 0.6128824949264526, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0026, + "grad_norm": 0.4303649365901947, + "learning_rate": 1.5195000000000003e-05, + "num_tokens": 327603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0024, + "grad_norm": 0.3603818118572235, + "learning_rate": 1.519e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 1.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.0722, + "grad_norm": 1.3239399194717407, + "learning_rate": 1.5185000000000001e-05, + "num_tokens": 328206.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0714, + "grad_norm": 1.5037869215011597, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0462, + "grad_norm": 1.4942961931228638, + "learning_rate": 1.5175000000000001e-05, + "num_tokens": 329230.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0019, + "grad_norm": 0.2582552134990692, + "learning_rate": 1.517e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0018, + "grad_norm": 0.22304527461528778, + "learning_rate": 1.5165000000000001e-05, + "num_tokens": 329412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.083, + "grad_norm": 2.117966890335083, + "learning_rate": 1.516e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.0018, + "grad_norm": 0.21721050143241882, + "learning_rate": 1.5155000000000001e-05, + "num_tokens": 330015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0016, + "grad_norm": 0.20195893943309784, + "learning_rate": 1.515e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0807, + "grad_norm": 2.2437827587127686, + "learning_rate": 1.5145000000000002e-05, + "num_tokens": 330618.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0803, + "grad_norm": 2.0074269771575928, + "learning_rate": 1.514e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.1081, + "grad_norm": 2.117880344390869, + "learning_rate": 1.5135000000000002e-05, + "num_tokens": 331642.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0498, + "grad_norm": 1.624760389328003, + "learning_rate": 1.513e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0017, + "grad_norm": 0.2406463772058487, + "learning_rate": 1.5125e-05, + "num_tokens": 332245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.078, + "grad_norm": 1.9976122379302979, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0017, + "grad_norm": 0.2691337466239929, + "learning_rate": 1.5115000000000002e-05, + "num_tokens": 332848.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0017, + "grad_norm": 0.3240523040294647, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.002, + "grad_norm": 0.3948870897293091, + "learning_rate": 1.5105e-05, + "num_tokens": 333030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.058, + "grad_norm": 2.228799343109131, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0019, + "grad_norm": 0.30388572812080383, + "learning_rate": 1.5095000000000002e-05, + "num_tokens": 333633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0018, + "grad_norm": 0.23492957651615143, + "learning_rate": 1.509e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0767, + "grad_norm": 1.961020588874817, + "learning_rate": 1.5085e-05, + "num_tokens": 334236.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0015, + "grad_norm": 0.18129733204841614, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0016, + "grad_norm": 0.20082105696201324, + "learning_rate": 1.5075000000000002e-05, + "num_tokens": 334418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0715, + "grad_norm": 1.6847742795944214, + "learning_rate": 1.507e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.1066, + "grad_norm": 1.804700255393982, + "learning_rate": 1.5065e-05, + "num_tokens": 335442.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0017, + "grad_norm": 0.24969542026519775, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 1.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.076, + "grad_norm": 1.119564175605774, + "learning_rate": 1.5055000000000002e-05, + "num_tokens": 336045.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.1127, + "grad_norm": 1.9994937181472778, + "learning_rate": 1.505e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0018, + "grad_norm": 0.27987295389175415, + "learning_rate": 1.5045e-05, + "num_tokens": 336648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0019, + "grad_norm": 0.3454192876815796, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0023, + "grad_norm": 0.4122897684574127, + "learning_rate": 1.5035000000000003e-05, + "num_tokens": 336830.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": 0.1004, + "grad_norm": 1.930411696434021, + "learning_rate": 1.503e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0019, + "grad_norm": 0.29886701703071594, + "learning_rate": 1.5025000000000001e-05, + "num_tokens": 337433.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0016, + "grad_norm": 0.2443024218082428, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.0673, + "grad_norm": 1.4124706983566284, + "learning_rate": 1.5015000000000001e-05, + "num_tokens": 338036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0799, + "grad_norm": 2.3533709049224854, + "learning_rate": 1.501e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0713, + "grad_norm": 1.8907470703125, + "learning_rate": 1.5005000000000001e-05, + "num_tokens": 339060.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0689, + "grad_norm": 2.691020965576172, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0497, + "grad_norm": 1.6671160459518433, + "learning_rate": 1.4995000000000001e-05, + "num_tokens": 340084.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.002, + "grad_norm": 0.29797157645225525, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 1.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0019, + "grad_norm": 0.29996100068092346, + "learning_rate": 1.4985000000000001e-05, + "num_tokens": 340266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.0024, + "grad_norm": 0.4070133566856384, + "learning_rate": 1.498e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0021, + "grad_norm": 0.3220314681529999, + "learning_rate": 1.4975000000000001e-05, + "num_tokens": 340448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0019, + "grad_norm": 0.3058181405067444, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0018, + "grad_norm": 0.28231292963027954, + "learning_rate": 1.4965e-05, + "num_tokens": 340630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0692, + "grad_norm": 1.5155085325241089, + "learning_rate": 1.496e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.0683, + "grad_norm": 1.8045986890792847, + "learning_rate": 1.4955000000000002e-05, + "num_tokens": 341654.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.0408, + "grad_norm": 1.349377989768982, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0734, + "grad_norm": 1.7803888320922852, + "learning_rate": 1.4945e-05, + "num_tokens": 342678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0014, + "grad_norm": 0.1658269613981247, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 1.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0625, + "grad_norm": 1.7009806632995605, + "learning_rate": 1.4935000000000002e-05, + "num_tokens": 343281.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0017, + "grad_norm": 0.25617343187332153, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.0625, + "grad_norm": 1.769629955291748, + "learning_rate": 1.4925e-05, + "num_tokens": 343884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0017, + "grad_norm": 0.2548482418060303, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 1.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.0016, + "grad_norm": 0.2222324013710022, + "learning_rate": 1.4915000000000002e-05, + "num_tokens": 344066.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0774, + "grad_norm": 4.686360836029053, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0459, + "grad_norm": 2.749084234237671, + "learning_rate": 1.4905e-05, + "num_tokens": 345090.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.1302, + "grad_norm": 4.177389621734619, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.1173, + "grad_norm": 4.055930137634277, + "learning_rate": 1.4895000000000002e-05, + "num_tokens": 346114.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.002, + "grad_norm": 0.3603017032146454, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": 0.0693, + "grad_norm": 1.6064629554748535, + "learning_rate": 1.4885e-05, + "num_tokens": 346717.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0652, + "grad_norm": 1.3037128448486328, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0689, + "grad_norm": 2.06034779548645, + "learning_rate": 1.4875000000000002e-05, + "num_tokens": 347741.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0029, + "grad_norm": 0.5724895596504211, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 1.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0033, + "grad_norm": 0.6629590392112732, + "learning_rate": 1.4865e-05, + "num_tokens": 347923.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0024, + "grad_norm": 0.453980416059494, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 1.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0024, + "grad_norm": 0.4251463711261749, + "learning_rate": 1.4855000000000001e-05, + "num_tokens": 348105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0019, + "grad_norm": 0.30966171622276306, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 1.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.002, + "grad_norm": 0.3118286430835724, + "learning_rate": 1.4845000000000001e-05, + "num_tokens": 348287.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0555, + "grad_norm": 1.792464256286621, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0675, + "grad_norm": 1.5182185173034668, + "learning_rate": 1.4835000000000001e-05, + "num_tokens": 349311.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0674, + "grad_norm": 2.3636367321014404, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0656, + "grad_norm": 2.3102426528930664, + "learning_rate": 1.4825000000000001e-05, + "num_tokens": 350335.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0649, + "grad_norm": 1.6550447940826416, + "learning_rate": 1.482e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0633, + "grad_norm": 1.6831378936767578, + "learning_rate": 1.4815000000000001e-05, + "num_tokens": 351359.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0012, + "grad_norm": 0.14287354052066803, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 1.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0506, + "grad_norm": 1.8767977952957153, + "learning_rate": 1.4805e-05, + "num_tokens": 351962.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0798, + "grad_norm": 1.768181562423706, + "learning_rate": 1.48e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0537, + "grad_norm": 1.7165502309799194, + "learning_rate": 1.4795000000000001e-05, + "num_tokens": 352986.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0016, + "grad_norm": 0.24984677135944366, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.06, + "grad_norm": 1.5225651264190674, + "learning_rate": 1.4785e-05, + "num_tokens": 353589.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0453, + "grad_norm": 1.48419988155365, + "learning_rate": 1.478e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": 0.0693, + "grad_norm": 1.9988808631896973, + "learning_rate": 1.4775000000000002e-05, + "num_tokens": 354613.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0419, + "grad_norm": 1.4052188396453857, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0582, + "grad_norm": 1.6217740774154663, + "learning_rate": 1.4765e-05, + "num_tokens": 355637.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0778, + "grad_norm": 1.9261959791183472, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0593, + "grad_norm": 1.315152645111084, + "learning_rate": 1.4755000000000002e-05, + "num_tokens": 356661.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0069, + "grad_norm": 1.2978978157043457, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0412, + "grad_norm": 1.215545654296875, + "learning_rate": 1.4745e-05, + "num_tokens": 357264.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0075, + "grad_norm": 1.4120475053787231, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 1.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": 0.033, + "grad_norm": 1.2826626300811768, + "learning_rate": 1.4735000000000002e-05, + "num_tokens": 357867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.0074, + "grad_norm": 1.4002093076705933, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0695, + "grad_norm": 2.1978306770324707, + "learning_rate": 1.4725e-05, + "num_tokens": 358470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0693, + "grad_norm": 1.8518682718276978, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0048, + "grad_norm": 0.920648455619812, + "learning_rate": 1.4715000000000002e-05, + "num_tokens": 359073.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0041, + "grad_norm": 0.7800686955451965, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0515, + "grad_norm": 2.606135606765747, + "learning_rate": 1.4705e-05, + "num_tokens": 359676.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0024, + "grad_norm": 0.40420445799827576, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 1.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0719, + "grad_norm": 1.9594024419784546, + "learning_rate": 1.4695e-05, + "num_tokens": 360279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0018, + "grad_norm": 0.245815709233284, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.0787, + "grad_norm": 2.42266845703125, + "learning_rate": 1.4685000000000001e-05, + "num_tokens": 360882.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0014, + "grad_norm": 0.19625961780548096, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 1.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0014, + "grad_norm": 0.18439820408821106, + "learning_rate": 1.4675000000000001e-05, + "num_tokens": 361064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0012, + "grad_norm": 0.15009146928787231, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0961, + "grad_norm": 1.6586538553237915, + "learning_rate": 1.4665000000000001e-05, + "num_tokens": 361667.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.056, + "grad_norm": 1.6204346418380737, + "learning_rate": 1.466e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0629, + "grad_norm": 3.179530382156372, + "learning_rate": 1.4655000000000001e-05, + "num_tokens": 362691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0475, + "grad_norm": 1.5324857234954834, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0475, + "grad_norm": 1.6246694326400757, + "learning_rate": 1.4645e-05, + "num_tokens": 363715.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.1217, + "grad_norm": 3.528550624847412, + "learning_rate": 1.464e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0013, + "grad_norm": 0.17739705741405487, + "learning_rate": 1.4635000000000001e-05, + "num_tokens": 364318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0736, + "grad_norm": 1.7169992923736572, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.1137, + "grad_norm": 2.5113534927368164, + "learning_rate": 1.4625e-05, + "num_tokens": 365342.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.105, + "grad_norm": 2.1154234409332275, + "learning_rate": 1.462e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.0014, + "grad_norm": 0.19033615291118622, + "learning_rate": 1.4615000000000002e-05, + "num_tokens": 365945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0521, + "grad_norm": 1.7730141878128052, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.0016, + "grad_norm": 0.24216671288013458, + "learning_rate": 1.4605e-05, + "num_tokens": 366548.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0018, + "grad_norm": 0.27462536096572876, + "learning_rate": 1.46e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": 0.0744, + "grad_norm": 1.9374821186065674, + "learning_rate": 1.4595000000000002e-05, + "num_tokens": 367151.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0679, + "grad_norm": 1.6294903755187988, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0495, + "grad_norm": 1.4929898977279663, + "learning_rate": 1.4585e-05, + "num_tokens": 368175.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0026, + "grad_norm": 0.4472891092300415, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0023, + "grad_norm": 0.36597439646720886, + "learning_rate": 1.4575000000000002e-05, + "num_tokens": 368357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0024, + "grad_norm": 0.42359644174575806, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0022, + "grad_norm": 0.37764036655426025, + "learning_rate": 1.4565e-05, + "num_tokens": 368539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0021, + "grad_norm": 0.34881848096847534, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0019, + "grad_norm": 0.2842845320701599, + "learning_rate": 1.4555000000000002e-05, + "num_tokens": 368721.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0016, + "grad_norm": 0.23593850433826447, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.0773, + "grad_norm": 1.4594675302505493, + "learning_rate": 1.4545e-05, + "num_tokens": 369324.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.1, + "grad_norm": 1.863494873046875, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0013, + "grad_norm": 0.13081954419612885, + "learning_rate": 1.4535e-05, + "num_tokens": 369927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0443, + "grad_norm": 1.7305635213851929, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0012, + "grad_norm": 0.12010564655065536, + "learning_rate": 1.4525e-05, + "num_tokens": 370530.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.046, + "grad_norm": 1.4965153932571411, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0013, + "grad_norm": 0.1335715800523758, + "learning_rate": 1.4515e-05, + "num_tokens": 371133.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0789, + "grad_norm": 2.0868091583251953, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0013, + "grad_norm": 0.1260039061307907, + "learning_rate": 1.4505000000000001e-05, + "num_tokens": 371736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0013, + "grad_norm": 0.1729843020439148, + "learning_rate": 1.45e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0014, + "grad_norm": 0.1744985431432724, + "learning_rate": 1.4495000000000001e-05, + "num_tokens": 371918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0012, + "grad_norm": 0.12203537672758102, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.1175, + "grad_norm": 2.857239007949829, + "learning_rate": 1.4485e-05, + "num_tokens": 372521.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0012, + "grad_norm": 0.13221806287765503, + "learning_rate": 1.448e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0558, + "grad_norm": 1.8117022514343262, + "learning_rate": 1.4475000000000001e-05, + "num_tokens": 373124.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.0746, + "grad_norm": 1.5601890087127686, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0667, + "grad_norm": 2.6270835399627686, + "learning_rate": 1.4465e-05, + "num_tokens": 374148.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.07, + "grad_norm": 2.4209983348846436, + "learning_rate": 1.446e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0759, + "grad_norm": 1.9546290636062622, + "learning_rate": 1.4455000000000001e-05, + "num_tokens": 375172.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0673, + "grad_norm": 2.9238405227661133, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0505, + "grad_norm": 1.4308744668960571, + "learning_rate": 1.4445e-05, + "num_tokens": 376196.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0491, + "grad_norm": 1.8547859191894531, + "learning_rate": 1.444e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0611, + "grad_norm": 1.7769485712051392, + "learning_rate": 1.4435000000000002e-05, + "num_tokens": 377220.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0026, + "grad_norm": 0.4414771497249603, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 1.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0645, + "grad_norm": 2.1288139820098877, + "learning_rate": 1.4425e-05, + "num_tokens": 377823.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0449, + "grad_norm": 1.480977177619934, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0622, + "grad_norm": 1.4551938772201538, + "learning_rate": 1.4415000000000002e-05, + "num_tokens": 378847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0435, + "grad_norm": 1.613083004951477, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0666, + "grad_norm": 1.3638219833374023, + "learning_rate": 1.4405e-05, + "num_tokens": 379871.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0447, + "grad_norm": 1.5498117208480835, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0423, + "grad_norm": 1.8802024126052856, + "learning_rate": 1.4395000000000002e-05, + "num_tokens": 380895.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0077, + "grad_norm": 1.3431289196014404, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0075, + "grad_norm": 1.2728586196899414, + "learning_rate": 1.4385e-05, + "num_tokens": 381077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0072, + "grad_norm": 1.205004096031189, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0776, + "grad_norm": 1.9510324001312256, + "learning_rate": 1.4375e-05, + "num_tokens": 381680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0585, + "grad_norm": 1.6569032669067383, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0437, + "grad_norm": 1.996708631515503, + "learning_rate": 1.4365000000000002e-05, + "num_tokens": 382704.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.1022, + "grad_norm": 1.9323452711105347, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.1023, + "grad_norm": 2.318890333175659, + "learning_rate": 1.4355e-05, + "num_tokens": 383728.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0406, + "grad_norm": 1.4253126382827759, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0032, + "grad_norm": 0.5123540759086609, + "learning_rate": 1.4345000000000002e-05, + "num_tokens": 384331.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0466, + "grad_norm": 1.6153643131256104, + "learning_rate": 1.434e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.003, + "grad_norm": 0.468280553817749, + "learning_rate": 1.4335e-05, + "num_tokens": 384934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0028, + "grad_norm": 0.4284001588821411, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0484, + "grad_norm": 1.9119105339050293, + "learning_rate": 1.4325000000000003e-05, + "num_tokens": 385537.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0616, + "grad_norm": 2.9587130546569824, + "learning_rate": 1.432e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0645, + "grad_norm": 2.1663818359375, + "learning_rate": 1.4315000000000001e-05, + "num_tokens": 386561.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0022, + "grad_norm": 0.33302196860313416, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0019, + "grad_norm": 0.2560519278049469, + "learning_rate": 1.4305000000000003e-05, + "num_tokens": 386743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0504, + "grad_norm": 2.333263397216797, + "learning_rate": 1.43e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0514, + "grad_norm": 1.790854573249817, + "learning_rate": 1.4295000000000001e-05, + "num_tokens": 387767.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0478, + "grad_norm": 1.8263012170791626, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0017, + "grad_norm": 0.22925561666488647, + "learning_rate": 1.4285000000000003e-05, + "num_tokens": 388370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0508, + "grad_norm": 1.9549782276153564, + "learning_rate": 1.428e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0461, + "grad_norm": 2.7456071376800537, + "learning_rate": 1.4275000000000001e-05, + "num_tokens": 389394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0019, + "grad_norm": 0.25512465834617615, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0018, + "grad_norm": 0.2454918771982193, + "learning_rate": 1.4265000000000001e-05, + "num_tokens": 389576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.0016, + "grad_norm": 0.20499202609062195, + "learning_rate": 1.426e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0016, + "grad_norm": 0.22024467587471008, + "learning_rate": 1.4255000000000002e-05, + "num_tokens": 389758.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.1054, + "grad_norm": 1.7958146333694458, + "learning_rate": 1.425e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0016, + "grad_norm": 0.19123780727386475, + "learning_rate": 1.4245000000000002e-05, + "num_tokens": 390361.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0015, + "grad_norm": 0.1973554641008377, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0515, + "grad_norm": 1.5054925680160522, + "learning_rate": 1.4235000000000002e-05, + "num_tokens": 390964.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0645, + "grad_norm": 1.4418784379959106, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0428, + "grad_norm": 1.3686002492904663, + "learning_rate": 1.4225000000000002e-05, + "num_tokens": 391988.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0015, + "grad_norm": 0.18040749430656433, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 1.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0666, + "grad_norm": 1.9525736570358276, + "learning_rate": 1.4215e-05, + "num_tokens": 392591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0416, + "grad_norm": 1.5055146217346191, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0016, + "grad_norm": 0.21493053436279297, + "learning_rate": 1.4205000000000002e-05, + "num_tokens": 393194.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0483, + "grad_norm": 1.4553972482681274, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0017, + "grad_norm": 0.24199633300304413, + "learning_rate": 1.4195e-05, + "num_tokens": 393797.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0017, + "grad_norm": 0.22347070276737213, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0446, + "grad_norm": 1.314347743988037, + "learning_rate": 1.4185000000000002e-05, + "num_tokens": 394400.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.002, + "grad_norm": 0.3113741874694824, + "learning_rate": 1.418e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0634, + "grad_norm": 1.786219596862793, + "learning_rate": 1.4175e-05, + "num_tokens": 395003.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0906, + "grad_norm": 2.9753689765930176, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0019, + "grad_norm": 0.2806491255760193, + "learning_rate": 1.4165000000000002e-05, + "num_tokens": 395606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0444, + "grad_norm": 1.8984386920928955, + "learning_rate": 1.416e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0023, + "grad_norm": 0.3554719090461731, + "learning_rate": 1.4155000000000001e-05, + "num_tokens": 396209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0021, + "grad_norm": 0.3154850900173187, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.002, + "grad_norm": 0.2822473347187042, + "learning_rate": 1.4145000000000003e-05, + "num_tokens": 396391.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.0933, + "grad_norm": 2.0030465126037598, + "learning_rate": 1.414e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0018, + "grad_norm": 0.25846239924430847, + "learning_rate": 1.4135000000000001e-05, + "num_tokens": 396994.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0576, + "grad_norm": 1.3536447286605835, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.0018, + "grad_norm": 0.23509684205055237, + "learning_rate": 1.4125000000000003e-05, + "num_tokens": 397597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0402, + "grad_norm": 1.1482503414154053, + "learning_rate": 1.412e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.065, + "grad_norm": 1.7037919759750366, + "learning_rate": 1.4115000000000001e-05, + "num_tokens": 398621.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0691, + "grad_norm": 1.7646807432174683, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0804, + "grad_norm": 1.7181248664855957, + "learning_rate": 1.4105000000000001e-05, + "num_tokens": 399645.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0019, + "grad_norm": 0.2505536675453186, + "learning_rate": 1.41e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0618, + "grad_norm": 1.5859951972961426, + "learning_rate": 1.4095000000000001e-05, + "num_tokens": 400248.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0019, + "grad_norm": 0.2755191922187805, + "learning_rate": 1.409e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 1.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0555, + "grad_norm": 1.4727070331573486, + "learning_rate": 1.4085000000000002e-05, + "num_tokens": 400851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0481, + "grad_norm": 1.8706026077270508, + "learning_rate": 1.408e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.0474, + "grad_norm": 1.1995218992233276, + "learning_rate": 1.4075000000000002e-05, + "num_tokens": 401875.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0543, + "grad_norm": 1.2178373336791992, + "learning_rate": 1.407e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0562, + "grad_norm": 1.595617413520813, + "learning_rate": 1.4065000000000002e-05, + "num_tokens": 402899.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0029, + "grad_norm": 0.46309027075767517, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 1.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0031, + "grad_norm": 0.5019537210464478, + "learning_rate": 1.4055e-05, + "num_tokens": 403081.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.0481, + "grad_norm": 1.4502179622650146, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0612, + "grad_norm": 1.3172924518585205, + "learning_rate": 1.4045000000000002e-05, + "num_tokens": 404105.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0643, + "grad_norm": 1.8145051002502441, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0939, + "grad_norm": 2.2837142944335938, + "learning_rate": 1.4035e-05, + "num_tokens": 405129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0444, + "grad_norm": 1.4133625030517578, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0762, + "grad_norm": 3.3270263671875, + "learning_rate": 1.4025000000000002e-05, + "num_tokens": 406153.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0382, + "grad_norm": 1.5502580404281616, + "learning_rate": 1.402e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0624, + "grad_norm": 2.8620283603668213, + "learning_rate": 1.4015e-05, + "num_tokens": 407177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0062, + "grad_norm": 0.9600316286087036, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": 0.232, + "grad_norm": 6.662532329559326, + "learning_rate": 1.4005000000000002e-05, + "num_tokens": 407780.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.2308, + "grad_norm": 5.728747844696045, + "learning_rate": 1.4e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0064, + "grad_norm": 1.0067918300628662, + "learning_rate": 1.3995e-05, + "num_tokens": 408383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0717, + "grad_norm": 2.222224712371826, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0049, + "grad_norm": 0.7748068571090698, + "learning_rate": 1.3985000000000002e-05, + "num_tokens": 408986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0042, + "grad_norm": 0.6555838584899902, + "learning_rate": 1.398e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.1053, + "grad_norm": 2.1453135013580322, + "learning_rate": 1.3975000000000001e-05, + "num_tokens": 409589.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0814, + "grad_norm": 2.092453718185425, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0025, + "grad_norm": 0.37734025716781616, + "learning_rate": 1.3965000000000003e-05, + "num_tokens": 410192.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.0859, + "grad_norm": 2.4313082695007324, + "learning_rate": 1.396e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0571, + "grad_norm": 1.533075213432312, + "learning_rate": 1.3955000000000001e-05, + "num_tokens": 411216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0907, + "grad_norm": 1.7440866231918335, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0015, + "grad_norm": 0.19383682310581207, + "learning_rate": 1.3945000000000001e-05, + "num_tokens": 411819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0015, + "grad_norm": 0.1786634922027588, + "learning_rate": 1.394e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.065, + "grad_norm": 2.1025426387786865, + "learning_rate": 1.3935000000000001e-05, + "num_tokens": 412422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0879, + "grad_norm": 1.9717315435409546, + "learning_rate": 1.393e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0726, + "grad_norm": 2.1733202934265137, + "learning_rate": 1.3925000000000001e-05, + "num_tokens": 413446.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": 0.0635, + "grad_norm": 2.1671876907348633, + "learning_rate": 1.392e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0728, + "grad_norm": 1.5356316566467285, + "learning_rate": 1.3915000000000001e-05, + "num_tokens": 414470.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0014, + "grad_norm": 0.16603456437587738, + "learning_rate": 1.391e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0558, + "grad_norm": 1.9890317916870117, + "learning_rate": 1.3905000000000002e-05, + "num_tokens": 415073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0015, + "grad_norm": 0.20005646347999573, + "learning_rate": 1.39e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.1005, + "grad_norm": 3.5178253650665283, + "learning_rate": 1.3895e-05, + "num_tokens": 415676.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0702, + "grad_norm": 2.5081353187561035, + "learning_rate": 1.389e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0017, + "grad_norm": 0.23757857084274292, + "learning_rate": 1.3885000000000002e-05, + "num_tokens": 416279.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0531, + "grad_norm": 1.5659825801849365, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.002, + "grad_norm": 0.3491363525390625, + "learning_rate": 1.3875e-05, + "num_tokens": 416882.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0663, + "grad_norm": 1.5751999616622925, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0018, + "grad_norm": 0.3209178149700165, + "learning_rate": 1.3865000000000002e-05, + "num_tokens": 417485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0019, + "grad_norm": 0.3630707561969757, + "learning_rate": 1.386e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0437, + "grad_norm": 1.6397857666015625, + "learning_rate": 1.3855e-05, + "num_tokens": 418088.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0585, + "grad_norm": 2.164947748184204, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0754, + "grad_norm": 1.7066527605056763, + "learning_rate": 1.3845000000000002e-05, + "num_tokens": 419112.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0021, + "grad_norm": 0.3518334627151489, + "learning_rate": 1.384e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 1.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.0505, + "grad_norm": 1.5215017795562744, + "learning_rate": 1.3835e-05, + "num_tokens": 419715.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0572, + "grad_norm": 1.9514737129211426, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0023, + "grad_norm": 0.4249929189682007, + "learning_rate": 1.3825000000000002e-05, + "num_tokens": 420318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0661, + "grad_norm": 1.7851744890213013, + "learning_rate": 1.382e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": 0.0621, + "grad_norm": 1.3740767240524292, + "learning_rate": 1.3815e-05, + "num_tokens": 421342.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0841, + "grad_norm": 2.665015459060669, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0028, + "grad_norm": 0.4941730797290802, + "learning_rate": 1.3805000000000003e-05, + "num_tokens": 421945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.044, + "grad_norm": 1.4924557209014893, + "learning_rate": 1.38e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.0511, + "grad_norm": 2.1234307289123535, + "learning_rate": 1.3795000000000001e-05, + "num_tokens": 422969.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0426, + "grad_norm": 1.1785792112350464, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.0773, + "grad_norm": 1.6448895931243896, + "learning_rate": 1.3785000000000001e-05, + "num_tokens": 423993.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0594, + "grad_norm": 1.792230486869812, + "learning_rate": 1.378e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0535, + "grad_norm": 1.3552350997924805, + "learning_rate": 1.3775000000000001e-05, + "num_tokens": 425017.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0388, + "grad_norm": 1.0532437562942505, + "learning_rate": 1.377e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0759, + "grad_norm": 2.1115078926086426, + "learning_rate": 1.3765000000000001e-05, + "num_tokens": 426041.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0056, + "grad_norm": 0.8818362355232239, + "learning_rate": 1.376e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 1.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0051, + "grad_norm": 0.8002524971961975, + "learning_rate": 1.3755000000000001e-05, + "num_tokens": 426223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0619, + "grad_norm": 2.207181692123413, + "learning_rate": 1.375e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0053, + "grad_norm": 0.814557671546936, + "learning_rate": 1.3745000000000001e-05, + "num_tokens": 426826.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0621, + "grad_norm": 1.6394788026809692, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.0678, + "grad_norm": 1.9382132291793823, + "learning_rate": 1.3735e-05, + "num_tokens": 427850.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0396, + "grad_norm": 1.3062744140625, + "learning_rate": 1.373e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.1056, + "grad_norm": 1.7765963077545166, + "learning_rate": 1.3725000000000002e-05, + "num_tokens": 428874.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0036, + "grad_norm": 0.5703164339065552, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.062, + "grad_norm": 1.6491400003433228, + "learning_rate": 1.3715e-05, + "num_tokens": 429477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0414, + "grad_norm": 1.2670550346374512, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0977, + "grad_norm": 2.5612552165985107, + "learning_rate": 1.3705000000000002e-05, + "num_tokens": 430501.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.043, + "grad_norm": 1.5120333433151245, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0513, + "grad_norm": 1.3469822406768799, + "learning_rate": 1.3695e-05, + "num_tokens": 431525.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.052, + "grad_norm": 1.3584448099136353, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0043, + "grad_norm": 0.6871080994606018, + "learning_rate": 1.3685000000000002e-05, + "num_tokens": 432128.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0038, + "grad_norm": 0.6316184401512146, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 1.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0039, + "grad_norm": 0.6172608733177185, + "learning_rate": 1.3675e-05, + "num_tokens": 432310.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0034, + "grad_norm": 0.5193918943405151, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": 0.0545, + "grad_norm": 1.789426326751709, + "learning_rate": 1.3665000000000002e-05, + "num_tokens": 432913.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.0681, + "grad_norm": 1.8359259366989136, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0856, + "grad_norm": 2.033186197280884, + "learning_rate": 1.3655e-05, + "num_tokens": 433937.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0019, + "grad_norm": 0.2717677354812622, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0902, + "grad_norm": 1.8082786798477173, + "learning_rate": 1.3645000000000002e-05, + "num_tokens": 434540.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0019, + "grad_norm": 0.27892598509788513, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0017, + "grad_norm": 0.21636277437210083, + "learning_rate": 1.3635e-05, + "num_tokens": 434722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0017, + "grad_norm": 0.21708306670188904, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0442, + "grad_norm": 1.8083100318908691, + "learning_rate": 1.3625e-05, + "num_tokens": 435325.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0015, + "grad_norm": 0.16797110438346863, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0013, + "grad_norm": 0.1489250212907791, + "learning_rate": 1.3615000000000001e-05, + "num_tokens": 435507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0013, + "grad_norm": 0.14432698488235474, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0845, + "grad_norm": 1.7793538570404053, + "learning_rate": 1.3605000000000001e-05, + "num_tokens": 436110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.072, + "grad_norm": 2.0468149185180664, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0012, + "grad_norm": 0.13057845830917358, + "learning_rate": 1.3595000000000001e-05, + "num_tokens": 436713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0012, + "grad_norm": 0.1187715157866478, + "learning_rate": 1.359e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0652, + "grad_norm": 1.7846852540969849, + "learning_rate": 1.3585000000000001e-05, + "num_tokens": 437316.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.001, + "grad_norm": 0.09880056232213974, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 1.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0543, + "grad_norm": 1.7948801517486572, + "learning_rate": 1.3575e-05, + "num_tokens": 437919.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0613, + "grad_norm": 1.7139854431152344, + "learning_rate": 1.357e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0932, + "grad_norm": 2.8757143020629883, + "learning_rate": 1.3565000000000001e-05, + "num_tokens": 438943.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0542, + "grad_norm": 1.7751576900482178, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0011, + "grad_norm": 0.10208199918270111, + "learning_rate": 1.3555e-05, + "num_tokens": 439546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0441, + "grad_norm": 1.3240106105804443, + "learning_rate": 1.355e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0013, + "grad_norm": 0.14222493767738342, + "learning_rate": 1.3545000000000002e-05, + "num_tokens": 440149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0013, + "grad_norm": 0.15622317790985107, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.049, + "grad_norm": 1.685028076171875, + "learning_rate": 1.3535e-05, + "num_tokens": 440752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0013, + "grad_norm": 0.15723161399364471, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0014, + "grad_norm": 0.1701563447713852, + "learning_rate": 1.3525000000000002e-05, + "num_tokens": 440934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0554, + "grad_norm": 1.94820237159729, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": 0.0868, + "grad_norm": 1.4613052606582642, + "learning_rate": 1.3515e-05, + "num_tokens": 441958.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0606, + "grad_norm": 1.5318107604980469, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0695, + "grad_norm": 1.676740050315857, + "learning_rate": 1.3505000000000002e-05, + "num_tokens": 442982.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0588, + "grad_norm": 1.5801854133605957, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.002, + "grad_norm": 0.27110394835472107, + "learning_rate": 1.3495e-05, + "num_tokens": 443585.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0493, + "grad_norm": 1.5821062326431274, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0409, + "grad_norm": 1.4319894313812256, + "learning_rate": 1.3485000000000002e-05, + "num_tokens": 444609.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0451, + "grad_norm": 1.562462329864502, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0621, + "grad_norm": 1.4181314706802368, + "learning_rate": 1.3475e-05, + "num_tokens": 445633.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0031, + "grad_norm": 0.48450395464897156, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0561, + "grad_norm": 1.5698680877685547, + "learning_rate": 1.3465e-05, + "num_tokens": 446236.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0035, + "grad_norm": 0.5244553685188293, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0036, + "grad_norm": 0.534037709236145, + "learning_rate": 1.3455e-05, + "num_tokens": 446418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0728, + "grad_norm": 2.4191722869873047, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0694, + "grad_norm": 2.0287888050079346, + "learning_rate": 1.3445000000000001e-05, + "num_tokens": 447442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.057, + "grad_norm": 1.7234476804733276, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0033, + "grad_norm": 0.48596495389938354, + "learning_rate": 1.3435000000000001e-05, + "num_tokens": 448045.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0572, + "grad_norm": 1.4727040529251099, + "learning_rate": 1.343e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0031, + "grad_norm": 0.4591142535209656, + "learning_rate": 1.3425000000000001e-05, + "num_tokens": 448648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0578, + "grad_norm": 1.542529582977295, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0544, + "grad_norm": 1.567787766456604, + "learning_rate": 1.3415e-05, + "num_tokens": 449672.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.048, + "grad_norm": 1.4822731018066406, + "learning_rate": 1.341e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0033, + "grad_norm": 0.47298771142959595, + "learning_rate": 1.3405000000000001e-05, + "num_tokens": 450275.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.0885, + "grad_norm": 2.084674119949341, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0571, + "grad_norm": 1.5821152925491333, + "learning_rate": 1.3395e-05, + "num_tokens": 451299.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.003, + "grad_norm": 0.44274547696113586, + "learning_rate": 1.339e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0461, + "grad_norm": 1.7462387084960938, + "learning_rate": 1.3385000000000001e-05, + "num_tokens": 451902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0557, + "grad_norm": 1.9857844114303589, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0419, + "grad_norm": 1.386896014213562, + "learning_rate": 1.3375e-05, + "num_tokens": 452926.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0457, + "grad_norm": 1.6964994668960571, + "learning_rate": 1.337e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.0029, + "grad_norm": 0.42876869440078735, + "learning_rate": 1.3365000000000002e-05, + "num_tokens": 453529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.1072, + "grad_norm": 2.350618839263916, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0495, + "grad_norm": 1.449182152748108, + "learning_rate": 1.3355e-05, + "num_tokens": 454553.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0608, + "grad_norm": 2.024829149246216, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0431, + "grad_norm": 1.3092213869094849, + "learning_rate": 1.3345000000000002e-05, + "num_tokens": 455577.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0035, + "grad_norm": 0.5321254134178162, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 1.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0033, + "grad_norm": 0.4984612762928009, + "learning_rate": 1.3335e-05, + "num_tokens": 455759.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.2288, + "grad_norm": 3.947110652923584, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0031, + "grad_norm": 0.4745834767818451, + "learning_rate": 1.3325000000000002e-05, + "num_tokens": 456362.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0032, + "grad_norm": 0.5151614546775818, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0542, + "grad_norm": 1.0336432456970215, + "learning_rate": 1.3315e-05, + "num_tokens": 456965.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0562, + "grad_norm": 1.5250927209854126, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0431, + "grad_norm": 1.4132592678070068, + "learning_rate": 1.3305e-05, + "num_tokens": 457989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.074, + "grad_norm": 1.864004373550415, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.0023, + "grad_norm": 0.32277822494506836, + "learning_rate": 1.3295e-05, + "num_tokens": 458592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": 0.0656, + "grad_norm": 1.8421293497085571, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0784, + "grad_norm": 1.431746482849121, + "learning_rate": 1.3285e-05, + "num_tokens": 459616.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0859, + "grad_norm": 2.2143869400024414, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0638, + "grad_norm": 2.397982597351074, + "learning_rate": 1.3275e-05, + "num_tokens": 460640.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.072, + "grad_norm": 1.9987224340438843, + "learning_rate": 1.327e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0026, + "grad_norm": 0.3712107837200165, + "learning_rate": 1.3265000000000001e-05, + "num_tokens": 461243.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0027, + "grad_norm": 0.3893998861312866, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 1.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0024, + "grad_norm": 0.3540315330028534, + "learning_rate": 1.3255e-05, + "num_tokens": 461425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0022, + "grad_norm": 0.3253246545791626, + "learning_rate": 1.325e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 1.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0585, + "grad_norm": 1.6001460552215576, + "learning_rate": 1.3245000000000001e-05, + "num_tokens": 462028.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.0472, + "grad_norm": 1.4387136697769165, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.002, + "grad_norm": 0.2645460069179535, + "learning_rate": 1.3235e-05, + "num_tokens": 462631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0486, + "grad_norm": 1.7650330066680908, + "learning_rate": 1.323e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0018, + "grad_norm": 0.23414187133312225, + "learning_rate": 1.3225000000000001e-05, + "num_tokens": 463234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0019, + "grad_norm": 0.2595520317554474, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0598, + "grad_norm": 1.4952349662780762, + "learning_rate": 1.3215e-05, + "num_tokens": 463837.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.0777, + "grad_norm": 1.956957221031189, + "learning_rate": 1.321e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0426, + "grad_norm": 1.263728141784668, + "learning_rate": 1.3205000000000001e-05, + "num_tokens": 464861.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0018, + "grad_norm": 0.2717933654785156, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 1.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0017, + "grad_norm": 0.24730290472507477, + "learning_rate": 1.3195e-05, + "num_tokens": 465043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0017, + "grad_norm": 0.25752246379852295, + "learning_rate": 1.319e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0594, + "grad_norm": 1.2743943929672241, + "learning_rate": 1.3185000000000002e-05, + "num_tokens": 465646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0468, + "grad_norm": 1.4228495359420776, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0015, + "grad_norm": 0.2151045948266983, + "learning_rate": 1.3175e-05, + "num_tokens": 466249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0707, + "grad_norm": 1.637633204460144, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0557, + "grad_norm": 1.91914963722229, + "learning_rate": 1.3165000000000002e-05, + "num_tokens": 467273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0017, + "grad_norm": 0.22663576900959015, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0447, + "grad_norm": 1.3842930793762207, + "learning_rate": 1.3155e-05, + "num_tokens": 467876.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0402, + "grad_norm": 1.3382936716079712, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.0722, + "grad_norm": 1.7016624212265015, + "learning_rate": 1.3145e-05, + "num_tokens": 468900.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0603, + "grad_norm": 1.7416592836380005, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0545, + "grad_norm": 2.0610973834991455, + "learning_rate": 1.3135e-05, + "num_tokens": 469924.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0027, + "grad_norm": 0.42048102617263794, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0647, + "grad_norm": 1.5505709648132324, + "learning_rate": 1.3125e-05, + "num_tokens": 470527.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0899, + "grad_norm": 1.7793169021606445, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0032, + "grad_norm": 0.5216090083122253, + "learning_rate": 1.3115000000000002e-05, + "num_tokens": 471130.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0886, + "grad_norm": 1.749000906944275, + "learning_rate": 1.311e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0917, + "grad_norm": 2.4577291011810303, + "learning_rate": 1.3105e-05, + "num_tokens": 472154.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0032, + "grad_norm": 0.5224512815475464, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0637, + "grad_norm": 1.690381646156311, + "learning_rate": 1.3095000000000003e-05, + "num_tokens": 472757.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0718, + "grad_norm": 2.1140615940093994, + "learning_rate": 1.309e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0041, + "grad_norm": 0.6610037684440613, + "learning_rate": 1.3085000000000001e-05, + "num_tokens": 473360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.1995, + "grad_norm": 5.919976711273193, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0035, + "grad_norm": 0.5762227177619934, + "learning_rate": 1.3075000000000003e-05, + "num_tokens": 473963.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0035, + "grad_norm": 0.558562695980072, + "learning_rate": 1.307e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 1.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0029, + "grad_norm": 0.4903852343559265, + "learning_rate": 1.3065000000000001e-05, + "num_tokens": 474145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0024, + "grad_norm": 0.40001630783081055, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 1.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.002, + "grad_norm": 0.3093484044075012, + "learning_rate": 1.3055000000000003e-05, + "num_tokens": 474327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0813, + "grad_norm": 1.846347451210022, + "learning_rate": 1.305e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0466, + "grad_norm": 1.9397575855255127, + "learning_rate": 1.3045000000000001e-05, + "num_tokens": 475351.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.0012, + "grad_norm": 0.1433739811182022, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 1.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0647, + "grad_norm": 1.7246447801589966, + "learning_rate": 1.3035000000000001e-05, + "num_tokens": 475954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0774, + "grad_norm": 1.6557238101959229, + "learning_rate": 1.303e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0732, + "grad_norm": 1.2370885610580444, + "learning_rate": 1.3025000000000002e-05, + "num_tokens": 476978.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0011, + "grad_norm": 0.11068759858608246, + "learning_rate": 1.302e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 1.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0921, + "grad_norm": 2.1499900817871094, + "learning_rate": 1.3015000000000002e-05, + "num_tokens": 477581.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0012, + "grad_norm": 0.12917853891849518, + "learning_rate": 1.301e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0644, + "grad_norm": 1.2409875392913818, + "learning_rate": 1.3005000000000002e-05, + "num_tokens": 478184.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0594, + "grad_norm": 1.3983649015426636, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0013, + "grad_norm": 0.17072346806526184, + "learning_rate": 1.2995000000000002e-05, + "num_tokens": 478787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0623, + "grad_norm": 1.6930880546569824, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0395, + "grad_norm": 1.0536465644836426, + "learning_rate": 1.2985e-05, + "num_tokens": 479811.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0593, + "grad_norm": 1.2563151121139526, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.0455, + "grad_norm": 1.3295787572860718, + "learning_rate": 1.2975000000000002e-05, + "num_tokens": 480835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.074, + "grad_norm": 1.3767396211624146, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0456, + "grad_norm": 1.3392114639282227, + "learning_rate": 1.2965e-05, + "num_tokens": 481859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.091, + "grad_norm": 2.6617116928100586, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0783, + "grad_norm": 2.208951473236084, + "learning_rate": 1.2955000000000002e-05, + "num_tokens": 482883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0026, + "grad_norm": 0.425293892621994, + "learning_rate": 1.295e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 1.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0751, + "grad_norm": 1.7252588272094727, + "learning_rate": 1.2945e-05, + "num_tokens": 483486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0032, + "grad_norm": 0.5211181640625, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 1.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": 0.0931, + "grad_norm": 2.448201894760132, + "learning_rate": 1.2935000000000002e-05, + "num_tokens": 484089.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.059, + "grad_norm": 1.2256298065185547, + "learning_rate": 1.293e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0037, + "grad_norm": 0.5853725671768188, + "learning_rate": 1.2925e-05, + "num_tokens": 484692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0667, + "grad_norm": 1.6646796464920044, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0034, + "grad_norm": 0.5198765993118286, + "learning_rate": 1.2915000000000003e-05, + "num_tokens": 485295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.06, + "grad_norm": 1.8327956199645996, + "learning_rate": 1.291e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0578, + "grad_norm": 1.4550710916519165, + "learning_rate": 1.2905000000000001e-05, + "num_tokens": 486319.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0035, + "grad_norm": 0.5253085494041443, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 1.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": 0.0664, + "grad_norm": 2.0553388595581055, + "learning_rate": 1.2895000000000003e-05, + "num_tokens": 486922.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0507, + "grad_norm": 1.2666943073272705, + "learning_rate": 1.289e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0419, + "grad_norm": 1.1951980590820312, + "learning_rate": 1.2885000000000001e-05, + "num_tokens": 487946.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0521, + "grad_norm": 1.5074187517166138, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0039, + "grad_norm": 0.5865699648857117, + "learning_rate": 1.2875000000000001e-05, + "num_tokens": 488549.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0032, + "grad_norm": 0.4775572121143341, + "learning_rate": 1.287e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 1.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0861, + "grad_norm": 1.977977991104126, + "learning_rate": 1.2865000000000001e-05, + "num_tokens": 489152.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0415, + "grad_norm": 1.351745843887329, + "learning_rate": 1.286e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0027, + "grad_norm": 0.3994472920894623, + "learning_rate": 1.2855000000000001e-05, + "num_tokens": 489755.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0027, + "grad_norm": 0.40307220816612244, + "learning_rate": 1.285e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 1.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0023, + "grad_norm": 0.3672088086605072, + "learning_rate": 1.2845000000000002e-05, + "num_tokens": 489937.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0024, + "grad_norm": 0.3693186938762665, + "learning_rate": 1.284e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0022, + "grad_norm": 0.3379809856414795, + "learning_rate": 1.2835000000000002e-05, + "num_tokens": 490119.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0692, + "grad_norm": 1.80624520778656, + "learning_rate": 1.283e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0015, + "grad_norm": 0.19782321155071259, + "learning_rate": 1.2825e-05, + "num_tokens": 490722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.0765, + "grad_norm": 2.1652674674987793, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0524, + "grad_norm": 1.3651760816574097, + "learning_rate": 1.2815000000000002e-05, + "num_tokens": 491746.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0013, + "grad_norm": 0.15779025852680206, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 1.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0405, + "grad_norm": 1.4021095037460327, + "learning_rate": 1.2805e-05, + "num_tokens": 492349.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0012, + "grad_norm": 0.14934077858924866, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0466, + "grad_norm": 1.3255256414413452, + "learning_rate": 1.2795000000000002e-05, + "num_tokens": 492952.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0011, + "grad_norm": 0.13669109344482422, + "learning_rate": 1.279e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0706, + "grad_norm": 2.915336847305298, + "learning_rate": 1.2785e-05, + "num_tokens": 493555.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0012, + "grad_norm": 0.14015723764896393, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 1.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0731, + "grad_norm": 1.5240583419799805, + "learning_rate": 1.2775000000000002e-05, + "num_tokens": 494158.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0011, + "grad_norm": 0.11803555488586426, + "learning_rate": 1.277e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0011, + "grad_norm": 0.13458400964736938, + "learning_rate": 1.2765e-05, + "num_tokens": 494340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0012, + "grad_norm": 0.14607498049736023, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0011, + "grad_norm": 0.12011824548244476, + "learning_rate": 1.2755000000000002e-05, + "num_tokens": 494522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0011, + "grad_norm": 0.13116565346717834, + "learning_rate": 1.275e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0011, + "grad_norm": 0.11727877706289291, + "learning_rate": 1.2745e-05, + "num_tokens": 494704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.0501, + "grad_norm": 1.6986955404281616, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0472, + "grad_norm": 1.4376126527786255, + "learning_rate": 1.2735000000000003e-05, + "num_tokens": 495728.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.001, + "grad_norm": 0.11870448291301727, + "learning_rate": 1.273e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0011, + "grad_norm": 0.11969612538814545, + "learning_rate": 1.2725000000000001e-05, + "num_tokens": 495910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0493, + "grad_norm": 1.3840702772140503, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.001, + "grad_norm": 0.10890035331249237, + "learning_rate": 1.2715000000000001e-05, + "num_tokens": 496513.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0011, + "grad_norm": 0.12227390706539154, + "learning_rate": 1.271e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0752, + "grad_norm": 2.110506057739258, + "learning_rate": 1.2705000000000001e-05, + "num_tokens": 497116.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0011, + "grad_norm": 0.1325536072254181, + "learning_rate": 1.27e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 1.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0386, + "grad_norm": 1.118979811668396, + "learning_rate": 1.2695000000000001e-05, + "num_tokens": 497719.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.066, + "grad_norm": 1.572615623474121, + "learning_rate": 1.269e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0679, + "grad_norm": 1.6447997093200684, + "learning_rate": 1.2685000000000001e-05, + "num_tokens": 498743.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0012, + "grad_norm": 0.1418675184249878, + "learning_rate": 1.268e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0471, + "grad_norm": 1.3554447889328003, + "learning_rate": 1.2675000000000001e-05, + "num_tokens": 499346.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0012, + "grad_norm": 0.1589028388261795, + "learning_rate": 1.267e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0943, + "grad_norm": 2.5991010665893555, + "learning_rate": 1.2665e-05, + "num_tokens": 499949.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0495, + "grad_norm": 1.6441336870193481, + "learning_rate": 1.266e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0765, + "grad_norm": 1.842661738395691, + "learning_rate": 1.2655000000000002e-05, + "num_tokens": 500973.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0016, + "grad_norm": 0.22247855365276337, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0401, + "grad_norm": 1.3632177114486694, + "learning_rate": 1.2645e-05, + "num_tokens": 501576.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0021, + "grad_norm": 0.31719765067100525, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0376, + "grad_norm": 1.1765908002853394, + "learning_rate": 1.2635000000000002e-05, + "num_tokens": 502179.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0024, + "grad_norm": 0.33981993794441223, + "learning_rate": 1.263e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 1.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0712, + "grad_norm": 1.7833467721939087, + "learning_rate": 1.2625e-05, + "num_tokens": 502782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0407, + "grad_norm": 1.2483290433883667, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0597, + "grad_norm": 1.2847890853881836, + "learning_rate": 1.2615000000000002e-05, + "num_tokens": 503806.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0706, + "grad_norm": 2.0048041343688965, + "learning_rate": 1.261e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0033, + "grad_norm": 0.48029038310050964, + "learning_rate": 1.2605e-05, + "num_tokens": 504409.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0853, + "grad_norm": 1.8489866256713867, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0664, + "grad_norm": 1.9049607515335083, + "learning_rate": 1.2595000000000002e-05, + "num_tokens": 505433.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0038, + "grad_norm": 0.5629300475120544, + "learning_rate": 1.259e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0035, + "grad_norm": 0.5016162395477295, + "learning_rate": 1.2585e-05, + "num_tokens": 505615.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.0034, + "grad_norm": 0.533896803855896, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 1.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0398, + "grad_norm": 1.6724116802215576, + "learning_rate": 1.2575000000000002e-05, + "num_tokens": 506218.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0649, + "grad_norm": 1.1757819652557373, + "learning_rate": 1.257e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0028, + "grad_norm": 0.3974631726741791, + "learning_rate": 1.2565e-05, + "num_tokens": 506821.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0022, + "grad_norm": 0.33079567551612854, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 1.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0612, + "grad_norm": 1.6804654598236084, + "learning_rate": 1.2555000000000001e-05, + "num_tokens": 507424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0814, + "grad_norm": 1.6637822389602661, + "learning_rate": 1.255e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0463, + "grad_norm": 1.2395890951156616, + "learning_rate": 1.2545000000000001e-05, + "num_tokens": 508448.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0022, + "grad_norm": 0.3290168046951294, + "learning_rate": 1.254e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0474, + "grad_norm": 1.62813138961792, + "learning_rate": 1.2535000000000001e-05, + "num_tokens": 509051.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0432, + "grad_norm": 1.1684247255325317, + "learning_rate": 1.253e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.191, + "grad_norm": 4.108924865722656, + "learning_rate": 1.2525000000000001e-05, + "num_tokens": 510075.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0022, + "grad_norm": 0.32842448353767395, + "learning_rate": 1.252e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0692, + "grad_norm": 1.0593329668045044, + "learning_rate": 1.2515000000000001e-05, + "num_tokens": 510678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.046, + "grad_norm": 1.279249906539917, + "learning_rate": 1.251e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0021, + "grad_norm": 0.32091253995895386, + "learning_rate": 1.2505e-05, + "num_tokens": 511281.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0404, + "grad_norm": 1.2973002195358276, + "learning_rate": 1.25e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0023, + "grad_norm": 0.34064143896102905, + "learning_rate": 1.2495000000000001e-05, + "num_tokens": 511884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0793, + "grad_norm": 1.864046573638916, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0023, + "grad_norm": 0.3757898211479187, + "learning_rate": 1.2485e-05, + "num_tokens": 512487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0024, + "grad_norm": 0.381061315536499, + "learning_rate": 1.248e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 1.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0676, + "grad_norm": 1.62307608127594, + "learning_rate": 1.2475000000000002e-05, + "num_tokens": 513090.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.047, + "grad_norm": 1.570786476135254, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0566, + "grad_norm": 1.7626087665557861, + "learning_rate": 1.2465e-05, + "num_tokens": 514114.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0581, + "grad_norm": 1.7678264379501343, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0417, + "grad_norm": 1.4467406272888184, + "learning_rate": 1.2455000000000002e-05, + "num_tokens": 515138.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0712, + "grad_norm": 1.5711795091629028, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.0026, + "grad_norm": 0.41801631450653076, + "learning_rate": 1.2445e-05, + "num_tokens": 515741.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0499, + "grad_norm": 1.5882858037948608, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0504, + "grad_norm": 1.1772035360336304, + "learning_rate": 1.2435000000000002e-05, + "num_tokens": 516765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0618, + "grad_norm": 1.7687872648239136, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0664, + "grad_norm": 1.677937626838684, + "learning_rate": 1.2425e-05, + "num_tokens": 517789.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.004, + "grad_norm": 0.654071569442749, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 1.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0409, + "grad_norm": 1.5208879709243774, + "learning_rate": 1.2415000000000002e-05, + "num_tokens": 518392.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0035, + "grad_norm": 0.5567553639411926, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 1.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0798, + "grad_norm": 2.2302029132843018, + "learning_rate": 1.2405e-05, + "num_tokens": 518995.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0669, + "grad_norm": 2.0240256786346436, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0839, + "grad_norm": 1.8468784093856812, + "learning_rate": 1.2395e-05, + "num_tokens": 520019.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0584, + "grad_norm": 2.1111018657684326, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0043, + "grad_norm": 0.755431592464447, + "learning_rate": 1.2385000000000001e-05, + "num_tokens": 520622.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0432, + "grad_norm": 1.864660620689392, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0693, + "grad_norm": 3.3374569416046143, + "learning_rate": 1.2375000000000001e-05, + "num_tokens": 521646.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0388, + "grad_norm": 1.5575084686279297, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.0645, + "grad_norm": 1.5467334985733032, + "learning_rate": 1.2365000000000001e-05, + "num_tokens": 522670.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0037, + "grad_norm": 0.5897421836853027, + "learning_rate": 1.236e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 1.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0819, + "grad_norm": 3.0543386936187744, + "learning_rate": 1.2355000000000001e-05, + "num_tokens": 523273.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.004, + "grad_norm": 0.647894024848938, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0032, + "grad_norm": 0.5120076537132263, + "learning_rate": 1.2345e-05, + "num_tokens": 523455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0032, + "grad_norm": 0.50294429063797, + "learning_rate": 1.234e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0654, + "grad_norm": 1.3424628973007202, + "learning_rate": 1.2335000000000001e-05, + "num_tokens": 524058.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0898, + "grad_norm": 2.0473086833953857, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0023, + "grad_norm": 0.36929139494895935, + "learning_rate": 1.2325e-05, + "num_tokens": 524661.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0021, + "grad_norm": 0.3227180540561676, + "learning_rate": 1.232e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0663, + "grad_norm": 1.83015775680542, + "learning_rate": 1.2315000000000002e-05, + "num_tokens": 525264.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0657, + "grad_norm": 1.8247884511947632, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0016, + "grad_norm": 0.21814872324466705, + "learning_rate": 1.2305e-05, + "num_tokens": 525867.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.07, + "grad_norm": 1.3606796264648438, + "learning_rate": 1.23e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.0521, + "grad_norm": 1.5558913946151733, + "learning_rate": 1.2295000000000002e-05, + "num_tokens": 526891.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": 0.0768, + "grad_norm": 1.718390703201294, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.2012, + "grad_norm": 3.623452663421631, + "learning_rate": 1.2285e-05, + "num_tokens": 527915.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0922, + "grad_norm": 2.289684534072876, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.0665, + "grad_norm": 1.6864427328109741, + "learning_rate": 1.2275000000000002e-05, + "num_tokens": 528939.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0017, + "grad_norm": 0.2226596623659134, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.195, + "grad_norm": 3.805149555206299, + "learning_rate": 1.2265e-05, + "num_tokens": 529542.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0651, + "grad_norm": 1.3887238502502441, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0016, + "grad_norm": 0.20220878720283508, + "learning_rate": 1.2255000000000002e-05, + "num_tokens": 530145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0456, + "grad_norm": 1.4763877391815186, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0017, + "grad_norm": 0.2297908216714859, + "learning_rate": 1.2245e-05, + "num_tokens": 530748.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0479, + "grad_norm": 1.846569538116455, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0018, + "grad_norm": 0.2527587115764618, + "learning_rate": 1.2235e-05, + "num_tokens": 531351.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0828, + "grad_norm": 1.8091585636138916, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.002, + "grad_norm": 0.29240918159484863, + "learning_rate": 1.2225e-05, + "num_tokens": 531954.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.0568, + "grad_norm": 1.4905025959014893, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0022, + "grad_norm": 0.29934078454971313, + "learning_rate": 1.2215e-05, + "num_tokens": 532557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.0655, + "grad_norm": 1.620811939239502, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0699, + "grad_norm": 1.4509178400039673, + "learning_rate": 1.2205000000000001e-05, + "num_tokens": 533581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0537, + "grad_norm": 1.6190178394317627, + "learning_rate": 1.22e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.083, + "grad_norm": 2.0025248527526855, + "learning_rate": 1.2195000000000001e-05, + "num_tokens": 534605.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3503265976905823, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": 0.0699, + "grad_norm": 1.2692803144454956, + "learning_rate": 1.2185e-05, + "num_tokens": 535208.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0024, + "grad_norm": 0.3514065146446228, + "learning_rate": 1.218e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 1.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0025, + "grad_norm": 0.3770548701286316, + "learning_rate": 1.2175000000000001e-05, + "num_tokens": 535390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0024, + "grad_norm": 0.3553021550178528, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 1.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0657, + "grad_norm": 1.3145198822021484, + "learning_rate": 1.2165e-05, + "num_tokens": 535993.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0453, + "grad_norm": 1.1688368320465088, + "learning_rate": 1.216e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.1801, + "grad_norm": 3.7217485904693604, + "learning_rate": 1.2155000000000001e-05, + "num_tokens": 537017.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0029, + "grad_norm": 0.4446180462837219, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 1.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0863, + "grad_norm": 2.0155787467956543, + "learning_rate": 1.2145e-05, + "num_tokens": 537620.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0022, + "grad_norm": 0.3482968807220459, + "learning_rate": 1.214e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0023, + "grad_norm": 0.32771721482276917, + "learning_rate": 1.2135000000000002e-05, + "num_tokens": 537802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.056, + "grad_norm": 1.8173542022705078, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.039, + "grad_norm": 1.1963605880737305, + "learning_rate": 1.2125e-05, + "num_tokens": 538826.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.0594, + "grad_norm": 1.7138198614120483, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.002, + "grad_norm": 0.2943565249443054, + "learning_rate": 1.2115000000000002e-05, + "num_tokens": 539429.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.002, + "grad_norm": 0.2892753481864929, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0019, + "grad_norm": 0.2714136838912964, + "learning_rate": 1.2105e-05, + "num_tokens": 539611.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0504, + "grad_norm": 1.0601574182510376, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0019, + "grad_norm": 0.2627917230129242, + "learning_rate": 1.2095000000000002e-05, + "num_tokens": 540214.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0566, + "grad_norm": 1.1405881643295288, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0018, + "grad_norm": 0.2452574223279953, + "learning_rate": 1.2085e-05, + "num_tokens": 540817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.0018, + "grad_norm": 0.24650417268276215, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0014, + "grad_norm": 0.19634543359279633, + "learning_rate": 1.2075e-05, + "num_tokens": 540999.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.0014, + "grad_norm": 0.17830893397331238, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.045, + "grad_norm": 1.1427490711212158, + "learning_rate": 1.2065e-05, + "num_tokens": 541602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.043, + "grad_norm": 1.0804896354675293, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0729, + "grad_norm": 1.6100242137908936, + "learning_rate": 1.2055e-05, + "num_tokens": 542626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.0585, + "grad_norm": 1.2319777011871338, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0014, + "grad_norm": 0.18333016335964203, + "learning_rate": 1.2045e-05, + "num_tokens": 543229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0014, + "grad_norm": 0.17933838069438934, + "learning_rate": 1.204e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.0606, + "grad_norm": 1.531948208808899, + "learning_rate": 1.2035e-05, + "num_tokens": 543832.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.0798, + "grad_norm": 1.4439104795455933, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.0798, + "grad_norm": 1.6658635139465332, + "learning_rate": 1.2025e-05, + "num_tokens": 544856.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.0666, + "grad_norm": 1.2919996976852417, + "learning_rate": 1.202e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.0526, + "grad_norm": 1.7219940423965454, + "learning_rate": 1.2015000000000001e-05, + "num_tokens": 545880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0453, + "grad_norm": 1.3877556324005127, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0675, + "grad_norm": 1.6357606649398804, + "learning_rate": 1.2005e-05, + "num_tokens": 546904.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0023, + "grad_norm": 0.3360651433467865, + "learning_rate": 1.2e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0025, + "grad_norm": 0.36647501587867737, + "learning_rate": 1.1995000000000001e-05, + "num_tokens": 547086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.1876, + "grad_norm": 3.880563974380493, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0026, + "grad_norm": 0.3927272856235504, + "learning_rate": 1.1985e-05, + "num_tokens": 547689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0616, + "grad_norm": 1.807646632194519, + "learning_rate": 1.198e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.0939, + "grad_norm": 3.455456018447876, + "learning_rate": 1.1975000000000001e-05, + "num_tokens": 548713.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0576, + "grad_norm": 1.2851530313491821, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0613, + "grad_norm": 1.2460367679595947, + "learning_rate": 1.1965e-05, + "num_tokens": 549737.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0498, + "grad_norm": 1.8220652341842651, + "learning_rate": 1.196e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0029, + "grad_norm": 0.43996259570121765, + "learning_rate": 1.1955000000000002e-05, + "num_tokens": 550340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.164, + "grad_norm": 3.639434814453125, + "learning_rate": 1.195e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0033, + "grad_norm": 0.49846982955932617, + "learning_rate": 1.1945e-05, + "num_tokens": 550943.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.0034, + "grad_norm": 0.5146701335906982, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0552, + "grad_norm": 0.9798343777656555, + "learning_rate": 1.1935000000000002e-05, + "num_tokens": 551546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0033, + "grad_norm": 0.49275118112564087, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 1.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.0699, + "grad_norm": 1.1279994249343872, + "learning_rate": 1.1925e-05, + "num_tokens": 552149.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0029, + "grad_norm": 0.4336951673030853, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0682, + "grad_norm": 1.8408714532852173, + "learning_rate": 1.1915e-05, + "num_tokens": 552752.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0025, + "grad_norm": 0.3696609139442444, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0468, + "grad_norm": 1.6169545650482178, + "learning_rate": 1.1905e-05, + "num_tokens": 553355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0669, + "grad_norm": 1.641153335571289, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0019, + "grad_norm": 0.2700659930706024, + "learning_rate": 1.1895e-05, + "num_tokens": 553958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0021, + "grad_norm": 0.30612003803253174, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0755, + "grad_norm": 1.821285367012024, + "learning_rate": 1.1885e-05, + "num_tokens": 554561.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0628, + "grad_norm": 1.6025607585906982, + "learning_rate": 1.188e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0018, + "grad_norm": 0.24747499823570251, + "learning_rate": 1.1875e-05, + "num_tokens": 555164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0017, + "grad_norm": 0.2355332225561142, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 1.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0016, + "grad_norm": 0.22167058289051056, + "learning_rate": 1.1865000000000002e-05, + "num_tokens": 555346.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0014, + "grad_norm": 0.1909945011138916, + "learning_rate": 1.186e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0014, + "grad_norm": 0.17070873081684113, + "learning_rate": 1.1855e-05, + "num_tokens": 555528.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0714, + "grad_norm": 1.4018418788909912, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0859, + "grad_norm": 2.558520793914795, + "learning_rate": 1.1845000000000003e-05, + "num_tokens": 556552.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0012, + "grad_norm": 0.14977574348449707, + "learning_rate": 1.184e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0011, + "grad_norm": 0.12937067449092865, + "learning_rate": 1.1835000000000001e-05, + "num_tokens": 556734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0604, + "grad_norm": 1.5028055906295776, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0012, + "grad_norm": 0.13798221945762634, + "learning_rate": 1.1825000000000003e-05, + "num_tokens": 557337.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0508, + "grad_norm": 1.1325984001159668, + "learning_rate": 1.182e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0608, + "grad_norm": 1.3021001815795898, + "learning_rate": 1.1815000000000001e-05, + "num_tokens": 558361.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0563, + "grad_norm": 1.5208338499069214, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.0669, + "grad_norm": 1.6899033784866333, + "learning_rate": 1.1805000000000001e-05, + "num_tokens": 559385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0582, + "grad_norm": 1.563767910003662, + "learning_rate": 1.18e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.0674, + "grad_norm": 1.4604460000991821, + "learning_rate": 1.1795000000000001e-05, + "num_tokens": 560409.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.051, + "grad_norm": 1.4536890983581543, + "learning_rate": 1.179e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0681, + "grad_norm": 1.4582575559616089, + "learning_rate": 1.1785000000000002e-05, + "num_tokens": 561433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0625, + "grad_norm": 1.5202876329421997, + "learning_rate": 1.178e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0018, + "grad_norm": 0.25325441360473633, + "learning_rate": 1.1775000000000002e-05, + "num_tokens": 562036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0533, + "grad_norm": 1.4468379020690918, + "learning_rate": 1.177e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0023, + "grad_norm": 0.32276058197021484, + "learning_rate": 1.1765000000000002e-05, + "num_tokens": 562639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0025, + "grad_norm": 0.36645182967185974, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.067, + "grad_norm": 2.532277822494507, + "learning_rate": 1.1755e-05, + "num_tokens": 563242.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0025, + "grad_norm": 0.3641115427017212, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 1.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0621, + "grad_norm": 1.6259859800338745, + "learning_rate": 1.1745000000000002e-05, + "num_tokens": 563845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.0431, + "grad_norm": 1.5126338005065918, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0416, + "grad_norm": 1.3851490020751953, + "learning_rate": 1.1735e-05, + "num_tokens": 564869.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0621, + "grad_norm": 1.7890119552612305, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.0661, + "grad_norm": 1.2367877960205078, + "learning_rate": 1.1725000000000002e-05, + "num_tokens": 565893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0032, + "grad_norm": 0.49922677874565125, + "learning_rate": 1.172e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 1.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.0033, + "grad_norm": 0.49921202659606934, + "learning_rate": 1.1715e-05, + "num_tokens": 566075.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.0035, + "grad_norm": 0.5215579867362976, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0031, + "grad_norm": 0.43590739369392395, + "learning_rate": 1.1705000000000002e-05, + "num_tokens": 566257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0397, + "grad_norm": 1.2309280633926392, + "learning_rate": 1.17e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.04, + "grad_norm": 1.2009049654006958, + "learning_rate": 1.1695e-05, + "num_tokens": 567281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0608, + "grad_norm": 1.7890830039978027, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0022, + "grad_norm": 0.33328190445899963, + "learning_rate": 1.1685000000000002e-05, + "num_tokens": 567884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0725, + "grad_norm": 1.7722251415252686, + "learning_rate": 1.168e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.002, + "grad_norm": 0.2905958592891693, + "learning_rate": 1.1675000000000001e-05, + "num_tokens": 568487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0778, + "grad_norm": 1.8844209909439087, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0558, + "grad_norm": 1.4232587814331055, + "learning_rate": 1.1665000000000003e-05, + "num_tokens": 569511.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0588, + "grad_norm": 1.4562510251998901, + "learning_rate": 1.166e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.0019, + "grad_norm": 0.2660907804965973, + "learning_rate": 1.1655000000000001e-05, + "num_tokens": 570114.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.079, + "grad_norm": 1.9491440057754517, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.055, + "grad_norm": 1.847509741783142, + "learning_rate": 1.1645000000000001e-05, + "num_tokens": 571138.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0517, + "grad_norm": 1.504838466644287, + "learning_rate": 1.164e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": 0.0416, + "grad_norm": 1.0979009866714478, + "learning_rate": 1.1635000000000001e-05, + "num_tokens": 572162.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0448, + "grad_norm": 1.3496202230453491, + "learning_rate": 1.163e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0026, + "grad_norm": 0.382183700799942, + "learning_rate": 1.1625000000000001e-05, + "num_tokens": 572765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0026, + "grad_norm": 0.37047019600868225, + "learning_rate": 1.162e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 1.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0702, + "grad_norm": 1.7991583347320557, + "learning_rate": 1.1615000000000001e-05, + "num_tokens": 573368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0442, + "grad_norm": 1.4013893604278564, + "learning_rate": 1.161e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.0409, + "grad_norm": 1.3295344114303589, + "learning_rate": 1.1605000000000002e-05, + "num_tokens": 574392.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.0388, + "grad_norm": 1.3626537322998047, + "learning_rate": 1.16e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0031, + "grad_norm": 0.4437231123447418, + "learning_rate": 1.1595e-05, + "num_tokens": 574995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0036, + "grad_norm": 0.5210691094398499, + "learning_rate": 1.159e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.065, + "grad_norm": 2.1340172290802, + "learning_rate": 1.1585000000000002e-05, + "num_tokens": 575598.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0647, + "grad_norm": 1.9830479621887207, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0684, + "grad_norm": 2.2673563957214355, + "learning_rate": 1.1575e-05, + "num_tokens": 576622.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0031, + "grad_norm": 0.44506582617759705, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.038, + "grad_norm": 1.131693959236145, + "learning_rate": 1.1565000000000002e-05, + "num_tokens": 577225.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.0369, + "grad_norm": 1.1869642734527588, + "learning_rate": 1.156e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0031, + "grad_norm": 0.4332590401172638, + "learning_rate": 1.1555e-05, + "num_tokens": 577828.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0026, + "grad_norm": 0.359754741191864, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0028, + "grad_norm": 0.3960857689380646, + "learning_rate": 1.1545000000000002e-05, + "num_tokens": 578010.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0569, + "grad_norm": 1.7389343976974487, + "learning_rate": 1.154e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0714, + "grad_norm": 1.75542414188385, + "learning_rate": 1.1535e-05, + "num_tokens": 579034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0026, + "grad_norm": 0.3733665943145752, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 1.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151729702949524, + "learning_rate": 1.1525000000000002e-05, + "num_tokens": 579216.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0725, + "grad_norm": 2.008699417114258, + "learning_rate": 1.152e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0679, + "grad_norm": 2.3607006072998047, + "learning_rate": 1.1515e-05, + "num_tokens": 580240.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.043, + "grad_norm": 1.3802534341812134, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0018, + "grad_norm": 0.24884727597236633, + "learning_rate": 1.1505000000000003e-05, + "num_tokens": 580843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0517, + "grad_norm": 1.4253575801849365, + "learning_rate": 1.15e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0479, + "grad_norm": 1.2443790435791016, + "learning_rate": 1.1495000000000001e-05, + "num_tokens": 581867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0017, + "grad_norm": 0.22854706645011902, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0742, + "grad_norm": 1.5941340923309326, + "learning_rate": 1.1485000000000001e-05, + "num_tokens": 582470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.002, + "grad_norm": 0.27522599697113037, + "learning_rate": 1.148e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0019, + "grad_norm": 0.2548190653324127, + "learning_rate": 1.1475000000000001e-05, + "num_tokens": 582652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0586, + "grad_norm": 0.9956546425819397, + "learning_rate": 1.147e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0399, + "grad_norm": 1.2318187952041626, + "learning_rate": 1.1465000000000001e-05, + "num_tokens": 583676.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.057, + "grad_norm": 1.2258297204971313, + "learning_rate": 1.146e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0593, + "grad_norm": 1.4450581073760986, + "learning_rate": 1.1455000000000001e-05, + "num_tokens": 584700.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0589, + "grad_norm": 2.703789472579956, + "learning_rate": 1.145e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0022, + "grad_norm": 0.2988422214984894, + "learning_rate": 1.1445000000000001e-05, + "num_tokens": 585303.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.002, + "grad_norm": 0.2543957829475403, + "learning_rate": 1.144e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0634, + "grad_norm": 1.5069470405578613, + "learning_rate": 1.1435e-05, + "num_tokens": 585906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0777, + "grad_norm": 1.8321071863174438, + "learning_rate": 1.143e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0705, + "grad_norm": 1.7684837579727173, + "learning_rate": 1.1425000000000002e-05, + "num_tokens": 586930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0646, + "grad_norm": 1.7334975004196167, + "learning_rate": 1.142e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0657, + "grad_norm": 1.7223514318466187, + "learning_rate": 1.1415e-05, + "num_tokens": 587954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0551, + "grad_norm": 2.0270273685455322, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0642, + "grad_norm": 1.5014370679855347, + "learning_rate": 1.1405000000000002e-05, + "num_tokens": 588978.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0766, + "grad_norm": 1.7329357862472534, + "learning_rate": 1.14e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.0038, + "grad_norm": 0.5561279654502869, + "learning_rate": 1.1395e-05, + "num_tokens": 589581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0805, + "grad_norm": 2.5624947547912598, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0036, + "grad_norm": 0.5101985931396484, + "learning_rate": 1.1385000000000002e-05, + "num_tokens": 590184.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0564, + "grad_norm": 1.227173924446106, + "learning_rate": 1.138e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0036, + "grad_norm": 0.5354023575782776, + "learning_rate": 1.1375e-05, + "num_tokens": 590787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0443, + "grad_norm": 1.4744853973388672, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0715, + "grad_norm": 1.5623061656951904, + "learning_rate": 1.1365000000000002e-05, + "num_tokens": 591811.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0529, + "grad_norm": 1.357082486152649, + "learning_rate": 1.136e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0037, + "grad_norm": 0.54876309633255, + "learning_rate": 1.1355e-05, + "num_tokens": 592414.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.0635, + "grad_norm": 1.2679226398468018, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0458, + "grad_norm": 1.1748446226119995, + "learning_rate": 1.1345000000000002e-05, + "num_tokens": 593438.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0035, + "grad_norm": 0.5624827146530151, + "learning_rate": 1.134e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.003, + "grad_norm": 0.4557420015335083, + "learning_rate": 1.1335e-05, + "num_tokens": 593620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.003, + "grad_norm": 0.46185532212257385, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0028, + "grad_norm": 0.42278051376342773, + "learning_rate": 1.1325e-05, + "num_tokens": 593802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0453, + "grad_norm": 1.387130856513977, + "learning_rate": 1.132e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025925099849701, + "learning_rate": 1.1315000000000001e-05, + "num_tokens": 594405.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0022, + "grad_norm": 0.33897924423217773, + "learning_rate": 1.131e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 1.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0737, + "grad_norm": 1.979303240776062, + "learning_rate": 1.1305000000000001e-05, + "num_tokens": 595008.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.064, + "grad_norm": 1.5425118207931519, + "learning_rate": 1.13e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0671, + "grad_norm": 1.1620323657989502, + "learning_rate": 1.1295000000000001e-05, + "num_tokens": 596032.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0785, + "grad_norm": 2.378268003463745, + "learning_rate": 1.129e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0016, + "grad_norm": 0.22170788049697876, + "learning_rate": 1.1285000000000001e-05, + "num_tokens": 596635.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0015, + "grad_norm": 0.20151561498641968, + "learning_rate": 1.128e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0017, + "grad_norm": 0.2272740602493286, + "learning_rate": 1.1275e-05, + "num_tokens": 596817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.0013, + "grad_norm": 0.15716217458248138, + "learning_rate": 1.127e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0609, + "grad_norm": 1.5205357074737549, + "learning_rate": 1.1265000000000001e-05, + "num_tokens": 597420.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0013, + "grad_norm": 0.16709472239017487, + "learning_rate": 1.126e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0771, + "grad_norm": 1.7946810722351074, + "learning_rate": 1.1255e-05, + "num_tokens": 598023.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0581, + "grad_norm": 1.250422716140747, + "learning_rate": 1.125e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0566, + "grad_norm": 1.8859542608261108, + "learning_rate": 1.1245000000000002e-05, + "num_tokens": 599047.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.042, + "grad_norm": 1.3896710872650146, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0012, + "grad_norm": 0.13600599765777588, + "learning_rate": 1.1235e-05, + "num_tokens": 599650.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0455, + "grad_norm": 1.2671265602111816, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0792, + "grad_norm": 1.9507051706314087, + "learning_rate": 1.1225000000000002e-05, + "num_tokens": 600674.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0015, + "grad_norm": 0.18869547545909882, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0643, + "grad_norm": 2.124163866043091, + "learning_rate": 1.1215e-05, + "num_tokens": 601277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0017, + "grad_norm": 0.22649085521697998, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 1.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0014, + "grad_norm": 0.1775384545326233, + "learning_rate": 1.1205000000000002e-05, + "num_tokens": 601459.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0675, + "grad_norm": 2.2713491916656494, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0504, + "grad_norm": 1.3982276916503906, + "learning_rate": 1.1195e-05, + "num_tokens": 602483.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0478, + "grad_norm": 1.40345299243927, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0655, + "grad_norm": 2.0257670879364014, + "learning_rate": 1.1185000000000002e-05, + "num_tokens": 603507.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.0019, + "grad_norm": 0.2651630938053131, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": 0.0772, + "grad_norm": 2.0185799598693848, + "learning_rate": 1.1175e-05, + "num_tokens": 604110.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0022, + "grad_norm": 0.30773913860321045, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 1.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0401, + "grad_norm": 1.1661447286605835, + "learning_rate": 1.1165e-05, + "num_tokens": 604713.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0805, + "grad_norm": 2.5561182498931885, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.0023, + "grad_norm": 0.3356492221355438, + "learning_rate": 1.1155e-05, + "num_tokens": 605316.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0519, + "grad_norm": 1.2280339002609253, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.0412, + "grad_norm": 1.1461997032165527, + "learning_rate": 1.1145000000000001e-05, + "num_tokens": 606340.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0024, + "grad_norm": 0.33912718296051025, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0026, + "grad_norm": 0.3827052116394043, + "learning_rate": 1.1135000000000001e-05, + "num_tokens": 606522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025944471359253, + "learning_rate": 1.113e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0025, + "grad_norm": 0.34845641255378723, + "learning_rate": 1.1125000000000001e-05, + "num_tokens": 606704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0704, + "grad_norm": 1.9853920936584473, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0431, + "grad_norm": 1.3894938230514526, + "learning_rate": 1.1115e-05, + "num_tokens": 607728.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.069, + "grad_norm": 1.2977555990219116, + "learning_rate": 1.111e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0366, + "grad_norm": 1.1859874725341797, + "learning_rate": 1.1105000000000001e-05, + "num_tokens": 608752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0022, + "grad_norm": 0.3078896105289459, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.002, + "grad_norm": 0.28668129444122314, + "learning_rate": 1.1095e-05, + "num_tokens": 608934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0021, + "grad_norm": 0.30314162373542786, + "learning_rate": 1.109e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0741, + "grad_norm": 1.5230200290679932, + "learning_rate": 1.1085000000000001e-05, + "num_tokens": 609537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.002, + "grad_norm": 0.26326534152030945, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 1.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.002, + "grad_norm": 0.2711552381515503, + "learning_rate": 1.1075e-05, + "num_tokens": 609719.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.0616, + "grad_norm": 1.274338960647583, + "learning_rate": 1.107e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.0016, + "grad_norm": 0.2114490568637848, + "learning_rate": 1.1065000000000002e-05, + "num_tokens": 610322.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0682, + "grad_norm": 1.6731176376342773, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0813, + "grad_norm": 1.9255222082138062, + "learning_rate": 1.1055e-05, + "num_tokens": 611346.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0016, + "grad_norm": 0.21615324914455414, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0688, + "grad_norm": 1.5003544092178345, + "learning_rate": 1.1045000000000002e-05, + "num_tokens": 611949.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0014, + "grad_norm": 0.18165816366672516, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 1.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0583, + "grad_norm": 1.9068502187728882, + "learning_rate": 1.1035e-05, + "num_tokens": 612552.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0015, + "grad_norm": 0.18768055737018585, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0014, + "grad_norm": 0.1921229511499405, + "learning_rate": 1.1025000000000002e-05, + "num_tokens": 612734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0015, + "grad_norm": 0.19404935836791992, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0619, + "grad_norm": 1.6527628898620605, + "learning_rate": 1.1015e-05, + "num_tokens": 613337.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0413, + "grad_norm": 1.2340315580368042, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0015, + "grad_norm": 0.19533570110797882, + "learning_rate": 1.1005e-05, + "num_tokens": 613940.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0639, + "grad_norm": 1.0601844787597656, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0014, + "grad_norm": 0.18472979962825775, + "learning_rate": 1.0995e-05, + "num_tokens": 614543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0642, + "grad_norm": 1.2736060619354248, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0581, + "grad_norm": 1.4980621337890625, + "learning_rate": 1.0985e-05, + "num_tokens": 615567.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0489, + "grad_norm": 1.1453659534454346, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0625, + "grad_norm": 1.6183781623840332, + "learning_rate": 1.0975e-05, + "num_tokens": 616591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0018, + "grad_norm": 0.24508105218410492, + "learning_rate": 1.097e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 1.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.002, + "grad_norm": 0.2894340753555298, + "learning_rate": 1.0965000000000001e-05, + "num_tokens": 616773.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0394, + "grad_norm": 1.3422820568084717, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0018, + "grad_norm": 0.26346835494041443, + "learning_rate": 1.0955e-05, + "num_tokens": 617376.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.002, + "grad_norm": 0.28616681694984436, + "learning_rate": 1.095e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0629, + "grad_norm": 1.515001654624939, + "learning_rate": 1.0945000000000001e-05, + "num_tokens": 617979.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.0429, + "grad_norm": 1.3231642246246338, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0498, + "grad_norm": 1.3477892875671387, + "learning_rate": 1.0935e-05, + "num_tokens": 619003.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.0686, + "grad_norm": 1.4584791660308838, + "learning_rate": 1.093e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.0021, + "grad_norm": 0.29815393686294556, + "learning_rate": 1.0925000000000001e-05, + "num_tokens": 619606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.087, + "grad_norm": 2.550358533859253, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.0021, + "grad_norm": 0.3024434447288513, + "learning_rate": 1.0915e-05, + "num_tokens": 620209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0559, + "grad_norm": 1.8500303030014038, + "learning_rate": 1.091e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0024, + "grad_norm": 0.3702225685119629, + "learning_rate": 1.0905000000000001e-05, + "num_tokens": 620812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0782, + "grad_norm": 1.9154956340789795, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0613, + "grad_norm": 1.6961833238601685, + "learning_rate": 1.0895e-05, + "num_tokens": 621836.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0022, + "grad_norm": 0.3193221390247345, + "learning_rate": 1.089e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0025, + "grad_norm": 0.36297887563705444, + "learning_rate": 1.0885000000000002e-05, + "num_tokens": 622018.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0022, + "grad_norm": 0.3415636420249939, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0408, + "grad_norm": 1.2334237098693848, + "learning_rate": 1.0875e-05, + "num_tokens": 622621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.002, + "grad_norm": 0.2912217974662781, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.043, + "grad_norm": 1.9397270679473877, + "learning_rate": 1.0865000000000002e-05, + "num_tokens": 623224.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.0395, + "grad_norm": 1.2516388893127441, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0018, + "grad_norm": 0.24329343438148499, + "learning_rate": 1.0855e-05, + "num_tokens": 623827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0019, + "grad_norm": 0.2603467106819153, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0588, + "grad_norm": 1.736319661140442, + "learning_rate": 1.0845e-05, + "num_tokens": 624430.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0014, + "grad_norm": 0.19694186747074127, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 1.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.0015, + "grad_norm": 0.20471760630607605, + "learning_rate": 1.0835e-05, + "num_tokens": 624612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0016, + "grad_norm": 0.21806074678897858, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0014, + "grad_norm": 0.19000421464443207, + "learning_rate": 1.0825e-05, + "num_tokens": 624794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.0516, + "grad_norm": 1.4601935148239136, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0713, + "grad_norm": 2.011367082595825, + "learning_rate": 1.0815e-05, + "num_tokens": 625818.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0012, + "grad_norm": 0.15841880440711975, + "learning_rate": 1.081e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 1.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0711, + "grad_norm": 2.100233793258667, + "learning_rate": 1.0805e-05, + "num_tokens": 626421.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0012, + "grad_norm": 0.1544499695301056, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0012, + "grad_norm": 0.15288732945919037, + "learning_rate": 1.0794999999999999e-05, + "num_tokens": 626603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.0379, + "grad_norm": 1.210354208946228, + "learning_rate": 1.079e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.042, + "grad_norm": 1.1011019945144653, + "learning_rate": 1.0785000000000001e-05, + "num_tokens": 627627.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0646, + "grad_norm": 1.4223557710647583, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0011, + "grad_norm": 0.14515887200832367, + "learning_rate": 1.0775e-05, + "num_tokens": 628230.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0012, + "grad_norm": 0.14745497703552246, + "learning_rate": 1.077e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0013, + "grad_norm": 0.16342398524284363, + "learning_rate": 1.0765000000000001e-05, + "num_tokens": 628412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0711, + "grad_norm": 1.4518134593963623, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0719, + "grad_norm": 1.6602455377578735, + "learning_rate": 1.0755e-05, + "num_tokens": 629436.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0676, + "grad_norm": 1.4668382406234741, + "learning_rate": 1.075e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0675, + "grad_norm": 1.7040259838104248, + "learning_rate": 1.0745000000000001e-05, + "num_tokens": 630460.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0015, + "grad_norm": 0.2076033502817154, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 1.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0578, + "grad_norm": 1.4224144220352173, + "learning_rate": 1.0735e-05, + "num_tokens": 631063.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0359, + "grad_norm": 1.0415198802947998, + "learning_rate": 1.073e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0549, + "grad_norm": 1.3249598741531372, + "learning_rate": 1.0725000000000001e-05, + "num_tokens": 632087.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27819395065307617, + "learning_rate": 1.072e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 1.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.002, + "grad_norm": 0.28510138392448425, + "learning_rate": 1.0715e-05, + "num_tokens": 632269.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0023, + "grad_norm": 0.33845254778862, + "learning_rate": 1.071e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.0022, + "grad_norm": 0.3247784972190857, + "learning_rate": 1.0705000000000002e-05, + "num_tokens": 632451.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.043, + "grad_norm": 1.0912247896194458, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.0578, + "grad_norm": 1.1355180740356445, + "learning_rate": 1.0695e-05, + "num_tokens": 633475.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.0024, + "grad_norm": 0.3479563593864441, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 1.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0022, + "grad_norm": 0.3158959448337555, + "learning_rate": 1.0685e-05, + "num_tokens": 633657.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": 0.0428, + "grad_norm": 1.4031771421432495, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.055, + "grad_norm": 1.2979878187179565, + "learning_rate": 1.0675e-05, + "num_tokens": 634681.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0021, + "grad_norm": 0.30659785866737366, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0407, + "grad_norm": 1.1281771659851074, + "learning_rate": 1.0665e-05, + "num_tokens": 635284.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0021, + "grad_norm": 0.3046596050262451, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 1.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.002, + "grad_norm": 0.29561498761177063, + "learning_rate": 1.0655e-05, + "num_tokens": 635466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.042, + "grad_norm": 1.11528480052948, + "learning_rate": 1.065e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0603, + "grad_norm": 1.633859634399414, + "learning_rate": 1.0645e-05, + "num_tokens": 636490.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0617, + "grad_norm": 1.5089678764343262, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0393, + "grad_norm": 1.644981026649475, + "learning_rate": 1.0634999999999999e-05, + "num_tokens": 637514.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0548, + "grad_norm": 1.4219714403152466, + "learning_rate": 1.063e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0022, + "grad_norm": 0.3061341941356659, + "learning_rate": 1.0625e-05, + "num_tokens": 638117.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0439, + "grad_norm": 1.3055533170700073, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0844, + "grad_norm": 2.4925858974456787, + "learning_rate": 1.0615000000000003e-05, + "num_tokens": 639141.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0409, + "grad_norm": 1.2279584407806396, + "learning_rate": 1.061e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.0023, + "grad_norm": 0.3406059145927429, + "learning_rate": 1.0605000000000001e-05, + "num_tokens": 639744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.0024, + "grad_norm": 0.3423788249492645, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 1.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0758, + "grad_norm": 2.193775177001953, + "learning_rate": 1.0595000000000003e-05, + "num_tokens": 640347.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0397, + "grad_norm": 1.2993077039718628, + "learning_rate": 1.059e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0025, + "grad_norm": 0.37831318378448486, + "learning_rate": 1.0585000000000001e-05, + "num_tokens": 640950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0695, + "grad_norm": 1.9661240577697754, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0514, + "grad_norm": 1.348526954650879, + "learning_rate": 1.0575000000000001e-05, + "num_tokens": 641974.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0422, + "grad_norm": 1.4465380907058716, + "learning_rate": 1.057e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0792, + "grad_norm": 1.823074460029602, + "learning_rate": 1.0565000000000001e-05, + "num_tokens": 642998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0707, + "grad_norm": 1.9393905401229858, + "learning_rate": 1.056e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": 0.0765, + "grad_norm": 2.4390299320220947, + "learning_rate": 1.0555000000000001e-05, + "num_tokens": 644022.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0377, + "grad_norm": 1.2858082056045532, + "learning_rate": 1.055e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.036, + "grad_norm": 1.1891300678253174, + "learning_rate": 1.0545000000000002e-05, + "num_tokens": 645046.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0596, + "grad_norm": 1.3432769775390625, + "learning_rate": 1.054e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0409, + "grad_norm": 1.3289687633514404, + "learning_rate": 1.0535000000000002e-05, + "num_tokens": 646070.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0703, + "grad_norm": 1.9712656736373901, + "learning_rate": 1.053e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0674, + "grad_norm": 1.360931634902954, + "learning_rate": 1.0525e-05, + "num_tokens": 647094.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0712, + "grad_norm": 1.7070671319961548, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0553, + "grad_norm": 1.2540414333343506, + "learning_rate": 1.0515000000000002e-05, + "num_tokens": 648118.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0462, + "grad_norm": 1.0861750841140747, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0606, + "grad_norm": 1.2730586528778076, + "learning_rate": 1.0505e-05, + "num_tokens": 649142.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0678, + "grad_norm": 1.881486177444458, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0358, + "grad_norm": 1.520228385925293, + "learning_rate": 1.0495000000000002e-05, + "num_tokens": 650166.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0102, + "grad_norm": 1.2519571781158447, + "learning_rate": 1.049e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0603, + "grad_norm": 1.7512507438659668, + "learning_rate": 1.0485e-05, + "num_tokens": 650769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": 0.0422, + "grad_norm": 1.2172882556915283, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0111, + "grad_norm": 1.2125916481018066, + "learning_rate": 1.0475000000000002e-05, + "num_tokens": 651372.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0104, + "grad_norm": 1.187291145324707, + "learning_rate": 1.047e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.067, + "grad_norm": 1.5227930545806885, + "learning_rate": 1.0465e-05, + "num_tokens": 651975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0512, + "grad_norm": 1.1584064960479736, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0585, + "grad_norm": 1.5452741384506226, + "learning_rate": 1.0455000000000002e-05, + "num_tokens": 652999.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.037, + "grad_norm": 1.2185399532318115, + "learning_rate": 1.045e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0073, + "grad_norm": 0.8913355469703674, + "learning_rate": 1.0445e-05, + "num_tokens": 653602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.1718, + "grad_norm": 3.605719804763794, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0545, + "grad_norm": 0.8743512034416199, + "learning_rate": 1.0435000000000003e-05, + "num_tokens": 654626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0601, + "grad_norm": 1.5047037601470947, + "learning_rate": 1.043e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0048, + "grad_norm": 0.6472101211547852, + "learning_rate": 1.0425000000000001e-05, + "num_tokens": 655229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0819, + "grad_norm": 2.8786802291870117, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0722, + "grad_norm": 1.6400585174560547, + "learning_rate": 1.0415000000000001e-05, + "num_tokens": 656253.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0379, + "grad_norm": 1.1578104496002197, + "learning_rate": 1.041e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0651, + "grad_norm": 1.9455623626708984, + "learning_rate": 1.0405000000000001e-05, + "num_tokens": 657277.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0588, + "grad_norm": 1.3513238430023193, + "learning_rate": 1.04e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0584, + "grad_norm": 2.0099873542785645, + "learning_rate": 1.0395000000000001e-05, + "num_tokens": 658301.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0422, + "grad_norm": 1.1260371208190918, + "learning_rate": 1.039e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.1567, + "grad_norm": 4.341492652893066, + "learning_rate": 1.0385000000000001e-05, + "num_tokens": 659325.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0034, + "grad_norm": 0.5023797154426575, + "learning_rate": 1.038e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.0515, + "grad_norm": 1.3957620859146118, + "learning_rate": 1.0375000000000001e-05, + "num_tokens": 659928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.074, + "grad_norm": 1.8058022260665894, + "learning_rate": 1.037e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.0683, + "grad_norm": 1.5976930856704712, + "learning_rate": 1.0365e-05, + "num_tokens": 660952.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.042, + "grad_norm": 1.2127424478530884, + "learning_rate": 1.036e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0525, + "grad_norm": 1.24295175075531, + "learning_rate": 1.0355000000000002e-05, + "num_tokens": 661976.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0553, + "grad_norm": 1.3676091432571411, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0043, + "grad_norm": 0.5990502834320068, + "learning_rate": 1.0345e-05, + "num_tokens": 662579.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.0651, + "grad_norm": 1.8467062711715698, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.0035, + "grad_norm": 0.4997740089893341, + "learning_rate": 1.0335000000000002e-05, + "num_tokens": 663182.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0564, + "grad_norm": 0.9972801804542542, + "learning_rate": 1.033e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0587, + "grad_norm": 1.6288121938705444, + "learning_rate": 1.0325e-05, + "num_tokens": 664206.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0387, + "grad_norm": 1.0264148712158203, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.0044, + "grad_norm": 0.6445260047912598, + "learning_rate": 1.0315000000000002e-05, + "num_tokens": 664809.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0379, + "grad_norm": 1.0764647722244263, + "learning_rate": 1.031e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.0483, + "grad_norm": 1.6414856910705566, + "learning_rate": 1.0305e-05, + "num_tokens": 665833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.0392, + "grad_norm": 1.0878779888153076, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0721, + "grad_norm": 1.8314939737319946, + "learning_rate": 1.0295000000000002e-05, + "num_tokens": 666857.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0404, + "grad_norm": 1.2442834377288818, + "learning_rate": 1.029e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0344, + "grad_norm": 1.0829095840454102, + "learning_rate": 1.0285e-05, + "num_tokens": 667881.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.005, + "grad_norm": 0.7069464921951294, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 1.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0701, + "grad_norm": 1.8649088144302368, + "learning_rate": 1.0275000000000002e-05, + "num_tokens": 668484.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0445, + "grad_norm": 1.5859991312026978, + "learning_rate": 1.027e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.0617, + "grad_norm": 1.400742530822754, + "learning_rate": 1.0265e-05, + "num_tokens": 669508.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0527, + "grad_norm": 1.4805254936218262, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.0052, + "grad_norm": 0.7180629968643188, + "learning_rate": 1.0255000000000001e-05, + "num_tokens": 670111.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0404, + "grad_norm": 1.3597116470336914, + "learning_rate": 1.025e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0054, + "grad_norm": 0.7400949597358704, + "learning_rate": 1.0245000000000001e-05, + "num_tokens": 670714.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.0049, + "grad_norm": 0.6836004853248596, + "learning_rate": 1.024e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 1.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0736, + "grad_norm": 2.3706512451171875, + "learning_rate": 1.0235000000000001e-05, + "num_tokens": 671317.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0045, + "grad_norm": 0.6252732872962952, + "learning_rate": 1.023e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0538, + "grad_norm": 1.2009153366088867, + "learning_rate": 1.0225000000000001e-05, + "num_tokens": 671920.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0032, + "grad_norm": 0.4667681157588959, + "learning_rate": 1.022e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 1.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0612, + "grad_norm": 1.505027413368225, + "learning_rate": 1.0215000000000001e-05, + "num_tokens": 672523.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.0551, + "grad_norm": 1.3336291313171387, + "learning_rate": 1.021e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0402, + "grad_norm": 1.1181267499923706, + "learning_rate": 1.0205e-05, + "num_tokens": 673547.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0633, + "grad_norm": 1.5764997005462646, + "learning_rate": 1.02e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0024, + "grad_norm": 0.33718812465667725, + "learning_rate": 1.0195000000000001e-05, + "num_tokens": 674150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0683, + "grad_norm": 1.428412675857544, + "learning_rate": 1.019e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441157937049866, + "learning_rate": 1.0185e-05, + "num_tokens": 674753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0023, + "grad_norm": 0.33211714029312134, + "learning_rate": 1.018e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0022, + "grad_norm": 0.3089843988418579, + "learning_rate": 1.0175000000000002e-05, + "num_tokens": 674935.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0635, + "grad_norm": 1.286823034286499, + "learning_rate": 1.017e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0419, + "grad_norm": 1.0465713739395142, + "learning_rate": 1.0165e-05, + "num_tokens": 675959.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0019, + "grad_norm": 0.27270686626434326, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 1.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0614, + "grad_norm": 1.536331295967102, + "learning_rate": 1.0155000000000002e-05, + "num_tokens": 676562.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0521, + "grad_norm": 1.3282392024993896, + "learning_rate": 1.015e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0573, + "grad_norm": 1.3458013534545898, + "learning_rate": 1.0145e-05, + "num_tokens": 677586.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0607, + "grad_norm": 1.5142616033554077, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0427, + "grad_norm": 1.3866674900054932, + "learning_rate": 1.0135000000000002e-05, + "num_tokens": 678610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0652, + "grad_norm": 1.3013007640838623, + "learning_rate": 1.013e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0021, + "grad_norm": 0.2967868447303772, + "learning_rate": 1.0125e-05, + "num_tokens": 679213.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.002, + "grad_norm": 0.2977685332298279, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0022, + "grad_norm": 0.3109460473060608, + "learning_rate": 1.0115000000000002e-05, + "num_tokens": 679395.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0563, + "grad_norm": 1.1927019357681274, + "learning_rate": 1.011e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0019, + "grad_norm": 0.27015697956085205, + "learning_rate": 1.0105e-05, + "num_tokens": 679998.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.054, + "grad_norm": 1.8113130331039429, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0688, + "grad_norm": 1.6508032083511353, + "learning_rate": 1.0095e-05, + "num_tokens": 681022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0502, + "grad_norm": 1.1528620719909668, + "learning_rate": 1.009e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29425331950187683, + "learning_rate": 1.0085000000000001e-05, + "num_tokens": 681625.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0641, + "grad_norm": 1.702049732208252, + "learning_rate": 1.008e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.036, + "grad_norm": 1.1969891786575317, + "learning_rate": 1.0075000000000001e-05, + "num_tokens": 682649.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0022, + "grad_norm": 0.31679248809814453, + "learning_rate": 1.007e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 1.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.0403, + "grad_norm": 1.1920922994613647, + "learning_rate": 1.0065000000000001e-05, + "num_tokens": 683252.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0544, + "grad_norm": 1.1415454149246216, + "learning_rate": 1.006e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0028, + "grad_norm": 0.42351487278938293, + "learning_rate": 1.0055000000000001e-05, + "num_tokens": 683855.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0466, + "grad_norm": 1.6247456073760986, + "learning_rate": 1.005e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0524, + "grad_norm": 1.2605568170547485, + "learning_rate": 1.0045e-05, + "num_tokens": 684879.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.057, + "grad_norm": 1.483921766281128, + "learning_rate": 1.004e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0029, + "grad_norm": 0.420865923166275, + "learning_rate": 1.0035000000000001e-05, + "num_tokens": 685482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0483, + "grad_norm": 1.9411001205444336, + "learning_rate": 1.003e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0418, + "grad_norm": 1.1357734203338623, + "learning_rate": 1.0025e-05, + "num_tokens": 686506.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0031, + "grad_norm": 0.4264874756336212, + "learning_rate": 1.002e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0628, + "grad_norm": 1.5096089839935303, + "learning_rate": 1.0015000000000002e-05, + "num_tokens": 687109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.003, + "grad_norm": 0.41657188534736633, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0, + "step": 2000 + }, + { + "loss": 0.0028, + "grad_norm": 0.3918426036834717, + "learning_rate": 1.0005e-05, + "num_tokens": 687291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0005, + "step": 2001 + }, + { + "loss": 0.0524, + "grad_norm": 1.1938209533691406, + "learning_rate": 1e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.001, + "step": 2002 + }, + { + "loss": 0.0027, + "grad_norm": 0.3788990080356598, + "learning_rate": 9.995000000000002e-06, + "num_tokens": 687894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0015, + "step": 2003 + }, + { + "loss": 0.0025, + "grad_norm": 0.3577810227870941, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 687985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.002, + "step": 2004 + }, + { + "loss": 0.0024, + "grad_norm": 0.3305366039276123, + "learning_rate": 9.985000000000002e-06, + "num_tokens": 688076.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0025, + "step": 2005 + }, + { + "loss": 0.002, + "grad_norm": 0.277047336101532, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 688167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.003, + "step": 2006 + }, + { + "loss": 0.0019, + "grad_norm": 0.2567979693412781, + "learning_rate": 9.975000000000002e-06, + "num_tokens": 688258.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0035, + "step": 2007 + }, + { + "loss": 0.0682, + "grad_norm": 1.844512701034546, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.004, + "step": 2008 + }, + { + "loss": 0.0487, + "grad_norm": 1.2499569654464722, + "learning_rate": 9.965000000000002e-06, + "num_tokens": 689282.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0045, + "step": 2009 + }, + { + "loss": 0.0432, + "grad_norm": 1.2406448125839233, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.005, + "step": 2010 + }, + { + "loss": 0.0804, + "grad_norm": 1.833058476448059, + "learning_rate": 9.955000000000002e-06, + "num_tokens": 690306.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0055, + "step": 2011 + }, + { + "loss": 0.0464, + "grad_norm": 1.3244189023971558, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.006, + "step": 2012 + }, + { + "loss": 0.0416, + "grad_norm": 1.044066309928894, + "learning_rate": 9.945e-06, + "num_tokens": 691330.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0065, + "step": 2013 + }, + { + "loss": 0.0646, + "grad_norm": 1.5272581577301025, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.007, + "step": 2014 + }, + { + "loss": 0.0401, + "grad_norm": 1.2222588062286377, + "learning_rate": 9.935e-06, + "num_tokens": 692354.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0075, + "step": 2015 + }, + { + "loss": 0.0833, + "grad_norm": 2.3880302906036377, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.008, + "step": 2016 + }, + { + "loss": 0.0661, + "grad_norm": 1.666345238685608, + "learning_rate": 9.925e-06, + "num_tokens": 693378.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0085, + "step": 2017 + }, + { + "loss": 0.061, + "grad_norm": 1.2552286386489868, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.009, + "step": 2018 + }, + { + "loss": 0.0022, + "grad_norm": 0.2978605329990387, + "learning_rate": 9.915e-06, + "num_tokens": 693981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0095, + "step": 2019 + }, + { + "loss": 0.0419, + "grad_norm": 1.1351749897003174, + "learning_rate": 9.91e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.01, + "step": 2020 + }, + { + "loss": 0.0028, + "grad_norm": 0.4339805245399475, + "learning_rate": 9.905000000000001e-06, + "num_tokens": 694584.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0105, + "step": 2021 + }, + { + "loss": 0.0027, + "grad_norm": 0.3737834393978119, + "learning_rate": 9.9e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 2022 + }, + { + "loss": 0.0724, + "grad_norm": 1.6216633319854736, + "learning_rate": 9.895000000000001e-06, + "num_tokens": 695187.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0115, + "step": 2023 + }, + { + "loss": 0.0026, + "grad_norm": 0.38558149337768555, + "learning_rate": 9.89e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 1.0, + "epoch": 1.012, + "step": 2024 + }, + { + "loss": 0.0457, + "grad_norm": 1.2241498231887817, + "learning_rate": 9.885000000000001e-06, + "num_tokens": 695790.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0125, + "step": 2025 + }, + { + "loss": 0.0387, + "grad_norm": 1.4335367679595947, + "learning_rate": 9.88e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.013, + "step": 2026 + }, + { + "loss": 0.0716, + "grad_norm": 1.5836760997772217, + "learning_rate": 9.875000000000001e-06, + "num_tokens": 696814.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0135, + "step": 2027 + }, + { + "loss": 0.0419, + "grad_norm": 1.2072887420654297, + "learning_rate": 9.87e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.014, + "step": 2028 + }, + { + "loss": 0.0376, + "grad_norm": 0.9630845189094543, + "learning_rate": 9.865000000000001e-06, + "num_tokens": 697838.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.0145, + "step": 2029 + }, + { + "loss": 0.0562, + "grad_norm": 1.396782636642456, + "learning_rate": 9.86e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.015, + "step": 2030 + }, + { + "loss": 0.0611, + "grad_norm": 1.526076316833496, + "learning_rate": 9.855000000000001e-06, + "num_tokens": 698862.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0155, + "step": 2031 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280098915100098, + "learning_rate": 9.85e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.016, + "step": 2032 + }, + { + "loss": 0.0036, + "grad_norm": 0.5271911025047302, + "learning_rate": 9.845000000000001e-06, + "num_tokens": 699044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0165, + "step": 2033 + }, + { + "loss": 0.0638, + "grad_norm": 1.2341188192367554, + "learning_rate": 9.84e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.017, + "step": 2034 + }, + { + "loss": 0.0386, + "grad_norm": 1.0637688636779785, + "learning_rate": 9.835000000000002e-06, + "num_tokens": 700068.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0175, + "step": 2035 + }, + { + "loss": 0.0036, + "grad_norm": 0.52369225025177, + "learning_rate": 9.83e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 1.0, + "epoch": 1.018, + "step": 2036 + }, + { + "loss": 0.0494, + "grad_norm": 2.351320266723633, + "learning_rate": 9.825000000000002e-06, + "num_tokens": 700671.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0185, + "step": 2037 + }, + { + "loss": 0.0034, + "grad_norm": 0.4984705150127411, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.019, + "step": 2038 + }, + { + "loss": 0.0406, + "grad_norm": 1.5286310911178589, + "learning_rate": 9.815000000000002e-06, + "num_tokens": 701274.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0195, + "step": 2039 + }, + { + "loss": 0.0523, + "grad_norm": 1.7273446321487427, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.02, + "step": 2040 + }, + { + "loss": 0.0033, + "grad_norm": 0.4823690950870514, + "learning_rate": 9.805000000000002e-06, + "num_tokens": 701877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0205, + "step": 2041 + }, + { + "loss": 0.0032, + "grad_norm": 0.4507608711719513, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.021, + "step": 2042 + }, + { + "loss": 0.0703, + "grad_norm": 1.77262544631958, + "learning_rate": 9.795000000000002e-06, + "num_tokens": 702480.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0215, + "step": 2043 + }, + { + "loss": 0.0026, + "grad_norm": 0.3709382116794586, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.022, + "step": 2044 + }, + { + "loss": 0.0683, + "grad_norm": 3.5564355850219727, + "learning_rate": 9.785e-06, + "num_tokens": 703083.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0225, + "step": 2045 + }, + { + "loss": 0.0024, + "grad_norm": 0.3166162967681885, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.023, + "step": 2046 + }, + { + "loss": 0.0022, + "grad_norm": 0.2928009331226349, + "learning_rate": 9.775e-06, + "num_tokens": 703265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0235, + "step": 2047 + }, + { + "loss": 0.0621, + "grad_norm": 1.902612566947937, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.024, + "step": 2048 + }, + { + "loss": 0.0018, + "grad_norm": 0.23954610526561737, + "learning_rate": 9.765e-06, + "num_tokens": 703868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0245, + "step": 2049 + }, + { + "loss": 0.0409, + "grad_norm": 1.3355653285980225, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.025, + "step": 2050 + }, + { + "loss": 0.0705, + "grad_norm": 1.6696054935455322, + "learning_rate": 9.755e-06, + "num_tokens": 704892.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0255, + "step": 2051 + }, + { + "loss": 0.0016, + "grad_norm": 0.22299779951572418, + "learning_rate": 9.75e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.026, + "step": 2052 + }, + { + "loss": 0.0016, + "grad_norm": 0.21063728630542755, + "learning_rate": 9.745e-06, + "num_tokens": 705074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0265, + "step": 2053 + }, + { + "loss": 0.0696, + "grad_norm": 1.6844984292984009, + "learning_rate": 9.74e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.027, + "step": 2054 + }, + { + "loss": 0.0714, + "grad_norm": 1.5383219718933105, + "learning_rate": 9.735e-06, + "num_tokens": 706098.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0275, + "step": 2055 + }, + { + "loss": 0.0015, + "grad_norm": 0.19807161390781403, + "learning_rate": 9.73e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.028, + "step": 2056 + }, + { + "loss": 0.0014, + "grad_norm": 0.19030039012432098, + "learning_rate": 9.725000000000001e-06, + "num_tokens": 706280.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0285, + "step": 2057 + }, + { + "loss": 0.0013, + "grad_norm": 0.16322408616542816, + "learning_rate": 9.72e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 1.0, + "epoch": 1.029, + "step": 2058 + }, + { + "loss": 0.0014, + "grad_norm": 0.17665083706378937, + "learning_rate": 9.715000000000001e-06, + "num_tokens": 706462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0295, + "step": 2059 + }, + { + "loss": 0.0669, + "grad_norm": 1.8765722513198853, + "learning_rate": 9.71e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.03, + "step": 2060 + }, + { + "loss": 0.0768, + "grad_norm": 1.7586760520935059, + "learning_rate": 9.705000000000001e-06, + "num_tokens": 707486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0305, + "step": 2061 + }, + { + "loss": 0.0696, + "grad_norm": 1.258619785308838, + "learning_rate": 9.7e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.031, + "step": 2062 + }, + { + "loss": 0.0493, + "grad_norm": 1.2884832620620728, + "learning_rate": 9.695000000000001e-06, + "num_tokens": 708510.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0315, + "step": 2063 + }, + { + "loss": 0.0012, + "grad_norm": 0.15901947021484375, + "learning_rate": 9.69e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.032, + "step": 2064 + }, + { + "loss": 0.0656, + "grad_norm": 1.3002307415008545, + "learning_rate": 9.685000000000001e-06, + "num_tokens": 709113.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0325, + "step": 2065 + }, + { + "loss": 0.0013, + "grad_norm": 0.17090171575546265, + "learning_rate": 9.68e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.033, + "step": 2066 + }, + { + "loss": 0.0013, + "grad_norm": 0.1825355738401413, + "learning_rate": 9.675000000000001e-06, + "num_tokens": 709295.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0335, + "step": 2067 + }, + { + "loss": 0.0459, + "grad_norm": 1.092247724533081, + "learning_rate": 9.67e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.034, + "step": 2068 + }, + { + "loss": 0.0648, + "grad_norm": 1.4761494398117065, + "learning_rate": 9.665000000000001e-06, + "num_tokens": 710319.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0345, + "step": 2069 + }, + { + "loss": 0.0014, + "grad_norm": 0.1826472133398056, + "learning_rate": 9.66e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 1.0, + "epoch": 1.035, + "step": 2070 + }, + { + "loss": 0.0461, + "grad_norm": 1.338349461555481, + "learning_rate": 9.655000000000002e-06, + "num_tokens": 710922.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0355, + "step": 2071 + }, + { + "loss": 0.0567, + "grad_norm": 1.0566164255142212, + "learning_rate": 9.65e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.036, + "step": 2072 + }, + { + "loss": 0.0015, + "grad_norm": 0.19834326207637787, + "learning_rate": 9.645000000000002e-06, + "num_tokens": 711525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0365, + "step": 2073 + }, + { + "loss": 0.0418, + "grad_norm": 1.210045576095581, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.037, + "step": 2074 + }, + { + "loss": 0.0016, + "grad_norm": 0.22290614247322083, + "learning_rate": 9.635000000000002e-06, + "num_tokens": 712128.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0375, + "step": 2075 + }, + { + "loss": 0.0695, + "grad_norm": 1.4690190553665161, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.038, + "step": 2076 + }, + { + "loss": 0.0016, + "grad_norm": 0.2209765613079071, + "learning_rate": 9.625e-06, + "num_tokens": 712731.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0385, + "step": 2077 + }, + { + "loss": 0.0018, + "grad_norm": 0.23313096165657043, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 2078 + }, + { + "loss": 0.0017, + "grad_norm": 0.23196078836917877, + "learning_rate": 9.615e-06, + "num_tokens": 712913.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0395, + "step": 2079 + }, + { + "loss": 0.0541, + "grad_norm": 1.220723032951355, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.04, + "step": 2080 + }, + { + "loss": 0.0018, + "grad_norm": 0.2516387403011322, + "learning_rate": 9.605e-06, + "num_tokens": 713516.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0405, + "step": 2081 + }, + { + "loss": 0.0424, + "grad_norm": 1.0561903715133667, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.041, + "step": 2082 + }, + { + "loss": 0.0438, + "grad_norm": 1.2110846042633057, + "learning_rate": 9.595e-06, + "num_tokens": 714540.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0415, + "step": 2083 + }, + { + "loss": 0.0018, + "grad_norm": 0.24697688221931458, + "learning_rate": 9.59e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 1.0, + "epoch": 1.042, + "step": 2084 + }, + { + "loss": 0.0388, + "grad_norm": 1.0054850578308105, + "learning_rate": 9.585e-06, + "num_tokens": 715143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0425, + "step": 2085 + }, + { + "loss": 0.0713, + "grad_norm": 1.8077067136764526, + "learning_rate": 9.58e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.043, + "step": 2086 + }, + { + "loss": 0.0018, + "grad_norm": 0.24363017082214355, + "learning_rate": 9.575e-06, + "num_tokens": 715746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0435, + "step": 2087 + }, + { + "loss": 0.0016, + "grad_norm": 0.21341845393180847, + "learning_rate": 9.57e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 2088 + }, + { + "loss": 0.0391, + "grad_norm": 1.3833376169204712, + "learning_rate": 9.565e-06, + "num_tokens": 716349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0445, + "step": 2089 + }, + { + "loss": 0.0393, + "grad_norm": 0.9772108793258667, + "learning_rate": 9.56e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.045, + "step": 2090 + }, + { + "loss": 0.002, + "grad_norm": 0.283633828163147, + "learning_rate": 9.555e-06, + "num_tokens": 716952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0455, + "step": 2091 + }, + { + "loss": 0.0728, + "grad_norm": 1.849652647972107, + "learning_rate": 9.55e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.046, + "step": 2092 + }, + { + "loss": 0.0022, + "grad_norm": 0.3161669969558716, + "learning_rate": 9.545000000000001e-06, + "num_tokens": 717555.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0465, + "step": 2093 + }, + { + "loss": 0.0587, + "grad_norm": 1.600858449935913, + "learning_rate": 9.54e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.047, + "step": 2094 + }, + { + "loss": 0.0021, + "grad_norm": 0.2948978543281555, + "learning_rate": 9.535000000000001e-06, + "num_tokens": 718158.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0475, + "step": 2095 + }, + { + "loss": 0.0019, + "grad_norm": 0.27492448687553406, + "learning_rate": 9.53e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.048, + "step": 2096 + }, + { + "loss": 0.0382, + "grad_norm": 1.2440471649169922, + "learning_rate": 9.525000000000001e-06, + "num_tokens": 718761.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0485, + "step": 2097 + }, + { + "loss": 0.058, + "grad_norm": 1.5657495260238647, + "learning_rate": 9.52e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 2098 + }, + { + "loss": 0.0018, + "grad_norm": 0.2510983645915985, + "learning_rate": 9.515000000000001e-06, + "num_tokens": 719364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0495, + "step": 2099 + }, + { + "loss": 0.0677, + "grad_norm": 2.6615045070648193, + "learning_rate": 9.51e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.05, + "step": 2100 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355963945388794, + "learning_rate": 9.505000000000001e-06, + "num_tokens": 719967.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0505, + "step": 2101 + }, + { + "loss": 0.0628, + "grad_norm": 1.4263781309127808, + "learning_rate": 9.5e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.051, + "step": 2102 + }, + { + "loss": 0.0384, + "grad_norm": 1.3316160440444946, + "learning_rate": 9.495000000000001e-06, + "num_tokens": 720991.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0515, + "step": 2103 + }, + { + "loss": 0.0413, + "grad_norm": 1.2754371166229248, + "learning_rate": 9.49e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.052, + "step": 2104 + }, + { + "loss": 0.0551, + "grad_norm": 1.9524251222610474, + "learning_rate": 9.485000000000002e-06, + "num_tokens": 722015.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0525, + "step": 2105 + }, + { + "loss": 0.0551, + "grad_norm": 1.5522267818450928, + "learning_rate": 9.48e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.053, + "step": 2106 + }, + { + "loss": 0.0019, + "grad_norm": 0.27614012360572815, + "learning_rate": 9.475000000000002e-06, + "num_tokens": 722618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0535, + "step": 2107 + }, + { + "loss": 0.0606, + "grad_norm": 1.409346103668213, + "learning_rate": 9.47e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.054, + "step": 2108 + }, + { + "loss": 0.0024, + "grad_norm": 0.357972115278244, + "learning_rate": 9.465e-06, + "num_tokens": 723221.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0545, + "step": 2109 + }, + { + "loss": 0.0023, + "grad_norm": 0.3270082175731659, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 1.0, + "epoch": 1.055, + "step": 2110 + }, + { + "loss": 0.0024, + "grad_norm": 0.3454654812812805, + "learning_rate": 9.455e-06, + "num_tokens": 723403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0555, + "step": 2111 + }, + { + "loss": 0.0024, + "grad_norm": 0.352299302816391, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 1.0, + "epoch": 1.056, + "step": 2112 + }, + { + "loss": 0.002, + "grad_norm": 0.27746516466140747, + "learning_rate": 9.445e-06, + "num_tokens": 723585.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0565, + "step": 2113 + }, + { + "loss": 0.002, + "grad_norm": 0.2780683636665344, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 1.0, + "epoch": 1.057, + "step": 2114 + }, + { + "loss": 0.0464, + "grad_norm": 1.5355291366577148, + "learning_rate": 9.435e-06, + "num_tokens": 724188.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0575, + "step": 2115 + }, + { + "loss": 0.0017, + "grad_norm": 0.2329765260219574, + "learning_rate": 9.43e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.058, + "step": 2116 + }, + { + "loss": 0.0015, + "grad_norm": 0.20377217233181, + "learning_rate": 9.425e-06, + "num_tokens": 724370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0585, + "step": 2117 + }, + { + "loss": 0.0014, + "grad_norm": 0.1731068193912506, + "learning_rate": 9.42e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 2118 + }, + { + "loss": 0.0349, + "grad_norm": 1.301210641860962, + "learning_rate": 9.415e-06, + "num_tokens": 724973.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.0594999999999999, + "step": 2119 + }, + { + "loss": 0.0012, + "grad_norm": 0.15070641040802002, + "learning_rate": 9.41e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.06, + "step": 2120 + }, + { + "loss": 0.0012, + "grad_norm": 0.13666701316833496, + "learning_rate": 9.405e-06, + "num_tokens": 725155.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0605, + "step": 2121 + }, + { + "loss": 0.0011, + "grad_norm": 0.13183920085430145, + "learning_rate": 9.4e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 2122 + }, + { + "loss": 0.0735, + "grad_norm": 2.157339096069336, + "learning_rate": 9.395e-06, + "num_tokens": 725758.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.0615, + "step": 2123 + }, + { + "loss": 0.0434, + "grad_norm": 1.441329836845398, + "learning_rate": 9.39e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.062, + "step": 2124 + }, + { + "loss": 0.001, + "grad_norm": 0.11148537695407867, + "learning_rate": 9.385e-06, + "num_tokens": 726361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0625, + "step": 2125 + }, + { + "loss": 0.0363, + "grad_norm": 1.2650766372680664, + "learning_rate": 9.38e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 1.063, + "step": 2126 + }, + { + "loss": 0.042, + "grad_norm": 1.170820951461792, + "learning_rate": 9.375000000000001e-06, + "num_tokens": 727385.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0635, + "step": 2127 + }, + { + "loss": 0.0375, + "grad_norm": 1.31922447681427, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.064, + "step": 2128 + }, + { + "loss": 0.0009, + "grad_norm": 0.10702881962060928, + "learning_rate": 9.365000000000001e-06, + "num_tokens": 727988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0645, + "step": 2129 + }, + { + "loss": 0.001, + "grad_norm": 0.12134991586208344, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 2130 + }, + { + "loss": 0.001, + "grad_norm": 0.12518537044525146, + "learning_rate": 9.355000000000001e-06, + "num_tokens": 728170.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0655000000000001, + "step": 2131 + }, + { + "loss": 0.0443, + "grad_norm": 1.5640217065811157, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.066, + "step": 2132 + }, + { + "loss": 0.043, + "grad_norm": 1.7402693033218384, + "learning_rate": 9.345000000000001e-06, + "num_tokens": 729194.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0665, + "step": 2133 + }, + { + "loss": 0.0572, + "grad_norm": 1.478943109512329, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.067, + "step": 2134 + }, + { + "loss": 0.0436, + "grad_norm": 1.75895357131958, + "learning_rate": 9.335000000000001e-06, + "num_tokens": 730218.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0675, + "step": 2135 + }, + { + "loss": 0.0011, + "grad_norm": 0.14104828238487244, + "learning_rate": 9.33e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.068, + "step": 2136 + }, + { + "loss": 0.0014, + "grad_norm": 0.1940988302230835, + "learning_rate": 9.325000000000001e-06, + "num_tokens": 730400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0685, + "step": 2137 + }, + { + "loss": 0.0012, + "grad_norm": 0.15279027819633484, + "learning_rate": 9.32e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 2138 + }, + { + "loss": 0.0627, + "grad_norm": 1.8744264841079712, + "learning_rate": 9.315000000000001e-06, + "num_tokens": 731003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0695000000000001, + "step": 2139 + }, + { + "loss": 0.045, + "grad_norm": 1.4347468614578247, + "learning_rate": 9.31e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.07, + "step": 2140 + }, + { + "loss": 0.0711, + "grad_norm": 1.9654953479766846, + "learning_rate": 9.305000000000002e-06, + "num_tokens": 732027.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0705, + "step": 2141 + }, + { + "loss": 0.0723, + "grad_norm": 1.851762294769287, + "learning_rate": 9.3e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 2142 + }, + { + "loss": 0.0397, + "grad_norm": 1.1016762256622314, + "learning_rate": 9.295e-06, + "num_tokens": 733051.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0715, + "step": 2143 + }, + { + "loss": 0.0614, + "grad_norm": 1.278972864151001, + "learning_rate": 9.29e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.072, + "step": 2144 + }, + { + "loss": 0.0578, + "grad_norm": 1.5237491130828857, + "learning_rate": 9.285e-06, + "num_tokens": 734075.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0725, + "step": 2145 + }, + { + "loss": 0.0021, + "grad_norm": 0.29453045129776, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.073, + "step": 2146 + }, + { + "loss": 0.0756, + "grad_norm": 1.90165376663208, + "learning_rate": 9.275e-06, + "num_tokens": 734678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0735, + "step": 2147 + }, + { + "loss": 0.0025, + "grad_norm": 0.3552635610103607, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.074, + "step": 2148 + }, + { + "loss": 0.0615, + "grad_norm": 1.3596733808517456, + "learning_rate": 9.265e-06, + "num_tokens": 735281.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0745, + "step": 2149 + }, + { + "loss": 0.0571, + "grad_norm": 1.0499508380889893, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.075, + "step": 2150 + }, + { + "loss": 0.0593, + "grad_norm": 1.4813532829284668, + "learning_rate": 9.255e-06, + "num_tokens": 736305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0755, + "step": 2151 + }, + { + "loss": 0.0451, + "grad_norm": 1.1956957578659058, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.076, + "step": 2152 + }, + { + "loss": 0.0035, + "grad_norm": 0.5021563172340393, + "learning_rate": 9.245e-06, + "num_tokens": 736908.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0765, + "step": 2153 + }, + { + "loss": 0.0035, + "grad_norm": 0.5023340582847595, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 2154 + }, + { + "loss": 0.0593, + "grad_norm": 1.3515294790267944, + "learning_rate": 9.235e-06, + "num_tokens": 737511.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0775, + "step": 2155 + }, + { + "loss": 0.0036, + "grad_norm": 0.5020677447319031, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.078, + "step": 2156 + }, + { + "loss": 0.0034, + "grad_norm": 0.4873979985713959, + "learning_rate": 9.225e-06, + "num_tokens": 737693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0785, + "step": 2157 + }, + { + "loss": 0.0582, + "grad_norm": 1.3766424655914307, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.079, + "step": 2158 + }, + { + "loss": 0.0631, + "grad_norm": 1.1943955421447754, + "learning_rate": 9.215e-06, + "num_tokens": 738717.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0795, + "step": 2159 + }, + { + "loss": 0.003, + "grad_norm": 0.43413516879081726, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.08, + "step": 2160 + }, + { + "loss": 0.0031, + "grad_norm": 0.44669783115386963, + "learning_rate": 9.205e-06, + "num_tokens": 738899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0805, + "step": 2161 + }, + { + "loss": 0.0561, + "grad_norm": 1.3388497829437256, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.081, + "step": 2162 + }, + { + "loss": 0.0426, + "grad_norm": 1.8933428525924683, + "learning_rate": 9.195000000000001e-06, + "num_tokens": 739923.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0815, + "step": 2163 + }, + { + "loss": 0.06, + "grad_norm": 1.3706074953079224, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.082, + "step": 2164 + }, + { + "loss": 0.0621, + "grad_norm": 1.443211555480957, + "learning_rate": 9.185000000000001e-06, + "num_tokens": 740947.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0825, + "step": 2165 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098005950450897, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 1.0, + "epoch": 1.083, + "step": 2166 + }, + { + "loss": 0.06, + "grad_norm": 1.2332003116607666, + "learning_rate": 9.175000000000001e-06, + "num_tokens": 741550.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.0835, + "step": 2167 + }, + { + "loss": 0.0682, + "grad_norm": 1.4077450037002563, + "learning_rate": 9.17e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.084, + "step": 2168 + }, + { + "loss": 0.0584, + "grad_norm": 1.4201141595840454, + "learning_rate": 9.165000000000001e-06, + "num_tokens": 742574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0845, + "step": 2169 + }, + { + "loss": 0.0024, + "grad_norm": 0.3220980167388916, + "learning_rate": 9.16e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.085, + "step": 2170 + }, + { + "loss": 0.0571, + "grad_norm": 1.3979272842407227, + "learning_rate": 9.155000000000001e-06, + "num_tokens": 743177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0855, + "step": 2171 + }, + { + "loss": 0.0572, + "grad_norm": 1.6924889087677002, + "learning_rate": 9.15e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.086, + "step": 2172 + }, + { + "loss": 0.0708, + "grad_norm": 1.7350118160247803, + "learning_rate": 9.145000000000001e-06, + "num_tokens": 744201.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0865, + "step": 2173 + }, + { + "loss": 0.0024, + "grad_norm": 0.3453267812728882, + "learning_rate": 9.14e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 1.0, + "epoch": 1.087, + "step": 2174 + }, + { + "loss": 0.0028, + "grad_norm": 0.3845599293708801, + "learning_rate": 9.135e-06, + "num_tokens": 744383.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0875, + "step": 2175 + }, + { + "loss": 0.0023, + "grad_norm": 0.32928982377052307, + "learning_rate": 9.13e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.088, + "step": 2176 + }, + { + "loss": 0.0025, + "grad_norm": 0.3593277335166931, + "learning_rate": 9.125e-06, + "num_tokens": 744565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0885, + "step": 2177 + }, + { + "loss": 0.0447, + "grad_norm": 1.6252307891845703, + "learning_rate": 9.12e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.089, + "step": 2178 + }, + { + "loss": 0.0664, + "grad_norm": 1.3326979875564575, + "learning_rate": 9.115e-06, + "num_tokens": 745589.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0895, + "step": 2179 + }, + { + "loss": 0.0713, + "grad_norm": 2.490602493286133, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.09, + "step": 2180 + }, + { + "loss": 0.0577, + "grad_norm": 1.2613682746887207, + "learning_rate": 9.105e-06, + "num_tokens": 746613.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0905, + "step": 2181 + }, + { + "loss": 0.0604, + "grad_norm": 1.8400533199310303, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.091, + "step": 2182 + }, + { + "loss": 0.0546, + "grad_norm": 1.577405571937561, + "learning_rate": 9.095e-06, + "num_tokens": 747637.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0915, + "step": 2183 + }, + { + "loss": 0.1758, + "grad_norm": 3.9485361576080322, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 1.092, + "step": 2184 + }, + { + "loss": 0.0407, + "grad_norm": 1.4230077266693115, + "learning_rate": 9.085e-06, + "num_tokens": 748661.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0925, + "step": 2185 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441873788833618, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 1.0, + "epoch": 1.093, + "step": 2186 + }, + { + "loss": 0.0574, + "grad_norm": 1.059336543083191, + "learning_rate": 9.075e-06, + "num_tokens": 749264.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0935, + "step": 2187 + }, + { + "loss": 0.0022, + "grad_norm": 0.3150666058063507, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.094, + "step": 2188 + }, + { + "loss": 0.0401, + "grad_norm": 1.1904288530349731, + "learning_rate": 9.065e-06, + "num_tokens": 749867.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0945, + "step": 2189 + }, + { + "loss": 0.0024, + "grad_norm": 0.3425971567630768, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 1.0, + "epoch": 1.095, + "step": 2190 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606398403644562, + "learning_rate": 9.055e-06, + "num_tokens": 750049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0955, + "step": 2191 + }, + { + "loss": 0.0025, + "grad_norm": 0.3754805028438568, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.096, + "step": 2192 + }, + { + "loss": 0.0512, + "grad_norm": 1.1577214002609253, + "learning_rate": 9.045e-06, + "num_tokens": 750652.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0965, + "step": 2193 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151845633983612, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 1.0, + "epoch": 1.097, + "step": 2194 + }, + { + "loss": 0.0386, + "grad_norm": 1.1814777851104736, + "learning_rate": 9.035e-06, + "num_tokens": 751255.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0975, + "step": 2195 + }, + { + "loss": 0.002, + "grad_norm": 0.2940640151500702, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.098, + "step": 2196 + }, + { + "loss": 0.0021, + "grad_norm": 0.3114289939403534, + "learning_rate": 9.025e-06, + "num_tokens": 751437.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0985, + "step": 2197 + }, + { + "loss": 0.0587, + "grad_norm": 1.5265949964523315, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.099, + "step": 2198 + }, + { + "loss": 0.0584, + "grad_norm": 1.182391881942749, + "learning_rate": 9.015000000000001e-06, + "num_tokens": 752461.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0995, + "step": 2199 + }, + { + "loss": 0.0018, + "grad_norm": 0.2633577287197113, + "learning_rate": 9.01e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1, + "step": 2200 + }, + { + "loss": 0.0019, + "grad_norm": 0.26985710859298706, + "learning_rate": 9.005000000000001e-06, + "num_tokens": 752643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1005, + "step": 2201 + }, + { + "loss": 0.0017, + "grad_norm": 0.23652321100234985, + "learning_rate": 9e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 2202 + }, + { + "loss": 0.0578, + "grad_norm": 1.4083077907562256, + "learning_rate": 8.995000000000001e-06, + "num_tokens": 753246.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1015, + "step": 2203 + }, + { + "loss": 0.0595, + "grad_norm": 1.427134394645691, + "learning_rate": 8.99e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.102, + "step": 2204 + }, + { + "loss": 0.0539, + "grad_norm": 1.3228328227996826, + "learning_rate": 8.985000000000001e-06, + "num_tokens": 754270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1025, + "step": 2205 + }, + { + "loss": 0.0015, + "grad_norm": 0.2133481651544571, + "learning_rate": 8.98e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.103, + "step": 2206 + }, + { + "loss": 0.0821, + "grad_norm": 2.5287461280822754, + "learning_rate": 8.975e-06, + "num_tokens": 754873.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1035, + "step": 2207 + }, + { + "loss": 0.0623, + "grad_norm": 1.4041988849639893, + "learning_rate": 8.97e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.104, + "step": 2208 + }, + { + "loss": 0.0409, + "grad_norm": 1.1858478784561157, + "learning_rate": 8.965e-06, + "num_tokens": 755897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1045, + "step": 2209 + }, + { + "loss": 0.0583, + "grad_norm": 1.219450831413269, + "learning_rate": 8.96e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.105, + "step": 2210 + }, + { + "loss": 0.0414, + "grad_norm": 1.1721197366714478, + "learning_rate": 8.955e-06, + "num_tokens": 756921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1055, + "step": 2211 + }, + { + "loss": 0.053, + "grad_norm": 1.277345895767212, + "learning_rate": 8.95e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.106, + "step": 2212 + }, + { + "loss": 0.0625, + "grad_norm": 1.3503938913345337, + "learning_rate": 8.945e-06, + "num_tokens": 757945.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1065, + "step": 2213 + }, + { + "loss": 0.002, + "grad_norm": 0.30203038454055786, + "learning_rate": 8.94e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.107, + "step": 2214 + }, + { + "loss": 0.0022, + "grad_norm": 0.35174328088760376, + "learning_rate": 8.935e-06, + "num_tokens": 758127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1075, + "step": 2215 + }, + { + "loss": 0.0423, + "grad_norm": 1.168192744255066, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.108, + "step": 2216 + }, + { + "loss": 0.0764, + "grad_norm": 1.3265845775604248, + "learning_rate": 8.925e-06, + "num_tokens": 759151.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1085, + "step": 2217 + }, + { + "loss": 0.1833, + "grad_norm": 3.288583755493164, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 1.109, + "step": 2218 + }, + { + "loss": 0.0029, + "grad_norm": 0.44568195939064026, + "learning_rate": 8.915e-06, + "num_tokens": 759754.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1095, + "step": 2219 + }, + { + "loss": 0.0027, + "grad_norm": 0.409576416015625, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 2220 + }, + { + "loss": 0.0033, + "grad_norm": 0.4960649907588959, + "learning_rate": 8.905e-06, + "num_tokens": 759936.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1105, + "step": 2221 + }, + { + "loss": 0.1642, + "grad_norm": 2.6913421154022217, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.111, + "step": 2222 + }, + { + "loss": 0.0715, + "grad_norm": 1.5037237405776978, + "learning_rate": 8.895e-06, + "num_tokens": 760960.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1115, + "step": 2223 + }, + { + "loss": 0.0562, + "grad_norm": 1.152312159538269, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.112, + "step": 2224 + }, + { + "loss": 0.0025, + "grad_norm": 0.3840191960334778, + "learning_rate": 8.885e-06, + "num_tokens": 761563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1125, + "step": 2225 + }, + { + "loss": 0.0421, + "grad_norm": 1.0708019733428955, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.113, + "step": 2226 + }, + { + "loss": 0.0713, + "grad_norm": 1.2928557395935059, + "learning_rate": 8.875e-06, + "num_tokens": 762587.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1135, + "step": 2227 + }, + { + "loss": 0.0622, + "grad_norm": 1.3733391761779785, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 2228 + }, + { + "loss": 0.0029, + "grad_norm": 0.42555150389671326, + "learning_rate": 8.865e-06, + "num_tokens": 763190.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1145, + "step": 2229 + }, + { + "loss": 0.0457, + "grad_norm": 1.3084357976913452, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.115, + "step": 2230 + }, + { + "loss": 0.0027, + "grad_norm": 0.3956111967563629, + "learning_rate": 8.855e-06, + "num_tokens": 763793.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1155, + "step": 2231 + }, + { + "loss": 0.066, + "grad_norm": 1.3650692701339722, + "learning_rate": 8.85e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.116, + "step": 2232 + }, + { + "loss": 0.0029, + "grad_norm": 0.4088021516799927, + "learning_rate": 8.845000000000001e-06, + "num_tokens": 764396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1165, + "step": 2233 + }, + { + "loss": 0.0397, + "grad_norm": 1.2808146476745605, + "learning_rate": 8.84e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.117, + "step": 2234 + }, + { + "loss": 0.0027, + "grad_norm": 0.3983195126056671, + "learning_rate": 8.835000000000001e-06, + "num_tokens": 764999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1175, + "step": 2235 + }, + { + "loss": 0.0423, + "grad_norm": 1.1593605279922485, + "learning_rate": 8.83e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1179999999999999, + "step": 2236 + }, + { + "loss": 0.0649, + "grad_norm": 1.5087552070617676, + "learning_rate": 8.825000000000001e-06, + "num_tokens": 766023.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1185, + "step": 2237 + }, + { + "loss": 0.0683, + "grad_norm": 1.5192102193832397, + "learning_rate": 8.82e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.119, + "step": 2238 + }, + { + "loss": 0.0588, + "grad_norm": 1.386413812637329, + "learning_rate": 8.815e-06, + "num_tokens": 767047.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1195, + "step": 2239 + }, + { + "loss": 0.14, + "grad_norm": 2.439119815826416, + "learning_rate": 8.81e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.12, + "step": 2240 + }, + { + "loss": 0.0029, + "grad_norm": 0.4191952049732208, + "learning_rate": 8.805e-06, + "num_tokens": 767650.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1205, + "step": 2241 + }, + { + "loss": 0.0397, + "grad_norm": 1.169542908668518, + "learning_rate": 8.8e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.121, + "step": 2242 + }, + { + "loss": 0.0584, + "grad_norm": 1.2895692586898804, + "learning_rate": 8.795e-06, + "num_tokens": 768674.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1215, + "step": 2243 + }, + { + "loss": 0.0582, + "grad_norm": 1.274592638015747, + "learning_rate": 8.79e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1219999999999999, + "step": 2244 + }, + { + "loss": 0.0032, + "grad_norm": 0.44238153100013733, + "learning_rate": 8.785e-06, + "num_tokens": 769277.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1225, + "step": 2245 + }, + { + "loss": 0.0032, + "grad_norm": 0.4488213360309601, + "learning_rate": 8.78e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.123, + "step": 2246 + }, + { + "loss": 0.003, + "grad_norm": 0.43088752031326294, + "learning_rate": 8.775e-06, + "num_tokens": 769459.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1235, + "step": 2247 + }, + { + "loss": 0.0366, + "grad_norm": 1.2531421184539795, + "learning_rate": 8.77e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.124, + "step": 2248 + }, + { + "loss": 0.0029, + "grad_norm": 0.40329650044441223, + "learning_rate": 8.765e-06, + "num_tokens": 770062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1245, + "step": 2249 + }, + { + "loss": 0.0527, + "grad_norm": 1.196119785308838, + "learning_rate": 8.76e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.125, + "step": 2250 + }, + { + "loss": 0.0468, + "grad_norm": 1.571480393409729, + "learning_rate": 8.755e-06, + "num_tokens": 771086.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1255, + "step": 2251 + }, + { + "loss": 0.0024, + "grad_norm": 0.32946687936782837, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.126, + "step": 2252 + }, + { + "loss": 0.0023, + "grad_norm": 0.3213779628276825, + "learning_rate": 8.745000000000002e-06, + "num_tokens": 771268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1265, + "step": 2253 + }, + { + "loss": 0.0381, + "grad_norm": 1.36893630027771, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.127, + "step": 2254 + }, + { + "loss": 0.0023, + "grad_norm": 0.3214550316333771, + "learning_rate": 8.735000000000002e-06, + "num_tokens": 771871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1275, + "step": 2255 + }, + { + "loss": 0.0389, + "grad_norm": 1.1307684183120728, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1280000000000001, + "step": 2256 + }, + { + "loss": 0.0021, + "grad_norm": 0.30145928263664246, + "learning_rate": 8.725000000000002e-06, + "num_tokens": 772474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1285, + "step": 2257 + }, + { + "loss": 0.0018, + "grad_norm": 0.24611108005046844, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.129, + "step": 2258 + }, + { + "loss": 0.0652, + "grad_norm": 1.5593312978744507, + "learning_rate": 8.715e-06, + "num_tokens": 773077.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1295, + "step": 2259 + }, + { + "loss": 0.1724, + "grad_norm": 3.1925134658813477, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.13, + "step": 2260 + }, + { + "loss": 0.0016, + "grad_norm": 0.2210361361503601, + "learning_rate": 8.705e-06, + "num_tokens": 773680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1305, + "step": 2261 + }, + { + "loss": 0.044, + "grad_norm": 1.1579885482788086, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.131, + "step": 2262 + }, + { + "loss": 0.0812, + "grad_norm": 2.0770068168640137, + "learning_rate": 8.695e-06, + "num_tokens": 774704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1315, + "step": 2263 + }, + { + "loss": 0.0376, + "grad_norm": 1.1654012203216553, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1320000000000001, + "step": 2264 + }, + { + "loss": 0.0017, + "grad_norm": 0.22535240650177002, + "learning_rate": 8.685e-06, + "num_tokens": 775307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1325, + "step": 2265 + }, + { + "loss": 0.0017, + "grad_norm": 0.2348785251379013, + "learning_rate": 8.68e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 2266 + }, + { + "loss": 0.0017, + "grad_norm": 0.24279342591762543, + "learning_rate": 8.675e-06, + "num_tokens": 775489.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1335, + "step": 2267 + }, + { + "loss": 0.0748, + "grad_norm": 1.5453892946243286, + "learning_rate": 8.67e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.134, + "step": 2268 + }, + { + "loss": 0.0015, + "grad_norm": 0.20795051753520966, + "learning_rate": 8.665000000000001e-06, + "num_tokens": 776092.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1345, + "step": 2269 + }, + { + "loss": 0.0016, + "grad_norm": 0.21314096450805664, + "learning_rate": 8.66e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 2270 + }, + { + "loss": 0.0016, + "grad_norm": 0.22147318720817566, + "learning_rate": 8.655000000000001e-06, + "num_tokens": 776274.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1355, + "step": 2271 + }, + { + "loss": 0.0511, + "grad_norm": 1.1325373649597168, + "learning_rate": 8.65e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1360000000000001, + "step": 2272 + }, + { + "loss": 0.0014, + "grad_norm": 0.18845656514167786, + "learning_rate": 8.645000000000001e-06, + "num_tokens": 776877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1365, + "step": 2273 + }, + { + "loss": 0.0013, + "grad_norm": 0.16952817142009735, + "learning_rate": 8.64e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 2274 + }, + { + "loss": 0.0621, + "grad_norm": 1.329026222229004, + "learning_rate": 8.635000000000001e-06, + "num_tokens": 777480.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1375, + "step": 2275 + }, + { + "loss": 0.0416, + "grad_norm": 1.105779767036438, + "learning_rate": 8.63e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.138, + "step": 2276 + }, + { + "loss": 0.0467, + "grad_norm": 1.1847842931747437, + "learning_rate": 8.625000000000001e-06, + "num_tokens": 778504.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1385, + "step": 2277 + }, + { + "loss": 0.0414, + "grad_norm": 1.0636855363845825, + "learning_rate": 8.62e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.139, + "step": 2278 + }, + { + "loss": 0.058, + "grad_norm": 1.3789916038513184, + "learning_rate": 8.615000000000001e-06, + "num_tokens": 779528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1395, + "step": 2279 + }, + { + "loss": 0.0649, + "grad_norm": 1.1419354677200317, + "learning_rate": 8.61e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1400000000000001, + "step": 2280 + }, + { + "loss": 0.0014, + "grad_norm": 0.19384142756462097, + "learning_rate": 8.605000000000001e-06, + "num_tokens": 780131.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1405, + "step": 2281 + }, + { + "loss": 0.0015, + "grad_norm": 0.19773858785629272, + "learning_rate": 8.6e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 2282 + }, + { + "loss": 0.0557, + "grad_norm": 1.190521001815796, + "learning_rate": 8.595000000000002e-06, + "num_tokens": 780734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1415, + "step": 2283 + }, + { + "loss": 0.0017, + "grad_norm": 0.23638860881328583, + "learning_rate": 8.59e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 1.0, + "epoch": 1.142, + "step": 2284 + }, + { + "loss": 0.0017, + "grad_norm": 0.24933819472789764, + "learning_rate": 8.585000000000002e-06, + "num_tokens": 780916.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1425, + "step": 2285 + }, + { + "loss": 0.0017, + "grad_norm": 0.22720065712928772, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 2286 + }, + { + "loss": 0.0416, + "grad_norm": 1.214958667755127, + "learning_rate": 8.575000000000002e-06, + "num_tokens": 781519.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1435, + "step": 2287 + }, + { + "loss": 0.054, + "grad_norm": 0.9985194206237793, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.144, + "step": 2288 + }, + { + "loss": 0.0017, + "grad_norm": 0.24114187061786652, + "learning_rate": 8.565000000000002e-06, + "num_tokens": 782122.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1445, + "step": 2289 + }, + { + "loss": 0.0574, + "grad_norm": 1.4530028104782104, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.145, + "step": 2290 + }, + { + "loss": 0.0018, + "grad_norm": 0.2544173002243042, + "learning_rate": 8.555e-06, + "num_tokens": 782725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1455, + "step": 2291 + }, + { + "loss": 0.0017, + "grad_norm": 0.23475930094718933, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 2292 + }, + { + "loss": 0.0708, + "grad_norm": 1.619470477104187, + "learning_rate": 8.545e-06, + "num_tokens": 783328.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1465, + "step": 2293 + }, + { + "loss": 0.0019, + "grad_norm": 0.2572467029094696, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 1.0, + "epoch": 1.147, + "step": 2294 + }, + { + "loss": 0.0019, + "grad_norm": 0.26701951026916504, + "learning_rate": 8.535e-06, + "num_tokens": 783510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1475, + "step": 2295 + }, + { + "loss": 0.0471, + "grad_norm": 1.147359848022461, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.148, + "step": 2296 + }, + { + "loss": 0.0485, + "grad_norm": 1.0665885210037231, + "learning_rate": 8.525e-06, + "num_tokens": 784534.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1485, + "step": 2297 + }, + { + "loss": 0.0017, + "grad_norm": 0.23322324454784393, + "learning_rate": 8.52e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.149, + "step": 2298 + }, + { + "loss": 0.0667, + "grad_norm": 1.4317374229431152, + "learning_rate": 8.515e-06, + "num_tokens": 785137.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1495, + "step": 2299 + }, + { + "loss": 0.0651, + "grad_norm": 1.4495528936386108, + "learning_rate": 8.51e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.15, + "step": 2300 + }, + { + "loss": 0.0018, + "grad_norm": 0.24990759789943695, + "learning_rate": 8.505e-06, + "num_tokens": 785740.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1505, + "step": 2301 + }, + { + "loss": 0.0673, + "grad_norm": 1.3833082914352417, + "learning_rate": 8.5e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.151, + "step": 2302 + }, + { + "loss": 0.0384, + "grad_norm": 1.0650711059570312, + "learning_rate": 8.495e-06, + "num_tokens": 786764.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1515, + "step": 2303 + }, + { + "loss": 0.0017, + "grad_norm": 0.2362237423658371, + "learning_rate": 8.49e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.152, + "step": 2304 + }, + { + "loss": 0.0362, + "grad_norm": 1.2261658906936646, + "learning_rate": 8.485000000000001e-06, + "num_tokens": 787367.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1525, + "step": 2305 + }, + { + "loss": 0.0021, + "grad_norm": 0.285277396440506, + "learning_rate": 8.48e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 1.0, + "epoch": 1.153, + "step": 2306 + }, + { + "loss": 0.0018, + "grad_norm": 0.24331547319889069, + "learning_rate": 8.475000000000001e-06, + "num_tokens": 787549.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1535, + "step": 2307 + }, + { + "loss": 0.057, + "grad_norm": 1.260392427444458, + "learning_rate": 8.47e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.154, + "step": 2308 + }, + { + "loss": 0.002, + "grad_norm": 0.26841071248054504, + "learning_rate": 8.465000000000001e-06, + "num_tokens": 788152.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1545, + "step": 2309 + }, + { + "loss": 0.0018, + "grad_norm": 0.25016698241233826, + "learning_rate": 8.46e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 2310 + }, + { + "loss": 0.002, + "grad_norm": 0.2738337218761444, + "learning_rate": 8.455000000000001e-06, + "num_tokens": 788334.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1555, + "step": 2311 + }, + { + "loss": 0.0017, + "grad_norm": 0.2311965376138687, + "learning_rate": 8.45e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 2312 + }, + { + "loss": 0.0608, + "grad_norm": 1.6522681713104248, + "learning_rate": 8.445000000000001e-06, + "num_tokens": 788937.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1565, + "step": 2313 + }, + { + "loss": 0.0595, + "grad_norm": 1.3370118141174316, + "learning_rate": 8.44e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.157, + "step": 2314 + }, + { + "loss": 0.0706, + "grad_norm": 1.5185800790786743, + "learning_rate": 8.435000000000001e-06, + "num_tokens": 789961.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1575, + "step": 2315 + }, + { + "loss": 0.0015, + "grad_norm": 0.20058579742908478, + "learning_rate": 8.43e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 1.0, + "epoch": 1.158, + "step": 2316 + }, + { + "loss": 0.0736, + "grad_norm": 1.6871758699417114, + "learning_rate": 8.425000000000001e-06, + "num_tokens": 790564.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1585, + "step": 2317 + }, + { + "loss": 0.0684, + "grad_norm": 1.7638912200927734, + "learning_rate": 8.42e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.159, + "step": 2318 + }, + { + "loss": 0.0017, + "grad_norm": 0.23336097598075867, + "learning_rate": 8.415000000000002e-06, + "num_tokens": 791167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1595, + "step": 2319 + }, + { + "loss": 0.0596, + "grad_norm": 1.3170890808105469, + "learning_rate": 8.41e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.16, + "step": 2320 + }, + { + "loss": 0.0566, + "grad_norm": 1.8501343727111816, + "learning_rate": 8.405000000000002e-06, + "num_tokens": 792191.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1605, + "step": 2321 + }, + { + "loss": 0.0679, + "grad_norm": 1.3065072298049927, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.161, + "step": 2322 + }, + { + "loss": 0.0577, + "grad_norm": 1.3374840021133423, + "learning_rate": 8.395e-06, + "num_tokens": 793215.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1615, + "step": 2323 + }, + { + "loss": 0.0651, + "grad_norm": 1.2627785205841064, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.162, + "step": 2324 + }, + { + "loss": 0.0589, + "grad_norm": 1.1249433755874634, + "learning_rate": 8.385e-06, + "num_tokens": 794239.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1625, + "step": 2325 + }, + { + "loss": 0.0022, + "grad_norm": 0.31153878569602966, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.163, + "step": 2326 + }, + { + "loss": 0.0376, + "grad_norm": 1.2043869495391846, + "learning_rate": 8.375e-06, + "num_tokens": 794842.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1635, + "step": 2327 + }, + { + "loss": 0.0024, + "grad_norm": 0.3410504162311554, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 1.0, + "epoch": 1.164, + "step": 2328 + }, + { + "loss": 0.0497, + "grad_norm": 1.3358232975006104, + "learning_rate": 8.365e-06, + "num_tokens": 795445.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1645, + "step": 2329 + }, + { + "loss": 0.062, + "grad_norm": 1.3019129037857056, + "learning_rate": 8.36e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.165, + "step": 2330 + }, + { + "loss": 0.1411, + "grad_norm": 3.1003713607788086, + "learning_rate": 8.355e-06, + "num_tokens": 796469.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.1655, + "step": 2331 + }, + { + "loss": 0.0675, + "grad_norm": 1.4928791522979736, + "learning_rate": 8.35e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.166, + "step": 2332 + }, + { + "loss": 0.0032, + "grad_norm": 0.47702810168266296, + "learning_rate": 8.345e-06, + "num_tokens": 797072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1665, + "step": 2333 + }, + { + "loss": 0.0486, + "grad_norm": 1.189456820487976, + "learning_rate": 8.34e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.167, + "step": 2334 + }, + { + "loss": 0.0033, + "grad_norm": 0.5152677893638611, + "learning_rate": 8.335e-06, + "num_tokens": 797675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1675, + "step": 2335 + }, + { + "loss": 0.0463, + "grad_norm": 1.3805276155471802, + "learning_rate": 8.33e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.168, + "step": 2336 + }, + { + "loss": 0.0653, + "grad_norm": 1.7025351524353027, + "learning_rate": 8.325e-06, + "num_tokens": 798699.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1685, + "step": 2337 + }, + { + "loss": 0.0031, + "grad_norm": 0.44580474495887756, + "learning_rate": 8.32e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 1.0, + "epoch": 1.169, + "step": 2338 + }, + { + "loss": 0.0462, + "grad_norm": 1.3915964365005493, + "learning_rate": 8.315000000000001e-06, + "num_tokens": 799302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1695, + "step": 2339 + }, + { + "loss": 0.0689, + "grad_norm": 1.3206253051757812, + "learning_rate": 8.31e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.17, + "step": 2340 + }, + { + "loss": 0.0616, + "grad_norm": 1.0774954557418823, + "learning_rate": 8.305000000000001e-06, + "num_tokens": 800326.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1705, + "step": 2341 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280348658561707, + "learning_rate": 8.3e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 1.0, + "epoch": 1.171, + "step": 2342 + }, + { + "loss": 0.0534, + "grad_norm": 1.1514171361923218, + "learning_rate": 8.295000000000001e-06, + "num_tokens": 800929.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1715, + "step": 2343 + }, + { + "loss": 0.0034, + "grad_norm": 0.4936150014400482, + "learning_rate": 8.29e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 1.0, + "epoch": 1.172, + "step": 2344 + }, + { + "loss": 0.0411, + "grad_norm": 1.091706395149231, + "learning_rate": 8.285000000000001e-06, + "num_tokens": 801532.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1724999999999999, + "step": 2345 + }, + { + "loss": 0.0633, + "grad_norm": 1.2277299165725708, + "learning_rate": 8.28e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.173, + "step": 2346 + }, + { + "loss": 0.0032, + "grad_norm": 0.4532278776168823, + "learning_rate": 8.275000000000001e-06, + "num_tokens": 802135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1735, + "step": 2347 + }, + { + "loss": 0.0033, + "grad_norm": 0.467818021774292, + "learning_rate": 8.27e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.174, + "step": 2348 + }, + { + "loss": 0.0528, + "grad_norm": 1.7821072340011597, + "learning_rate": 8.265000000000001e-06, + "num_tokens": 802738.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1745, + "step": 2349 + }, + { + "loss": 0.0415, + "grad_norm": 1.4086565971374512, + "learning_rate": 8.26e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.175, + "step": 2350 + }, + { + "loss": 0.045, + "grad_norm": 1.1930326223373413, + "learning_rate": 8.255000000000001e-06, + "num_tokens": 803762.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1755, + "step": 2351 + }, + { + "loss": 0.0028, + "grad_norm": 0.4077257215976715, + "learning_rate": 8.25e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 1.0, + "epoch": 1.176, + "step": 2352 + }, + { + "loss": 0.0535, + "grad_norm": 1.0156196355819702, + "learning_rate": 8.245000000000002e-06, + "num_tokens": 804365.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1764999999999999, + "step": 2353 + }, + { + "loss": 0.0544, + "grad_norm": 1.701621413230896, + "learning_rate": 8.24e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.177, + "step": 2354 + }, + { + "loss": 0.0408, + "grad_norm": 1.3804023265838623, + "learning_rate": 8.235e-06, + "num_tokens": 805389.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1775, + "step": 2355 + }, + { + "loss": 0.0538, + "grad_norm": 1.4935331344604492, + "learning_rate": 8.23e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.178, + "step": 2356 + }, + { + "loss": 0.0031, + "grad_norm": 0.46967241168022156, + "learning_rate": 8.225e-06, + "num_tokens": 805992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1785, + "step": 2357 + }, + { + "loss": 0.003, + "grad_norm": 0.4181312620639801, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.179, + "step": 2358 + }, + { + "loss": 0.003, + "grad_norm": 0.4292071461677551, + "learning_rate": 8.215e-06, + "num_tokens": 806174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1795, + "step": 2359 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606574833393097, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.18, + "step": 2360 + }, + { + "loss": 0.0384, + "grad_norm": 1.0812703371047974, + "learning_rate": 8.205e-06, + "num_tokens": 806777.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1804999999999999, + "step": 2361 + }, + { + "loss": 0.0025, + "grad_norm": 0.36413413286209106, + "learning_rate": 8.2e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.181, + "step": 2362 + }, + { + "loss": 0.0632, + "grad_norm": 1.3525351285934448, + "learning_rate": 8.195e-06, + "num_tokens": 807380.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1815, + "step": 2363 + }, + { + "loss": 0.0021, + "grad_norm": 0.29519718885421753, + "learning_rate": 8.19e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.182, + "step": 2364 + }, + { + "loss": 0.002, + "grad_norm": 0.28825369477272034, + "learning_rate": 8.185e-06, + "num_tokens": 807562.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1825, + "step": 2365 + }, + { + "loss": 0.0364, + "grad_norm": 1.0907576084136963, + "learning_rate": 8.18e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.183, + "step": 2366 + }, + { + "loss": 0.0682, + "grad_norm": 1.3050081729888916, + "learning_rate": 8.175e-06, + "num_tokens": 808586.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1835, + "step": 2367 + }, + { + "loss": 0.0424, + "grad_norm": 1.141483187675476, + "learning_rate": 8.17e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.184, + "step": 2368 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355233788490295, + "learning_rate": 8.165e-06, + "num_tokens": 809189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1844999999999999, + "step": 2369 + }, + { + "loss": 0.0744, + "grad_norm": 1.7785593271255493, + "learning_rate": 8.16e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.185, + "step": 2370 + }, + { + "loss": 0.0657, + "grad_norm": 1.3623268604278564, + "learning_rate": 8.155e-06, + "num_tokens": 810213.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1855, + "step": 2371 + }, + { + "loss": 0.0549, + "grad_norm": 1.1436368227005005, + "learning_rate": 8.15e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.186, + "step": 2372 + }, + { + "loss": 0.0539, + "grad_norm": 1.2383182048797607, + "learning_rate": 8.145e-06, + "num_tokens": 811237.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1865, + "step": 2373 + }, + { + "loss": 0.0018, + "grad_norm": 0.24816246330738068, + "learning_rate": 8.14e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.187, + "step": 2374 + }, + { + "loss": 0.0409, + "grad_norm": 1.240695834159851, + "learning_rate": 8.135000000000001e-06, + "num_tokens": 811840.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1875, + "step": 2375 + }, + { + "loss": 0.0364, + "grad_norm": 0.927349328994751, + "learning_rate": 8.13e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.188, + "step": 2376 + }, + { + "loss": 0.002, + "grad_norm": 0.28636854887008667, + "learning_rate": 8.125000000000001e-06, + "num_tokens": 812443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1885, + "step": 2377 + }, + { + "loss": 0.0021, + "grad_norm": 0.3085651397705078, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 2378 + }, + { + "loss": 0.0733, + "grad_norm": 1.627233862876892, + "learning_rate": 8.115000000000001e-06, + "num_tokens": 813046.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1895, + "step": 2379 + }, + { + "loss": 0.0523, + "grad_norm": 1.2803730964660645, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.19, + "step": 2380 + }, + { + "loss": 0.0358, + "grad_norm": 1.134440302848816, + "learning_rate": 8.105000000000001e-06, + "num_tokens": 814070.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1905000000000001, + "step": 2381 + }, + { + "loss": 0.062, + "grad_norm": 1.7024178504943848, + "learning_rate": 8.1e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.191, + "step": 2382 + }, + { + "loss": 0.0555, + "grad_norm": 1.755904197692871, + "learning_rate": 8.095000000000001e-06, + "num_tokens": 815094.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1915, + "step": 2383 + }, + { + "loss": 0.0028, + "grad_norm": 0.4056146442890167, + "learning_rate": 8.09e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 1.0, + "epoch": 1.192, + "step": 2384 + }, + { + "loss": 0.0415, + "grad_norm": 1.3847079277038574, + "learning_rate": 8.085000000000001e-06, + "num_tokens": 815697.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1925, + "step": 2385 + }, + { + "loss": 0.041, + "grad_norm": 1.05851149559021, + "learning_rate": 8.08e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.193, + "step": 2386 + }, + { + "loss": 0.0683, + "grad_norm": 1.5797926187515259, + "learning_rate": 8.075000000000001e-06, + "num_tokens": 816721.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1935, + "step": 2387 + }, + { + "loss": 0.003, + "grad_norm": 0.44755682349205017, + "learning_rate": 8.07e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 1.0, + "epoch": 1.194, + "step": 2388 + }, + { + "loss": 0.0035, + "grad_norm": 0.5333588719367981, + "learning_rate": 8.065e-06, + "num_tokens": 816903.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1945000000000001, + "step": 2389 + }, + { + "loss": 0.0034, + "grad_norm": 0.5025861263275146, + "learning_rate": 8.06e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 2390 + }, + { + "loss": 0.0657, + "grad_norm": 1.9265213012695312, + "learning_rate": 8.055e-06, + "num_tokens": 817506.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1955, + "step": 2391 + }, + { + "loss": 0.0029, + "grad_norm": 0.4326709508895874, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.196, + "step": 2392 + }, + { + "loss": 0.0385, + "grad_norm": 1.282583236694336, + "learning_rate": 8.045e-06, + "num_tokens": 818109.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1965, + "step": 2393 + }, + { + "loss": 0.048, + "grad_norm": 1.7246921062469482, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.197, + "step": 2394 + }, + { + "loss": 0.0529, + "grad_norm": 1.3816536664962769, + "learning_rate": 8.035e-06, + "num_tokens": 819133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1975, + "step": 2395 + }, + { + "loss": 0.0025, + "grad_norm": 0.36934202909469604, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 1.0, + "epoch": 1.198, + "step": 2396 + }, + { + "loss": 0.0701, + "grad_norm": 1.844415307044983, + "learning_rate": 8.025e-06, + "num_tokens": 819736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1985000000000001, + "step": 2397 + }, + { + "loss": 0.0026, + "grad_norm": 0.3918537199497223, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 1.0, + "epoch": 1.199, + "step": 2398 + }, + { + "loss": 0.0025, + "grad_norm": 0.3629172444343567, + "learning_rate": 8.015e-06, + "num_tokens": 819918.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1995, + "step": 2399 + }, + { + "loss": 0.0593, + "grad_norm": 1.3562273979187012, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2, + "step": 2400 + }, + { + "loss": 0.0415, + "grad_norm": 1.1191670894622803, + "learning_rate": 8.005e-06, + "num_tokens": 820942.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2005, + "step": 2401 + }, + { + "loss": 0.0021, + "grad_norm": 0.3028194308280945, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 1.0, + "epoch": 1.201, + "step": 2402 + }, + { + "loss": 0.0021, + "grad_norm": 0.3161010444164276, + "learning_rate": 7.995e-06, + "num_tokens": 821124.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2015, + "step": 2403 + }, + { + "loss": 0.0631, + "grad_norm": 1.4275634288787842, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.202, + "step": 2404 + }, + { + "loss": 0.0018, + "grad_norm": 0.2525792121887207, + "learning_rate": 7.985e-06, + "num_tokens": 821727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2025000000000001, + "step": 2405 + }, + { + "loss": 0.0576, + "grad_norm": 1.2019566297531128, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.203, + "step": 2406 + }, + { + "loss": 0.0019, + "grad_norm": 0.28433406352996826, + "learning_rate": 7.975e-06, + "num_tokens": 822330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2035, + "step": 2407 + }, + { + "loss": 0.0018, + "grad_norm": 0.26680925488471985, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 1.0, + "epoch": 1.204, + "step": 2408 + }, + { + "loss": 0.0523, + "grad_norm": 1.5135900974273682, + "learning_rate": 7.965e-06, + "num_tokens": 822933.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2045, + "step": 2409 + }, + { + "loss": 0.0595, + "grad_norm": 1.425874948501587, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.205, + "step": 2410 + }, + { + "loss": 0.0688, + "grad_norm": 1.7353657484054565, + "learning_rate": 7.955000000000001e-06, + "num_tokens": 823957.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2055, + "step": 2411 + }, + { + "loss": 0.0016, + "grad_norm": 0.22734731435775757, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.206, + "step": 2412 + }, + { + "loss": 0.0016, + "grad_norm": 0.22473861277103424, + "learning_rate": 7.945000000000001e-06, + "num_tokens": 824139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2065, + "step": 2413 + }, + { + "loss": 0.0016, + "grad_norm": 0.23369428515434265, + "learning_rate": 7.94e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 2414 + }, + { + "loss": 0.0018, + "grad_norm": 0.25014567375183105, + "learning_rate": 7.935000000000001e-06, + "num_tokens": 824321.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2075, + "step": 2415 + }, + { + "loss": 0.0701, + "grad_norm": 1.4806315898895264, + "learning_rate": 7.93e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.208, + "step": 2416 + }, + { + "loss": 0.0015, + "grad_norm": 0.1993637979030609, + "learning_rate": 7.925000000000001e-06, + "num_tokens": 824924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2085, + "step": 2417 + }, + { + "loss": 0.0548, + "grad_norm": 1.2813140153884888, + "learning_rate": 7.92e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.209, + "step": 2418 + }, + { + "loss": 0.0552, + "grad_norm": 1.2722525596618652, + "learning_rate": 7.915000000000001e-06, + "num_tokens": 825948.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2095, + "step": 2419 + }, + { + "loss": 0.0013, + "grad_norm": 0.17925392091274261, + "learning_rate": 7.91e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 1.0, + "epoch": 1.21, + "step": 2420 + }, + { + "loss": 0.0013, + "grad_norm": 0.18519414961338043, + "learning_rate": 7.905e-06, + "num_tokens": 826130.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2105, + "step": 2421 + }, + { + "loss": 0.041, + "grad_norm": 1.3869478702545166, + "learning_rate": 7.9e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.211, + "step": 2422 + }, + { + "loss": 0.0013, + "grad_norm": 0.1751483976840973, + "learning_rate": 7.895e-06, + "num_tokens": 826733.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2115, + "step": 2423 + }, + { + "loss": 0.05, + "grad_norm": 1.0098025798797607, + "learning_rate": 7.89e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.212, + "step": 2424 + }, + { + "loss": 0.0605, + "grad_norm": 1.3178874254226685, + "learning_rate": 7.885e-06, + "num_tokens": 827757.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2125, + "step": 2425 + }, + { + "loss": 0.0013, + "grad_norm": 0.18827441334724426, + "learning_rate": 7.88e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 1.0, + "epoch": 1.213, + "step": 2426 + }, + { + "loss": 0.064, + "grad_norm": 1.4484566450119019, + "learning_rate": 7.875e-06, + "num_tokens": 828360.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2135, + "step": 2427 + }, + { + "loss": 0.0014, + "grad_norm": 0.19540052115917206, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 1.0, + "epoch": 1.214, + "step": 2428 + }, + { + "loss": 0.0623, + "grad_norm": 1.3592177629470825, + "learning_rate": 7.865e-06, + "num_tokens": 828963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2145, + "step": 2429 + }, + { + "loss": 0.0014, + "grad_norm": 0.20412060618400574, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.215, + "step": 2430 + }, + { + "loss": 0.0617, + "grad_norm": 1.755582332611084, + "learning_rate": 7.855e-06, + "num_tokens": 829566.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2155, + "step": 2431 + }, + { + "loss": 0.0631, + "grad_norm": 1.2380058765411377, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.216, + "step": 2432 + }, + { + "loss": 0.0375, + "grad_norm": 1.3119670152664185, + "learning_rate": 7.845e-06, + "num_tokens": 830590.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2165, + "step": 2433 + }, + { + "loss": 0.0015, + "grad_norm": 0.22137387096881866, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 1.0, + "epoch": 1.217, + "step": 2434 + }, + { + "loss": 0.0017, + "grad_norm": 0.2416553795337677, + "learning_rate": 7.835e-06, + "num_tokens": 830772.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2175, + "step": 2435 + }, + { + "loss": 0.0015, + "grad_norm": 0.21708650887012482, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 2436 + }, + { + "loss": 0.0016, + "grad_norm": 0.23922832310199738, + "learning_rate": 7.825e-06, + "num_tokens": 830954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2185, + "step": 2437 + }, + { + "loss": 0.0016, + "grad_norm": 0.2385343313217163, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 2438 + }, + { + "loss": 0.065, + "grad_norm": 1.4742591381072998, + "learning_rate": 7.815e-06, + "num_tokens": 831557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2195, + "step": 2439 + }, + { + "loss": 0.0016, + "grad_norm": 0.2341725379228592, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 1.0, + "epoch": 1.22, + "step": 2440 + }, + { + "loss": 0.0615, + "grad_norm": 1.4791371822357178, + "learning_rate": 7.805e-06, + "num_tokens": 832160.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2205, + "step": 2441 + }, + { + "loss": 0.048, + "grad_norm": 1.601716160774231, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.221, + "step": 2442 + }, + { + "loss": 0.0014, + "grad_norm": 0.19947591423988342, + "learning_rate": 7.795e-06, + "num_tokens": 832763.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2215, + "step": 2443 + }, + { + "loss": 0.0801, + "grad_norm": 1.753954291343689, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.222, + "step": 2444 + }, + { + "loss": 0.0015, + "grad_norm": 0.21398615837097168, + "learning_rate": 7.785000000000001e-06, + "num_tokens": 833366.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2225, + "step": 2445 + }, + { + "loss": 0.0655, + "grad_norm": 1.799574851989746, + "learning_rate": 7.78e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.223, + "step": 2446 + }, + { + "loss": 0.0438, + "grad_norm": 1.332261085510254, + "learning_rate": 7.775000000000001e-06, + "num_tokens": 834390.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2235, + "step": 2447 + }, + { + "loss": 0.044, + "grad_norm": 1.238344430923462, + "learning_rate": 7.77e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.224, + "step": 2448 + }, + { + "loss": 0.0015, + "grad_norm": 0.2137579768896103, + "learning_rate": 7.765000000000001e-06, + "num_tokens": 834993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2245, + "step": 2449 + }, + { + "loss": 0.0438, + "grad_norm": 1.1821973323822021, + "learning_rate": 7.76e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.225, + "step": 2450 + }, + { + "loss": 0.0562, + "grad_norm": 1.4905529022216797, + "learning_rate": 7.755000000000001e-06, + "num_tokens": 836017.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2255, + "step": 2451 + }, + { + "loss": 0.0015, + "grad_norm": 0.21731820702552795, + "learning_rate": 7.75e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 1.0, + "epoch": 1.226, + "step": 2452 + }, + { + "loss": 0.0017, + "grad_norm": 0.25909724831581116, + "learning_rate": 7.745e-06, + "num_tokens": 836199.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2265, + "step": 2453 + }, + { + "loss": 0.0016, + "grad_norm": 0.22781187295913696, + "learning_rate": 7.74e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 2454 + }, + { + "loss": 0.0016, + "grad_norm": 0.24323998391628265, + "learning_rate": 7.735e-06, + "num_tokens": 836381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2275, + "step": 2455 + }, + { + "loss": 0.0594, + "grad_norm": 1.5349161624908447, + "learning_rate": 7.73e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.228, + "step": 2456 + }, + { + "loss": 0.0017, + "grad_norm": 0.24151335656642914, + "learning_rate": 7.725e-06, + "num_tokens": 836984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2285, + "step": 2457 + }, + { + "loss": 0.0016, + "grad_norm": 0.23347225785255432, + "learning_rate": 7.72e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 2458 + }, + { + "loss": 0.0017, + "grad_norm": 0.24232612550258636, + "learning_rate": 7.715e-06, + "num_tokens": 837166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2295, + "step": 2459 + }, + { + "loss": 0.0016, + "grad_norm": 0.23151801526546478, + "learning_rate": 7.71e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 2460 + }, + { + "loss": 0.0586, + "grad_norm": 1.4122602939605713, + "learning_rate": 7.705e-06, + "num_tokens": 837769.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2305, + "step": 2461 + }, + { + "loss": 0.0014, + "grad_norm": 0.19469626247882843, + "learning_rate": 7.7e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.231, + "step": 2462 + }, + { + "loss": 0.0637, + "grad_norm": 1.675697684288025, + "learning_rate": 7.695e-06, + "num_tokens": 838372.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2315, + "step": 2463 + }, + { + "loss": 0.0013, + "grad_norm": 0.17535777390003204, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.232, + "step": 2464 + }, + { + "loss": 0.0549, + "grad_norm": 1.1719900369644165, + "learning_rate": 7.685e-06, + "num_tokens": 838975.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2325, + "step": 2465 + }, + { + "loss": 0.0013, + "grad_norm": 0.16398227214813232, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.233, + "step": 2466 + }, + { + "loss": 0.0674, + "grad_norm": 1.7502342462539673, + "learning_rate": 7.675e-06, + "num_tokens": 839578.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2335, + "step": 2467 + }, + { + "loss": 0.0013, + "grad_norm": 0.17352193593978882, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.234, + "step": 2468 + }, + { + "loss": 0.063, + "grad_norm": 1.5015274286270142, + "learning_rate": 7.665e-06, + "num_tokens": 840181.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2345, + "step": 2469 + }, + { + "loss": 0.0611, + "grad_norm": 1.3142430782318115, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2349999999999999, + "step": 2470 + }, + { + "loss": 0.0589, + "grad_norm": 1.3366830348968506, + "learning_rate": 7.655e-06, + "num_tokens": 841205.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2355, + "step": 2471 + }, + { + "loss": 0.0013, + "grad_norm": 0.17301248013973236, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.236, + "step": 2472 + }, + { + "loss": 0.0435, + "grad_norm": 1.1996126174926758, + "learning_rate": 7.645e-06, + "num_tokens": 841808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2365, + "step": 2473 + }, + { + "loss": 0.0015, + "grad_norm": 0.21387803554534912, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.237, + "step": 2474 + }, + { + "loss": 0.064, + "grad_norm": 1.3917018175125122, + "learning_rate": 7.635e-06, + "num_tokens": 842411.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2375, + "step": 2475 + }, + { + "loss": 0.0014, + "grad_norm": 0.20352397859096527, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.238, + "step": 2476 + }, + { + "loss": 0.0015, + "grad_norm": 0.21035854518413544, + "learning_rate": 7.625e-06, + "num_tokens": 842593.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2385, + "step": 2477 + }, + { + "loss": 0.0384, + "grad_norm": 1.1954495906829834, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.2389999999999999, + "step": 2478 + }, + { + "loss": 0.0398, + "grad_norm": 1.3171675205230713, + "learning_rate": 7.615e-06, + "num_tokens": 843617.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2395, + "step": 2479 + }, + { + "loss": 0.0016, + "grad_norm": 0.22742266952991486, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.24, + "step": 2480 + }, + { + "loss": 0.0505, + "grad_norm": 1.463847041130066, + "learning_rate": 7.605e-06, + "num_tokens": 844220.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2405, + "step": 2481 + }, + { + "loss": 0.0634, + "grad_norm": 1.0150220394134521, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.241, + "step": 2482 + }, + { + "loss": 0.0628, + "grad_norm": 1.2490217685699463, + "learning_rate": 7.595e-06, + "num_tokens": 845244.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2415, + "step": 2483 + }, + { + "loss": 0.0568, + "grad_norm": 0.9812212586402893, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.242, + "step": 2484 + }, + { + "loss": 0.0684, + "grad_norm": 1.4887269735336304, + "learning_rate": 7.585e-06, + "num_tokens": 846268.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2425, + "step": 2485 + }, + { + "loss": 0.002, + "grad_norm": 0.2907889485359192, + "learning_rate": 7.58e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2429999999999999, + "step": 2486 + }, + { + "loss": 0.0024, + "grad_norm": 0.3490116596221924, + "learning_rate": 7.575e-06, + "num_tokens": 846450.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2435, + "step": 2487 + }, + { + "loss": 0.0379, + "grad_norm": 0.9351921081542969, + "learning_rate": 7.57e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.244, + "step": 2488 + }, + { + "loss": 0.0409, + "grad_norm": 1.486227035522461, + "learning_rate": 7.565e-06, + "num_tokens": 847474.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2445, + "step": 2489 + }, + { + "loss": 0.0024, + "grad_norm": 0.35926783084869385, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.245, + "step": 2490 + }, + { + "loss": 0.0547, + "grad_norm": 1.216343879699707, + "learning_rate": 7.5550000000000005e-06, + "num_tokens": 848077.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2455, + "step": 2491 + }, + { + "loss": 0.0622, + "grad_norm": 1.0978708267211914, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.246, + "step": 2492 + }, + { + "loss": 0.0026, + "grad_norm": 0.3695952892303467, + "learning_rate": 7.545e-06, + "num_tokens": 848680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2465, + "step": 2493 + }, + { + "loss": 0.0712, + "grad_norm": 1.1717898845672607, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2469999999999999, + "step": 2494 + }, + { + "loss": 0.003, + "grad_norm": 0.4548373818397522, + "learning_rate": 7.535e-06, + "num_tokens": 849283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2475, + "step": 2495 + }, + { + "loss": 0.003, + "grad_norm": 0.4568769335746765, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.248, + "step": 2496 + }, + { + "loss": 0.0024, + "grad_norm": 0.36542901396751404, + "learning_rate": 7.525e-06, + "num_tokens": 849465.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2485, + "step": 2497 + }, + { + "loss": 0.0566, + "grad_norm": 1.315274715423584, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.249, + "step": 2498 + }, + { + "loss": 0.0026, + "grad_norm": 0.39514294266700745, + "learning_rate": 7.515e-06, + "num_tokens": 850068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2495, + "step": 2499 + }, + { + "loss": 0.0678, + "grad_norm": 1.530604362487793, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.25, + "step": 2500 + }, + { + "loss": 0.0022, + "grad_norm": 0.3104536533355713, + "learning_rate": 7.505e-06, + "num_tokens": 850671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2505, + "step": 2501 + }, + { + "loss": 0.0019, + "grad_norm": 0.2783941924571991, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.251, + "step": 2502 + }, + { + "loss": 0.0597, + "grad_norm": 1.77070951461792, + "learning_rate": 7.495000000000001e-06, + "num_tokens": 851274.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2515, + "step": 2503 + }, + { + "loss": 0.0019, + "grad_norm": 0.2808924913406372, + "learning_rate": 7.49e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 1.0, + "epoch": 1.252, + "step": 2504 + }, + { + "loss": 0.0441, + "grad_norm": 1.070281982421875, + "learning_rate": 7.485000000000001e-06, + "num_tokens": 851877.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2525, + "step": 2505 + }, + { + "loss": 0.0018, + "grad_norm": 0.25118544697761536, + "learning_rate": 7.48e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2530000000000001, + "step": 2506 + }, + { + "loss": 0.0698, + "grad_norm": 1.3499447107315063, + "learning_rate": 7.475000000000001e-06, + "num_tokens": 852480.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2535, + "step": 2507 + }, + { + "loss": 0.0016, + "grad_norm": 0.23157145082950592, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.254, + "step": 2508 + }, + { + "loss": 0.0384, + "grad_norm": 1.1759817600250244, + "learning_rate": 7.465000000000001e-06, + "num_tokens": 853083.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2545, + "step": 2509 + }, + { + "loss": 0.0017, + "grad_norm": 0.24023179709911346, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.255, + "step": 2510 + }, + { + "loss": 0.0559, + "grad_norm": 1.3075677156448364, + "learning_rate": 7.4550000000000015e-06, + "num_tokens": 853686.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2555, + "step": 2511 + }, + { + "loss": 0.0691, + "grad_norm": 1.5931618213653564, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.256, + "step": 2512 + }, + { + "loss": 0.0015, + "grad_norm": 0.21379417181015015, + "learning_rate": 7.445000000000001e-06, + "num_tokens": 854289.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2565, + "step": 2513 + }, + { + "loss": 0.0016, + "grad_norm": 0.22427783906459808, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 2514 + }, + { + "loss": 0.0585, + "grad_norm": 1.3955110311508179, + "learning_rate": 7.435000000000001e-06, + "num_tokens": 854892.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2575, + "step": 2515 + }, + { + "loss": 0.0016, + "grad_norm": 0.22540539503097534, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.258, + "step": 2516 + }, + { + "loss": 0.0015, + "grad_norm": 0.20957466959953308, + "learning_rate": 7.425000000000001e-06, + "num_tokens": 855074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2585, + "step": 2517 + }, + { + "loss": 0.0013, + "grad_norm": 0.17798997461795807, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 2518 + }, + { + "loss": 0.0681, + "grad_norm": 1.692757487297058, + "learning_rate": 7.415000000000001e-06, + "num_tokens": 855677.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2595, + "step": 2519 + }, + { + "loss": 0.0013, + "grad_norm": 0.18327295780181885, + "learning_rate": 7.41e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 1.0, + "epoch": 1.26, + "step": 2520 + }, + { + "loss": 0.0694, + "grad_norm": 1.3426337242126465, + "learning_rate": 7.405000000000001e-06, + "num_tokens": 856280.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2605, + "step": 2521 + }, + { + "loss": 0.0575, + "grad_norm": 1.3755184412002563, + "learning_rate": 7.4e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2610000000000001, + "step": 2522 + }, + { + "loss": 0.0012, + "grad_norm": 0.15550144016742706, + "learning_rate": 7.395000000000001e-06, + "num_tokens": 856883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2615, + "step": 2523 + }, + { + "loss": 0.0013, + "grad_norm": 0.18434429168701172, + "learning_rate": 7.39e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 2524 + }, + { + "loss": 0.0561, + "grad_norm": 1.3532037734985352, + "learning_rate": 7.385000000000001e-06, + "num_tokens": 857486.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2625, + "step": 2525 + }, + { + "loss": 0.0783, + "grad_norm": 2.749722719192505, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.263, + "step": 2526 + }, + { + "loss": 0.0739, + "grad_norm": 1.7389228343963623, + "learning_rate": 7.375000000000001e-06, + "num_tokens": 858510.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2635, + "step": 2527 + }, + { + "loss": 0.0596, + "grad_norm": 1.5434712171554565, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 2528 + }, + { + "loss": 0.0012, + "grad_norm": 0.16660870611667633, + "learning_rate": 7.365000000000001e-06, + "num_tokens": 859113.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2645, + "step": 2529 + }, + { + "loss": 0.0466, + "grad_norm": 1.1618560552597046, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2650000000000001, + "step": 2530 + }, + { + "loss": 0.066, + "grad_norm": 1.4426238536834717, + "learning_rate": 7.355000000000001e-06, + "num_tokens": 860137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2655, + "step": 2531 + }, + { + "loss": 0.0014, + "grad_norm": 0.1874425858259201, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 1.0, + "epoch": 1.266, + "step": 2532 + }, + { + "loss": 0.0574, + "grad_norm": 1.2460824251174927, + "learning_rate": 7.345000000000001e-06, + "num_tokens": 860740.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2665, + "step": 2533 + }, + { + "loss": 0.0722, + "grad_norm": 1.7045679092407227, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.267, + "step": 2534 + }, + { + "loss": 0.0641, + "grad_norm": 1.4023394584655762, + "learning_rate": 7.335000000000001e-06, + "num_tokens": 861764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2675, + "step": 2535 + }, + { + "loss": 0.0018, + "grad_norm": 0.25083932280540466, + "learning_rate": 7.33e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.268, + "step": 2536 + }, + { + "loss": 0.0625, + "grad_norm": 1.2308841943740845, + "learning_rate": 7.325000000000001e-06, + "num_tokens": 862367.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2685, + "step": 2537 + }, + { + "loss": 0.1399, + "grad_norm": 2.6957058906555176, + "learning_rate": 7.32e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.2690000000000001, + "step": 2538 + }, + { + "loss": 0.0403, + "grad_norm": 1.0539931058883667, + "learning_rate": 7.315000000000001e-06, + "num_tokens": 863391.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2695, + "step": 2539 + }, + { + "loss": 0.0603, + "grad_norm": 1.6862679719924927, + "learning_rate": 7.31e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.27, + "step": 2540 + }, + { + "loss": 0.0022, + "grad_norm": 0.3110877275466919, + "learning_rate": 7.305000000000001e-06, + "num_tokens": 863994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2705, + "step": 2541 + }, + { + "loss": 0.0521, + "grad_norm": 1.1967720985412598, + "learning_rate": 7.3e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.271, + "step": 2542 + }, + { + "loss": 0.1383, + "grad_norm": 2.653751850128174, + "learning_rate": 7.295000000000001e-06, + "num_tokens": 865018.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.2715, + "step": 2543 + }, + { + "loss": 0.0025, + "grad_norm": 0.3700110614299774, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.272, + "step": 2544 + }, + { + "loss": 0.0031, + "grad_norm": 0.42906609177589417, + "learning_rate": 7.2850000000000006e-06, + "num_tokens": 865200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2725, + "step": 2545 + }, + { + "loss": 0.0437, + "grad_norm": 1.104537010192871, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2730000000000001, + "step": 2546 + }, + { + "loss": 0.0027, + "grad_norm": 0.3919247090816498, + "learning_rate": 7.275000000000001e-06, + "num_tokens": 865803.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2735, + "step": 2547 + }, + { + "loss": 0.0029, + "grad_norm": 0.4317328929901123, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 2548 + }, + { + "loss": 0.0025, + "grad_norm": 0.37341031432151794, + "learning_rate": 7.265000000000001e-06, + "num_tokens": 865985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2745, + "step": 2549 + }, + { + "loss": 0.0416, + "grad_norm": 1.0737035274505615, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.275, + "step": 2550 + }, + { + "loss": 0.0646, + "grad_norm": 1.3107216358184814, + "learning_rate": 7.255000000000001e-06, + "num_tokens": 867009.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2755, + "step": 2551 + }, + { + "loss": 0.0381, + "grad_norm": 0.9233097434043884, + "learning_rate": 7.25e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.276, + "step": 2552 + }, + { + "loss": 0.056, + "grad_norm": 1.2655408382415771, + "learning_rate": 7.245000000000001e-06, + "num_tokens": 868033.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2765, + "step": 2553 + }, + { + "loss": 0.0519, + "grad_norm": 1.2633070945739746, + "learning_rate": 7.24e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2770000000000001, + "step": 2554 + }, + { + "loss": 0.0666, + "grad_norm": 1.5826315879821777, + "learning_rate": 7.235000000000001e-06, + "num_tokens": 869057.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2775, + "step": 2555 + }, + { + "loss": 0.0026, + "grad_norm": 0.3732459545135498, + "learning_rate": 7.23e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 1.0, + "epoch": 1.278, + "step": 2556 + }, + { + "loss": 0.0384, + "grad_norm": 0.9308870434761047, + "learning_rate": 7.225000000000001e-06, + "num_tokens": 869660.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2785, + "step": 2557 + }, + { + "loss": 0.0027, + "grad_norm": 0.3898535668849945, + "learning_rate": 7.22e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 1.0, + "epoch": 1.279, + "step": 2558 + }, + { + "loss": 0.0416, + "grad_norm": 1.0320757627487183, + "learning_rate": 7.215000000000001e-06, + "num_tokens": 870263.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2795, + "step": 2559 + }, + { + "loss": 0.0028, + "grad_norm": 0.4121858477592468, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 1.0, + "epoch": 1.28, + "step": 2560 + }, + { + "loss": 0.0028, + "grad_norm": 0.4276776611804962, + "learning_rate": 7.2050000000000005e-06, + "num_tokens": 870445.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2805, + "step": 2561 + }, + { + "loss": 0.0407, + "grad_norm": 0.9345077872276306, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2810000000000001, + "step": 2562 + }, + { + "loss": 0.0025, + "grad_norm": 0.3605985641479492, + "learning_rate": 7.1950000000000006e-06, + "num_tokens": 871048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2814999999999999, + "step": 2563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346655070781708, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.282, + "step": 2564 + }, + { + "loss": 0.0744, + "grad_norm": 1.8985601663589478, + "learning_rate": 7.185000000000001e-06, + "num_tokens": 871651.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2825, + "step": 2565 + }, + { + "loss": 0.0388, + "grad_norm": 0.96394282579422, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.283, + "step": 2566 + }, + { + "loss": 0.0682, + "grad_norm": 1.4056230783462524, + "learning_rate": 7.175000000000001e-06, + "num_tokens": 872675.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2835, + "step": 2567 + }, + { + "loss": 0.0022, + "grad_norm": 0.3106633722782135, + "learning_rate": 7.17e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 1.0, + "epoch": 1.284, + "step": 2568 + }, + { + "loss": 0.0384, + "grad_norm": 1.064553141593933, + "learning_rate": 7.165000000000001e-06, + "num_tokens": 873278.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2845, + "step": 2569 + }, + { + "loss": 0.0626, + "grad_norm": 1.0392028093338013, + "learning_rate": 7.16e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.285, + "step": 2570 + }, + { + "loss": 0.0022, + "grad_norm": 0.30655112862586975, + "learning_rate": 7.155000000000001e-06, + "num_tokens": 873881.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2854999999999999, + "step": 2571 + }, + { + "loss": 0.0673, + "grad_norm": 1.5468289852142334, + "learning_rate": 7.15e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.286, + "step": 2572 + }, + { + "loss": 0.0498, + "grad_norm": 1.2830432653427124, + "learning_rate": 7.145000000000001e-06, + "num_tokens": 874905.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2865, + "step": 2573 + }, + { + "loss": 0.055, + "grad_norm": 1.0863239765167236, + "learning_rate": 7.14e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 2574 + }, + { + "loss": 0.0606, + "grad_norm": 1.434999704360962, + "learning_rate": 7.135000000000001e-06, + "num_tokens": 875929.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2875, + "step": 2575 + }, + { + "loss": 0.0532, + "grad_norm": 1.290963888168335, + "learning_rate": 7.13e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.288, + "step": 2576 + }, + { + "loss": 0.0026, + "grad_norm": 0.36665645241737366, + "learning_rate": 7.125e-06, + "num_tokens": 876532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2885, + "step": 2577 + }, + { + "loss": 0.0485, + "grad_norm": 1.2393323183059692, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.289, + "step": 2578 + }, + { + "loss": 0.0029, + "grad_norm": 0.3994691073894501, + "learning_rate": 7.1150000000000005e-06, + "num_tokens": 877135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2894999999999999, + "step": 2579 + }, + { + "loss": 0.0544, + "grad_norm": 1.361981987953186, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.29, + "step": 2580 + }, + { + "loss": 0.0529, + "grad_norm": 1.1892880201339722, + "learning_rate": 7.105000000000001e-06, + "num_tokens": 878159.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2905, + "step": 2581 + }, + { + "loss": 0.069, + "grad_norm": 1.5022639036178589, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.291, + "step": 2582 + }, + { + "loss": 0.0594, + "grad_norm": 1.2174897193908691, + "learning_rate": 7.095000000000001e-06, + "num_tokens": 879183.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2915, + "step": 2583 + }, + { + "loss": 0.0723, + "grad_norm": 2.1814920902252197, + "learning_rate": 7.09e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.292, + "step": 2584 + }, + { + "loss": 0.0544, + "grad_norm": 1.1524139642715454, + "learning_rate": 7.085000000000001e-06, + "num_tokens": 880207.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2925, + "step": 2585 + }, + { + "loss": 0.0035, + "grad_norm": 0.5082859396934509, + "learning_rate": 7.08e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.293, + "step": 2586 + }, + { + "loss": 0.0034, + "grad_norm": 0.49455657601356506, + "learning_rate": 7.075000000000001e-06, + "num_tokens": 880389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2934999999999999, + "step": 2587 + }, + { + "loss": 0.0516, + "grad_norm": 1.1291673183441162, + "learning_rate": 7.07e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.294, + "step": 2588 + }, + { + "loss": 0.0402, + "grad_norm": 1.073132038116455, + "learning_rate": 7.065000000000001e-06, + "num_tokens": 881413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2945, + "step": 2589 + }, + { + "loss": 0.0409, + "grad_norm": 1.1712205410003662, + "learning_rate": 7.06e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.295, + "step": 2590 + }, + { + "loss": 0.0596, + "grad_norm": 1.2515616416931152, + "learning_rate": 7.055000000000001e-06, + "num_tokens": 882437.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2955, + "step": 2591 + }, + { + "loss": 0.0039, + "grad_norm": 0.5442217588424683, + "learning_rate": 7.05e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.296, + "step": 2592 + }, + { + "loss": 0.0041, + "grad_norm": 0.5982818603515625, + "learning_rate": 7.045e-06, + "num_tokens": 882619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2965, + "step": 2593 + }, + { + "loss": 0.0558, + "grad_norm": 1.3499200344085693, + "learning_rate": 7.04e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.297, + "step": 2594 + }, + { + "loss": 0.0038, + "grad_norm": 0.5531075596809387, + "learning_rate": 7.035e-06, + "num_tokens": 883222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2974999999999999, + "step": 2595 + }, + { + "loss": 0.0716, + "grad_norm": 1.8495835065841675, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.298, + "step": 2596 + }, + { + "loss": 0.0387, + "grad_norm": 1.2195173501968384, + "learning_rate": 7.0250000000000005e-06, + "num_tokens": 884246.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2985, + "step": 2597 + }, + { + "loss": 0.0715, + "grad_norm": 1.7892330884933472, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.299, + "step": 2598 + }, + { + "loss": 0.0034, + "grad_norm": 0.5045487284660339, + "learning_rate": 7.015000000000001e-06, + "num_tokens": 884849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2995, + "step": 2599 + }, + { + "loss": 0.0551, + "grad_norm": 1.5834842920303345, + "learning_rate": 7.01e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3, + "step": 2600 + }, + { + "loss": 0.0037, + "grad_norm": 0.5456190705299377, + "learning_rate": 7.005000000000001e-06, + "num_tokens": 885452.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3005, + "step": 2601 + }, + { + "loss": 0.0036, + "grad_norm": 0.5648893117904663, + "learning_rate": 7e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.301, + "step": 2602 + }, + { + "loss": 0.06, + "grad_norm": 1.417505145072937, + "learning_rate": 6.995000000000001e-06, + "num_tokens": 886055.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3014999999999999, + "step": 2603 + }, + { + "loss": 0.0684, + "grad_norm": 1.5355315208435059, + "learning_rate": 6.99e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.302, + "step": 2604 + }, + { + "loss": 0.0027, + "grad_norm": 0.4013388454914093, + "learning_rate": 6.985000000000001e-06, + "num_tokens": 886658.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3025, + "step": 2605 + }, + { + "loss": 0.0026, + "grad_norm": 0.38935649394989014, + "learning_rate": 6.98e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 1.0, + "epoch": 1.303, + "step": 2606 + }, + { + "loss": 0.0578, + "grad_norm": 1.1277109384536743, + "learning_rate": 6.975000000000001e-06, + "num_tokens": 887261.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3035, + "step": 2607 + }, + { + "loss": 0.0023, + "grad_norm": 0.3507567048072815, + "learning_rate": 6.97e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.304, + "step": 2608 + }, + { + "loss": 0.0021, + "grad_norm": 0.3047695755958557, + "learning_rate": 6.965e-06, + "num_tokens": 887443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3045, + "step": 2609 + }, + { + "loss": 0.0564, + "grad_norm": 1.2580876350402832, + "learning_rate": 6.96e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.305, + "step": 2610 + }, + { + "loss": 0.0018, + "grad_norm": 0.26692500710487366, + "learning_rate": 6.955e-06, + "num_tokens": 888046.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3054999999999999, + "step": 2611 + }, + { + "loss": 0.0601, + "grad_norm": 1.2882280349731445, + "learning_rate": 6.95e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.306, + "step": 2612 + }, + { + "loss": 0.0662, + "grad_norm": 1.3626042604446411, + "learning_rate": 6.945e-06, + "num_tokens": 889070.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3065, + "step": 2613 + }, + { + "loss": 0.0015, + "grad_norm": 0.20663970708847046, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 1.0, + "epoch": 1.307, + "step": 2614 + }, + { + "loss": 0.0421, + "grad_norm": 1.0858242511749268, + "learning_rate": 6.9350000000000005e-06, + "num_tokens": 889673.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3075, + "step": 2615 + }, + { + "loss": 0.061, + "grad_norm": 1.1361438035964966, + "learning_rate": 6.93e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.308, + "step": 2616 + }, + { + "loss": 0.053, + "grad_norm": 1.0651867389678955, + "learning_rate": 6.925000000000001e-06, + "num_tokens": 890697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3085, + "step": 2617 + }, + { + "loss": 0.0648, + "grad_norm": 1.4413301944732666, + "learning_rate": 6.92e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.309, + "step": 2618 + }, + { + "loss": 0.0016, + "grad_norm": 0.23106220364570618, + "learning_rate": 6.915000000000001e-06, + "num_tokens": 891300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3094999999999999, + "step": 2619 + }, + { + "loss": 0.0596, + "grad_norm": 1.1959160566329956, + "learning_rate": 6.91e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.31, + "step": 2620 + }, + { + "loss": 0.0625, + "grad_norm": 1.4631091356277466, + "learning_rate": 6.905000000000001e-06, + "num_tokens": 892324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3105, + "step": 2621 + }, + { + "loss": 0.0385, + "grad_norm": 1.1421785354614258, + "learning_rate": 6.9e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.311, + "step": 2622 + }, + { + "loss": 0.0644, + "grad_norm": 1.3361622095108032, + "learning_rate": 6.895000000000001e-06, + "num_tokens": 893348.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3115, + "step": 2623 + }, + { + "loss": 0.0393, + "grad_norm": 1.3101776838302612, + "learning_rate": 6.89e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.312, + "step": 2624 + }, + { + "loss": 0.0415, + "grad_norm": 1.2668944597244263, + "learning_rate": 6.885e-06, + "num_tokens": 894372.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3125, + "step": 2625 + }, + { + "loss": 0.0637, + "grad_norm": 1.8910597562789917, + "learning_rate": 6.88e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.313, + "step": 2626 + }, + { + "loss": 0.0385, + "grad_norm": 1.383195161819458, + "learning_rate": 6.875e-06, + "num_tokens": 895396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3135, + "step": 2627 + }, + { + "loss": 0.0029, + "grad_norm": 0.41114333271980286, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.314, + "step": 2628 + }, + { + "loss": 0.0709, + "grad_norm": 2.5799410343170166, + "learning_rate": 6.865e-06, + "num_tokens": 895999.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3145, + "step": 2629 + }, + { + "loss": 0.0717, + "grad_norm": 1.9481109380722046, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.315, + "step": 2630 + }, + { + "loss": 0.0031, + "grad_norm": 0.4399254620075226, + "learning_rate": 6.8550000000000004e-06, + "num_tokens": 896602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3155000000000001, + "step": 2631 + }, + { + "loss": 0.0692, + "grad_norm": 1.7998204231262207, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.316, + "step": 2632 + }, + { + "loss": 0.0589, + "grad_norm": 1.2681806087493896, + "learning_rate": 6.8450000000000005e-06, + "num_tokens": 897626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3165, + "step": 2633 + }, + { + "loss": 0.1572, + "grad_norm": 2.9861464500427246, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.317, + "step": 2634 + }, + { + "loss": 0.0033, + "grad_norm": 0.4804554879665375, + "learning_rate": 6.835000000000001e-06, + "num_tokens": 898229.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3175, + "step": 2635 + }, + { + "loss": 0.0039, + "grad_norm": 0.5298879742622375, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 1.0, + "epoch": 1.318, + "step": 2636 + }, + { + "loss": 0.0033, + "grad_norm": 0.45830750465393066, + "learning_rate": 6.825000000000001e-06, + "num_tokens": 898411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3185, + "step": 2637 + }, + { + "loss": 0.0759, + "grad_norm": 2.195838451385498, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.319, + "step": 2638 + }, + { + "loss": 0.0028, + "grad_norm": 0.3985951840877533, + "learning_rate": 6.815000000000001e-06, + "num_tokens": 899014.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3195000000000001, + "step": 2639 + }, + { + "loss": 0.0435, + "grad_norm": 1.082383155822754, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.32, + "step": 2640 + }, + { + "loss": 0.0031, + "grad_norm": 0.4386924207210541, + "learning_rate": 6.805000000000001e-06, + "num_tokens": 899617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3205, + "step": 2641 + }, + { + "loss": 0.044, + "grad_norm": 1.3280903100967407, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.321, + "step": 2642 + }, + { + "loss": 0.0024, + "grad_norm": 0.34161683917045593, + "learning_rate": 6.795e-06, + "num_tokens": 900220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3215, + "step": 2643 + }, + { + "loss": 0.0026, + "grad_norm": 0.3536019027233124, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.322, + "step": 2644 + }, + { + "loss": 0.0721, + "grad_norm": 1.825214147567749, + "learning_rate": 6.785e-06, + "num_tokens": 900823.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.3225, + "step": 2645 + }, + { + "loss": 0.0603, + "grad_norm": 1.441401481628418, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.323, + "step": 2646 + }, + { + "loss": 0.0552, + "grad_norm": 1.026498556137085, + "learning_rate": 6.775e-06, + "num_tokens": 901847.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3235000000000001, + "step": 2647 + }, + { + "loss": 0.0607, + "grad_norm": 1.567400574684143, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.324, + "step": 2648 + }, + { + "loss": 0.0365, + "grad_norm": 1.1754707098007202, + "learning_rate": 6.7650000000000005e-06, + "num_tokens": 902871.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3245, + "step": 2649 + }, + { + "loss": 0.0634, + "grad_norm": 1.0925911664962769, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.325, + "step": 2650 + }, + { + "loss": 0.0022, + "grad_norm": 0.3080379068851471, + "learning_rate": 6.7550000000000005e-06, + "num_tokens": 903474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3255, + "step": 2651 + }, + { + "loss": 0.0024, + "grad_norm": 0.3412145972251892, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.326, + "step": 2652 + }, + { + "loss": 0.0612, + "grad_norm": 1.387506127357483, + "learning_rate": 6.745000000000001e-06, + "num_tokens": 904077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3265, + "step": 2653 + }, + { + "loss": 0.0543, + "grad_norm": 1.0726388692855835, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 2654 + }, + { + "loss": 0.0515, + "grad_norm": 1.3620095252990723, + "learning_rate": 6.735000000000001e-06, + "num_tokens": 905101.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3275000000000001, + "step": 2655 + }, + { + "loss": 0.0536, + "grad_norm": 0.999693751335144, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.328, + "step": 2656 + }, + { + "loss": 0.0725, + "grad_norm": 1.338326096534729, + "learning_rate": 6.725000000000001e-06, + "num_tokens": 906125.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3285, + "step": 2657 + }, + { + "loss": 0.0025, + "grad_norm": 0.3621944487094879, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.329, + "step": 2658 + }, + { + "loss": 0.0027, + "grad_norm": 0.3732605576515198, + "learning_rate": 6.715e-06, + "num_tokens": 906307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3295, + "step": 2659 + }, + { + "loss": 0.0025, + "grad_norm": 0.3675785958766937, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 2660 + }, + { + "loss": 0.0546, + "grad_norm": 1.420166015625, + "learning_rate": 6.705e-06, + "num_tokens": 906910.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3305, + "step": 2661 + }, + { + "loss": 0.065, + "grad_norm": 1.7972251176834106, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.331, + "step": 2662 + }, + { + "loss": 0.0026, + "grad_norm": 0.38739708065986633, + "learning_rate": 6.695e-06, + "num_tokens": 907513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3315000000000001, + "step": 2663 + }, + { + "loss": 0.0621, + "grad_norm": 1.1773098707199097, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.332, + "step": 2664 + }, + { + "loss": 0.047, + "grad_norm": 1.3367711305618286, + "learning_rate": 6.685e-06, + "num_tokens": 908537.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3325, + "step": 2665 + }, + { + "loss": 0.0614, + "grad_norm": 1.5761219263076782, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.333, + "step": 2666 + }, + { + "loss": 0.0028, + "grad_norm": 0.39666748046875, + "learning_rate": 6.6750000000000005e-06, + "num_tokens": 909140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3335, + "step": 2667 + }, + { + "loss": 0.0026, + "grad_norm": 0.38161027431488037, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 2668 + }, + { + "loss": 0.0027, + "grad_norm": 0.3782355785369873, + "learning_rate": 6.6650000000000006e-06, + "num_tokens": 909322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3345, + "step": 2669 + }, + { + "loss": 0.0449, + "grad_norm": 1.2690225839614868, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.335, + "step": 2670 + }, + { + "loss": 0.0618, + "grad_norm": 1.4404915571212769, + "learning_rate": 6.655000000000001e-06, + "num_tokens": 910346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3355000000000001, + "step": 2671 + }, + { + "loss": 0.0593, + "grad_norm": 1.6381967067718506, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.336, + "step": 2672 + }, + { + "loss": 0.0023, + "grad_norm": 0.3195578455924988, + "learning_rate": 6.645000000000001e-06, + "num_tokens": 910949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3365, + "step": 2673 + }, + { + "loss": 0.1244, + "grad_norm": 2.2930221557617188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.337, + "step": 2674 + }, + { + "loss": 0.061, + "grad_norm": 1.1066110134124756, + "learning_rate": 6.635e-06, + "num_tokens": 911973.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3375, + "step": 2675 + }, + { + "loss": 0.0023, + "grad_norm": 0.3287852704524994, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.338, + "step": 2676 + }, + { + "loss": 0.0723, + "grad_norm": 1.8842978477478027, + "learning_rate": 6.625e-06, + "num_tokens": 912576.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3385, + "step": 2677 + }, + { + "loss": 0.0616, + "grad_norm": 1.410254955291748, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.339, + "step": 2678 + }, + { + "loss": 0.0661, + "grad_norm": 1.7658559083938599, + "learning_rate": 6.615e-06, + "num_tokens": 913600.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3395000000000001, + "step": 2679 + }, + { + "loss": 0.0023, + "grad_norm": 0.3321514427661896, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.34, + "step": 2680 + }, + { + "loss": 0.0026, + "grad_norm": 0.38943803310394287, + "learning_rate": 6.605e-06, + "num_tokens": 913782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3405, + "step": 2681 + }, + { + "loss": 0.0533, + "grad_norm": 1.220119833946228, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.341, + "step": 2682 + }, + { + "loss": 0.0577, + "grad_norm": 1.4489399194717407, + "learning_rate": 6.595e-06, + "num_tokens": 914806.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3415, + "step": 2683 + }, + { + "loss": 0.0534, + "grad_norm": 1.437482237815857, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.342, + "step": 2684 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185518980026245, + "learning_rate": 6.5850000000000005e-06, + "num_tokens": 915409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3425, + "step": 2685 + }, + { + "loss": 0.0557, + "grad_norm": 1.233544945716858, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.343, + "step": 2686 + }, + { + "loss": 0.1326, + "grad_norm": 2.9976046085357666, + "learning_rate": 6.5750000000000006e-06, + "num_tokens": 916433.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3435000000000001, + "step": 2687 + }, + { + "loss": 0.0555, + "grad_norm": 1.1236023902893066, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3439999999999999, + "step": 2688 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615230619907379, + "learning_rate": 6.565000000000001e-06, + "num_tokens": 917036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3445, + "step": 2689 + }, + { + "loss": 0.0613, + "grad_norm": 1.391479730606079, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.345, + "step": 2690 + }, + { + "loss": 0.0023, + "grad_norm": 0.32829907536506653, + "learning_rate": 6.555e-06, + "num_tokens": 917639.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3455, + "step": 2691 + }, + { + "loss": 0.0025, + "grad_norm": 0.35658934712409973, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 1.0, + "epoch": 1.346, + "step": 2692 + }, + { + "loss": 0.0028, + "grad_norm": 0.40413787961006165, + "learning_rate": 6.545e-06, + "num_tokens": 917821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3465, + "step": 2693 + }, + { + "loss": 0.0023, + "grad_norm": 0.3243667185306549, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 1.0, + "epoch": 1.347, + "step": 2694 + }, + { + "loss": 0.0023, + "grad_norm": 0.33630460500717163, + "learning_rate": 6.535e-06, + "num_tokens": 918003.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3475, + "step": 2695 + }, + { + "loss": 0.0529, + "grad_norm": 1.6163023710250854, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3479999999999999, + "step": 2696 + }, + { + "loss": 0.0678, + "grad_norm": 1.5625479221343994, + "learning_rate": 6.525e-06, + "num_tokens": 919027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3485, + "step": 2697 + }, + { + "loss": 0.0676, + "grad_norm": 1.5719348192214966, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.349, + "step": 2698 + }, + { + "loss": 0.002, + "grad_norm": 0.2859533727169037, + "learning_rate": 6.515e-06, + "num_tokens": 919630.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3495, + "step": 2699 + }, + { + "loss": 0.0434, + "grad_norm": 1.324418067932129, + "learning_rate": 6.51e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.35, + "step": 2700 + }, + { + "loss": 0.042, + "grad_norm": 1.3165403604507446, + "learning_rate": 6.505e-06, + "num_tokens": 920654.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3505, + "step": 2701 + }, + { + "loss": 0.0018, + "grad_norm": 0.2492700070142746, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.351, + "step": 2702 + }, + { + "loss": 0.1336, + "grad_norm": 2.710927963256836, + "learning_rate": 6.4950000000000005e-06, + "num_tokens": 921257.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.3515, + "step": 2703 + }, + { + "loss": 0.059, + "grad_norm": 1.8472118377685547, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3519999999999999, + "step": 2704 + }, + { + "loss": 0.0448, + "grad_norm": 1.164633870124817, + "learning_rate": 6.485000000000001e-06, + "num_tokens": 922281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3525, + "step": 2705 + }, + { + "loss": 0.0544, + "grad_norm": 1.3916175365447998, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.353, + "step": 2706 + }, + { + "loss": 0.0463, + "grad_norm": 1.397131085395813, + "learning_rate": 6.475e-06, + "num_tokens": 923305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3535, + "step": 2707 + }, + { + "loss": 0.0019, + "grad_norm": 0.26947012543678284, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.354, + "step": 2708 + }, + { + "loss": 0.0017, + "grad_norm": 0.23892365396022797, + "learning_rate": 6.465e-06, + "num_tokens": 923487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3545, + "step": 2709 + }, + { + "loss": 0.0018, + "grad_norm": 0.25066784024238586, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 2710 + }, + { + "loss": 0.0435, + "grad_norm": 1.2238185405731201, + "learning_rate": 6.455e-06, + "num_tokens": 924090.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3555, + "step": 2711 + }, + { + "loss": 0.0019, + "grad_norm": 0.26420801877975464, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3559999999999999, + "step": 2712 + }, + { + "loss": 0.0572, + "grad_norm": 1.1416776180267334, + "learning_rate": 6.445e-06, + "num_tokens": 924693.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3565, + "step": 2713 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754037082195282, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.357, + "step": 2714 + }, + { + "loss": 0.0018, + "grad_norm": 0.25344598293304443, + "learning_rate": 6.435e-06, + "num_tokens": 924875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3575, + "step": 2715 + }, + { + "loss": 0.0017, + "grad_norm": 0.23587873578071594, + "learning_rate": 6.43e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 2716 + }, + { + "loss": 0.0701, + "grad_norm": 1.6822742223739624, + "learning_rate": 6.425e-06, + "num_tokens": 925478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3585, + "step": 2717 + }, + { + "loss": 0.0017, + "grad_norm": 0.22698912024497986, + "learning_rate": 6.42e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 1.0, + "epoch": 1.359, + "step": 2718 + }, + { + "loss": 0.044, + "grad_norm": 1.2083390951156616, + "learning_rate": 6.415e-06, + "num_tokens": 926081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3595, + "step": 2719 + }, + { + "loss": 0.0017, + "grad_norm": 0.23327840864658356, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3599999999999999, + "step": 2720 + }, + { + "loss": 0.0557, + "grad_norm": 1.281182885169983, + "learning_rate": 6.4050000000000005e-06, + "num_tokens": 926684.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3605, + "step": 2721 + }, + { + "loss": 0.0539, + "grad_norm": 1.1743288040161133, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.361, + "step": 2722 + }, + { + "loss": 0.0646, + "grad_norm": 1.2470465898513794, + "learning_rate": 6.395e-06, + "num_tokens": 927708.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3615, + "step": 2723 + }, + { + "loss": 0.0015, + "grad_norm": 0.20256949961185455, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 1.0, + "epoch": 1.362, + "step": 2724 + }, + { + "loss": 0.0394, + "grad_norm": 1.1593482494354248, + "learning_rate": 6.385e-06, + "num_tokens": 928311.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3625, + "step": 2725 + }, + { + "loss": 0.0737, + "grad_norm": 1.937491774559021, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.363, + "step": 2726 + }, + { + "loss": 0.0438, + "grad_norm": 1.1960216760635376, + "learning_rate": 6.375e-06, + "num_tokens": 929335.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3635, + "step": 2727 + }, + { + "loss": 0.0016, + "grad_norm": 0.21763351559638977, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3639999999999999, + "step": 2728 + }, + { + "loss": 0.0017, + "grad_norm": 0.24479590356349945, + "learning_rate": 6.365e-06, + "num_tokens": 929517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3645, + "step": 2729 + }, + { + "loss": 0.0619, + "grad_norm": 1.315623164176941, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.365, + "step": 2730 + }, + { + "loss": 0.0016, + "grad_norm": 0.2220989614725113, + "learning_rate": 6.355e-06, + "num_tokens": 930120.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3655, + "step": 2731 + }, + { + "loss": 0.0017, + "grad_norm": 0.2321062982082367, + "learning_rate": 6.35e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 2732 + }, + { + "loss": 0.0017, + "grad_norm": 0.23798637092113495, + "learning_rate": 6.345e-06, + "num_tokens": 930302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3665, + "step": 2733 + }, + { + "loss": 0.0577, + "grad_norm": 1.2568942308425903, + "learning_rate": 6.34e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.367, + "step": 2734 + }, + { + "loss": 0.041, + "grad_norm": 1.6406105756759644, + "learning_rate": 6.335e-06, + "num_tokens": 931326.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3675, + "step": 2735 + }, + { + "loss": 0.0517, + "grad_norm": 1.235734224319458, + "learning_rate": 6.33e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 2736 + }, + { + "loss": 0.0423, + "grad_norm": 0.9826679825782776, + "learning_rate": 6.3250000000000004e-06, + "num_tokens": 932350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3685, + "step": 2737 + }, + { + "loss": 0.0018, + "grad_norm": 0.26410505175590515, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.369, + "step": 2738 + }, + { + "loss": 0.002, + "grad_norm": 0.2839818596839905, + "learning_rate": 6.315e-06, + "num_tokens": 932532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3695, + "step": 2739 + }, + { + "loss": 0.0533, + "grad_norm": 1.2392011880874634, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.37, + "step": 2740 + }, + { + "loss": 0.0017, + "grad_norm": 0.23982419073581696, + "learning_rate": 6.305e-06, + "num_tokens": 933135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3705, + "step": 2741 + }, + { + "loss": 0.0548, + "grad_norm": 1.4777438640594482, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.371, + "step": 2742 + }, + { + "loss": 0.0019, + "grad_norm": 0.2724550664424896, + "learning_rate": 6.295e-06, + "num_tokens": 933738.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3715, + "step": 2743 + }, + { + "loss": 0.0019, + "grad_norm": 0.2623855173587799, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3719999999999999, + "step": 2744 + }, + { + "loss": 0.0583, + "grad_norm": 1.0648019313812256, + "learning_rate": 6.285e-06, + "num_tokens": 934341.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3725, + "step": 2745 + }, + { + "loss": 0.0725, + "grad_norm": 1.589500069618225, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.373, + "step": 2746 + }, + { + "loss": 0.0617, + "grad_norm": 1.4101024866104126, + "learning_rate": 6.275e-06, + "num_tokens": 935365.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3735, + "step": 2747 + }, + { + "loss": 0.0019, + "grad_norm": 0.2686757743358612, + "learning_rate": 6.27e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 1.0, + "epoch": 1.374, + "step": 2748 + }, + { + "loss": 0.0451, + "grad_norm": 1.6723026037216187, + "learning_rate": 6.265e-06, + "num_tokens": 935968.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3745, + "step": 2749 + }, + { + "loss": 0.1481, + "grad_norm": 2.561096668243408, + "learning_rate": 6.26e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.375, + "step": 2750 + }, + { + "loss": 0.0593, + "grad_norm": 1.1495637893676758, + "learning_rate": 6.255e-06, + "num_tokens": 936992.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3755, + "step": 2751 + }, + { + "loss": 0.0583, + "grad_norm": 1.0880846977233887, + "learning_rate": 6.25e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.376, + "step": 2752 + }, + { + "loss": 0.0641, + "grad_norm": 1.4671814441680908, + "learning_rate": 6.245000000000001e-06, + "num_tokens": 938016.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3765, + "step": 2753 + }, + { + "loss": 0.0022, + "grad_norm": 0.3182397186756134, + "learning_rate": 6.24e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 1.0, + "epoch": 1.377, + "step": 2754 + }, + { + "loss": 0.0605, + "grad_norm": 1.1844297647476196, + "learning_rate": 6.235000000000001e-06, + "num_tokens": 938619.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3775, + "step": 2755 + }, + { + "loss": 0.0633, + "grad_norm": 1.227432131767273, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3780000000000001, + "step": 2756 + }, + { + "loss": 0.0026, + "grad_norm": 0.3716835677623749, + "learning_rate": 6.225000000000001e-06, + "num_tokens": 939222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3785, + "step": 2757 + }, + { + "loss": 0.0599, + "grad_norm": 1.3364546298980713, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.379, + "step": 2758 + }, + { + "loss": 0.0532, + "grad_norm": 1.3746514320373535, + "learning_rate": 6.215000000000001e-06, + "num_tokens": 940246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3795, + "step": 2759 + }, + { + "loss": 0.0696, + "grad_norm": 1.6494160890579224, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.38, + "step": 2760 + }, + { + "loss": 0.0031, + "grad_norm": 0.4407944083213806, + "learning_rate": 6.205000000000001e-06, + "num_tokens": 940849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3805, + "step": 2761 + }, + { + "loss": 0.0559, + "grad_norm": 1.3899201154708862, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.381, + "step": 2762 + }, + { + "loss": 0.0393, + "grad_norm": 1.0294471979141235, + "learning_rate": 6.195000000000001e-06, + "num_tokens": 941873.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3815, + "step": 2763 + }, + { + "loss": 0.0028, + "grad_norm": 0.41492387652397156, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3820000000000001, + "step": 2764 + }, + { + "loss": 0.039, + "grad_norm": 1.2755433320999146, + "learning_rate": 6.185000000000001e-06, + "num_tokens": 942476.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3825, + "step": 2765 + }, + { + "loss": 0.0407, + "grad_norm": 1.1641042232513428, + "learning_rate": 6.18e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.383, + "step": 2766 + }, + { + "loss": 0.0033, + "grad_norm": 0.45876702666282654, + "learning_rate": 6.175000000000001e-06, + "num_tokens": 943079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3835, + "step": 2767 + }, + { + "loss": 0.053, + "grad_norm": 1.1277137994766235, + "learning_rate": 6.17e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.384, + "step": 2768 + }, + { + "loss": 0.069, + "grad_norm": 1.974735140800476, + "learning_rate": 6.165000000000001e-06, + "num_tokens": 944103.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3845, + "step": 2769 + }, + { + "loss": 0.0399, + "grad_norm": 1.308519959449768, + "learning_rate": 6.16e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.385, + "step": 2770 + }, + { + "loss": 0.0399, + "grad_norm": 1.3881995677947998, + "learning_rate": 6.155000000000001e-06, + "num_tokens": 945127.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3855, + "step": 2771 + }, + { + "loss": 0.0388, + "grad_norm": 1.376846194267273, + "learning_rate": 6.15e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3860000000000001, + "step": 2772 + }, + { + "loss": 0.0565, + "grad_norm": 1.6753615140914917, + "learning_rate": 6.145000000000001e-06, + "num_tokens": 946151.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3865, + "step": 2773 + }, + { + "loss": 0.0537, + "grad_norm": 1.350510597229004, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.387, + "step": 2774 + }, + { + "loss": 0.0348, + "grad_norm": 1.0870490074157715, + "learning_rate": 6.1350000000000006e-06, + "num_tokens": 947175.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3875, + "step": 2775 + }, + { + "loss": 0.0041, + "grad_norm": 0.5800921320915222, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 1.0, + "epoch": 1.388, + "step": 2776 + }, + { + "loss": 0.0046, + "grad_norm": 0.6146813631057739, + "learning_rate": 6.125000000000001e-06, + "num_tokens": 947357.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3885, + "step": 2777 + }, + { + "loss": 0.0685, + "grad_norm": 2.028545618057251, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.389, + "step": 2778 + }, + { + "loss": 0.0562, + "grad_norm": 1.10191011428833, + "learning_rate": 6.115000000000001e-06, + "num_tokens": 948381.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3895, + "step": 2779 + }, + { + "loss": 0.057, + "grad_norm": 1.6782788038253784, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3900000000000001, + "step": 2780 + }, + { + "loss": 0.0048, + "grad_norm": 0.6447672843933105, + "learning_rate": 6.105000000000001e-06, + "num_tokens": 948984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3905, + "step": 2781 + }, + { + "loss": 0.0045, + "grad_norm": 0.6120741963386536, + "learning_rate": 6.1e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.391, + "step": 2782 + }, + { + "loss": 0.0037, + "grad_norm": 0.5294094085693359, + "learning_rate": 6.095000000000001e-06, + "num_tokens": 949166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3915, + "step": 2783 + }, + { + "loss": 0.0041, + "grad_norm": 0.5634744167327881, + "learning_rate": 6.09e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.392, + "step": 2784 + }, + { + "loss": 0.0543, + "grad_norm": 1.1946736574172974, + "learning_rate": 6.085000000000001e-06, + "num_tokens": 949769.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3925, + "step": 2785 + }, + { + "loss": 0.0393, + "grad_norm": 1.366204857826233, + "learning_rate": 6.08e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.393, + "step": 2786 + }, + { + "loss": 0.0031, + "grad_norm": 0.4588482677936554, + "learning_rate": 6.075000000000001e-06, + "num_tokens": 950372.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3935, + "step": 2787 + }, + { + "loss": 0.0741, + "grad_norm": 1.6554986238479614, + "learning_rate": 6.07e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.3940000000000001, + "step": 2788 + }, + { + "loss": 0.0358, + "grad_norm": 1.0052374601364136, + "learning_rate": 6.065000000000001e-06, + "num_tokens": 951396.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3945, + "step": 2789 + }, + { + "loss": 0.0029, + "grad_norm": 0.4081237316131592, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.395, + "step": 2790 + }, + { + "loss": 0.0627, + "grad_norm": 1.5037425756454468, + "learning_rate": 6.0550000000000005e-06, + "num_tokens": 951999.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3955, + "step": 2791 + }, + { + "loss": 0.0024, + "grad_norm": 0.36483630537986755, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.396, + "step": 2792 + }, + { + "loss": 0.0455, + "grad_norm": 1.2050751447677612, + "learning_rate": 6.0450000000000006e-06, + "num_tokens": 952602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3965, + "step": 2793 + }, + { + "loss": 0.0021, + "grad_norm": 0.3035581111907959, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.397, + "step": 2794 + }, + { + "loss": 0.0025, + "grad_norm": 0.3607647716999054, + "learning_rate": 6.035000000000001e-06, + "num_tokens": 952784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3975, + "step": 2795 + }, + { + "loss": 0.0625, + "grad_norm": 1.2081470489501953, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3980000000000001, + "step": 2796 + }, + { + "loss": 0.0425, + "grad_norm": 1.0764844417572021, + "learning_rate": 6.025000000000001e-06, + "num_tokens": 953808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3985, + "step": 2797 + }, + { + "loss": 0.0632, + "grad_norm": 1.425076961517334, + "learning_rate": 6.02e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.399, + "step": 2798 + }, + { + "loss": 0.0395, + "grad_norm": 0.9470378160476685, + "learning_rate": 6.015000000000001e-06, + "num_tokens": 954832.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3995, + "step": 2799 + }, + { + "loss": 0.0404, + "grad_norm": 1.0599867105484009, + "learning_rate": 6.01e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4, + "step": 2800 + }, + { + "loss": 0.0577, + "grad_norm": 1.2933481931686401, + "learning_rate": 6.005000000000001e-06, + "num_tokens": 955856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4005, + "step": 2801 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215804398059845, + "learning_rate": 6e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 1.0, + "epoch": 1.401, + "step": 2802 + }, + { + "loss": 0.0601, + "grad_norm": 1.4103161096572876, + "learning_rate": 5.995000000000001e-06, + "num_tokens": 956459.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4015, + "step": 2803 + }, + { + "loss": 0.0022, + "grad_norm": 0.303093820810318, + "learning_rate": 5.99e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4020000000000001, + "step": 2804 + }, + { + "loss": 0.0663, + "grad_norm": 1.360801339149475, + "learning_rate": 5.985000000000001e-06, + "num_tokens": 957062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4025, + "step": 2805 + }, + { + "loss": 0.0022, + "grad_norm": 0.3075718581676483, + "learning_rate": 5.98e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 1.0, + "epoch": 1.403, + "step": 2806 + }, + { + "loss": 0.0602, + "grad_norm": 1.137125849723816, + "learning_rate": 5.975e-06, + "num_tokens": 957665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4035, + "step": 2807 + }, + { + "loss": 0.0022, + "grad_norm": 0.30045661330223083, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.404, + "step": 2808 + }, + { + "loss": 0.0392, + "grad_norm": 1.0042834281921387, + "learning_rate": 5.9650000000000005e-06, + "num_tokens": 958268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4045, + "step": 2809 + }, + { + "loss": 0.0401, + "grad_norm": 1.117727279663086, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.405, + "step": 2810 + }, + { + "loss": 0.0703, + "grad_norm": 1.4459725618362427, + "learning_rate": 5.955000000000001e-06, + "num_tokens": 959292.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4055, + "step": 2811 + }, + { + "loss": 0.0621, + "grad_norm": 1.3719003200531006, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4060000000000001, + "step": 2812 + }, + { + "loss": 0.0023, + "grad_norm": 0.31605690717697144, + "learning_rate": 5.945000000000001e-06, + "num_tokens": 959895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4064999999999999, + "step": 2813 + }, + { + "loss": 0.0605, + "grad_norm": 1.3043557405471802, + "learning_rate": 5.94e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.407, + "step": 2814 + }, + { + "loss": 0.0653, + "grad_norm": 1.2358129024505615, + "learning_rate": 5.935000000000001e-06, + "num_tokens": 960919.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4075, + "step": 2815 + }, + { + "loss": 0.0025, + "grad_norm": 0.3330060839653015, + "learning_rate": 5.93e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.408, + "step": 2816 + }, + { + "loss": 0.058, + "grad_norm": 1.1393845081329346, + "learning_rate": 5.925000000000001e-06, + "num_tokens": 961522.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4085, + "step": 2817 + }, + { + "loss": 0.0689, + "grad_norm": 1.4732993841171265, + "learning_rate": 5.92e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.409, + "step": 2818 + }, + { + "loss": 0.0028, + "grad_norm": 0.37631359696388245, + "learning_rate": 5.915000000000001e-06, + "num_tokens": 962125.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4095, + "step": 2819 + }, + { + "loss": 0.0026, + "grad_norm": 0.35936713218688965, + "learning_rate": 5.91e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.41, + "step": 2820 + }, + { + "loss": 0.0558, + "grad_norm": 1.2061470746994019, + "learning_rate": 5.905000000000001e-06, + "num_tokens": 962728.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4104999999999999, + "step": 2821 + }, + { + "loss": 0.0582, + "grad_norm": 1.513380527496338, + "learning_rate": 5.9e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 2822 + }, + { + "loss": 0.0418, + "grad_norm": 1.2391456365585327, + "learning_rate": 5.895e-06, + "num_tokens": 963752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4115, + "step": 2823 + }, + { + "loss": 0.069, + "grad_norm": 1.4670116901397705, + "learning_rate": 5.89e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.412, + "step": 2824 + }, + { + "loss": 0.0028, + "grad_norm": 0.3788264989852905, + "learning_rate": 5.885e-06, + "num_tokens": 964355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4125, + "step": 2825 + }, + { + "loss": 0.0027, + "grad_norm": 0.3687077462673187, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 1.0, + "epoch": 1.413, + "step": 2826 + }, + { + "loss": 0.0399, + "grad_norm": 1.233347773551941, + "learning_rate": 5.8750000000000005e-06, + "num_tokens": 964958.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4135, + "step": 2827 + }, + { + "loss": 0.0027, + "grad_norm": 0.37683984637260437, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.414, + "step": 2828 + }, + { + "loss": 0.048, + "grad_norm": 1.2649948596954346, + "learning_rate": 5.865000000000001e-06, + "num_tokens": 965561.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4144999999999999, + "step": 2829 + }, + { + "loss": 0.0589, + "grad_norm": 1.3882242441177368, + "learning_rate": 5.86e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.415, + "step": 2830 + }, + { + "loss": 0.0362, + "grad_norm": 1.1658241748809814, + "learning_rate": 5.855000000000001e-06, + "num_tokens": 966585.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4155, + "step": 2831 + }, + { + "loss": 0.0521, + "grad_norm": 1.0679434537887573, + "learning_rate": 5.85e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.416, + "step": 2832 + }, + { + "loss": 0.003, + "grad_norm": 0.40383246541023254, + "learning_rate": 5.845000000000001e-06, + "num_tokens": 967188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4165, + "step": 2833 + }, + { + "loss": 0.0427, + "grad_norm": 1.2304917573928833, + "learning_rate": 5.84e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.417, + "step": 2834 + }, + { + "loss": 0.0538, + "grad_norm": 1.1524217128753662, + "learning_rate": 5.835000000000001e-06, + "num_tokens": 968212.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4175, + "step": 2835 + }, + { + "loss": 0.0379, + "grad_norm": 0.9404373168945312, + "learning_rate": 5.83e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.418, + "step": 2836 + }, + { + "loss": 0.0031, + "grad_norm": 0.4096873104572296, + "learning_rate": 5.825000000000001e-06, + "num_tokens": 968815.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4184999999999999, + "step": 2837 + }, + { + "loss": 0.0028, + "grad_norm": 0.37403908371925354, + "learning_rate": 5.82e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.419, + "step": 2838 + }, + { + "loss": 0.0361, + "grad_norm": 0.9613595604896545, + "learning_rate": 5.815e-06, + "num_tokens": 969418.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.4195, + "step": 2839 + }, + { + "loss": 0.0571, + "grad_norm": 1.3871361017227173, + "learning_rate": 5.81e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.42, + "step": 2840 + }, + { + "loss": 0.0365, + "grad_norm": 1.060208797454834, + "learning_rate": 5.805e-06, + "num_tokens": 970442.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4205, + "step": 2841 + }, + { + "loss": 0.0031, + "grad_norm": 0.4013337790966034, + "learning_rate": 5.8e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 1.0, + "epoch": 1.421, + "step": 2842 + }, + { + "loss": 0.041, + "grad_norm": 1.2097371816635132, + "learning_rate": 5.795e-06, + "num_tokens": 971045.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4215, + "step": 2843 + }, + { + "loss": 0.0614, + "grad_norm": 1.1929858922958374, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.422, + "step": 2844 + }, + { + "loss": 0.0559, + "grad_norm": 1.3881855010986328, + "learning_rate": 5.7850000000000005e-06, + "num_tokens": 972069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4224999999999999, + "step": 2845 + }, + { + "loss": 0.0649, + "grad_norm": 1.5359828472137451, + "learning_rate": 5.78e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.423, + "step": 2846 + }, + { + "loss": 0.0562, + "grad_norm": 1.2387086153030396, + "learning_rate": 5.775000000000001e-06, + "num_tokens": 973093.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4235, + "step": 2847 + }, + { + "loss": 0.0634, + "grad_norm": 1.30796480178833, + "learning_rate": 5.77e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.424, + "step": 2848 + }, + { + "loss": 0.0035, + "grad_norm": 0.4502550959587097, + "learning_rate": 5.765000000000001e-06, + "num_tokens": 973696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4245, + "step": 2849 + }, + { + "loss": 0.0625, + "grad_norm": 1.4468958377838135, + "learning_rate": 5.76e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.425, + "step": 2850 + }, + { + "loss": 0.0675, + "grad_norm": 1.6001074314117432, + "learning_rate": 5.755000000000001e-06, + "num_tokens": 974720.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4255, + "step": 2851 + }, + { + "loss": 0.0039, + "grad_norm": 0.5094487071037292, + "learning_rate": 5.75e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.426, + "step": 2852 + }, + { + "loss": 0.039, + "grad_norm": 0.9305217266082764, + "learning_rate": 5.745000000000001e-06, + "num_tokens": 975323.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4264999999999999, + "step": 2853 + }, + { + "loss": 0.0379, + "grad_norm": 0.9311109185218811, + "learning_rate": 5.74e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.427, + "step": 2854 + }, + { + "loss": 0.0656, + "grad_norm": 1.3803378343582153, + "learning_rate": 5.735e-06, + "num_tokens": 976347.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4275, + "step": 2855 + }, + { + "loss": 0.0495, + "grad_norm": 1.455142855644226, + "learning_rate": 5.73e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.428, + "step": 2856 + }, + { + "loss": 0.048, + "grad_norm": 0.9757342338562012, + "learning_rate": 5.725e-06, + "num_tokens": 977371.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4285, + "step": 2857 + }, + { + "loss": 0.07, + "grad_norm": 1.3820722103118896, + "learning_rate": 5.72e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.429, + "step": 2858 + }, + { + "loss": 0.0496, + "grad_norm": 0.9005600810050964, + "learning_rate": 5.715e-06, + "num_tokens": 978395.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4295, + "step": 2859 + }, + { + "loss": 0.0588, + "grad_norm": 1.1311612129211426, + "learning_rate": 5.71e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.43, + "step": 2860 + }, + { + "loss": 0.0603, + "grad_norm": 1.2565733194351196, + "learning_rate": 5.7050000000000004e-06, + "num_tokens": 979419.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4304999999999999, + "step": 2861 + }, + { + "loss": 0.0061, + "grad_norm": 0.7569929361343384, + "learning_rate": 5.7e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.431, + "step": 2862 + }, + { + "loss": 0.0061, + "grad_norm": 0.757468044757843, + "learning_rate": 5.6950000000000005e-06, + "num_tokens": 979601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4315, + "step": 2863 + }, + { + "loss": 0.0442, + "grad_norm": 1.3257757425308228, + "learning_rate": 5.69e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.432, + "step": 2864 + }, + { + "loss": 0.0054, + "grad_norm": 0.7246440649032593, + "learning_rate": 5.685000000000001e-06, + "num_tokens": 980204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4325, + "step": 2865 + }, + { + "loss": 0.0558, + "grad_norm": 1.1359434127807617, + "learning_rate": 5.68e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.433, + "step": 2866 + }, + { + "loss": 0.0059, + "grad_norm": 0.7417834997177124, + "learning_rate": 5.675000000000001e-06, + "num_tokens": 980807.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4335, + "step": 2867 + }, + { + "loss": 0.0046, + "grad_norm": 0.6065738201141357, + "learning_rate": 5.67e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 1.0, + "epoch": 1.434, + "step": 2868 + }, + { + "loss": 0.0045, + "grad_norm": 0.6112881898880005, + "learning_rate": 5.665000000000001e-06, + "num_tokens": 980989.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4344999999999999, + "step": 2869 + }, + { + "loss": 0.0598, + "grad_norm": 1.1446788311004639, + "learning_rate": 5.66e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.435, + "step": 2870 + }, + { + "loss": 0.004, + "grad_norm": 0.5359569787979126, + "learning_rate": 5.655e-06, + "num_tokens": 981592.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4355, + "step": 2871 + }, + { + "loss": 0.0372, + "grad_norm": 1.0225598812103271, + "learning_rate": 5.65e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.436, + "step": 2872 + }, + { + "loss": 0.0031, + "grad_norm": 0.4344872236251831, + "learning_rate": 5.645e-06, + "num_tokens": 982195.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4365, + "step": 2873 + }, + { + "loss": 0.0035, + "grad_norm": 0.4770989418029785, + "learning_rate": 5.64e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 1.0, + "epoch": 1.437, + "step": 2874 + }, + { + "loss": 0.1529, + "grad_norm": 2.6292223930358887, + "learning_rate": 5.635e-06, + "num_tokens": 982798.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4375, + "step": 2875 + }, + { + "loss": 0.0536, + "grad_norm": 1.1502479314804077, + "learning_rate": 5.63e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.438, + "step": 2876 + }, + { + "loss": 0.0541, + "grad_norm": 1.5837680101394653, + "learning_rate": 5.625e-06, + "num_tokens": 983822.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4385, + "step": 2877 + }, + { + "loss": 0.0621, + "grad_norm": 1.0932730436325073, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.439, + "step": 2878 + }, + { + "loss": 0.0024, + "grad_norm": 0.3176769018173218, + "learning_rate": 5.6150000000000005e-06, + "num_tokens": 984425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4395, + "step": 2879 + }, + { + "loss": 0.056, + "grad_norm": 1.2500354051589966, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.44, + "step": 2880 + }, + { + "loss": 0.046, + "grad_norm": 1.282015323638916, + "learning_rate": 5.6050000000000005e-06, + "num_tokens": 985449.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4405000000000001, + "step": 2881 + }, + { + "loss": 0.0672, + "grad_norm": 1.5532522201538086, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.441, + "step": 2882 + }, + { + "loss": 0.0571, + "grad_norm": 1.1880862712860107, + "learning_rate": 5.595000000000001e-06, + "num_tokens": 986473.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4415, + "step": 2883 + }, + { + "loss": 0.0019, + "grad_norm": 0.26678329706192017, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.442, + "step": 2884 + }, + { + "loss": 0.002, + "grad_norm": 0.26291605830192566, + "learning_rate": 5.585000000000001e-06, + "num_tokens": 986655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4425, + "step": 2885 + }, + { + "loss": 0.002, + "grad_norm": 0.2711234986782074, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.443, + "step": 2886 + }, + { + "loss": 0.0021, + "grad_norm": 0.2862178087234497, + "learning_rate": 5.575000000000001e-06, + "num_tokens": 986837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4435, + "step": 2887 + }, + { + "loss": 0.0571, + "grad_norm": 1.3704899549484253, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.444, + "step": 2888 + }, + { + "loss": 0.0585, + "grad_norm": 1.0157582759857178, + "learning_rate": 5.565e-06, + "num_tokens": 987861.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4445000000000001, + "step": 2889 + }, + { + "loss": 0.0377, + "grad_norm": 1.079724669456482, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.445, + "step": 2890 + }, + { + "loss": 0.14, + "grad_norm": 1.9184038639068604, + "learning_rate": 5.555e-06, + "num_tokens": 988885.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4455, + "step": 2891 + }, + { + "loss": 0.0019, + "grad_norm": 0.25762176513671875, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.446, + "step": 2892 + }, + { + "loss": 0.0702, + "grad_norm": 1.5166800022125244, + "learning_rate": 5.545e-06, + "num_tokens": 989488.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4465, + "step": 2893 + }, + { + "loss": 0.0394, + "grad_norm": 1.1091899871826172, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.447, + "step": 2894 + }, + { + "loss": 0.0647, + "grad_norm": 1.4911457300186157, + "learning_rate": 5.535e-06, + "num_tokens": 990512.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4475, + "step": 2895 + }, + { + "loss": 0.063, + "grad_norm": 1.6225489377975464, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.448, + "step": 2896 + }, + { + "loss": 0.041, + "grad_norm": 1.3053377866744995, + "learning_rate": 5.5250000000000005e-06, + "num_tokens": 991536.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4485000000000001, + "step": 2897 + }, + { + "loss": 0.002, + "grad_norm": 0.27576708793640137, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 1.0, + "epoch": 1.449, + "step": 2898 + }, + { + "loss": 0.0019, + "grad_norm": 0.26415082812309265, + "learning_rate": 5.5150000000000006e-06, + "num_tokens": 991718.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4495, + "step": 2899 + }, + { + "loss": 0.0021, + "grad_norm": 0.29174545407295227, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 2900 + }, + { + "loss": 0.0573, + "grad_norm": 1.38834810256958, + "learning_rate": 5.505000000000001e-06, + "num_tokens": 992321.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4505, + "step": 2901 + }, + { + "loss": 0.0443, + "grad_norm": 1.4421913623809814, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 2902 + }, + { + "loss": 0.0022, + "grad_norm": 0.29639050364494324, + "learning_rate": 5.495000000000001e-06, + "num_tokens": 992924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4515, + "step": 2903 + }, + { + "loss": 0.0655, + "grad_norm": 1.5755751132965088, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.452, + "step": 2904 + }, + { + "loss": 0.0022, + "grad_norm": 0.2955166697502136, + "learning_rate": 5.485e-06, + "num_tokens": 993527.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4525000000000001, + "step": 2905 + }, + { + "loss": 0.0021, + "grad_norm": 0.2841387689113617, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.453, + "step": 2906 + }, + { + "loss": 0.0021, + "grad_norm": 0.286550909280777, + "learning_rate": 5.475e-06, + "num_tokens": 993709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4535, + "step": 2907 + }, + { + "loss": 0.0357, + "grad_norm": 1.0881201028823853, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.454, + "step": 2908 + }, + { + "loss": 0.0409, + "grad_norm": 1.0831390619277954, + "learning_rate": 5.465e-06, + "num_tokens": 994733.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4545, + "step": 2909 + }, + { + "loss": 0.0573, + "grad_norm": 1.2077234983444214, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.455, + "step": 2910 + }, + { + "loss": 0.0567, + "grad_norm": 1.2307626008987427, + "learning_rate": 5.455e-06, + "num_tokens": 995757.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4555, + "step": 2911 + }, + { + "loss": 0.067, + "grad_norm": 1.356170654296875, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.456, + "step": 2912 + }, + { + "loss": 0.0019, + "grad_norm": 0.2535565495491028, + "learning_rate": 5.445e-06, + "num_tokens": 996360.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4565000000000001, + "step": 2913 + }, + { + "loss": 0.0366, + "grad_norm": 1.0972084999084473, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.457, + "step": 2914 + }, + { + "loss": 0.054, + "grad_norm": 1.0509806871414185, + "learning_rate": 5.4350000000000005e-06, + "num_tokens": 997384.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4575, + "step": 2915 + }, + { + "loss": 0.0609, + "grad_norm": 1.3918635845184326, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.458, + "step": 2916 + }, + { + "loss": 0.0388, + "grad_norm": 1.0420371294021606, + "learning_rate": 5.4250000000000006e-06, + "num_tokens": 998408.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4585, + "step": 2917 + }, + { + "loss": 0.072, + "grad_norm": 1.3679769039154053, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.459, + "step": 2918 + }, + { + "loss": 0.0027, + "grad_norm": 0.3709925413131714, + "learning_rate": 5.415000000000001e-06, + "num_tokens": 999011.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4595, + "step": 2919 + }, + { + "loss": 0.0661, + "grad_norm": 1.381754755973816, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.46, + "step": 2920 + }, + { + "loss": 0.041, + "grad_norm": 1.2045968770980835, + "learning_rate": 5.405e-06, + "num_tokens": 1000035.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4605000000000001, + "step": 2921 + }, + { + "loss": 0.0023, + "grad_norm": 0.3062268793582916, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 1.0, + "epoch": 1.461, + "step": 2922 + }, + { + "loss": 0.0464, + "grad_norm": 1.0317680835723877, + "learning_rate": 5.395e-06, + "num_tokens": 1000638.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4615, + "step": 2923 + }, + { + "loss": 0.0495, + "grad_norm": 1.3268100023269653, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.462, + "step": 2924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6260963678359985, + "learning_rate": 5.385e-06, + "num_tokens": 1001662.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4625, + "step": 2925 + }, + { + "loss": 0.0553, + "grad_norm": 1.0903215408325195, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.463, + "step": 2926 + }, + { + "loss": 0.0029, + "grad_norm": 0.3851076066493988, + "learning_rate": 5.375e-06, + "num_tokens": 1002265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4635, + "step": 2927 + }, + { + "loss": 0.0692, + "grad_norm": 1.6572927236557007, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.464, + "step": 2928 + }, + { + "loss": 0.0625, + "grad_norm": 1.5664637088775635, + "learning_rate": 5.365e-06, + "num_tokens": 1003289.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4645000000000001, + "step": 2929 + }, + { + "loss": 0.0626, + "grad_norm": 1.198908805847168, + "learning_rate": 5.36e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.465, + "step": 2930 + }, + { + "loss": 0.0641, + "grad_norm": 1.2499873638153076, + "learning_rate": 5.355e-06, + "num_tokens": 1004313.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4655, + "step": 2931 + }, + { + "loss": 0.0042, + "grad_norm": 0.5362296104431152, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 1.0, + "epoch": 1.466, + "step": 2932 + }, + { + "loss": 0.0037, + "grad_norm": 0.49612900614738464, + "learning_rate": 5.3450000000000005e-06, + "num_tokens": 1004495.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4665, + "step": 2933 + }, + { + "loss": 0.0039, + "grad_norm": 0.5115715861320496, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.467, + "step": 2934 + }, + { + "loss": 0.056, + "grad_norm": 1.3353906869888306, + "learning_rate": 5.335000000000001e-06, + "num_tokens": 1005098.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4675, + "step": 2935 + }, + { + "loss": 0.0407, + "grad_norm": 1.1807116270065308, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.468, + "step": 2936 + }, + { + "loss": 0.0551, + "grad_norm": 1.257308006286621, + "learning_rate": 5.325e-06, + "num_tokens": 1006122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4685000000000001, + "step": 2937 + }, + { + "loss": 0.0606, + "grad_norm": 1.2219009399414062, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4689999999999999, + "step": 2938 + }, + { + "loss": 0.0403, + "grad_norm": 1.094189167022705, + "learning_rate": 5.315e-06, + "num_tokens": 1007146.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4695, + "step": 2939 + }, + { + "loss": 0.0467, + "grad_norm": 1.1191236972808838, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 2940 + }, + { + "loss": 0.0556, + "grad_norm": 1.1905457973480225, + "learning_rate": 5.305e-06, + "num_tokens": 1008170.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4705, + "step": 2941 + }, + { + "loss": 0.0038, + "grad_norm": 0.5084776282310486, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 1.0, + "epoch": 1.471, + "step": 2942 + }, + { + "loss": 0.0558, + "grad_norm": 0.9725843071937561, + "learning_rate": 5.295e-06, + "num_tokens": 1008773.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4715, + "step": 2943 + }, + { + "loss": 0.058, + "grad_norm": 1.1404790878295898, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.472, + "step": 2944 + }, + { + "loss": 0.0038, + "grad_norm": 0.4927501380443573, + "learning_rate": 5.285e-06, + "num_tokens": 1009376.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4725, + "step": 2945 + }, + { + "loss": 0.052, + "grad_norm": 1.0383561849594116, + "learning_rate": 5.28e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4729999999999999, + "step": 2946 + }, + { + "loss": 0.0039, + "grad_norm": 0.5245242118835449, + "learning_rate": 5.275e-06, + "num_tokens": 1009979.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4735, + "step": 2947 + }, + { + "loss": 0.0599, + "grad_norm": 1.137878179550171, + "learning_rate": 5.27e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.474, + "step": 2948 + }, + { + "loss": 0.0039, + "grad_norm": 0.5066397190093994, + "learning_rate": 5.265e-06, + "num_tokens": 1010582.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4745, + "step": 2949 + }, + { + "loss": 0.0037, + "grad_norm": 0.4922652542591095, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 1.0, + "epoch": 1.475, + "step": 2950 + }, + { + "loss": 0.0402, + "grad_norm": 1.1538424491882324, + "learning_rate": 5.2550000000000005e-06, + "num_tokens": 1011185.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4755, + "step": 2951 + }, + { + "loss": 0.0562, + "grad_norm": 1.8279345035552979, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.476, + "step": 2952 + }, + { + "loss": 0.0636, + "grad_norm": 1.2982397079467773, + "learning_rate": 5.245e-06, + "num_tokens": 1012209.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4765, + "step": 2953 + }, + { + "loss": 0.0033, + "grad_norm": 0.4363272488117218, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4769999999999999, + "step": 2954 + }, + { + "loss": 0.0549, + "grad_norm": 1.556806206703186, + "learning_rate": 5.235e-06, + "num_tokens": 1012812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4775, + "step": 2955 + }, + { + "loss": 0.0358, + "grad_norm": 1.0845907926559448, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.478, + "step": 2956 + }, + { + "loss": 0.0032, + "grad_norm": 0.4301038384437561, + "learning_rate": 5.225e-06, + "num_tokens": 1013415.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4785, + "step": 2957 + }, + { + "loss": 0.003, + "grad_norm": 0.3937813341617584, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 1.0, + "epoch": 1.479, + "step": 2958 + }, + { + "loss": 0.0403, + "grad_norm": 0.9416876435279846, + "learning_rate": 5.215e-06, + "num_tokens": 1014018.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4795, + "step": 2959 + }, + { + "loss": 0.0029, + "grad_norm": 0.3991153836250305, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.48, + "step": 2960 + }, + { + "loss": 0.0367, + "grad_norm": 1.106955885887146, + "learning_rate": 5.205e-06, + "num_tokens": 1014621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4805, + "step": 2961 + }, + { + "loss": 0.0586, + "grad_norm": 1.3418941497802734, + "learning_rate": 5.2e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4809999999999999, + "step": 2962 + }, + { + "loss": 0.0358, + "grad_norm": 0.9489701390266418, + "learning_rate": 5.195e-06, + "num_tokens": 1015645.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4815, + "step": 2963 + }, + { + "loss": 0.0629, + "grad_norm": 1.0855809450149536, + "learning_rate": 5.19e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.482, + "step": 2964 + }, + { + "loss": 0.0027, + "grad_norm": 0.3812173306941986, + "learning_rate": 5.185e-06, + "num_tokens": 1016248.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4825, + "step": 2965 + }, + { + "loss": 0.0028, + "grad_norm": 0.3925476372241974, + "learning_rate": 5.18e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 1.0, + "epoch": 1.483, + "step": 2966 + }, + { + "loss": 0.0567, + "grad_norm": 1.3809915781021118, + "learning_rate": 5.1750000000000004e-06, + "num_tokens": 1016851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4835, + "step": 2967 + }, + { + "loss": 0.0428, + "grad_norm": 1.4269046783447266, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.484, + "step": 2968 + }, + { + "loss": 0.0026, + "grad_norm": 0.3535688519477844, + "learning_rate": 5.165e-06, + "num_tokens": 1017454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4845, + "step": 2969 + }, + { + "loss": 0.0025, + "grad_norm": 0.34918057918548584, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4849999999999999, + "step": 2970 + }, + { + "loss": 0.0025, + "grad_norm": 0.34093669056892395, + "learning_rate": 5.155e-06, + "num_tokens": 1017636.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4855, + "step": 2971 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282490372657776, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.486, + "step": 2972 + }, + { + "loss": 0.0762, + "grad_norm": 2.083855628967285, + "learning_rate": 5.145e-06, + "num_tokens": 1018239.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4865, + "step": 2973 + }, + { + "loss": 0.0548, + "grad_norm": 1.5333393812179565, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.487, + "step": 2974 + }, + { + "loss": 0.0373, + "grad_norm": 1.078650712966919, + "learning_rate": 5.135e-06, + "num_tokens": 1019263.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4875, + "step": 2975 + }, + { + "loss": 0.0447, + "grad_norm": 1.3176923990249634, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.488, + "step": 2976 + }, + { + "loss": 0.0023, + "grad_norm": 0.3142336308956146, + "learning_rate": 5.125e-06, + "num_tokens": 1019866.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4885, + "step": 2977 + }, + { + "loss": 0.0021, + "grad_norm": 0.2898966073989868, + "learning_rate": 5.12e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 2978 + }, + { + "loss": 0.046, + "grad_norm": 1.2612260580062866, + "learning_rate": 5.115e-06, + "num_tokens": 1020469.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4895, + "step": 2979 + }, + { + "loss": 0.0718, + "grad_norm": 2.1195919513702393, + "learning_rate": 5.11e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.49, + "step": 2980 + }, + { + "loss": 0.002, + "grad_norm": 0.2805778682231903, + "learning_rate": 5.105e-06, + "num_tokens": 1021072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4905, + "step": 2981 + }, + { + "loss": 0.002, + "grad_norm": 0.2843017280101776, + "learning_rate": 5.1e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 2982 + }, + { + "loss": 0.002, + "grad_norm": 0.277892529964447, + "learning_rate": 5.095e-06, + "num_tokens": 1021254.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4915, + "step": 2983 + }, + { + "loss": 0.0422, + "grad_norm": 1.0654278993606567, + "learning_rate": 5.09e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.492, + "step": 2984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29488760232925415, + "learning_rate": 5.085e-06, + "num_tokens": 1021857.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4925, + "step": 2985 + }, + { + "loss": 0.0392, + "grad_norm": 1.086630940437317, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4929999999999999, + "step": 2986 + }, + { + "loss": 0.0018, + "grad_norm": 0.24030831456184387, + "learning_rate": 5.075e-06, + "num_tokens": 1022460.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4935, + "step": 2987 + }, + { + "loss": 0.0406, + "grad_norm": 0.9846900105476379, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.494, + "step": 2988 + }, + { + "loss": 0.0418, + "grad_norm": 1.6849744319915771, + "learning_rate": 5.065e-06, + "num_tokens": 1023484.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4945, + "step": 2989 + }, + { + "loss": 0.0015, + "grad_norm": 0.2105080932378769, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 1.0, + "epoch": 1.495, + "step": 2990 + }, + { + "loss": 0.0019, + "grad_norm": 0.26552438735961914, + "learning_rate": 5.055e-06, + "num_tokens": 1023666.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4955, + "step": 2991 + }, + { + "loss": 0.0016, + "grad_norm": 0.21752813458442688, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 2992 + }, + { + "loss": 0.0666, + "grad_norm": 1.4344254732131958, + "learning_rate": 5.045e-06, + "num_tokens": 1024269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4965, + "step": 2993 + }, + { + "loss": 0.0415, + "grad_norm": 1.1530293226242065, + "learning_rate": 5.04e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4969999999999999, + "step": 2994 + }, + { + "loss": 0.0365, + "grad_norm": 1.0033750534057617, + "learning_rate": 5.035e-06, + "num_tokens": 1025293.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4975, + "step": 2995 + }, + { + "loss": 0.0369, + "grad_norm": 1.062666654586792, + "learning_rate": 5.03e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.498, + "step": 2996 + }, + { + "loss": 0.0016, + "grad_norm": 0.23261243104934692, + "learning_rate": 5.025e-06, + "num_tokens": 1025896.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4985, + "step": 2997 + }, + { + "loss": 0.0019, + "grad_norm": 0.26436832547187805, + "learning_rate": 5.02e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 2998 + }, + { + "loss": 0.0395, + "grad_norm": 1.0828720331192017, + "learning_rate": 5.015e-06, + "num_tokens": 1026499.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4995, + "step": 2999 + }, + { + "loss": 0.0018, + "grad_norm": 0.24229036271572113, + "learning_rate": 5.01e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5, + "step": 3000 + }, + { + "loss": 0.0636, + "grad_norm": 1.5817841291427612, + "learning_rate": 5.0049999999999995e-06, + "num_tokens": 1027102.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5005, + "step": 3001 + }, + { + "loss": 0.0016, + "grad_norm": 0.21737374365329742, + "learning_rate": 5e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.501, + "step": 3002 + }, + { + "loss": 0.0535, + "grad_norm": 1.0760457515716553, + "learning_rate": 4.9950000000000005e-06, + "num_tokens": 1027705.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5015, + "step": 3003 + }, + { + "loss": 0.0702, + "grad_norm": 1.5160242319107056, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 3004 + }, + { + "loss": 0.002, + "grad_norm": 0.28444817662239075, + "learning_rate": 4.9850000000000006e-06, + "num_tokens": 1028308.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5025, + "step": 3005 + }, + { + "loss": 0.0659, + "grad_norm": 1.394598364830017, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5030000000000001, + "step": 3006 + }, + { + "loss": 0.0549, + "grad_norm": 1.4268598556518555, + "learning_rate": 4.975000000000001e-06, + "num_tokens": 1029332.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5034999999999998, + "step": 3007 + }, + { + "loss": 0.0693, + "grad_norm": 1.3022048473358154, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.504, + "step": 3008 + }, + { + "loss": 0.0577, + "grad_norm": 1.6034104824066162, + "learning_rate": 4.965000000000001e-06, + "num_tokens": 1030356.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5045, + "step": 3009 + }, + { + "loss": 0.002, + "grad_norm": 0.26663535833358765, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.505, + "step": 3010 + }, + { + "loss": 0.0021, + "grad_norm": 0.29342901706695557, + "learning_rate": 4.955e-06, + "num_tokens": 1030538.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5055, + "step": 3011 + }, + { + "loss": 0.0574, + "grad_norm": 1.232057809829712, + "learning_rate": 4.95e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.506, + "step": 3012 + }, + { + "loss": 0.0022, + "grad_norm": 0.2940972149372101, + "learning_rate": 4.945e-06, + "num_tokens": 1031141.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5065, + "step": 3013 + }, + { + "loss": 0.0022, + "grad_norm": 0.3054879307746887, + "learning_rate": 4.94e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 3014 + }, + { + "loss": 0.002, + "grad_norm": 0.2681850492954254, + "learning_rate": 4.935e-06, + "num_tokens": 1031323.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5074999999999998, + "step": 3015 + }, + { + "loss": 0.0018, + "grad_norm": 0.24893507361412048, + "learning_rate": 4.93e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 3016 + }, + { + "loss": 0.0514, + "grad_norm": 0.9832684993743896, + "learning_rate": 4.925e-06, + "num_tokens": 1031926.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5085, + "step": 3017 + }, + { + "loss": 0.0546, + "grad_norm": 1.0513758659362793, + "learning_rate": 4.92e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.509, + "step": 3018 + }, + { + "loss": 0.0438, + "grad_norm": 1.3256640434265137, + "learning_rate": 4.915e-06, + "num_tokens": 1032950.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5095, + "step": 3019 + }, + { + "loss": 0.039, + "grad_norm": 1.1269205808639526, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.51, + "step": 3020 + }, + { + "loss": 0.0606, + "grad_norm": 1.2971444129943848, + "learning_rate": 4.9050000000000005e-06, + "num_tokens": 1033974.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5105, + "step": 3021 + }, + { + "loss": 0.0018, + "grad_norm": 0.24280324578285217, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5110000000000001, + "step": 3022 + }, + { + "loss": 0.0726, + "grad_norm": 1.984804630279541, + "learning_rate": 4.8950000000000006e-06, + "num_tokens": 1034577.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.5114999999999998, + "step": 3023 + }, + { + "loss": 0.0444, + "grad_norm": 1.1891791820526123, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.512, + "step": 3024 + }, + { + "loss": 0.0425, + "grad_norm": 1.3020859956741333, + "learning_rate": 4.885000000000001e-06, + "num_tokens": 1035601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5125, + "step": 3025 + }, + { + "loss": 0.0397, + "grad_norm": 0.8992137312889099, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.513, + "step": 3026 + }, + { + "loss": 0.0518, + "grad_norm": 1.0060539245605469, + "learning_rate": 4.875e-06, + "num_tokens": 1036625.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5135, + "step": 3027 + }, + { + "loss": 0.0618, + "grad_norm": 1.2295892238616943, + "learning_rate": 4.87e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.514, + "step": 3028 + }, + { + "loss": 0.057, + "grad_norm": 1.2740446329116821, + "learning_rate": 4.865e-06, + "num_tokens": 1037649.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5145, + "step": 3029 + }, + { + "loss": 0.067, + "grad_norm": 1.2444658279418945, + "learning_rate": 4.86e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5150000000000001, + "step": 3030 + }, + { + "loss": 0.0389, + "grad_norm": 1.0539816617965698, + "learning_rate": 4.855e-06, + "num_tokens": 1038673.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5154999999999998, + "step": 3031 + }, + { + "loss": 0.0613, + "grad_norm": 1.2166608572006226, + "learning_rate": 4.85e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.516, + "step": 3032 + }, + { + "loss": 0.0636, + "grad_norm": 1.2355148792266846, + "learning_rate": 4.845e-06, + "num_tokens": 1039697.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5165, + "step": 3033 + }, + { + "loss": 0.0586, + "grad_norm": 1.195371150970459, + "learning_rate": 4.84e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.517, + "step": 3034 + }, + { + "loss": 0.0031, + "grad_norm": 0.4328796863555908, + "learning_rate": 4.835e-06, + "num_tokens": 1040300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5175, + "step": 3035 + }, + { + "loss": 0.0033, + "grad_norm": 0.4462224841117859, + "learning_rate": 4.83e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 3036 + }, + { + "loss": 0.0404, + "grad_norm": 1.2766720056533813, + "learning_rate": 4.825e-06, + "num_tokens": 1040903.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5185, + "step": 3037 + }, + { + "loss": 0.0038, + "grad_norm": 0.5095945000648499, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5190000000000001, + "step": 3038 + }, + { + "loss": 0.0528, + "grad_norm": 1.006589651107788, + "learning_rate": 4.8150000000000005e-06, + "num_tokens": 1041506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5194999999999999, + "step": 3039 + }, + { + "loss": 0.0417, + "grad_norm": 1.2964030504226685, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.52, + "step": 3040 + }, + { + "loss": 0.0592, + "grad_norm": 1.1840168237686157, + "learning_rate": 4.805000000000001e-06, + "num_tokens": 1042530.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5205, + "step": 3041 + }, + { + "loss": 0.0038, + "grad_norm": 0.49861085414886475, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 1.0, + "epoch": 1.521, + "step": 3042 + }, + { + "loss": 0.0037, + "grad_norm": 0.49751704931259155, + "learning_rate": 4.795e-06, + "num_tokens": 1042712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5215, + "step": 3043 + }, + { + "loss": 0.0481, + "grad_norm": 1.022782564163208, + "learning_rate": 4.79e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.522, + "step": 3044 + }, + { + "loss": 0.0038, + "grad_norm": 0.49228596687316895, + "learning_rate": 4.785e-06, + "num_tokens": 1043315.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5225, + "step": 3045 + }, + { + "loss": 0.0376, + "grad_norm": 1.1729862689971924, + "learning_rate": 4.78e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5230000000000001, + "step": 3046 + }, + { + "loss": 0.0653, + "grad_norm": 1.5206072330474854, + "learning_rate": 4.775e-06, + "num_tokens": 1044339.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5234999999999999, + "step": 3047 + }, + { + "loss": 0.0633, + "grad_norm": 1.2756298780441284, + "learning_rate": 4.77e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.524, + "step": 3048 + }, + { + "loss": 0.0036, + "grad_norm": 0.4977829158306122, + "learning_rate": 4.765e-06, + "num_tokens": 1044942.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5245, + "step": 3049 + }, + { + "loss": 0.0526, + "grad_norm": 1.0627686977386475, + "learning_rate": 4.76e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.525, + "step": 3050 + }, + { + "loss": 0.0381, + "grad_norm": 1.1623107194900513, + "learning_rate": 4.755e-06, + "num_tokens": 1045966.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5255, + "step": 3051 + }, + { + "loss": 0.0036, + "grad_norm": 0.5119946002960205, + "learning_rate": 4.75e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.526, + "step": 3052 + }, + { + "loss": 0.0581, + "grad_norm": 1.3532719612121582, + "learning_rate": 4.745e-06, + "num_tokens": 1046569.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5265, + "step": 3053 + }, + { + "loss": 0.0594, + "grad_norm": 1.2599351406097412, + "learning_rate": 4.74e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5270000000000001, + "step": 3054 + }, + { + "loss": 0.0033, + "grad_norm": 0.4622514843940735, + "learning_rate": 4.735e-06, + "num_tokens": 1047172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5274999999999999, + "step": 3055 + }, + { + "loss": 0.0728, + "grad_norm": 1.6162607669830322, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.528, + "step": 3056 + }, + { + "loss": 0.0627, + "grad_norm": 1.4714545011520386, + "learning_rate": 4.7250000000000005e-06, + "num_tokens": 1048196.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5285, + "step": 3057 + }, + { + "loss": 0.0034, + "grad_norm": 0.48141252994537354, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 1.0, + "epoch": 1.529, + "step": 3058 + }, + { + "loss": 0.0385, + "grad_norm": 1.0676530599594116, + "learning_rate": 4.715e-06, + "num_tokens": 1048799.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5295, + "step": 3059 + }, + { + "loss": 0.0032, + "grad_norm": 0.44829145073890686, + "learning_rate": 4.71e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 1.0, + "epoch": 1.53, + "step": 3060 + }, + { + "loss": 0.0031, + "grad_norm": 0.4258093535900116, + "learning_rate": 4.705e-06, + "num_tokens": 1048981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5305, + "step": 3061 + }, + { + "loss": 0.0715, + "grad_norm": 1.3509596586227417, + "learning_rate": 4.7e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5310000000000001, + "step": 3062 + }, + { + "loss": 0.0341, + "grad_norm": 1.0876250267028809, + "learning_rate": 4.695e-06, + "num_tokens": 1050005.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5314999999999999, + "step": 3063 + }, + { + "loss": 0.0611, + "grad_norm": 1.3174924850463867, + "learning_rate": 4.69e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.532, + "step": 3064 + }, + { + "loss": 0.0417, + "grad_norm": 1.123489499092102, + "learning_rate": 4.685000000000001e-06, + "num_tokens": 1051029.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5325, + "step": 3065 + }, + { + "loss": 0.066, + "grad_norm": 1.7399777173995972, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.533, + "step": 3066 + }, + { + "loss": 0.0028, + "grad_norm": 0.38190290331840515, + "learning_rate": 4.675000000000001e-06, + "num_tokens": 1051632.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5335, + "step": 3067 + }, + { + "loss": 0.0651, + "grad_norm": 1.4947158098220825, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.534, + "step": 3068 + }, + { + "loss": 0.003, + "grad_norm": 0.40696173906326294, + "learning_rate": 4.665e-06, + "num_tokens": 1052235.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5345, + "step": 3069 + }, + { + "loss": 0.0555, + "grad_norm": 1.2926570177078247, + "learning_rate": 4.66e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5350000000000001, + "step": 3070 + }, + { + "loss": 0.0625, + "grad_norm": 1.2110244035720825, + "learning_rate": 4.655e-06, + "num_tokens": 1053259.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5354999999999999, + "step": 3071 + }, + { + "loss": 0.0033, + "grad_norm": 0.44495561718940735, + "learning_rate": 4.65e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 1.0, + "epoch": 1.536, + "step": 3072 + }, + { + "loss": 0.0574, + "grad_norm": 1.1019057035446167, + "learning_rate": 4.645e-06, + "num_tokens": 1053862.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5365, + "step": 3073 + }, + { + "loss": 0.003, + "grad_norm": 0.4128797650337219, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.537, + "step": 3074 + }, + { + "loss": 0.0572, + "grad_norm": 1.164238452911377, + "learning_rate": 4.6350000000000005e-06, + "num_tokens": 1054465.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5375, + "step": 3075 + }, + { + "loss": 0.0631, + "grad_norm": 1.4220542907714844, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.538, + "step": 3076 + }, + { + "loss": 0.0377, + "grad_norm": 1.2259591817855835, + "learning_rate": 4.625000000000001e-06, + "num_tokens": 1055489.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5385, + "step": 3077 + }, + { + "loss": 0.003, + "grad_norm": 0.4099157154560089, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5390000000000001, + "step": 3078 + }, + { + "loss": 0.0027, + "grad_norm": 0.3750811219215393, + "learning_rate": 4.615000000000001e-06, + "num_tokens": 1055671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5394999999999999, + "step": 3079 + }, + { + "loss": 0.0621, + "grad_norm": 1.2325596809387207, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.54, + "step": 3080 + }, + { + "loss": 0.0504, + "grad_norm": 0.9959844350814819, + "learning_rate": 4.605000000000001e-06, + "num_tokens": 1056695.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5405, + "step": 3081 + }, + { + "loss": 0.0574, + "grad_norm": 1.0301742553710938, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.541, + "step": 3082 + }, + { + "loss": 0.0512, + "grad_norm": 1.0320547819137573, + "learning_rate": 4.595000000000001e-06, + "num_tokens": 1057719.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5415, + "step": 3083 + }, + { + "loss": 0.0561, + "grad_norm": 1.225005865097046, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.542, + "step": 3084 + }, + { + "loss": 0.0376, + "grad_norm": 1.1090381145477295, + "learning_rate": 4.585e-06, + "num_tokens": 1058743.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.5425, + "step": 3085 + }, + { + "loss": 0.0032, + "grad_norm": 0.44738513231277466, + "learning_rate": 4.58e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5430000000000001, + "step": 3086 + }, + { + "loss": 0.0031, + "grad_norm": 0.4485037624835968, + "learning_rate": 4.575e-06, + "num_tokens": 1058925.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5434999999999999, + "step": 3087 + }, + { + "loss": 0.0703, + "grad_norm": 1.630645751953125, + "learning_rate": 4.57e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.544, + "step": 3088 + }, + { + "loss": 0.0034, + "grad_norm": 0.4586680233478546, + "learning_rate": 4.565e-06, + "num_tokens": 1059528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5445, + "step": 3089 + }, + { + "loss": 0.003, + "grad_norm": 0.41872572898864746, + "learning_rate": 4.56e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.545, + "step": 3090 + }, + { + "loss": 0.0433, + "grad_norm": 1.1152652502059937, + "learning_rate": 4.5550000000000004e-06, + "num_tokens": 1060131.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5455, + "step": 3091 + }, + { + "loss": 0.0025, + "grad_norm": 0.35068032145500183, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.546, + "step": 3092 + }, + { + "loss": 0.0396, + "grad_norm": 1.0990018844604492, + "learning_rate": 4.5450000000000005e-06, + "num_tokens": 1060734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5465, + "step": 3093 + }, + { + "loss": 0.0635, + "grad_norm": 1.6193867921829224, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5470000000000002, + "step": 3094 + }, + { + "loss": 0.0027, + "grad_norm": 0.3813343644142151, + "learning_rate": 4.535000000000001e-06, + "num_tokens": 1061337.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5474999999999999, + "step": 3095 + }, + { + "loss": 0.0025, + "grad_norm": 0.3389427363872528, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 1.0, + "epoch": 1.548, + "step": 3096 + }, + { + "loss": 0.0652, + "grad_norm": 1.455460786819458, + "learning_rate": 4.525000000000001e-06, + "num_tokens": 1061940.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5485, + "step": 3097 + }, + { + "loss": 0.0596, + "grad_norm": 1.318932056427002, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.549, + "step": 3098 + }, + { + "loss": 0.0021, + "grad_norm": 0.30851492285728455, + "learning_rate": 4.515000000000001e-06, + "num_tokens": 1062543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5495, + "step": 3099 + }, + { + "loss": 0.0021, + "grad_norm": 0.29576948285102844, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 1.0, + "epoch": 1.55, + "step": 3100 + }, + { + "loss": 0.0021, + "grad_norm": 0.29117029905319214, + "learning_rate": 4.505e-06, + "num_tokens": 1062725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5505, + "step": 3101 + }, + { + "loss": 0.04, + "grad_norm": 1.1777619123458862, + "learning_rate": 4.5e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5510000000000002, + "step": 3102 + }, + { + "loss": 0.0538, + "grad_norm": 1.1641870737075806, + "learning_rate": 4.495e-06, + "num_tokens": 1063749.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5514999999999999, + "step": 3103 + }, + { + "loss": 0.0423, + "grad_norm": 1.3220707178115845, + "learning_rate": 4.49e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.552, + "step": 3104 + }, + { + "loss": 0.0021, + "grad_norm": 0.30619239807128906, + "learning_rate": 4.485e-06, + "num_tokens": 1064352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5525, + "step": 3105 + }, + { + "loss": 0.0681, + "grad_norm": 1.3809969425201416, + "learning_rate": 4.48e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.553, + "step": 3106 + }, + { + "loss": 0.055, + "grad_norm": 1.1956359148025513, + "learning_rate": 4.475e-06, + "num_tokens": 1065376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5535, + "step": 3107 + }, + { + "loss": 0.0573, + "grad_norm": 1.2887022495269775, + "learning_rate": 4.47e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.554, + "step": 3108 + }, + { + "loss": 0.0554, + "grad_norm": 1.1560310125350952, + "learning_rate": 4.4650000000000004e-06, + "num_tokens": 1066400.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5545, + "step": 3109 + }, + { + "loss": 0.0021, + "grad_norm": 0.29395192861557007, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5550000000000002, + "step": 3110 + }, + { + "loss": 0.0652, + "grad_norm": 1.608464002609253, + "learning_rate": 4.4550000000000005e-06, + "num_tokens": 1067003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5554999999999999, + "step": 3111 + }, + { + "loss": 0.0558, + "grad_norm": 1.2650138139724731, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.556, + "step": 3112 + }, + { + "loss": 0.0458, + "grad_norm": 1.2872962951660156, + "learning_rate": 4.445000000000001e-06, + "num_tokens": 1068027.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5565, + "step": 3113 + }, + { + "loss": 0.0022, + "grad_norm": 0.30732589960098267, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.557, + "step": 3114 + }, + { + "loss": 0.0558, + "grad_norm": 1.0926036834716797, + "learning_rate": 4.435000000000001e-06, + "num_tokens": 1068630.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5575, + "step": 3115 + }, + { + "loss": 0.0023, + "grad_norm": 0.32145828008651733, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 1.0, + "epoch": 1.558, + "step": 3116 + }, + { + "loss": 0.0373, + "grad_norm": 1.1655807495117188, + "learning_rate": 4.425e-06, + "num_tokens": 1069233.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5585, + "step": 3117 + }, + { + "loss": 0.0769, + "grad_norm": 1.796105980873108, + "learning_rate": 4.42e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.5590000000000002, + "step": 3118 + }, + { + "loss": 0.0026, + "grad_norm": 0.3620903789997101, + "learning_rate": 4.415e-06, + "num_tokens": 1069836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5594999999999999, + "step": 3119 + }, + { + "loss": 0.0429, + "grad_norm": 1.309659481048584, + "learning_rate": 4.41e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.56, + "step": 3120 + }, + { + "loss": 0.0023, + "grad_norm": 0.32819899916648865, + "learning_rate": 4.405e-06, + "num_tokens": 1070439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5605, + "step": 3121 + }, + { + "loss": 0.0576, + "grad_norm": 1.0110256671905518, + "learning_rate": 4.4e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.561, + "step": 3122 + }, + { + "loss": 0.0474, + "grad_norm": 1.327854037284851, + "learning_rate": 4.395e-06, + "num_tokens": 1071463.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5615, + "step": 3123 + }, + { + "loss": 0.0371, + "grad_norm": 1.2000775337219238, + "learning_rate": 4.39e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.562, + "step": 3124 + }, + { + "loss": 0.0532, + "grad_norm": 1.1874752044677734, + "learning_rate": 4.385e-06, + "num_tokens": 1072487.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5625, + "step": 3125 + }, + { + "loss": 0.0387, + "grad_norm": 1.2780605554580688, + "learning_rate": 4.38e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.563, + "step": 3126 + }, + { + "loss": 0.0029, + "grad_norm": 0.38496679067611694, + "learning_rate": 4.3750000000000005e-06, + "num_tokens": 1073090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5635, + "step": 3127 + }, + { + "loss": 0.0028, + "grad_norm": 0.3800834119319916, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.564, + "step": 3128 + }, + { + "loss": 0.0386, + "grad_norm": 1.077006459236145, + "learning_rate": 4.3650000000000006e-06, + "num_tokens": 1073693.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5645, + "step": 3129 + }, + { + "loss": 0.0669, + "grad_norm": 1.2879207134246826, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.565, + "step": 3130 + }, + { + "loss": 0.0027, + "grad_norm": 0.37664031982421875, + "learning_rate": 4.355000000000001e-06, + "num_tokens": 1074296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5655000000000001, + "step": 3131 + }, + { + "loss": 0.0026, + "grad_norm": 0.35762181878089905, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5659999999999998, + "step": 3132 + }, + { + "loss": 0.0026, + "grad_norm": 0.3616492450237274, + "learning_rate": 4.345000000000001e-06, + "num_tokens": 1074478.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5665, + "step": 3133 + }, + { + "loss": 0.054, + "grad_norm": 1.413800835609436, + "learning_rate": 4.34e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.567, + "step": 3134 + }, + { + "loss": 0.0549, + "grad_norm": 1.1791685819625854, + "learning_rate": 4.335e-06, + "num_tokens": 1075502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5675, + "step": 3135 + }, + { + "loss": 0.0382, + "grad_norm": 1.1417726278305054, + "learning_rate": 4.33e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.568, + "step": 3136 + }, + { + "loss": 0.0586, + "grad_norm": 1.360926866531372, + "learning_rate": 4.325e-06, + "num_tokens": 1076526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5685, + "step": 3137 + }, + { + "loss": 0.0569, + "grad_norm": 1.1636319160461426, + "learning_rate": 4.32e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.569, + "step": 3138 + }, + { + "loss": 0.0024, + "grad_norm": 0.3462548851966858, + "learning_rate": 4.315e-06, + "num_tokens": 1077129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5695000000000001, + "step": 3139 + }, + { + "loss": 0.0619, + "grad_norm": 1.3171995878219604, + "learning_rate": 4.31e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5699999999999998, + "step": 3140 + }, + { + "loss": 0.0026, + "grad_norm": 0.35494717955589294, + "learning_rate": 4.305e-06, + "num_tokens": 1077732.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5705, + "step": 3141 + }, + { + "loss": 0.003, + "grad_norm": 0.4175266921520233, + "learning_rate": 4.3e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 3142 + }, + { + "loss": 0.0588, + "grad_norm": 1.5107394456863403, + "learning_rate": 4.295e-06, + "num_tokens": 1078335.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5715, + "step": 3143 + }, + { + "loss": 0.0583, + "grad_norm": 1.5851935148239136, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.572, + "step": 3144 + }, + { + "loss": 0.0401, + "grad_norm": 1.1422215700149536, + "learning_rate": 4.2850000000000005e-06, + "num_tokens": 1079359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5725, + "step": 3145 + }, + { + "loss": 0.0429, + "grad_norm": 1.3809804916381836, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.573, + "step": 3146 + }, + { + "loss": 0.0397, + "grad_norm": 1.1466025114059448, + "learning_rate": 4.2750000000000006e-06, + "num_tokens": 1080383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5735000000000001, + "step": 3147 + }, + { + "loss": 0.0389, + "grad_norm": 1.035447120666504, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 3148 + }, + { + "loss": 0.0029, + "grad_norm": 0.39080947637557983, + "learning_rate": 4.265000000000001e-06, + "num_tokens": 1080986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5745, + "step": 3149 + }, + { + "loss": 0.0029, + "grad_norm": 0.39702585339546204, + "learning_rate": 4.26e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.575, + "step": 3150 + }, + { + "loss": 0.0376, + "grad_norm": 1.0406111478805542, + "learning_rate": 4.255e-06, + "num_tokens": 1081589.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5755, + "step": 3151 + }, + { + "loss": 0.0029, + "grad_norm": 0.40471911430358887, + "learning_rate": 4.25e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.576, + "step": 3152 + }, + { + "loss": 0.0542, + "grad_norm": 1.382663607597351, + "learning_rate": 4.245e-06, + "num_tokens": 1082192.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5765, + "step": 3153 + }, + { + "loss": 0.0026, + "grad_norm": 0.39454102516174316, + "learning_rate": 4.24e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.577, + "step": 3154 + }, + { + "loss": 0.0515, + "grad_norm": 1.1649845838546753, + "learning_rate": 4.235e-06, + "num_tokens": 1082795.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5775000000000001, + "step": 3155 + }, + { + "loss": 0.0383, + "grad_norm": 1.10068941116333, + "learning_rate": 4.23e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5779999999999998, + "step": 3156 + }, + { + "loss": 0.0417, + "grad_norm": 1.2253996133804321, + "learning_rate": 4.225e-06, + "num_tokens": 1083819.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5785, + "step": 3157 + }, + { + "loss": 0.0028, + "grad_norm": 0.3961932361125946, + "learning_rate": 4.22e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 1.0, + "epoch": 1.579, + "step": 3158 + }, + { + "loss": 0.0503, + "grad_norm": 1.089829921722412, + "learning_rate": 4.215e-06, + "num_tokens": 1084422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5795, + "step": 3159 + }, + { + "loss": 0.0026, + "grad_norm": 0.3804922103881836, + "learning_rate": 4.21e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.58, + "step": 3160 + }, + { + "loss": 0.0551, + "grad_norm": 1.131371259689331, + "learning_rate": 4.205e-06, + "num_tokens": 1085025.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5805, + "step": 3161 + }, + { + "loss": 0.0707, + "grad_norm": 1.5008512735366821, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.581, + "step": 3162 + }, + { + "loss": 0.1371, + "grad_norm": 2.452535629272461, + "learning_rate": 4.1950000000000005e-06, + "num_tokens": 1086049.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.5815000000000001, + "step": 3163 + }, + { + "loss": 0.0375, + "grad_norm": 1.132121205329895, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5819999999999999, + "step": 3164 + }, + { + "loss": 0.0372, + "grad_norm": 1.136691689491272, + "learning_rate": 4.185000000000001e-06, + "num_tokens": 1087073.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5825, + "step": 3165 + }, + { + "loss": 0.066, + "grad_norm": 1.451141595840454, + "learning_rate": 4.18e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.583, + "step": 3166 + }, + { + "loss": 0.0601, + "grad_norm": 1.3219071626663208, + "learning_rate": 4.175e-06, + "num_tokens": 1088097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5835, + "step": 3167 + }, + { + "loss": 0.0033, + "grad_norm": 0.44295263290405273, + "learning_rate": 4.17e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.584, + "step": 3168 + }, + { + "loss": 0.0033, + "grad_norm": 0.4387746751308441, + "learning_rate": 4.165e-06, + "num_tokens": 1088279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5845, + "step": 3169 + }, + { + "loss": 0.0031, + "grad_norm": 0.42495018243789673, + "learning_rate": 4.16e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 3170 + }, + { + "loss": 0.0032, + "grad_norm": 0.43195274472236633, + "learning_rate": 4.155e-06, + "num_tokens": 1088461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5855000000000001, + "step": 3171 + }, + { + "loss": 0.0383, + "grad_norm": 1.089600682258606, + "learning_rate": 4.15e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5859999999999999, + "step": 3172 + }, + { + "loss": 0.037, + "grad_norm": 1.125685691833496, + "learning_rate": 4.145e-06, + "num_tokens": 1089485.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5865, + "step": 3173 + }, + { + "loss": 0.0028, + "grad_norm": 0.3951958119869232, + "learning_rate": 4.14e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 1.0, + "epoch": 1.587, + "step": 3174 + }, + { + "loss": 0.0032, + "grad_norm": 0.4249975085258484, + "learning_rate": 4.135e-06, + "num_tokens": 1089667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5875, + "step": 3175 + }, + { + "loss": 0.003, + "grad_norm": 0.4017711281776428, + "learning_rate": 4.13e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 1.0, + "epoch": 1.588, + "step": 3176 + }, + { + "loss": 0.0554, + "grad_norm": 1.5242044925689697, + "learning_rate": 4.125e-06, + "num_tokens": 1090270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5885, + "step": 3177 + }, + { + "loss": 0.0397, + "grad_norm": 1.1341863870620728, + "learning_rate": 4.12e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.589, + "step": 3178 + }, + { + "loss": 0.0027, + "grad_norm": 0.36381402611732483, + "learning_rate": 4.115e-06, + "num_tokens": 1090873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5895000000000001, + "step": 3179 + }, + { + "loss": 0.0607, + "grad_norm": 1.1853790283203125, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5899999999999999, + "step": 3180 + }, + { + "loss": 0.0643, + "grad_norm": 1.3047658205032349, + "learning_rate": 4.1050000000000005e-06, + "num_tokens": 1091897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5905, + "step": 3181 + }, + { + "loss": 0.0026, + "grad_norm": 0.35462620854377747, + "learning_rate": 4.1e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.591, + "step": 3182 + }, + { + "loss": 0.0551, + "grad_norm": 1.313693642616272, + "learning_rate": 4.095e-06, + "num_tokens": 1092500.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5915, + "step": 3183 + }, + { + "loss": 0.0476, + "grad_norm": 1.3256938457489014, + "learning_rate": 4.09e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.592, + "step": 3184 + }, + { + "loss": 0.0674, + "grad_norm": 1.4579592943191528, + "learning_rate": 4.085e-06, + "num_tokens": 1093524.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5925, + "step": 3185 + }, + { + "loss": 0.0654, + "grad_norm": 1.39744234085083, + "learning_rate": 4.08e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.593, + "step": 3186 + }, + { + "loss": 0.0024, + "grad_norm": 0.3426502048969269, + "learning_rate": 4.075e-06, + "num_tokens": 1094127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5935000000000001, + "step": 3187 + }, + { + "loss": 0.0025, + "grad_norm": 0.34538590908050537, + "learning_rate": 4.07e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 3188 + }, + { + "loss": 0.0023, + "grad_norm": 0.317192405462265, + "learning_rate": 4.065e-06, + "num_tokens": 1094309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5945, + "step": 3189 + }, + { + "loss": 0.067, + "grad_norm": 1.3644077777862549, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.595, + "step": 3190 + }, + { + "loss": 0.0403, + "grad_norm": 1.0108872652053833, + "learning_rate": 4.055000000000001e-06, + "num_tokens": 1095333.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5955, + "step": 3191 + }, + { + "loss": 0.0023, + "grad_norm": 0.32959794998168945, + "learning_rate": 4.05e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 1.0, + "epoch": 1.596, + "step": 3192 + }, + { + "loss": 0.0695, + "grad_norm": 1.4694541692733765, + "learning_rate": 4.045e-06, + "num_tokens": 1095936.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5965, + "step": 3193 + }, + { + "loss": 0.0579, + "grad_norm": 1.4185339212417603, + "learning_rate": 4.04e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.597, + "step": 3194 + }, + { + "loss": 0.0023, + "grad_norm": 0.3271894156932831, + "learning_rate": 4.035e-06, + "num_tokens": 1096539.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5975000000000001, + "step": 3195 + }, + { + "loss": 0.0687, + "grad_norm": 1.3683706521987915, + "learning_rate": 4.03e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5979999999999999, + "step": 3196 + }, + { + "loss": 0.0022, + "grad_norm": 0.3076697289943695, + "learning_rate": 4.0250000000000004e-06, + "num_tokens": 1097142.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5985, + "step": 3197 + }, + { + "loss": 0.0633, + "grad_norm": 1.3920204639434814, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.599, + "step": 3198 + }, + { + "loss": 0.0025, + "grad_norm": 0.340093195438385, + "learning_rate": 4.0150000000000005e-06, + "num_tokens": 1097745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5995, + "step": 3199 + }, + { + "loss": 0.0446, + "grad_norm": 1.343589186668396, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6, + "step": 3200 + }, + { + "loss": 0.0019, + "grad_norm": 0.27124884724617004, + "learning_rate": 4.005000000000001e-06, + "num_tokens": 1098348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6005, + "step": 3201 + }, + { + "loss": 0.0404, + "grad_norm": 0.9648232460021973, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.601, + "step": 3202 + }, + { + "loss": 0.0019, + "grad_norm": 0.27278977632522583, + "learning_rate": 3.995000000000001e-06, + "num_tokens": 1098951.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6015000000000001, + "step": 3203 + }, + { + "loss": 0.0376, + "grad_norm": 1.0787500143051147, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6019999999999999, + "step": 3204 + }, + { + "loss": 0.0528, + "grad_norm": 1.1423871517181396, + "learning_rate": 3.985000000000001e-06, + "num_tokens": 1099975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6025, + "step": 3205 + }, + { + "loss": 0.0428, + "grad_norm": 1.0963202714920044, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.603, + "step": 3206 + }, + { + "loss": 0.0023, + "grad_norm": 0.3151981234550476, + "learning_rate": 3.975000000000001e-06, + "num_tokens": 1100578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6035, + "step": 3207 + }, + { + "loss": 0.0627, + "grad_norm": 1.3276523351669312, + "learning_rate": 3.97e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.604, + "step": 3208 + }, + { + "loss": 0.0644, + "grad_norm": 1.2610445022583008, + "learning_rate": 3.965e-06, + "num_tokens": 1101602.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6045, + "step": 3209 + }, + { + "loss": 0.0605, + "grad_norm": 1.5303077697753906, + "learning_rate": 3.96e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.605, + "step": 3210 + }, + { + "loss": 0.0428, + "grad_norm": 1.1033059358596802, + "learning_rate": 3.955e-06, + "num_tokens": 1102626.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6055000000000001, + "step": 3211 + }, + { + "loss": 0.0025, + "grad_norm": 0.3444884419441223, + "learning_rate": 3.95e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6059999999999999, + "step": 3212 + }, + { + "loss": 0.0021, + "grad_norm": 0.30967977643013, + "learning_rate": 3.945e-06, + "num_tokens": 1102808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6065, + "step": 3213 + }, + { + "loss": 0.0023, + "grad_norm": 0.3297445774078369, + "learning_rate": 3.94e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.607, + "step": 3214 + }, + { + "loss": 0.0389, + "grad_norm": 0.9863300323486328, + "learning_rate": 3.9350000000000004e-06, + "num_tokens": 1103411.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6075, + "step": 3215 + }, + { + "loss": 0.0024, + "grad_norm": 0.34737643599510193, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.608, + "step": 3216 + }, + { + "loss": 0.0636, + "grad_norm": 1.4206818342208862, + "learning_rate": 3.9250000000000005e-06, + "num_tokens": 1104014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6085, + "step": 3217 + }, + { + "loss": 0.0635, + "grad_norm": 1.3302878141403198, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.609, + "step": 3218 + }, + { + "loss": 0.0023, + "grad_norm": 0.34072810411453247, + "learning_rate": 3.915000000000001e-06, + "num_tokens": 1104617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6095000000000002, + "step": 3219 + }, + { + "loss": 0.0023, + "grad_norm": 0.324464350938797, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6099999999999999, + "step": 3220 + }, + { + "loss": 0.041, + "grad_norm": 1.2196465730667114, + "learning_rate": 3.905000000000001e-06, + "num_tokens": 1105220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6105, + "step": 3221 + }, + { + "loss": 0.0609, + "grad_norm": 1.3683393001556396, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.611, + "step": 3222 + }, + { + "loss": 0.067, + "grad_norm": 1.3955715894699097, + "learning_rate": 3.895000000000001e-06, + "num_tokens": 1106244.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6115, + "step": 3223 + }, + { + "loss": 0.0681, + "grad_norm": 1.2971601486206055, + "learning_rate": 3.89e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.612, + "step": 3224 + }, + { + "loss": 0.0399, + "grad_norm": 0.9620857834815979, + "learning_rate": 3.885e-06, + "num_tokens": 1107268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6125, + "step": 3225 + }, + { + "loss": 0.0563, + "grad_norm": 1.419252634048462, + "learning_rate": 3.88e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.613, + "step": 3226 + }, + { + "loss": 0.0025, + "grad_norm": 0.3523210883140564, + "learning_rate": 3.875e-06, + "num_tokens": 1107871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6135000000000002, + "step": 3227 + }, + { + "loss": 0.0025, + "grad_norm": 0.3481607437133789, + "learning_rate": 3.87e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 3228 + }, + { + "loss": 0.0668, + "grad_norm": 1.5234949588775635, + "learning_rate": 3.865e-06, + "num_tokens": 1108474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6145, + "step": 3229 + }, + { + "loss": 0.065, + "grad_norm": 1.0866061449050903, + "learning_rate": 3.86e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.615, + "step": 3230 + }, + { + "loss": 0.0023, + "grad_norm": 0.32322317361831665, + "learning_rate": 3.855e-06, + "num_tokens": 1109077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6155, + "step": 3231 + }, + { + "loss": 0.0028, + "grad_norm": 0.3983127474784851, + "learning_rate": 3.85e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 3232 + }, + { + "loss": 0.0028, + "grad_norm": 0.3855290114879608, + "learning_rate": 3.8450000000000005e-06, + "num_tokens": 1109259.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6165, + "step": 3233 + }, + { + "loss": 0.0628, + "grad_norm": 1.2134065628051758, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.617, + "step": 3234 + }, + { + "loss": 0.0026, + "grad_norm": 0.3645097613334656, + "learning_rate": 3.8350000000000006e-06, + "num_tokens": 1109862.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6175000000000002, + "step": 3235 + }, + { + "loss": 0.0564, + "grad_norm": 1.3227709531784058, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6179999999999999, + "step": 3236 + }, + { + "loss": 0.0356, + "grad_norm": 1.1357544660568237, + "learning_rate": 3.825000000000001e-06, + "num_tokens": 1110886.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6185, + "step": 3237 + }, + { + "loss": 0.002, + "grad_norm": 0.2842106819152832, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.619, + "step": 3238 + }, + { + "loss": 0.0021, + "grad_norm": 0.2954864501953125, + "learning_rate": 3.815000000000001e-06, + "num_tokens": 1111068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6195, + "step": 3239 + }, + { + "loss": 0.0535, + "grad_norm": 1.2989691495895386, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.62, + "step": 3240 + }, + { + "loss": 0.0633, + "grad_norm": 1.4842454195022583, + "learning_rate": 3.8050000000000004e-06, + "num_tokens": 1112092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6205, + "step": 3241 + }, + { + "loss": 0.0613, + "grad_norm": 1.4029802083969116, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.621, + "step": 3242 + }, + { + "loss": 0.0021, + "grad_norm": 0.3039712905883789, + "learning_rate": 3.7950000000000005e-06, + "num_tokens": 1112695.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6215000000000002, + "step": 3243 + }, + { + "loss": 0.0564, + "grad_norm": 1.3126254081726074, + "learning_rate": 3.79e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6219999999999999, + "step": 3244 + }, + { + "loss": 0.0372, + "grad_norm": 1.1704014539718628, + "learning_rate": 3.785e-06, + "num_tokens": 1113719.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6225, + "step": 3245 + }, + { + "loss": 0.0438, + "grad_norm": 1.2828481197357178, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.623, + "step": 3246 + }, + { + "loss": 0.0023, + "grad_norm": 0.343226820230484, + "learning_rate": 3.7750000000000003e-06, + "num_tokens": 1114322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6235, + "step": 3247 + }, + { + "loss": 0.0402, + "grad_norm": 1.072348952293396, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.624, + "step": 3248 + }, + { + "loss": 0.0372, + "grad_norm": 1.061455488204956, + "learning_rate": 3.7650000000000004e-06, + "num_tokens": 1115346.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6245, + "step": 3249 + }, + { + "loss": 0.0621, + "grad_norm": 1.3332241773605347, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.625, + "step": 3250 + }, + { + "loss": 0.0665, + "grad_norm": 1.4206236600875854, + "learning_rate": 3.7550000000000005e-06, + "num_tokens": 1116370.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6255, + "step": 3251 + }, + { + "loss": 0.0616, + "grad_norm": 1.5544387102127075, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.626, + "step": 3252 + }, + { + "loss": 0.0024, + "grad_norm": 0.34623461961746216, + "learning_rate": 3.745e-06, + "num_tokens": 1116973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6265, + "step": 3253 + }, + { + "loss": 0.0611, + "grad_norm": 1.2223175764083862, + "learning_rate": 3.74e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.627, + "step": 3254 + }, + { + "loss": 0.0517, + "grad_norm": 1.338625192642212, + "learning_rate": 3.7350000000000002e-06, + "num_tokens": 1117997.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6275, + "step": 3255 + }, + { + "loss": 0.0567, + "grad_norm": 1.3747273683547974, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6280000000000001, + "step": 3256 + }, + { + "loss": 0.0026, + "grad_norm": 0.36324965953826904, + "learning_rate": 3.7250000000000003e-06, + "num_tokens": 1118600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6284999999999998, + "step": 3257 + }, + { + "loss": 0.0025, + "grad_norm": 0.3447258472442627, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 3258 + }, + { + "loss": 0.0026, + "grad_norm": 0.36628466844558716, + "learning_rate": 3.7150000000000004e-06, + "num_tokens": 1118782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6295, + "step": 3259 + }, + { + "loss": 0.0535, + "grad_norm": 1.2702912092208862, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.63, + "step": 3260 + }, + { + "loss": 0.0026, + "grad_norm": 0.37140271067619324, + "learning_rate": 3.705e-06, + "num_tokens": 1119385.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6305, + "step": 3261 + }, + { + "loss": 0.003, + "grad_norm": 0.4019966721534729, + "learning_rate": 3.7e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 1.0, + "epoch": 1.631, + "step": 3262 + }, + { + "loss": 0.0669, + "grad_norm": 1.4418880939483643, + "learning_rate": 3.695e-06, + "num_tokens": 1119988.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6315, + "step": 3263 + }, + { + "loss": 0.0396, + "grad_norm": 1.2212142944335938, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6320000000000001, + "step": 3264 + }, + { + "loss": 0.0026, + "grad_norm": 0.37143605947494507, + "learning_rate": 3.6850000000000003e-06, + "num_tokens": 1120591.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6324999999999998, + "step": 3265 + }, + { + "loss": 0.0588, + "grad_norm": 1.3627078533172607, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.633, + "step": 3266 + }, + { + "loss": 0.0027, + "grad_norm": 0.3791561722755432, + "learning_rate": 3.6750000000000004e-06, + "num_tokens": 1121194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6335, + "step": 3267 + }, + { + "loss": 0.0567, + "grad_norm": 1.289622187614441, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.634, + "step": 3268 + }, + { + "loss": 0.0579, + "grad_norm": 1.220171332359314, + "learning_rate": 3.665e-06, + "num_tokens": 1122218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6345, + "step": 3269 + }, + { + "loss": 0.0543, + "grad_norm": 1.3633372783660889, + "learning_rate": 3.66e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.635, + "step": 3270 + }, + { + "loss": 0.0376, + "grad_norm": 1.1212244033813477, + "learning_rate": 3.655e-06, + "num_tokens": 1123242.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6355, + "step": 3271 + }, + { + "loss": 0.066, + "grad_norm": 1.352933645248413, + "learning_rate": 3.65e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6360000000000001, + "step": 3272 + }, + { + "loss": 0.0469, + "grad_norm": 1.09308922290802, + "learning_rate": 3.6450000000000003e-06, + "num_tokens": 1124266.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6364999999999998, + "step": 3273 + }, + { + "loss": 0.1411, + "grad_norm": 2.6187405586242676, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.637, + "step": 3274 + }, + { + "loss": 0.0414, + "grad_norm": 1.162994146347046, + "learning_rate": 3.6350000000000003e-06, + "num_tokens": 1125290.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6375, + "step": 3275 + }, + { + "loss": 0.0028, + "grad_norm": 0.3896919786930084, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.638, + "step": 3276 + }, + { + "loss": 0.0026, + "grad_norm": 0.3726244270801544, + "learning_rate": 3.625e-06, + "num_tokens": 1125472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6385, + "step": 3277 + }, + { + "loss": 0.0026, + "grad_norm": 0.36463192105293274, + "learning_rate": 3.62e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.639, + "step": 3278 + }, + { + "loss": 0.0507, + "grad_norm": 1.3470423221588135, + "learning_rate": 3.615e-06, + "num_tokens": 1126075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6395, + "step": 3279 + }, + { + "loss": 0.0683, + "grad_norm": 1.4609153270721436, + "learning_rate": 3.61e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.6400000000000001, + "step": 3280 + }, + { + "loss": 0.0535, + "grad_norm": 1.1537185907363892, + "learning_rate": 3.6050000000000002e-06, + "num_tokens": 1127099.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6404999999999998, + "step": 3281 + }, + { + "loss": 0.0608, + "grad_norm": 1.3845043182373047, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.641, + "step": 3282 + }, + { + "loss": 0.0447, + "grad_norm": 1.212424397468567, + "learning_rate": 3.5950000000000003e-06, + "num_tokens": 1128123.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6415, + "step": 3283 + }, + { + "loss": 0.0026, + "grad_norm": 0.37876564264297485, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 1.0, + "epoch": 1.642, + "step": 3284 + }, + { + "loss": 0.0408, + "grad_norm": 1.2840468883514404, + "learning_rate": 3.585e-06, + "num_tokens": 1128726.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6425, + "step": 3285 + }, + { + "loss": 0.0386, + "grad_norm": 1.1343239545822144, + "learning_rate": 3.58e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.643, + "step": 3286 + }, + { + "loss": 0.0381, + "grad_norm": 1.1031399965286255, + "learning_rate": 3.575e-06, + "num_tokens": 1129750.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6435, + "step": 3287 + }, + { + "loss": 0.0728, + "grad_norm": 1.8012501001358032, + "learning_rate": 3.57e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6440000000000001, + "step": 3288 + }, + { + "loss": 0.003, + "grad_norm": 0.42031532526016235, + "learning_rate": 3.565e-06, + "num_tokens": 1130353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6444999999999999, + "step": 3289 + }, + { + "loss": 0.0028, + "grad_norm": 0.42307499051094055, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 1.0, + "epoch": 1.645, + "step": 3290 + }, + { + "loss": 0.0656, + "grad_norm": 1.4206976890563965, + "learning_rate": 3.5550000000000003e-06, + "num_tokens": 1130956.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6455, + "step": 3291 + }, + { + "loss": 0.0373, + "grad_norm": 1.0836045742034912, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 3292 + }, + { + "loss": 0.0666, + "grad_norm": 1.4353013038635254, + "learning_rate": 3.545e-06, + "num_tokens": 1131980.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6465, + "step": 3293 + }, + { + "loss": 0.0033, + "grad_norm": 0.48532357811927795, + "learning_rate": 3.54e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.647, + "step": 3294 + }, + { + "loss": 0.0032, + "grad_norm": 0.4415268898010254, + "learning_rate": 3.535e-06, + "num_tokens": 1132162.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6475, + "step": 3295 + }, + { + "loss": 0.0029, + "grad_norm": 0.41665494441986084, + "learning_rate": 3.53e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 3296 + }, + { + "loss": 0.0638, + "grad_norm": 1.2469731569290161, + "learning_rate": 3.525e-06, + "num_tokens": 1132765.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6484999999999999, + "step": 3297 + }, + { + "loss": 0.0614, + "grad_norm": 1.251099944114685, + "learning_rate": 3.52e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.649, + "step": 3298 + }, + { + "loss": 0.0027, + "grad_norm": 0.39604058861732483, + "learning_rate": 3.5150000000000002e-06, + "num_tokens": 1133368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6495, + "step": 3299 + }, + { + "loss": 0.0588, + "grad_norm": 1.0699150562286377, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.65, + "step": 3300 + }, + { + "loss": 0.0583, + "grad_norm": 1.2757554054260254, + "learning_rate": 3.505e-06, + "num_tokens": 1134392.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6505, + "step": 3301 + }, + { + "loss": 0.0401, + "grad_norm": 1.3257462978363037, + "learning_rate": 3.5e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.651, + "step": 3302 + }, + { + "loss": 0.0643, + "grad_norm": 1.4011600017547607, + "learning_rate": 3.495e-06, + "num_tokens": 1135416.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6515, + "step": 3303 + }, + { + "loss": 0.0587, + "grad_norm": 1.5523959398269653, + "learning_rate": 3.49e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6520000000000001, + "step": 3304 + }, + { + "loss": 0.0602, + "grad_norm": 1.1153236627578735, + "learning_rate": 3.485e-06, + "num_tokens": 1136440.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6524999999999999, + "step": 3305 + }, + { + "loss": 0.0032, + "grad_norm": 0.4743506610393524, + "learning_rate": 3.48e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 1.0, + "epoch": 1.653, + "step": 3306 + }, + { + "loss": 0.0032, + "grad_norm": 0.44705691933631897, + "learning_rate": 3.475e-06, + "num_tokens": 1136622.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6535, + "step": 3307 + }, + { + "loss": 0.0627, + "grad_norm": 1.376706838607788, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.654, + "step": 3308 + }, + { + "loss": 0.0578, + "grad_norm": 1.3461076021194458, + "learning_rate": 3.465e-06, + "num_tokens": 1137646.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6545, + "step": 3309 + }, + { + "loss": 0.0028, + "grad_norm": 0.4053739011287689, + "learning_rate": 3.46e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 1.0, + "epoch": 1.655, + "step": 3310 + }, + { + "loss": 0.0028, + "grad_norm": 0.4151926636695862, + "learning_rate": 3.455e-06, + "num_tokens": 1137828.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6555, + "step": 3311 + }, + { + "loss": 0.003, + "grad_norm": 0.42436280846595764, + "learning_rate": 3.45e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6560000000000001, + "step": 3312 + }, + { + "loss": 0.0029, + "grad_norm": 0.41050389409065247, + "learning_rate": 3.445e-06, + "num_tokens": 1138010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6564999999999999, + "step": 3313 + }, + { + "loss": 0.0562, + "grad_norm": 1.2650190591812134, + "learning_rate": 3.44e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.657, + "step": 3314 + }, + { + "loss": 0.0558, + "grad_norm": 1.1567943096160889, + "learning_rate": 3.4350000000000006e-06, + "num_tokens": 1139034.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6575, + "step": 3315 + }, + { + "loss": 0.0413, + "grad_norm": 1.3011746406555176, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.658, + "step": 3316 + }, + { + "loss": 0.0569, + "grad_norm": 1.4117727279663086, + "learning_rate": 3.4250000000000007e-06, + "num_tokens": 1140058.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6585, + "step": 3317 + }, + { + "loss": 0.0027, + "grad_norm": 0.3829484283924103, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.659, + "step": 3318 + }, + { + "loss": 0.0516, + "grad_norm": 1.152258038520813, + "learning_rate": 3.4150000000000003e-06, + "num_tokens": 1140661.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6595, + "step": 3319 + }, + { + "loss": 0.0396, + "grad_norm": 1.20711088180542, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6600000000000001, + "step": 3320 + }, + { + "loss": 0.0522, + "grad_norm": 1.251099705696106, + "learning_rate": 3.4050000000000004e-06, + "num_tokens": 1141685.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6604999999999999, + "step": 3321 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730953454971313, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.661, + "step": 3322 + }, + { + "loss": 0.0613, + "grad_norm": 1.5974045991897583, + "learning_rate": 3.3950000000000005e-06, + "num_tokens": 1142709.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6615, + "step": 3323 + }, + { + "loss": 0.0522, + "grad_norm": 1.416182518005371, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.662, + "step": 3324 + }, + { + "loss": 0.0595, + "grad_norm": 1.381279706954956, + "learning_rate": 3.3850000000000006e-06, + "num_tokens": 1143733.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6625, + "step": 3325 + }, + { + "loss": 0.0563, + "grad_norm": 1.2484899759292603, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.663, + "step": 3326 + }, + { + "loss": 0.0029, + "grad_norm": 0.41797107458114624, + "learning_rate": 3.3750000000000003e-06, + "num_tokens": 1144336.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6635, + "step": 3327 + }, + { + "loss": 0.0027, + "grad_norm": 0.39544638991355896, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6640000000000001, + "step": 3328 + }, + { + "loss": 0.0371, + "grad_norm": 1.0045322179794312, + "learning_rate": 3.3650000000000004e-06, + "num_tokens": 1144939.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6644999999999999, + "step": 3329 + }, + { + "loss": 0.0671, + "grad_norm": 1.530097246170044, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.665, + "step": 3330 + }, + { + "loss": 0.0529, + "grad_norm": 1.179215669631958, + "learning_rate": 3.3550000000000005e-06, + "num_tokens": 1145963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6655, + "step": 3331 + }, + { + "loss": 0.0033, + "grad_norm": 0.46830442547798157, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.666, + "step": 3332 + }, + { + "loss": 0.0031, + "grad_norm": 0.44680675864219666, + "learning_rate": 3.3450000000000006e-06, + "num_tokens": 1146145.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6665, + "step": 3333 + }, + { + "loss": 0.0591, + "grad_norm": 2.0427138805389404, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.667, + "step": 3334 + }, + { + "loss": 0.0446, + "grad_norm": 1.0700162649154663, + "learning_rate": 3.3350000000000003e-06, + "num_tokens": 1147169.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6675, + "step": 3335 + }, + { + "loss": 0.0352, + "grad_norm": 0.953519344329834, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6680000000000001, + "step": 3336 + }, + { + "loss": 0.0402, + "grad_norm": 1.208362102508545, + "learning_rate": 3.3250000000000004e-06, + "num_tokens": 1148193.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6684999999999999, + "step": 3337 + }, + { + "loss": 0.0034, + "grad_norm": 0.48497405648231506, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 1.0, + "epoch": 1.669, + "step": 3338 + }, + { + "loss": 0.0031, + "grad_norm": 0.4533288776874542, + "learning_rate": 3.3150000000000004e-06, + "num_tokens": 1148375.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6695, + "step": 3339 + }, + { + "loss": 0.0531, + "grad_norm": 1.031333088874817, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.67, + "step": 3340 + }, + { + "loss": 0.0029, + "grad_norm": 0.40945783257484436, + "learning_rate": 3.3050000000000005e-06, + "num_tokens": 1148978.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6705, + "step": 3341 + }, + { + "loss": 0.0643, + "grad_norm": 1.0990197658538818, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.671, + "step": 3342 + }, + { + "loss": 0.0379, + "grad_norm": 1.0483911037445068, + "learning_rate": 3.2950000000000002e-06, + "num_tokens": 1150002.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6715, + "step": 3343 + }, + { + "loss": 0.0489, + "grad_norm": 1.0835374593734741, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6720000000000002, + "step": 3344 + }, + { + "loss": 0.0033, + "grad_norm": 0.4901528060436249, + "learning_rate": 3.2850000000000003e-06, + "num_tokens": 1150605.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6724999999999999, + "step": 3345 + }, + { + "loss": 0.0029, + "grad_norm": 0.41757330298423767, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.673, + "step": 3346 + }, + { + "loss": 0.0379, + "grad_norm": 0.9371951818466187, + "learning_rate": 3.2750000000000004e-06, + "num_tokens": 1151208.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6735, + "step": 3347 + }, + { + "loss": 0.0397, + "grad_norm": 1.0155102014541626, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 3348 + }, + { + "loss": 0.0027, + "grad_norm": 0.3897286653518677, + "learning_rate": 3.2650000000000005e-06, + "num_tokens": 1151811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6745, + "step": 3349 + }, + { + "loss": 0.0028, + "grad_norm": 0.4042399525642395, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 1.0, + "epoch": 1.675, + "step": 3350 + }, + { + "loss": 0.003, + "grad_norm": 0.43666109442710876, + "learning_rate": 3.255e-06, + "num_tokens": 1151993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6755, + "step": 3351 + }, + { + "loss": 0.0029, + "grad_norm": 0.42103472352027893, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6760000000000002, + "step": 3352 + }, + { + "loss": 0.0028, + "grad_norm": 0.41361838579177856, + "learning_rate": 3.2450000000000003e-06, + "num_tokens": 1152175.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6764999999999999, + "step": 3353 + }, + { + "loss": 0.0357, + "grad_norm": 0.9301024675369263, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.677, + "step": 3354 + }, + { + "loss": 0.0025, + "grad_norm": 0.3655649721622467, + "learning_rate": 3.2350000000000004e-06, + "num_tokens": 1152778.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6775, + "step": 3355 + }, + { + "loss": 0.0363, + "grad_norm": 1.0852001905441284, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.678, + "step": 3356 + }, + { + "loss": 0.0021, + "grad_norm": 0.3051436245441437, + "learning_rate": 3.2250000000000005e-06, + "num_tokens": 1153381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6785, + "step": 3357 + }, + { + "loss": 0.0025, + "grad_norm": 0.38162630796432495, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 3358 + }, + { + "loss": 0.0022, + "grad_norm": 0.33861595392227173, + "learning_rate": 3.215e-06, + "num_tokens": 1153563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6795, + "step": 3359 + }, + { + "loss": 0.0021, + "grad_norm": 0.311531126499176, + "learning_rate": 3.21e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 3360 + }, + { + "loss": 0.002, + "grad_norm": 0.30146220326423645, + "learning_rate": 3.2050000000000002e-06, + "num_tokens": 1153745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6804999999999999, + "step": 3361 + }, + { + "loss": 0.0019, + "grad_norm": 0.28205639123916626, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 3362 + }, + { + "loss": 0.0483, + "grad_norm": 1.185204029083252, + "learning_rate": 3.1950000000000003e-06, + "num_tokens": 1154348.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6815, + "step": 3363 + }, + { + "loss": 0.0705, + "grad_norm": 1.442715048789978, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.682, + "step": 3364 + }, + { + "loss": 0.059, + "grad_norm": 1.5234472751617432, + "learning_rate": 3.1850000000000004e-06, + "num_tokens": 1155372.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6825, + "step": 3365 + }, + { + "loss": 0.0712, + "grad_norm": 1.9519693851470947, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.683, + "step": 3366 + }, + { + "loss": 0.041, + "grad_norm": 1.0349758863449097, + "learning_rate": 3.175e-06, + "num_tokens": 1156396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6835, + "step": 3367 + }, + { + "loss": 0.0423, + "grad_norm": 1.263643503189087, + "learning_rate": 3.17e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 3368 + }, + { + "loss": 0.0015, + "grad_norm": 0.21718572080135345, + "learning_rate": 3.165e-06, + "num_tokens": 1156999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6844999999999999, + "step": 3369 + }, + { + "loss": 0.0612, + "grad_norm": 1.4974867105484009, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.685, + "step": 3370 + }, + { + "loss": 0.0684, + "grad_norm": 1.3690571784973145, + "learning_rate": 3.1550000000000003e-06, + "num_tokens": 1158023.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6855, + "step": 3371 + }, + { + "loss": 0.0015, + "grad_norm": 0.22092363238334656, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 1.0, + "epoch": 1.686, + "step": 3372 + }, + { + "loss": 0.0466, + "grad_norm": 1.359930157661438, + "learning_rate": 3.1450000000000004e-06, + "num_tokens": 1158626.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6865, + "step": 3373 + }, + { + "loss": 0.0017, + "grad_norm": 0.23505748808383942, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.687, + "step": 3374 + }, + { + "loss": 0.0412, + "grad_norm": 1.154797077178955, + "learning_rate": 3.135e-06, + "num_tokens": 1159229.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6875, + "step": 3375 + }, + { + "loss": 0.0688, + "grad_norm": 1.5609385967254639, + "learning_rate": 3.13e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.688, + "step": 3376 + }, + { + "loss": 0.0689, + "grad_norm": 1.9219101667404175, + "learning_rate": 3.125e-06, + "num_tokens": 1160253.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6885, + "step": 3377 + }, + { + "loss": 0.0528, + "grad_norm": 1.4017720222473145, + "learning_rate": 3.12e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 3378 + }, + { + "loss": 0.0018, + "grad_norm": 0.2644074261188507, + "learning_rate": 3.1150000000000002e-06, + "num_tokens": 1160856.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6895, + "step": 3379 + }, + { + "loss": 0.0359, + "grad_norm": 1.1351364850997925, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.69, + "step": 3380 + }, + { + "loss": 0.0561, + "grad_norm": 1.2852329015731812, + "learning_rate": 3.1050000000000003e-06, + "num_tokens": 1161880.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6905000000000001, + "step": 3381 + }, + { + "loss": 0.0019, + "grad_norm": 0.2809182107448578, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6909999999999998, + "step": 3382 + }, + { + "loss": 0.0019, + "grad_norm": 0.2629799544811249, + "learning_rate": 3.0950000000000004e-06, + "num_tokens": 1162062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6915, + "step": 3383 + }, + { + "loss": 0.0583, + "grad_norm": 1.3401031494140625, + "learning_rate": 3.09e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.692, + "step": 3384 + }, + { + "loss": 0.0019, + "grad_norm": 0.2741340398788452, + "learning_rate": 3.085e-06, + "num_tokens": 1162665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6925, + "step": 3385 + }, + { + "loss": 0.0019, + "grad_norm": 0.2670257091522217, + "learning_rate": 3.08e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 3386 + }, + { + "loss": 0.0529, + "grad_norm": 0.9913851022720337, + "learning_rate": 3.075e-06, + "num_tokens": 1163268.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6935, + "step": 3387 + }, + { + "loss": 0.0018, + "grad_norm": 0.2675456404685974, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.694, + "step": 3388 + }, + { + "loss": 0.0405, + "grad_norm": 1.6220101118087769, + "learning_rate": 3.0650000000000003e-06, + "num_tokens": 1163871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6945000000000001, + "step": 3389 + }, + { + "loss": 0.0478, + "grad_norm": 1.0595648288726807, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 3390 + }, + { + "loss": 0.0022, + "grad_norm": 0.3088478446006775, + "learning_rate": 3.0550000000000004e-06, + "num_tokens": 1164474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6955, + "step": 3391 + }, + { + "loss": 0.0501, + "grad_norm": 1.3393687009811401, + "learning_rate": 3.05e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.696, + "step": 3392 + }, + { + "loss": 0.0019, + "grad_norm": 0.2677120566368103, + "learning_rate": 3.045e-06, + "num_tokens": 1165077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6965, + "step": 3393 + }, + { + "loss": 0.0519, + "grad_norm": 1.1974607706069946, + "learning_rate": 3.04e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.697, + "step": 3394 + }, + { + "loss": 0.0406, + "grad_norm": 1.0820717811584473, + "learning_rate": 3.035e-06, + "num_tokens": 1166101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6975, + "step": 3395 + }, + { + "loss": 0.002, + "grad_norm": 0.2836916148662567, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.698, + "step": 3396 + }, + { + "loss": 0.002, + "grad_norm": 0.2837901711463928, + "learning_rate": 3.0250000000000003e-06, + "num_tokens": 1166283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6985000000000001, + "step": 3397 + }, + { + "loss": 0.0546, + "grad_norm": 1.4433382749557495, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6989999999999998, + "step": 3398 + }, + { + "loss": 0.0021, + "grad_norm": 0.2978130877017975, + "learning_rate": 3.0150000000000004e-06, + "num_tokens": 1166886.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6995, + "step": 3399 + }, + { + "loss": 0.002, + "grad_norm": 0.2806030511856079, + "learning_rate": 3.01e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 3400 + }, + { + "loss": 0.0636, + "grad_norm": 1.3879796266555786, + "learning_rate": 3.005e-06, + "num_tokens": 1167489.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7005, + "step": 3401 + }, + { + "loss": 0.002, + "grad_norm": 0.2759900689125061, + "learning_rate": 3e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.701, + "step": 3402 + }, + { + "loss": 0.0574, + "grad_norm": 1.3505700826644897, + "learning_rate": 2.995e-06, + "num_tokens": 1168092.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7015, + "step": 3403 + }, + { + "loss": 0.0554, + "grad_norm": 1.4108113050460815, + "learning_rate": 2.99e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.702, + "step": 3404 + }, + { + "loss": 0.0558, + "grad_norm": 1.5085475444793701, + "learning_rate": 2.9850000000000002e-06, + "num_tokens": 1169116.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7025000000000001, + "step": 3405 + }, + { + "loss": 0.0019, + "grad_norm": 0.2683292031288147, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7029999999999998, + "step": 3406 + }, + { + "loss": 0.0367, + "grad_norm": 1.1768198013305664, + "learning_rate": 2.9750000000000003e-06, + "num_tokens": 1169719.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7035, + "step": 3407 + }, + { + "loss": 0.002, + "grad_norm": 0.2821144759654999, + "learning_rate": 2.97e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 1.0, + "epoch": 1.704, + "step": 3408 + }, + { + "loss": 0.0018, + "grad_norm": 0.26630160212516785, + "learning_rate": 2.965e-06, + "num_tokens": 1169901.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7045, + "step": 3409 + }, + { + "loss": 0.0018, + "grad_norm": 0.2571128308773041, + "learning_rate": 2.96e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 3410 + }, + { + "loss": 0.002, + "grad_norm": 0.28111621737480164, + "learning_rate": 2.955e-06, + "num_tokens": 1170083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7055, + "step": 3411 + }, + { + "loss": 0.002, + "grad_norm": 0.27419018745422363, + "learning_rate": 2.95e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 3412 + }, + { + "loss": 0.0019, + "grad_norm": 0.26888176798820496, + "learning_rate": 2.945e-06, + "num_tokens": 1170265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7065000000000001, + "step": 3413 + }, + { + "loss": 0.0018, + "grad_norm": 0.2536250352859497, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 3414 + }, + { + "loss": 0.0018, + "grad_norm": 0.24844178557395935, + "learning_rate": 2.9350000000000003e-06, + "num_tokens": 1170447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7075, + "step": 3415 + }, + { + "loss": 0.0487, + "grad_norm": 1.4517875909805298, + "learning_rate": 2.93e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.708, + "step": 3416 + }, + { + "loss": 0.0564, + "grad_norm": 1.2101439237594604, + "learning_rate": 2.925e-06, + "num_tokens": 1171471.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7085, + "step": 3417 + }, + { + "loss": 0.043, + "grad_norm": 1.1227502822875977, + "learning_rate": 2.92e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.709, + "step": 3418 + }, + { + "loss": 0.0556, + "grad_norm": 1.1113651990890503, + "learning_rate": 2.915e-06, + "num_tokens": 1172495.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7095, + "step": 3419 + }, + { + "loss": 0.0015, + "grad_norm": 0.21050438284873962, + "learning_rate": 2.91e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.71, + "step": 3420 + }, + { + "loss": 0.0492, + "grad_norm": 1.136242389678955, + "learning_rate": 2.905e-06, + "num_tokens": 1173098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7105000000000001, + "step": 3421 + }, + { + "loss": 0.0549, + "grad_norm": 1.1831704378128052, + "learning_rate": 2.9e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7109999999999999, + "step": 3422 + }, + { + "loss": 0.0589, + "grad_norm": 1.318955659866333, + "learning_rate": 2.8950000000000002e-06, + "num_tokens": 1174122.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7115, + "step": 3423 + }, + { + "loss": 0.0385, + "grad_norm": 1.1089059114456177, + "learning_rate": 2.89e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.712, + "step": 3424 + }, + { + "loss": 0.0017, + "grad_norm": 0.24754203855991364, + "learning_rate": 2.885e-06, + "num_tokens": 1174725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7125, + "step": 3425 + }, + { + "loss": 0.0563, + "grad_norm": 1.1799119710922241, + "learning_rate": 2.88e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.713, + "step": 3426 + }, + { + "loss": 0.0017, + "grad_norm": 0.2318888157606125, + "learning_rate": 2.875e-06, + "num_tokens": 1175328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7135, + "step": 3427 + }, + { + "loss": 0.0623, + "grad_norm": 1.3154571056365967, + "learning_rate": 2.87e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.714, + "step": 3428 + }, + { + "loss": 0.0019, + "grad_norm": 0.26307183504104614, + "learning_rate": 2.865e-06, + "num_tokens": 1175931.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7145000000000001, + "step": 3429 + }, + { + "loss": 0.0018, + "grad_norm": 0.2589333653450012, + "learning_rate": 2.86e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 3430 + }, + { + "loss": 0.0504, + "grad_norm": 1.4614155292510986, + "learning_rate": 2.855e-06, + "num_tokens": 1176534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7155, + "step": 3431 + }, + { + "loss": 0.0018, + "grad_norm": 0.2591991722583771, + "learning_rate": 2.85e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.716, + "step": 3432 + }, + { + "loss": 0.0018, + "grad_norm": 0.25856250524520874, + "learning_rate": 2.845e-06, + "num_tokens": 1176716.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7165, + "step": 3433 + }, + { + "loss": 0.0368, + "grad_norm": 1.2794378995895386, + "learning_rate": 2.84e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.717, + "step": 3434 + }, + { + "loss": 0.0595, + "grad_norm": 1.1754332780838013, + "learning_rate": 2.835e-06, + "num_tokens": 1177740.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7175, + "step": 3435 + }, + { + "loss": 0.0016, + "grad_norm": 0.218499094247818, + "learning_rate": 2.83e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 1.0, + "epoch": 1.718, + "step": 3436 + }, + { + "loss": 0.0562, + "grad_norm": 1.4319361448287964, + "learning_rate": 2.825e-06, + "num_tokens": 1178343.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7185000000000001, + "step": 3437 + }, + { + "loss": 0.0548, + "grad_norm": 1.1614960432052612, + "learning_rate": 2.82e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7189999999999999, + "step": 3438 + }, + { + "loss": 0.0634, + "grad_norm": 1.559000849723816, + "learning_rate": 2.815e-06, + "num_tokens": 1179367.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7195, + "step": 3439 + }, + { + "loss": 0.0593, + "grad_norm": 1.1891441345214844, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 3440 + }, + { + "loss": 0.0638, + "grad_norm": 1.2654136419296265, + "learning_rate": 2.8050000000000007e-06, + "num_tokens": 1180391.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7205, + "step": 3441 + }, + { + "loss": 0.0411, + "grad_norm": 1.2888840436935425, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.721, + "step": 3442 + }, + { + "loss": 0.002, + "grad_norm": 0.2810196280479431, + "learning_rate": 2.7950000000000003e-06, + "num_tokens": 1180994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7215, + "step": 3443 + }, + { + "loss": 0.0393, + "grad_norm": 1.1534147262573242, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.722, + "step": 3444 + }, + { + "loss": 0.0019, + "grad_norm": 0.2703098952770233, + "learning_rate": 2.7850000000000004e-06, + "num_tokens": 1181597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7225000000000001, + "step": 3445 + }, + { + "loss": 0.0612, + "grad_norm": 1.2400104999542236, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7229999999999999, + "step": 3446 + }, + { + "loss": 0.0019, + "grad_norm": 0.27535656094551086, + "learning_rate": 2.7750000000000005e-06, + "num_tokens": 1182200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7235, + "step": 3447 + }, + { + "loss": 0.002, + "grad_norm": 0.2844158411026001, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 3448 + }, + { + "loss": 0.002, + "grad_norm": 0.2850154936313629, + "learning_rate": 2.7650000000000006e-06, + "num_tokens": 1182382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7245, + "step": 3449 + }, + { + "loss": 0.0018, + "grad_norm": 0.26619744300842285, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 3450 + }, + { + "loss": 0.0019, + "grad_norm": 0.2684476971626282, + "learning_rate": 2.7550000000000003e-06, + "num_tokens": 1182564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7255, + "step": 3451 + }, + { + "loss": 0.0577, + "grad_norm": 1.3094863891601562, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.726, + "step": 3452 + }, + { + "loss": 0.0378, + "grad_norm": 1.201589822769165, + "learning_rate": 2.7450000000000004e-06, + "num_tokens": 1183588.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7265000000000001, + "step": 3453 + }, + { + "loss": 0.0537, + "grad_norm": 1.2897847890853882, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7269999999999999, + "step": 3454 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792169749736786, + "learning_rate": 2.7350000000000005e-06, + "num_tokens": 1184191.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7275, + "step": 3455 + }, + { + "loss": 0.002, + "grad_norm": 0.28593137860298157, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 3456 + }, + { + "loss": 0.058, + "grad_norm": 1.3839404582977295, + "learning_rate": 2.7250000000000006e-06, + "num_tokens": 1184794.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7285, + "step": 3457 + }, + { + "loss": 0.0018, + "grad_norm": 0.2617915868759155, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 1.0, + "epoch": 1.729, + "step": 3458 + }, + { + "loss": 0.0019, + "grad_norm": 0.2803640067577362, + "learning_rate": 2.7150000000000003e-06, + "num_tokens": 1184976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7295, + "step": 3459 + }, + { + "loss": 0.0389, + "grad_norm": 1.0974253416061401, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.73, + "step": 3460 + }, + { + "loss": 0.0017, + "grad_norm": 0.24105492234230042, + "learning_rate": 2.7050000000000004e-06, + "num_tokens": 1185579.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7305000000000001, + "step": 3461 + }, + { + "loss": 0.0017, + "grad_norm": 0.2462151199579239, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 3462 + }, + { + "loss": 0.0681, + "grad_norm": 2.0248329639434814, + "learning_rate": 2.6950000000000005e-06, + "num_tokens": 1186182.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7315, + "step": 3463 + }, + { + "loss": 0.0506, + "grad_norm": 1.0506778955459595, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.732, + "step": 3464 + }, + { + "loss": 0.0414, + "grad_norm": 1.1461181640625, + "learning_rate": 2.6850000000000006e-06, + "num_tokens": 1187206.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7325, + "step": 3465 + }, + { + "loss": 0.002, + "grad_norm": 0.29532936215400696, + "learning_rate": 2.68e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 1.0, + "epoch": 1.733, + "step": 3466 + }, + { + "loss": 0.0018, + "grad_norm": 0.2511617839336395, + "learning_rate": 2.6750000000000002e-06, + "num_tokens": 1187388.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7335, + "step": 3467 + }, + { + "loss": 0.0017, + "grad_norm": 0.24015438556671143, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 3468 + }, + { + "loss": 0.0394, + "grad_norm": 1.186040997505188, + "learning_rate": 2.6650000000000003e-06, + "num_tokens": 1187991.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7345000000000002, + "step": 3469 + }, + { + "loss": 0.0516, + "grad_norm": 1.3716928958892822, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7349999999999999, + "step": 3470 + }, + { + "loss": 0.0017, + "grad_norm": 0.24118225276470184, + "learning_rate": 2.6550000000000004e-06, + "num_tokens": 1188594.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7355, + "step": 3471 + }, + { + "loss": 0.0634, + "grad_norm": 1.3280280828475952, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.736, + "step": 3472 + }, + { + "loss": 0.0606, + "grad_norm": 1.5957295894622803, + "learning_rate": 2.6450000000000005e-06, + "num_tokens": 1189618.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7365, + "step": 3473 + }, + { + "loss": 0.0019, + "grad_norm": 0.26652151346206665, + "learning_rate": 2.64e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.737, + "step": 3474 + }, + { + "loss": 0.0465, + "grad_norm": 1.2865381240844727, + "learning_rate": 2.635e-06, + "num_tokens": 1190221.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7375, + "step": 3475 + }, + { + "loss": 0.0696, + "grad_norm": 1.5268961191177368, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.738, + "step": 3476 + }, + { + "loss": 0.0016, + "grad_norm": 0.22352814674377441, + "learning_rate": 2.6250000000000003e-06, + "num_tokens": 1190824.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7385000000000002, + "step": 3477 + }, + { + "loss": 0.0398, + "grad_norm": 1.0832366943359375, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7389999999999999, + "step": 3478 + }, + { + "loss": 0.002, + "grad_norm": 0.2866823971271515, + "learning_rate": 2.6150000000000004e-06, + "num_tokens": 1191427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7395, + "step": 3479 + }, + { + "loss": 0.0017, + "grad_norm": 0.25320085883140564, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 3480 + }, + { + "loss": 0.0554, + "grad_norm": 1.305580496788025, + "learning_rate": 2.6050000000000005e-06, + "num_tokens": 1192030.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7405, + "step": 3481 + }, + { + "loss": 0.053, + "grad_norm": 1.3485558032989502, + "learning_rate": 2.6e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.741, + "step": 3482 + }, + { + "loss": 0.0597, + "grad_norm": 1.3094996213912964, + "learning_rate": 2.595e-06, + "num_tokens": 1193054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7415, + "step": 3483 + }, + { + "loss": 0.0361, + "grad_norm": 1.02549409866333, + "learning_rate": 2.59e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.742, + "step": 3484 + }, + { + "loss": 0.0549, + "grad_norm": 1.1604732275009155, + "learning_rate": 2.5850000000000002e-06, + "num_tokens": 1194078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7425000000000002, + "step": 3485 + }, + { + "loss": 0.0578, + "grad_norm": 1.1389886140823364, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7429999999999999, + "step": 3486 + }, + { + "loss": 0.0383, + "grad_norm": 1.1444112062454224, + "learning_rate": 2.5750000000000003e-06, + "num_tokens": 1195102.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7435, + "step": 3487 + }, + { + "loss": 0.0363, + "grad_norm": 1.2686033248901367, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.744, + "step": 3488 + }, + { + "loss": 0.0609, + "grad_norm": 1.2078722715377808, + "learning_rate": 2.5650000000000004e-06, + "num_tokens": 1196126.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7445, + "step": 3489 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754855155944824, + "learning_rate": 2.56e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 1.0, + "epoch": 1.745, + "step": 3490 + }, + { + "loss": 0.063, + "grad_norm": 1.346100091934204, + "learning_rate": 2.555e-06, + "num_tokens": 1196729.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7455, + "step": 3491 + }, + { + "loss": 0.0625, + "grad_norm": 1.3309886455535889, + "learning_rate": 2.55e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.746, + "step": 3492 + }, + { + "loss": 0.0023, + "grad_norm": 0.3301111161708832, + "learning_rate": 2.545e-06, + "num_tokens": 1197332.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7465000000000002, + "step": 3493 + }, + { + "loss": 0.0382, + "grad_norm": 1.0473533868789673, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7469999999999999, + "step": 3494 + }, + { + "loss": 0.0625, + "grad_norm": 1.2907440662384033, + "learning_rate": 2.5350000000000003e-06, + "num_tokens": 1198356.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7475, + "step": 3495 + }, + { + "loss": 0.0412, + "grad_norm": 1.1875349283218384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.748, + "step": 3496 + }, + { + "loss": 0.1176, + "grad_norm": 2.9710206985473633, + "learning_rate": 2.5250000000000004e-06, + "num_tokens": 1199380.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.7485, + "step": 3497 + }, + { + "loss": 0.0026, + "grad_norm": 0.36476898193359375, + "learning_rate": 2.52e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.749, + "step": 3498 + }, + { + "loss": 0.0379, + "grad_norm": 1.0208238363265991, + "learning_rate": 2.515e-06, + "num_tokens": 1199983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7495, + "step": 3499 + }, + { + "loss": 0.0026, + "grad_norm": 0.37356528639793396, + "learning_rate": 2.51e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.75, + "step": 3500 + }, + { + "loss": 0.0027, + "grad_norm": 0.39622190594673157, + "learning_rate": 2.505e-06, + "num_tokens": 1200165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7505, + "step": 3501 + }, + { + "loss": 0.0372, + "grad_norm": 1.0979310274124146, + "learning_rate": 2.5e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.751, + "step": 3502 + }, + { + "loss": 0.0362, + "grad_norm": 1.0418155193328857, + "learning_rate": 2.4950000000000003e-06, + "num_tokens": 1201189.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7515, + "step": 3503 + }, + { + "loss": 0.0632, + "grad_norm": 1.6260945796966553, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.752, + "step": 3504 + }, + { + "loss": 0.0029, + "grad_norm": 0.3957514762878418, + "learning_rate": 2.4850000000000003e-06, + "num_tokens": 1201792.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7525, + "step": 3505 + }, + { + "loss": 0.0024, + "grad_norm": 0.3393152356147766, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 3506 + }, + { + "loss": 0.0515, + "grad_norm": 1.1930348873138428, + "learning_rate": 2.475e-06, + "num_tokens": 1202395.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7534999999999998, + "step": 3507 + }, + { + "loss": 0.0026, + "grad_norm": 0.380045086145401, + "learning_rate": 2.47e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 1.0, + "epoch": 1.754, + "step": 3508 + }, + { + "loss": 0.0027, + "grad_norm": 0.3971390724182129, + "learning_rate": 2.465e-06, + "num_tokens": 1202577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7545, + "step": 3509 + }, + { + "loss": 0.0028, + "grad_norm": 0.38638150691986084, + "learning_rate": 2.46e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 1.0, + "epoch": 1.755, + "step": 3510 + }, + { + "loss": 0.0615, + "grad_norm": 1.3876094818115234, + "learning_rate": 2.4550000000000002e-06, + "num_tokens": 1203180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7555, + "step": 3511 + }, + { + "loss": 0.0432, + "grad_norm": 1.4136366844177246, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.756, + "step": 3512 + }, + { + "loss": 0.0024, + "grad_norm": 0.34141626954078674, + "learning_rate": 2.4450000000000003e-06, + "num_tokens": 1203783.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7565, + "step": 3513 + }, + { + "loss": 0.0566, + "grad_norm": 1.0875115394592285, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7570000000000001, + "step": 3514 + }, + { + "loss": 0.0482, + "grad_norm": 1.5494464635849, + "learning_rate": 2.435e-06, + "num_tokens": 1204807.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7574999999999998, + "step": 3515 + }, + { + "loss": 0.0413, + "grad_norm": 1.0267417430877686, + "learning_rate": 2.43e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.758, + "step": 3516 + }, + { + "loss": 0.0529, + "grad_norm": 1.3826123476028442, + "learning_rate": 2.425e-06, + "num_tokens": 1205831.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7585, + "step": 3517 + }, + { + "loss": 0.0622, + "grad_norm": 1.3799962997436523, + "learning_rate": 2.42e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.759, + "step": 3518 + }, + { + "loss": 0.0026, + "grad_norm": 0.36601629853248596, + "learning_rate": 2.415e-06, + "num_tokens": 1206434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7595, + "step": 3519 + }, + { + "loss": 0.057, + "grad_norm": 1.4413540363311768, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.76, + "step": 3520 + }, + { + "loss": 0.062, + "grad_norm": 1.5269067287445068, + "learning_rate": 2.4050000000000003e-06, + "num_tokens": 1207458.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7605, + "step": 3521 + }, + { + "loss": 0.0529, + "grad_norm": 1.1583778858184814, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7610000000000001, + "step": 3522 + }, + { + "loss": 0.0629, + "grad_norm": 1.502618432044983, + "learning_rate": 2.395e-06, + "num_tokens": 1208482.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7614999999999998, + "step": 3523 + }, + { + "loss": 0.0556, + "grad_norm": 1.4562733173370361, + "learning_rate": 2.39e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.762, + "step": 3524 + }, + { + "loss": 0.0028, + "grad_norm": 0.4034802317619324, + "learning_rate": 2.385e-06, + "num_tokens": 1209085.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7625, + "step": 3525 + }, + { + "loss": 0.0501, + "grad_norm": 1.3905121088027954, + "learning_rate": 2.38e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.763, + "step": 3526 + }, + { + "loss": 0.0628, + "grad_norm": 1.1878178119659424, + "learning_rate": 2.375e-06, + "num_tokens": 1210109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7635, + "step": 3527 + }, + { + "loss": 0.0371, + "grad_norm": 1.1999701261520386, + "learning_rate": 2.37e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.764, + "step": 3528 + }, + { + "loss": 0.0029, + "grad_norm": 0.40889084339141846, + "learning_rate": 2.3650000000000002e-06, + "num_tokens": 1210712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7645, + "step": 3529 + }, + { + "loss": 0.0389, + "grad_norm": 1.039504885673523, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7650000000000001, + "step": 3530 + }, + { + "loss": 0.068, + "grad_norm": 1.371443748474121, + "learning_rate": 2.355e-06, + "num_tokens": 1211736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7654999999999998, + "step": 3531 + }, + { + "loss": 0.0695, + "grad_norm": 1.7425730228424072, + "learning_rate": 2.35e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.766, + "step": 3532 + }, + { + "loss": 0.0523, + "grad_norm": 1.3040227890014648, + "learning_rate": 2.345e-06, + "num_tokens": 1212760.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7665, + "step": 3533 + }, + { + "loss": 0.0027, + "grad_norm": 0.3859405517578125, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 1.0, + "epoch": 1.767, + "step": 3534 + }, + { + "loss": 0.0385, + "grad_norm": 1.0744153261184692, + "learning_rate": 2.3350000000000005e-06, + "num_tokens": 1213363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7675, + "step": 3535 + }, + { + "loss": 0.0029, + "grad_norm": 0.4078717827796936, + "learning_rate": 2.33e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.768, + "step": 3536 + }, + { + "loss": 0.0464, + "grad_norm": 1.3526980876922607, + "learning_rate": 2.325e-06, + "num_tokens": 1213966.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7685, + "step": 3537 + }, + { + "loss": 0.0032, + "grad_norm": 0.44447413086891174, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7690000000000001, + "step": 3538 + }, + { + "loss": 0.0346, + "grad_norm": 0.9852960705757141, + "learning_rate": 2.3150000000000003e-06, + "num_tokens": 1214569.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7694999999999999, + "step": 3539 + }, + { + "loss": 0.0581, + "grad_norm": 1.1710577011108398, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.77, + "step": 3540 + }, + { + "loss": 0.003, + "grad_norm": 0.42533135414123535, + "learning_rate": 2.3050000000000004e-06, + "num_tokens": 1215172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7705, + "step": 3541 + }, + { + "loss": 0.0373, + "grad_norm": 0.9175604581832886, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.771, + "step": 3542 + }, + { + "loss": 0.0464, + "grad_norm": 1.2586400508880615, + "learning_rate": 2.2950000000000005e-06, + "num_tokens": 1216196.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.7715, + "step": 3543 + }, + { + "loss": 0.0557, + "grad_norm": 1.3000445365905762, + "learning_rate": 2.29e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.772, + "step": 3544 + }, + { + "loss": 0.0377, + "grad_norm": 1.0466715097427368, + "learning_rate": 2.285e-06, + "num_tokens": 1217220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7725, + "step": 3545 + }, + { + "loss": 0.003, + "grad_norm": 0.41341033577919006, + "learning_rate": 2.28e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7730000000000001, + "step": 3546 + }, + { + "loss": 0.0555, + "grad_norm": 1.2895411252975464, + "learning_rate": 2.2750000000000002e-06, + "num_tokens": 1217823.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7734999999999999, + "step": 3547 + }, + { + "loss": 0.0032, + "grad_norm": 0.4543672800064087, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 1.0, + "epoch": 1.774, + "step": 3548 + }, + { + "loss": 0.0033, + "grad_norm": 0.45242005586624146, + "learning_rate": 2.2650000000000003e-06, + "num_tokens": 1218005.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7745, + "step": 3549 + }, + { + "loss": 0.0664, + "grad_norm": 1.4492830038070679, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.775, + "step": 3550 + }, + { + "loss": 0.0621, + "grad_norm": 1.410575270652771, + "learning_rate": 2.2550000000000004e-06, + "num_tokens": 1219029.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7755, + "step": 3551 + }, + { + "loss": 0.0668, + "grad_norm": 1.4600263833999634, + "learning_rate": 2.25e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.776, + "step": 3552 + }, + { + "loss": 0.0518, + "grad_norm": 1.185958981513977, + "learning_rate": 2.245e-06, + "num_tokens": 1220053.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7765, + "step": 3553 + }, + { + "loss": 0.0031, + "grad_norm": 0.4426004886627197, + "learning_rate": 2.24e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7770000000000001, + "step": 3554 + }, + { + "loss": 0.0391, + "grad_norm": 1.1847765445709229, + "learning_rate": 2.235e-06, + "num_tokens": 1220656.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7774999999999999, + "step": 3555 + }, + { + "loss": 0.0387, + "grad_norm": 1.1244046688079834, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.778, + "step": 3556 + }, + { + "loss": 0.0639, + "grad_norm": 1.5144935846328735, + "learning_rate": 2.2250000000000003e-06, + "num_tokens": 1221680.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7785, + "step": 3557 + }, + { + "loss": 0.0504, + "grad_norm": 1.1694223880767822, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.779, + "step": 3558 + }, + { + "loss": 0.039, + "grad_norm": 1.198093295097351, + "learning_rate": 2.2150000000000004e-06, + "num_tokens": 1222704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7795, + "step": 3559 + }, + { + "loss": 0.0556, + "grad_norm": 1.4882034063339233, + "learning_rate": 2.21e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.78, + "step": 3560 + }, + { + "loss": 0.0033, + "grad_norm": 0.4605433940887451, + "learning_rate": 2.205e-06, + "num_tokens": 1223307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7805, + "step": 3561 + }, + { + "loss": 0.0427, + "grad_norm": 1.400830864906311, + "learning_rate": 2.2e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7810000000000001, + "step": 3562 + }, + { + "loss": 0.0596, + "grad_norm": 1.4765678644180298, + "learning_rate": 2.195e-06, + "num_tokens": 1224331.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7814999999999999, + "step": 3563 + }, + { + "loss": 0.0029, + "grad_norm": 0.4184083044528961, + "learning_rate": 2.19e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 1.0, + "epoch": 1.782, + "step": 3564 + }, + { + "loss": 0.0031, + "grad_norm": 0.4302586615085602, + "learning_rate": 2.1850000000000003e-06, + "num_tokens": 1224513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7825, + "step": 3565 + }, + { + "loss": 0.0031, + "grad_norm": 0.4298599362373352, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 1.0, + "epoch": 1.783, + "step": 3566 + }, + { + "loss": 0.065, + "grad_norm": 1.424648642539978, + "learning_rate": 2.1750000000000004e-06, + "num_tokens": 1225116.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7835, + "step": 3567 + }, + { + "loss": 0.0031, + "grad_norm": 0.4238447844982147, + "learning_rate": 2.17e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.784, + "step": 3568 + }, + { + "loss": 0.0031, + "grad_norm": 0.4220222532749176, + "learning_rate": 2.165e-06, + "num_tokens": 1225298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7845, + "step": 3569 + }, + { + "loss": 0.003, + "grad_norm": 0.42732101678848267, + "learning_rate": 2.16e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7850000000000001, + "step": 3570 + }, + { + "loss": 0.0346, + "grad_norm": 1.0672036409378052, + "learning_rate": 2.155e-06, + "num_tokens": 1225901.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7854999999999999, + "step": 3571 + }, + { + "loss": 0.0424, + "grad_norm": 1.0617742538452148, + "learning_rate": 2.15e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.786, + "step": 3572 + }, + { + "loss": 0.0592, + "grad_norm": 1.3852803707122803, + "learning_rate": 2.1450000000000002e-06, + "num_tokens": 1226925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7865, + "step": 3573 + }, + { + "loss": 0.0029, + "grad_norm": 0.4290924072265625, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 1.0, + "epoch": 1.787, + "step": 3574 + }, + { + "loss": 0.051, + "grad_norm": 1.1031818389892578, + "learning_rate": 2.1350000000000003e-06, + "num_tokens": 1227528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7875, + "step": 3575 + }, + { + "loss": 0.0393, + "grad_norm": 1.184659719467163, + "learning_rate": 2.13e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.788, + "step": 3576 + }, + { + "loss": 0.0755, + "grad_norm": 1.9755206108093262, + "learning_rate": 2.125e-06, + "num_tokens": 1228552.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.7885, + "step": 3577 + }, + { + "loss": 0.071, + "grad_norm": 1.4741475582122803, + "learning_rate": 2.12e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7890000000000001, + "step": 3578 + }, + { + "loss": 0.0609, + "grad_norm": 1.6418182849884033, + "learning_rate": 2.115e-06, + "num_tokens": 1229576.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7894999999999999, + "step": 3579 + }, + { + "loss": 0.0027, + "grad_norm": 0.40381157398223877, + "learning_rate": 2.11e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.79, + "step": 3580 + }, + { + "loss": 0.0551, + "grad_norm": 1.2949596643447876, + "learning_rate": 2.105e-06, + "num_tokens": 1230179.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7905, + "step": 3581 + }, + { + "loss": 0.0504, + "grad_norm": 1.073058843612671, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.791, + "step": 3582 + }, + { + "loss": 0.0028, + "grad_norm": 0.3910202980041504, + "learning_rate": 2.0950000000000003e-06, + "num_tokens": 1230782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7915, + "step": 3583 + }, + { + "loss": 0.0029, + "grad_norm": 0.40099310874938965, + "learning_rate": 2.09e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.792, + "step": 3584 + }, + { + "loss": 0.0686, + "grad_norm": 1.5408157110214233, + "learning_rate": 2.085e-06, + "num_tokens": 1231385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7925, + "step": 3585 + }, + { + "loss": 0.0547, + "grad_norm": 1.2888717651367188, + "learning_rate": 2.08e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7930000000000001, + "step": 3586 + }, + { + "loss": 0.0392, + "grad_norm": 1.1414070129394531, + "learning_rate": 2.075e-06, + "num_tokens": 1232409.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7934999999999999, + "step": 3587 + }, + { + "loss": 0.0567, + "grad_norm": 1.2421129941940308, + "learning_rate": 2.07e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.794, + "step": 3588 + }, + { + "loss": 0.0567, + "grad_norm": 1.2121027708053589, + "learning_rate": 2.065e-06, + "num_tokens": 1233433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7945, + "step": 3589 + }, + { + "loss": 0.0028, + "grad_norm": 0.4114837944507599, + "learning_rate": 2.06e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.795, + "step": 3590 + }, + { + "loss": 0.003, + "grad_norm": 0.4205188453197479, + "learning_rate": 2.0550000000000002e-06, + "num_tokens": 1233615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7955, + "step": 3591 + }, + { + "loss": 0.0029, + "grad_norm": 0.39967694878578186, + "learning_rate": 2.05e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 1.0, + "epoch": 1.796, + "step": 3592 + }, + { + "loss": 0.056, + "grad_norm": 1.251736044883728, + "learning_rate": 2.045e-06, + "num_tokens": 1234218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7965, + "step": 3593 + }, + { + "loss": 0.0028, + "grad_norm": 0.3914256989955902, + "learning_rate": 2.04e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7970000000000002, + "step": 3594 + }, + { + "loss": 0.0604, + "grad_norm": 1.1881632804870605, + "learning_rate": 2.035e-06, + "num_tokens": 1234821.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7974999999999999, + "step": 3595 + }, + { + "loss": 0.0622, + "grad_norm": 1.149919033050537, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.798, + "step": 3596 + }, + { + "loss": 0.0549, + "grad_norm": 1.0469919443130493, + "learning_rate": 2.025e-06, + "num_tokens": 1235845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7985, + "step": 3597 + }, + { + "loss": 0.0535, + "grad_norm": 1.3651666641235352, + "learning_rate": 2.02e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.799, + "step": 3598 + }, + { + "loss": 0.0026, + "grad_norm": 0.37465357780456543, + "learning_rate": 2.015e-06, + "num_tokens": 1236448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7995, + "step": 3599 + }, + { + "loss": 0.0365, + "grad_norm": 1.0199239253997803, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8, + "step": 3600 + }, + { + "loss": 0.0617, + "grad_norm": 1.1323697566986084, + "learning_rate": 2.0050000000000003e-06, + "num_tokens": 1237472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8005, + "step": 3601 + }, + { + "loss": 0.003, + "grad_norm": 0.4225693345069885, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8010000000000002, + "step": 3602 + }, + { + "loss": 0.0379, + "grad_norm": 1.1038097143173218, + "learning_rate": 1.9950000000000004e-06, + "num_tokens": 1238075.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8014999999999999, + "step": 3603 + }, + { + "loss": 0.003, + "grad_norm": 0.4044983685016632, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.802, + "step": 3604 + }, + { + "loss": 0.0655, + "grad_norm": 1.8133554458618164, + "learning_rate": 1.985e-06, + "num_tokens": 1238678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8025, + "step": 3605 + }, + { + "loss": 0.0028, + "grad_norm": 0.39725902676582336, + "learning_rate": 1.98e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.803, + "step": 3606 + }, + { + "loss": 0.003, + "grad_norm": 0.4250074028968811, + "learning_rate": 1.975e-06, + "num_tokens": 1238860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8035, + "step": 3607 + }, + { + "loss": 0.0378, + "grad_norm": 1.14003586769104, + "learning_rate": 1.97e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.804, + "step": 3608 + }, + { + "loss": 0.0028, + "grad_norm": 0.39355626702308655, + "learning_rate": 1.9650000000000002e-06, + "num_tokens": 1239463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8045, + "step": 3609 + }, + { + "loss": 0.0378, + "grad_norm": 1.2409162521362305, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8050000000000002, + "step": 3610 + }, + { + "loss": 0.0448, + "grad_norm": 1.4544258117675781, + "learning_rate": 1.9550000000000003e-06, + "num_tokens": 1240487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8054999999999999, + "step": 3611 + }, + { + "loss": 0.0027, + "grad_norm": 0.3753180205821991, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.806, + "step": 3612 + }, + { + "loss": 0.0029, + "grad_norm": 0.4058220088481903, + "learning_rate": 1.945e-06, + "num_tokens": 1240669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8065, + "step": 3613 + }, + { + "loss": 0.0574, + "grad_norm": 1.4277732372283936, + "learning_rate": 1.94e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.807, + "step": 3614 + }, + { + "loss": 0.0645, + "grad_norm": 1.5439943075180054, + "learning_rate": 1.935e-06, + "num_tokens": 1241693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8075, + "step": 3615 + }, + { + "loss": 0.0609, + "grad_norm": 1.4575119018554688, + "learning_rate": 1.93e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.808, + "step": 3616 + }, + { + "loss": 0.0024, + "grad_norm": 0.33791404962539673, + "learning_rate": 1.925e-06, + "num_tokens": 1242296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8085, + "step": 3617 + }, + { + "loss": 0.0392, + "grad_norm": 0.994301974773407, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8090000000000002, + "step": 3618 + }, + { + "loss": 0.0026, + "grad_norm": 0.35725516080856323, + "learning_rate": 1.9150000000000003e-06, + "num_tokens": 1242899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8094999999999999, + "step": 3619 + }, + { + "loss": 0.1147, + "grad_norm": 2.219489097595215, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.81, + "step": 3620 + }, + { + "loss": 0.0025, + "grad_norm": 0.358549028635025, + "learning_rate": 1.9050000000000002e-06, + "num_tokens": 1243502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8105, + "step": 3621 + }, + { + "loss": 0.0497, + "grad_norm": 1.0606470108032227, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.811, + "step": 3622 + }, + { + "loss": 0.0354, + "grad_norm": 1.1863391399383545, + "learning_rate": 1.895e-06, + "num_tokens": 1244526.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8115, + "step": 3623 + }, + { + "loss": 0.0617, + "grad_norm": 1.461073398590088, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.812, + "step": 3624 + }, + { + "loss": 0.0522, + "grad_norm": 1.180123209953308, + "learning_rate": 1.8850000000000002e-06, + "num_tokens": 1245550.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8125, + "step": 3625 + }, + { + "loss": 0.0513, + "grad_norm": 1.1050792932510376, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.813, + "step": 3626 + }, + { + "loss": 0.0382, + "grad_norm": 1.1048370599746704, + "learning_rate": 1.8750000000000003e-06, + "num_tokens": 1246574.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8135, + "step": 3627 + }, + { + "loss": 0.0594, + "grad_norm": 1.5278170108795166, + "learning_rate": 1.87e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.814, + "step": 3628 + }, + { + "loss": 0.0026, + "grad_norm": 0.3680756688117981, + "learning_rate": 1.8650000000000001e-06, + "num_tokens": 1247177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8145, + "step": 3629 + }, + { + "loss": 0.0025, + "grad_norm": 0.3478946387767792, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.815, + "step": 3630 + }, + { + "loss": 0.0602, + "grad_norm": 1.2490179538726807, + "learning_rate": 1.8550000000000002e-06, + "num_tokens": 1247780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8155000000000001, + "step": 3631 + }, + { + "loss": 0.0751, + "grad_norm": 1.6024861335754395, + "learning_rate": 1.85e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8159999999999998, + "step": 3632 + }, + { + "loss": 0.055, + "grad_norm": 1.4603705406188965, + "learning_rate": 1.8450000000000001e-06, + "num_tokens": 1248804.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8165, + "step": 3633 + }, + { + "loss": 0.0025, + "grad_norm": 0.37733298540115356, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.817, + "step": 3634 + }, + { + "loss": 0.0028, + "grad_norm": 0.3999163806438446, + "learning_rate": 1.8350000000000002e-06, + "num_tokens": 1248986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8175, + "step": 3635 + }, + { + "loss": 0.0027, + "grad_norm": 0.39710038900375366, + "learning_rate": 1.83e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.818, + "step": 3636 + }, + { + "loss": 0.0028, + "grad_norm": 0.39646029472351074, + "learning_rate": 1.825e-06, + "num_tokens": 1249168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8185, + "step": 3637 + }, + { + "loss": 0.0426, + "grad_norm": 1.3070132732391357, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.819, + "step": 3638 + }, + { + "loss": 0.039, + "grad_norm": 1.1619224548339844, + "learning_rate": 1.8150000000000002e-06, + "num_tokens": 1250192.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8195000000000001, + "step": 3639 + }, + { + "loss": 0.0367, + "grad_norm": 1.1559624671936035, + "learning_rate": 1.81e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8199999999999998, + "step": 3640 + }, + { + "loss": 0.053, + "grad_norm": 1.3208280801773071, + "learning_rate": 1.805e-06, + "num_tokens": 1251216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8205, + "step": 3641 + }, + { + "loss": 0.0544, + "grad_norm": 1.2948426008224487, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.821, + "step": 3642 + }, + { + "loss": 0.049, + "grad_norm": 1.0491054058074951, + "learning_rate": 1.7950000000000002e-06, + "num_tokens": 1252240.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8215, + "step": 3643 + }, + { + "loss": 0.037, + "grad_norm": 1.3279922008514404, + "learning_rate": 1.79e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.822, + "step": 3644 + }, + { + "loss": 0.0027, + "grad_norm": 0.38797032833099365, + "learning_rate": 1.785e-06, + "num_tokens": 1252843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8225, + "step": 3645 + }, + { + "loss": 0.0526, + "grad_norm": 1.3761346340179443, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.823, + "step": 3646 + }, + { + "loss": 0.0594, + "grad_norm": 1.5943882465362549, + "learning_rate": 1.7750000000000002e-06, + "num_tokens": 1253867.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8235000000000001, + "step": 3647 + }, + { + "loss": 0.0386, + "grad_norm": 1.1582005023956299, + "learning_rate": 1.77e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8239999999999998, + "step": 3648 + }, + { + "loss": 0.0625, + "grad_norm": 1.422128438949585, + "learning_rate": 1.765e-06, + "num_tokens": 1254891.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8245, + "step": 3649 + }, + { + "loss": 0.0027, + "grad_norm": 0.3794823884963989, + "learning_rate": 1.76e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.825, + "step": 3650 + }, + { + "loss": 0.0377, + "grad_norm": 1.0281649827957153, + "learning_rate": 1.7550000000000001e-06, + "num_tokens": 1255494.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8255, + "step": 3651 + }, + { + "loss": 0.057, + "grad_norm": 1.2542749643325806, + "learning_rate": 1.75e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.826, + "step": 3652 + }, + { + "loss": 0.0027, + "grad_norm": 0.3857089579105377, + "learning_rate": 1.745e-06, + "num_tokens": 1256097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8265, + "step": 3653 + }, + { + "loss": 0.0529, + "grad_norm": 1.148740291595459, + "learning_rate": 1.74e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.827, + "step": 3654 + }, + { + "loss": 0.003, + "grad_norm": 0.4200035333633423, + "learning_rate": 1.7350000000000001e-06, + "num_tokens": 1256700.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8275000000000001, + "step": 3655 + }, + { + "loss": 0.0028, + "grad_norm": 0.3945881426334381, + "learning_rate": 1.73e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8279999999999998, + "step": 3656 + }, + { + "loss": 0.039, + "grad_norm": 0.9618701934814453, + "learning_rate": 1.725e-06, + "num_tokens": 1257303.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8285, + "step": 3657 + }, + { + "loss": 0.0399, + "grad_norm": 1.2282723188400269, + "learning_rate": 1.72e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.829, + "step": 3658 + }, + { + "loss": 0.0509, + "grad_norm": 1.175613284111023, + "learning_rate": 1.7150000000000003e-06, + "num_tokens": 1258327.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8295, + "step": 3659 + }, + { + "loss": 0.0378, + "grad_norm": 1.1486104726791382, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.83, + "step": 3660 + }, + { + "loss": 0.0589, + "grad_norm": 1.3274273872375488, + "learning_rate": 1.7050000000000002e-06, + "num_tokens": 1259351.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8305, + "step": 3661 + }, + { + "loss": 0.046, + "grad_norm": 1.3887542486190796, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.831, + "step": 3662 + }, + { + "loss": 0.0029, + "grad_norm": 0.39590317010879517, + "learning_rate": 1.6950000000000003e-06, + "num_tokens": 1259954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8315000000000001, + "step": 3663 + }, + { + "loss": 0.0369, + "grad_norm": 1.080889105796814, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8319999999999999, + "step": 3664 + }, + { + "loss": 0.0535, + "grad_norm": 1.3136940002441406, + "learning_rate": 1.6850000000000002e-06, + "num_tokens": 1260978.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8325, + "step": 3665 + }, + { + "loss": 0.059, + "grad_norm": 1.5410752296447754, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 3666 + }, + { + "loss": 0.0029, + "grad_norm": 0.3952591121196747, + "learning_rate": 1.6750000000000003e-06, + "num_tokens": 1261581.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8335, + "step": 3667 + }, + { + "loss": 0.0518, + "grad_norm": 1.3276718854904175, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.834, + "step": 3668 + }, + { + "loss": 0.003, + "grad_norm": 0.4232414960861206, + "learning_rate": 1.6650000000000002e-06, + "num_tokens": 1262184.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8345, + "step": 3669 + }, + { + "loss": 0.0639, + "grad_norm": 1.2759331464767456, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.835, + "step": 3670 + }, + { + "loss": 0.0571, + "grad_norm": 1.5148133039474487, + "learning_rate": 1.6550000000000002e-06, + "num_tokens": 1263208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8355000000000001, + "step": 3671 + }, + { + "loss": 0.0637, + "grad_norm": 1.4910366535186768, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8359999999999999, + "step": 3672 + }, + { + "loss": 0.0029, + "grad_norm": 0.4135521948337555, + "learning_rate": 1.6450000000000001e-06, + "num_tokens": 1263811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8365, + "step": 3673 + }, + { + "loss": 0.0511, + "grad_norm": 1.2618604898452759, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.837, + "step": 3674 + }, + { + "loss": 0.0501, + "grad_norm": 1.1598845720291138, + "learning_rate": 1.6350000000000002e-06, + "num_tokens": 1264835.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8375, + "step": 3675 + }, + { + "loss": 0.0445, + "grad_norm": 1.0752735137939453, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.838, + "step": 3676 + }, + { + "loss": 0.003, + "grad_norm": 0.42967167496681213, + "learning_rate": 1.6250000000000001e-06, + "num_tokens": 1265438.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8385, + "step": 3677 + }, + { + "loss": 0.003, + "grad_norm": 0.41333630681037903, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 1.0, + "epoch": 1.839, + "step": 3678 + }, + { + "loss": 0.0033, + "grad_norm": 0.4601726531982422, + "learning_rate": 1.6150000000000002e-06, + "num_tokens": 1265620.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8395000000000001, + "step": 3679 + }, + { + "loss": 0.0648, + "grad_norm": 1.4645088911056519, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8399999999999999, + "step": 3680 + }, + { + "loss": 0.0371, + "grad_norm": 1.0282845497131348, + "learning_rate": 1.605e-06, + "num_tokens": 1266644.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8405, + "step": 3681 + }, + { + "loss": 0.0034, + "grad_norm": 0.4804507791996002, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 1.0, + "epoch": 1.841, + "step": 3682 + }, + { + "loss": 0.0611, + "grad_norm": 1.6006290912628174, + "learning_rate": 1.5950000000000002e-06, + "num_tokens": 1267247.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8415, + "step": 3683 + }, + { + "loss": 0.0032, + "grad_norm": 0.4456159472465515, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 1.0, + "epoch": 1.842, + "step": 3684 + }, + { + "loss": 0.0028, + "grad_norm": 0.39536213874816895, + "learning_rate": 1.585e-06, + "num_tokens": 1267429.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8425, + "step": 3685 + }, + { + "loss": 0.0441, + "grad_norm": 1.2790175676345825, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.843, + "step": 3686 + }, + { + "loss": 0.0545, + "grad_norm": 1.1657609939575195, + "learning_rate": 1.5750000000000002e-06, + "num_tokens": 1268453.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8435000000000001, + "step": 3687 + }, + { + "loss": 0.0536, + "grad_norm": 1.0926413536071777, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8439999999999999, + "step": 3688 + }, + { + "loss": 0.0362, + "grad_norm": 0.9912558197975159, + "learning_rate": 1.565e-06, + "num_tokens": 1269477.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8445, + "step": 3689 + }, + { + "loss": 0.0374, + "grad_norm": 1.0493851900100708, + "learning_rate": 1.56e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.845, + "step": 3690 + }, + { + "loss": 0.0028, + "grad_norm": 0.4059640169143677, + "learning_rate": 1.5550000000000001e-06, + "num_tokens": 1270080.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8455, + "step": 3691 + }, + { + "loss": 0.003, + "grad_norm": 0.4232662618160248, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 1.0, + "epoch": 1.846, + "step": 3692 + }, + { + "loss": 0.0031, + "grad_norm": 0.43225178122520447, + "learning_rate": 1.545e-06, + "num_tokens": 1270262.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8465, + "step": 3693 + }, + { + "loss": 0.0027, + "grad_norm": 0.3701487183570862, + "learning_rate": 1.54e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.847, + "step": 3694 + }, + { + "loss": 0.0545, + "grad_norm": 1.3909512758255005, + "learning_rate": 1.5350000000000001e-06, + "num_tokens": 1270865.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8475000000000001, + "step": 3695 + }, + { + "loss": 0.0027, + "grad_norm": 0.38712078332901, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8479999999999999, + "step": 3696 + }, + { + "loss": 0.0506, + "grad_norm": 1.0741735696792603, + "learning_rate": 1.525e-06, + "num_tokens": 1271468.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8485, + "step": 3697 + }, + { + "loss": 0.0693, + "grad_norm": 1.657240629196167, + "learning_rate": 1.52e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.849, + "step": 3698 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615441918373108, + "learning_rate": 1.5150000000000001e-06, + "num_tokens": 1272071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8495, + "step": 3699 + }, + { + "loss": 0.0355, + "grad_norm": 0.9562244415283203, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.85, + "step": 3700 + }, + { + "loss": 0.0026, + "grad_norm": 0.36725983023643494, + "learning_rate": 1.505e-06, + "num_tokens": 1272674.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8505, + "step": 3701 + }, + { + "loss": 0.0028, + "grad_norm": 0.3878721296787262, + "learning_rate": 1.5e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 1.0, + "epoch": 1.851, + "step": 3702 + }, + { + "loss": 0.0359, + "grad_norm": 1.0378117561340332, + "learning_rate": 1.495e-06, + "num_tokens": 1273277.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8515000000000001, + "step": 3703 + }, + { + "loss": 0.0656, + "grad_norm": 1.2746002674102783, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8519999999999999, + "step": 3704 + }, + { + "loss": 0.0026, + "grad_norm": 0.35767146944999695, + "learning_rate": 1.485e-06, + "num_tokens": 1273880.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8525, + "step": 3705 + }, + { + "loss": 0.0026, + "grad_norm": 0.36552944779396057, + "learning_rate": 1.48e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.853, + "step": 3706 + }, + { + "loss": 0.0473, + "grad_norm": 1.1046762466430664, + "learning_rate": 1.475e-06, + "num_tokens": 1274483.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8535, + "step": 3707 + }, + { + "loss": 0.0625, + "grad_norm": 1.4509928226470947, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.854, + "step": 3708 + }, + { + "loss": 0.0421, + "grad_norm": 1.1400452852249146, + "learning_rate": 1.465e-06, + "num_tokens": 1275507.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8545, + "step": 3709 + }, + { + "loss": 0.0026, + "grad_norm": 0.3619054853916168, + "learning_rate": 1.46e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 1.0, + "epoch": 1.855, + "step": 3710 + }, + { + "loss": 0.0026, + "grad_norm": 0.3667825162410736, + "learning_rate": 1.455e-06, + "num_tokens": 1275689.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8555000000000001, + "step": 3711 + }, + { + "loss": 0.0466, + "grad_norm": 1.255405068397522, + "learning_rate": 1.45e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8559999999999999, + "step": 3712 + }, + { + "loss": 0.0657, + "grad_norm": 1.4270333051681519, + "learning_rate": 1.445e-06, + "num_tokens": 1276713.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8565, + "step": 3713 + }, + { + "loss": 0.0356, + "grad_norm": 1.035252571105957, + "learning_rate": 1.44e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.857, + "step": 3714 + }, + { + "loss": 0.0024, + "grad_norm": 0.34851282835006714, + "learning_rate": 1.435e-06, + "num_tokens": 1277316.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8575, + "step": 3715 + }, + { + "loss": 0.0669, + "grad_norm": 1.6207127571105957, + "learning_rate": 1.43e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.858, + "step": 3716 + }, + { + "loss": 0.0025, + "grad_norm": 0.34068116545677185, + "learning_rate": 1.425e-06, + "num_tokens": 1277919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8585, + "step": 3717 + }, + { + "loss": 0.0023, + "grad_norm": 0.3336624801158905, + "learning_rate": 1.42e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 3718 + }, + { + "loss": 0.0663, + "grad_norm": 1.4342654943466187, + "learning_rate": 1.415e-06, + "num_tokens": 1278522.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8595000000000002, + "step": 3719 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730687618255615, + "learning_rate": 1.41e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8599999999999999, + "step": 3720 + }, + { + "loss": 0.062, + "grad_norm": 1.4714523553848267, + "learning_rate": 1.4050000000000003e-06, + "num_tokens": 1279546.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8605, + "step": 3721 + }, + { + "loss": 0.0514, + "grad_norm": 1.2004119157791138, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.861, + "step": 3722 + }, + { + "loss": 0.0023, + "grad_norm": 0.3368993103504181, + "learning_rate": 1.3950000000000002e-06, + "num_tokens": 1280149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8615, + "step": 3723 + }, + { + "loss": 0.0025, + "grad_norm": 0.3626645803451538, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 3724 + }, + { + "loss": 0.0379, + "grad_norm": 1.129130482673645, + "learning_rate": 1.3850000000000003e-06, + "num_tokens": 1280752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8625, + "step": 3725 + }, + { + "loss": 0.0026, + "grad_norm": 0.35549208521842957, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.863, + "step": 3726 + }, + { + "loss": 0.039, + "grad_norm": 1.0426714420318604, + "learning_rate": 1.3750000000000002e-06, + "num_tokens": 1281355.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8635000000000002, + "step": 3727 + }, + { + "loss": 0.0591, + "grad_norm": 1.4238243103027344, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8639999999999999, + "step": 3728 + }, + { + "loss": 0.0587, + "grad_norm": 1.182423710823059, + "learning_rate": 1.3650000000000003e-06, + "num_tokens": 1282379.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8645, + "step": 3729 + }, + { + "loss": 0.0344, + "grad_norm": 1.0535178184509277, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.865, + "step": 3730 + }, + { + "loss": 0.0024, + "grad_norm": 0.34818780422210693, + "learning_rate": 1.3550000000000002e-06, + "num_tokens": 1282982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8655, + "step": 3731 + }, + { + "loss": 0.0652, + "grad_norm": 1.3155183792114258, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.866, + "step": 3732 + }, + { + "loss": 0.0543, + "grad_norm": 1.2466151714324951, + "learning_rate": 1.3450000000000003e-06, + "num_tokens": 1284006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8665, + "step": 3733 + }, + { + "loss": 0.0366, + "grad_norm": 1.1111284494400024, + "learning_rate": 1.34e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.867, + "step": 3734 + }, + { + "loss": 0.036, + "grad_norm": 1.2413430213928223, + "learning_rate": 1.3350000000000001e-06, + "num_tokens": 1285030.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8675000000000002, + "step": 3735 + }, + { + "loss": 0.0503, + "grad_norm": 1.2572247982025146, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8679999999999999, + "step": 3736 + }, + { + "loss": 0.0634, + "grad_norm": 1.3656840324401855, + "learning_rate": 1.3250000000000002e-06, + "num_tokens": 1286054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8685, + "step": 3737 + }, + { + "loss": 0.0369, + "grad_norm": 1.1938374042510986, + "learning_rate": 1.32e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.869, + "step": 3738 + }, + { + "loss": 0.0619, + "grad_norm": 1.5963718891143799, + "learning_rate": 1.3150000000000001e-06, + "num_tokens": 1287078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8695, + "step": 3739 + }, + { + "loss": 0.0569, + "grad_norm": 1.3680788278579712, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.87, + "step": 3740 + }, + { + "loss": 0.0535, + "grad_norm": 1.175209879875183, + "learning_rate": 1.3050000000000002e-06, + "num_tokens": 1288102.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8705, + "step": 3741 + }, + { + "loss": 0.0026, + "grad_norm": 0.3611868619918823, + "learning_rate": 1.3e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.871, + "step": 3742 + }, + { + "loss": 0.0377, + "grad_norm": 1.2314857244491577, + "learning_rate": 1.295e-06, + "num_tokens": 1288705.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8715000000000002, + "step": 3743 + }, + { + "loss": 0.0511, + "grad_norm": 1.4128717184066772, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8719999999999999, + "step": 3744 + }, + { + "loss": 0.1336, + "grad_norm": 2.185844659805298, + "learning_rate": 1.2850000000000002e-06, + "num_tokens": 1289729.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.8725, + "step": 3745 + }, + { + "loss": 0.0025, + "grad_norm": 0.33957669138908386, + "learning_rate": 1.28e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 1.0, + "epoch": 1.873, + "step": 3746 + }, + { + "loss": 0.0027, + "grad_norm": 0.3769534230232239, + "learning_rate": 1.275e-06, + "num_tokens": 1289911.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8735, + "step": 3747 + }, + { + "loss": 0.0584, + "grad_norm": 1.4691829681396484, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.874, + "step": 3748 + }, + { + "loss": 0.0635, + "grad_norm": 1.6226807832717896, + "learning_rate": 1.2650000000000002e-06, + "num_tokens": 1290935.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8745, + "step": 3749 + }, + { + "loss": 0.0033, + "grad_norm": 0.4503451883792877, + "learning_rate": 1.26e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 1.0, + "epoch": 1.875, + "step": 3750 + }, + { + "loss": 0.0028, + "grad_norm": 0.39449983835220337, + "learning_rate": 1.255e-06, + "num_tokens": 1291117.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8755, + "step": 3751 + }, + { + "loss": 0.0029, + "grad_norm": 0.4101957678794861, + "learning_rate": 1.25e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 1.0, + "epoch": 1.876, + "step": 3752 + }, + { + "loss": 0.0359, + "grad_norm": 1.259843111038208, + "learning_rate": 1.2450000000000002e-06, + "num_tokens": 1291720.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8765, + "step": 3753 + }, + { + "loss": 0.0027, + "grad_norm": 0.372577965259552, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.877, + "step": 3754 + }, + { + "loss": 0.0596, + "grad_norm": 1.1994444131851196, + "learning_rate": 1.235e-06, + "num_tokens": 1292323.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8775, + "step": 3755 + }, + { + "loss": 0.0703, + "grad_norm": 1.5322065353393555, + "learning_rate": 1.23e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8780000000000001, + "step": 3756 + }, + { + "loss": 0.0643, + "grad_norm": 1.7045296430587769, + "learning_rate": 1.2250000000000001e-06, + "num_tokens": 1293347.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8784999999999998, + "step": 3757 + }, + { + "loss": 0.0439, + "grad_norm": 1.2476153373718262, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.879, + "step": 3758 + }, + { + "loss": 0.0402, + "grad_norm": 1.186736822128296, + "learning_rate": 1.215e-06, + "num_tokens": 1294371.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8795, + "step": 3759 + }, + { + "loss": 0.0029, + "grad_norm": 0.39700445532798767, + "learning_rate": 1.21e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.88, + "step": 3760 + }, + { + "loss": 0.1202, + "grad_norm": 3.1105434894561768, + "learning_rate": 1.2050000000000001e-06, + "num_tokens": 1294974.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.8805, + "step": 3761 + }, + { + "loss": 0.0408, + "grad_norm": 1.1640613079071045, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.881, + "step": 3762 + }, + { + "loss": 0.0023, + "grad_norm": 0.32245126366615295, + "learning_rate": 1.195e-06, + "num_tokens": 1295577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8815, + "step": 3763 + }, + { + "loss": 0.0644, + "grad_norm": 1.4617496728897095, + "learning_rate": 1.19e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8820000000000001, + "step": 3764 + }, + { + "loss": 0.0024, + "grad_norm": 0.3409968614578247, + "learning_rate": 1.185e-06, + "num_tokens": 1296180.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8824999999999998, + "step": 3765 + }, + { + "loss": 0.0666, + "grad_norm": 2.035632848739624, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.883, + "step": 3766 + }, + { + "loss": 0.0402, + "grad_norm": 1.1498757600784302, + "learning_rate": 1.175e-06, + "num_tokens": 1297204.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8835, + "step": 3767 + }, + { + "loss": 0.0593, + "grad_norm": 1.348196268081665, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.884, + "step": 3768 + }, + { + "loss": 0.0667, + "grad_norm": 1.692858099937439, + "learning_rate": 1.165e-06, + "num_tokens": 1298228.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8845, + "step": 3769 + }, + { + "loss": 0.0029, + "grad_norm": 0.40195682644844055, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 1.0, + "epoch": 1.885, + "step": 3770 + }, + { + "loss": 0.0515, + "grad_norm": 1.0095990896224976, + "learning_rate": 1.1550000000000002e-06, + "num_tokens": 1298831.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8855, + "step": 3771 + }, + { + "loss": 0.0411, + "grad_norm": 1.4529675245285034, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8860000000000001, + "step": 3772 + }, + { + "loss": 0.0029, + "grad_norm": 0.39934462308883667, + "learning_rate": 1.145e-06, + "num_tokens": 1299434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8864999999999998, + "step": 3773 + }, + { + "loss": 0.0026, + "grad_norm": 0.37341752648353577, + "learning_rate": 1.14e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.887, + "step": 3774 + }, + { + "loss": 0.003, + "grad_norm": 0.427602082490921, + "learning_rate": 1.1350000000000001e-06, + "num_tokens": 1299616.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8875, + "step": 3775 + }, + { + "loss": 0.0027, + "grad_norm": 0.38110828399658203, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 1.0, + "epoch": 1.888, + "step": 3776 + }, + { + "loss": 0.05, + "grad_norm": 1.3058017492294312, + "learning_rate": 1.125e-06, + "num_tokens": 1300219.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8885, + "step": 3777 + }, + { + "loss": 0.0551, + "grad_norm": 1.049538016319275, + "learning_rate": 1.12e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.889, + "step": 3778 + }, + { + "loss": 0.0543, + "grad_norm": 1.1460436582565308, + "learning_rate": 1.1150000000000001e-06, + "num_tokens": 1301243.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8895, + "step": 3779 + }, + { + "loss": 0.0402, + "grad_norm": 1.1601300239562988, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8900000000000001, + "step": 3780 + }, + { + "loss": 0.0571, + "grad_norm": 1.1402069330215454, + "learning_rate": 1.105e-06, + "num_tokens": 1302267.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8904999999999998, + "step": 3781 + }, + { + "loss": 0.0381, + "grad_norm": 1.2498735189437866, + "learning_rate": 1.1e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.891, + "step": 3782 + }, + { + "loss": 0.0658, + "grad_norm": 1.471903920173645, + "learning_rate": 1.095e-06, + "num_tokens": 1303291.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8915, + "step": 3783 + }, + { + "loss": 0.003, + "grad_norm": 0.40989261865615845, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.892, + "step": 3784 + }, + { + "loss": 0.0029, + "grad_norm": 0.4065409004688263, + "learning_rate": 1.085e-06, + "num_tokens": 1303473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8925, + "step": 3785 + }, + { + "loss": 0.0027, + "grad_norm": 0.38934385776519775, + "learning_rate": 1.08e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.893, + "step": 3786 + }, + { + "loss": 0.0028, + "grad_norm": 0.3856496810913086, + "learning_rate": 1.075e-06, + "num_tokens": 1303655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8935, + "step": 3787 + }, + { + "loss": 0.0422, + "grad_norm": 1.3679287433624268, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8940000000000001, + "step": 3788 + }, + { + "loss": 0.051, + "grad_norm": 1.206390619277954, + "learning_rate": 1.065e-06, + "num_tokens": 1304679.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8944999999999999, + "step": 3789 + }, + { + "loss": 0.0029, + "grad_norm": 0.41105058789253235, + "learning_rate": 1.06e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 1.0, + "epoch": 1.895, + "step": 3790 + }, + { + "loss": 0.0027, + "grad_norm": 0.3825374245643616, + "learning_rate": 1.055e-06, + "num_tokens": 1304861.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8955, + "step": 3791 + }, + { + "loss": 0.0024, + "grad_norm": 0.3389546871185303, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.896, + "step": 3792 + }, + { + "loss": 0.0027, + "grad_norm": 0.38113462924957275, + "learning_rate": 1.045e-06, + "num_tokens": 1305043.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8965, + "step": 3793 + }, + { + "loss": 0.0025, + "grad_norm": 0.35084959864616394, + "learning_rate": 1.04e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 1.0, + "epoch": 1.897, + "step": 3794 + }, + { + "loss": 0.056, + "grad_norm": 1.4280885457992554, + "learning_rate": 1.035e-06, + "num_tokens": 1305646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8975, + "step": 3795 + }, + { + "loss": 0.0584, + "grad_norm": 1.4864161014556885, + "learning_rate": 1.03e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8980000000000001, + "step": 3796 + }, + { + "loss": 0.0023, + "grad_norm": 0.32296261191368103, + "learning_rate": 1.025e-06, + "num_tokens": 1306249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8984999999999999, + "step": 3797 + }, + { + "loss": 0.0372, + "grad_norm": 1.1412842273712158, + "learning_rate": 1.02e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.899, + "step": 3798 + }, + { + "loss": 0.036, + "grad_norm": 1.0588805675506592, + "learning_rate": 1.0150000000000002e-06, + "num_tokens": 1307273.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8995, + "step": 3799 + }, + { + "loss": 0.0025, + "grad_norm": 0.34841030836105347, + "learning_rate": 1.01e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9, + "step": 3800 + }, + { + "loss": 0.0025, + "grad_norm": 0.3537651002407074, + "learning_rate": 1.0050000000000001e-06, + "num_tokens": 1307455.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9005, + "step": 3801 + }, + { + "loss": 0.0405, + "grad_norm": 1.1438575983047485, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.901, + "step": 3802 + }, + { + "loss": 0.0694, + "grad_norm": 1.4709012508392334, + "learning_rate": 9.950000000000002e-07, + "num_tokens": 1308479.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9015, + "step": 3803 + }, + { + "loss": 0.0023, + "grad_norm": 0.3326675593852997, + "learning_rate": 9.9e-07, + "num_tokens": 1308570.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9020000000000001, + "step": 3804 + }, + { + "loss": 0.0635, + "grad_norm": 1.4323761463165283, + "learning_rate": 9.85e-07, + "num_tokens": 1309082.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9024999999999999, + "step": 3805 + }, + { + "loss": 0.0683, + "grad_norm": 1.6102875471115112, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.903, + "step": 3806 + }, + { + "loss": 0.0022, + "grad_norm": 0.3131149709224701, + "learning_rate": 9.750000000000002e-07, + "num_tokens": 1309685.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9035, + "step": 3807 + }, + { + "loss": 0.0021, + "grad_norm": 0.30395570397377014, + "learning_rate": 9.7e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 3808 + }, + { + "loss": 0.056, + "grad_norm": 1.3097760677337646, + "learning_rate": 9.65e-07, + "num_tokens": 1310288.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9045, + "step": 3809 + }, + { + "loss": 0.0425, + "grad_norm": 1.2873075008392334, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.905, + "step": 3810 + }, + { + "loss": 0.0366, + "grad_norm": 1.1098606586456299, + "learning_rate": 9.550000000000002e-07, + "num_tokens": 1311312.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9055, + "step": 3811 + }, + { + "loss": 0.0023, + "grad_norm": 0.33073046803474426, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9060000000000001, + "step": 3812 + }, + { + "loss": 0.0558, + "grad_norm": 1.287516713142395, + "learning_rate": 9.450000000000001e-07, + "num_tokens": 1311915.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9064999999999999, + "step": 3813 + }, + { + "loss": 0.0023, + "grad_norm": 0.3197239935398102, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 1.0, + "epoch": 1.907, + "step": 3814 + }, + { + "loss": 0.0022, + "grad_norm": 0.3093603253364563, + "learning_rate": 9.35e-07, + "num_tokens": 1312097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9075, + "step": 3815 + }, + { + "loss": 0.0027, + "grad_norm": 0.3792094588279724, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.908, + "step": 3816 + }, + { + "loss": 0.0024, + "grad_norm": 0.33527225255966187, + "learning_rate": 9.25e-07, + "num_tokens": 1312279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9085, + "step": 3817 + }, + { + "loss": 0.0531, + "grad_norm": 1.204848051071167, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.909, + "step": 3818 + }, + { + "loss": 0.0702, + "grad_norm": 1.3416361808776855, + "learning_rate": 9.15e-07, + "num_tokens": 1313303.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9095, + "step": 3819 + }, + { + "loss": 0.0541, + "grad_norm": 1.515673279762268, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9100000000000001, + "step": 3820 + }, + { + "loss": 0.0024, + "grad_norm": 0.33284807205200195, + "learning_rate": 9.05e-07, + "num_tokens": 1313906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9104999999999999, + "step": 3821 + }, + { + "loss": 0.0023, + "grad_norm": 0.32082033157348633, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 3822 + }, + { + "loss": 0.056, + "grad_norm": 1.2340785264968872, + "learning_rate": 8.95e-07, + "num_tokens": 1314509.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9115, + "step": 3823 + }, + { + "loss": 0.0021, + "grad_norm": 0.3040038049221039, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.912, + "step": 3824 + }, + { + "loss": 0.0392, + "grad_norm": 1.3959851264953613, + "learning_rate": 8.85e-07, + "num_tokens": 1315112.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9125, + "step": 3825 + }, + { + "loss": 0.0027, + "grad_norm": 0.37887290120124817, + "learning_rate": 8.8e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 1.0, + "epoch": 1.913, + "step": 3826 + }, + { + "loss": 0.0022, + "grad_norm": 0.30666735768318176, + "learning_rate": 8.75e-07, + "num_tokens": 1315294.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9135, + "step": 3827 + }, + { + "loss": 0.0691, + "grad_norm": 1.3549600839614868, + "learning_rate": 8.7e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9140000000000001, + "step": 3828 + }, + { + "loss": 0.0675, + "grad_norm": 1.2945553064346313, + "learning_rate": 8.65e-07, + "num_tokens": 1316318.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9144999999999999, + "step": 3829 + }, + { + "loss": 0.0022, + "grad_norm": 0.3147728145122528, + "learning_rate": 8.6e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.915, + "step": 3830 + }, + { + "loss": 0.0531, + "grad_norm": 1.0365914106369019, + "learning_rate": 8.550000000000002e-07, + "num_tokens": 1316921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9155, + "step": 3831 + }, + { + "loss": 0.0416, + "grad_norm": 1.2123857736587524, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.916, + "step": 3832 + }, + { + "loss": 0.0023, + "grad_norm": 0.3252547085285187, + "learning_rate": 8.450000000000002e-07, + "num_tokens": 1317524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9165, + "step": 3833 + }, + { + "loss": 0.0021, + "grad_norm": 0.29913613200187683, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.917, + "step": 3834 + }, + { + "loss": 0.0688, + "grad_norm": 1.6491233110427856, + "learning_rate": 8.350000000000002e-07, + "num_tokens": 1318127.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9175, + "step": 3835 + }, + { + "loss": 0.0021, + "grad_norm": 0.3058773875236511, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9180000000000001, + "step": 3836 + }, + { + "loss": 0.038, + "grad_norm": 1.1742405891418457, + "learning_rate": 8.250000000000001e-07, + "num_tokens": 1318730.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9184999999999999, + "step": 3837 + }, + { + "loss": 0.002, + "grad_norm": 0.27437257766723633, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.919, + "step": 3838 + }, + { + "loss": 0.0397, + "grad_norm": 1.1734699010849, + "learning_rate": 8.150000000000001e-07, + "num_tokens": 1319333.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9195, + "step": 3839 + }, + { + "loss": 0.0688, + "grad_norm": 1.6114236116409302, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.92, + "step": 3840 + }, + { + "loss": 0.0396, + "grad_norm": 1.3022080659866333, + "learning_rate": 8.050000000000001e-07, + "num_tokens": 1320357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9205, + "step": 3841 + }, + { + "loss": 0.002, + "grad_norm": 0.2882446348667145, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.921, + "step": 3842 + }, + { + "loss": 0.0636, + "grad_norm": 1.4788239002227783, + "learning_rate": 7.950000000000001e-07, + "num_tokens": 1320960.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9215, + "step": 3843 + }, + { + "loss": 0.0554, + "grad_norm": 1.472805142402649, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 3844 + }, + { + "loss": 0.0382, + "grad_norm": 1.3122379779815674, + "learning_rate": 7.850000000000001e-07, + "num_tokens": 1321984.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9224999999999999, + "step": 3845 + }, + { + "loss": 0.0019, + "grad_norm": 0.27439191937446594, + "learning_rate": 7.8e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.923, + "step": 3846 + }, + { + "loss": 0.0021, + "grad_norm": 0.3059723973274231, + "learning_rate": 7.750000000000001e-07, + "num_tokens": 1322166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9235, + "step": 3847 + }, + { + "loss": 0.0021, + "grad_norm": 0.3025694489479065, + "learning_rate": 7.7e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 3848 + }, + { + "loss": 0.0416, + "grad_norm": 1.4384698867797852, + "learning_rate": 7.650000000000001e-07, + "num_tokens": 1322769.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9245, + "step": 3849 + }, + { + "loss": 0.0019, + "grad_norm": 0.26954689621925354, + "learning_rate": 7.6e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.925, + "step": 3850 + }, + { + "loss": 0.0373, + "grad_norm": 1.0434874296188354, + "learning_rate": 7.550000000000001e-07, + "num_tokens": 1323372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9255, + "step": 3851 + }, + { + "loss": 0.0384, + "grad_norm": 1.2146815061569214, + "learning_rate": 7.5e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9260000000000002, + "step": 3852 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992803454399109, + "learning_rate": 7.450000000000001e-07, + "num_tokens": 1323975.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9264999999999999, + "step": 3853 + }, + { + "loss": 0.0683, + "grad_norm": 2.0715625286102295, + "learning_rate": 7.4e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.927, + "step": 3854 + }, + { + "loss": 0.0687, + "grad_norm": 1.7195099592208862, + "learning_rate": 7.350000000000001e-07, + "num_tokens": 1324999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.9275, + "step": 3855 + }, + { + "loss": 0.0022, + "grad_norm": 0.31213998794555664, + "learning_rate": 7.3e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.928, + "step": 3856 + }, + { + "loss": 0.0446, + "grad_norm": 1.5833452939987183, + "learning_rate": 7.25e-07, + "num_tokens": 1325602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9285, + "step": 3857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27154725790023804, + "learning_rate": 7.2e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.929, + "step": 3858 + }, + { + "loss": 0.0385, + "grad_norm": 1.1363227367401123, + "learning_rate": 7.15e-07, + "num_tokens": 1326205.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9295, + "step": 3859 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992321252822876, + "learning_rate": 7.1e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9300000000000002, + "step": 3860 + }, + { + "loss": 0.0537, + "grad_norm": 1.2202407121658325, + "learning_rate": 7.05e-07, + "num_tokens": 1326808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9304999999999999, + "step": 3861 + }, + { + "loss": 0.0659, + "grad_norm": 1.3972662687301636, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.931, + "step": 3862 + }, + { + "loss": 0.0022, + "grad_norm": 0.3156076967716217, + "learning_rate": 6.950000000000001e-07, + "num_tokens": 1327411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9315, + "step": 3863 + }, + { + "loss": 0.002, + "grad_norm": 0.2746105492115021, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 3864 + }, + { + "loss": 0.0492, + "grad_norm": 1.111280083656311, + "learning_rate": 6.850000000000001e-07, + "num_tokens": 1328014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9325, + "step": 3865 + }, + { + "loss": 0.0557, + "grad_norm": 1.1395080089569092, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.933, + "step": 3866 + }, + { + "loss": 0.041, + "grad_norm": 1.1225674152374268, + "learning_rate": 6.750000000000001e-07, + "num_tokens": 1329038.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9335, + "step": 3867 + }, + { + "loss": 0.0021, + "grad_norm": 0.2975449860095978, + "learning_rate": 6.7e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9340000000000002, + "step": 3868 + }, + { + "loss": 0.002, + "grad_norm": 0.2790532410144806, + "learning_rate": 6.650000000000001e-07, + "num_tokens": 1329220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9344999999999999, + "step": 3869 + }, + { + "loss": 0.0019, + "grad_norm": 0.27045223116874695, + "learning_rate": 6.6e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 3870 + }, + { + "loss": 0.0587, + "grad_norm": 1.2998172044754028, + "learning_rate": 6.550000000000001e-07, + "num_tokens": 1329823.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9355, + "step": 3871 + }, + { + "loss": 0.1167, + "grad_norm": 2.1144580841064453, + "learning_rate": 6.5e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.936, + "step": 3872 + }, + { + "loss": 0.0021, + "grad_norm": 0.29768821597099304, + "learning_rate": 6.450000000000001e-07, + "num_tokens": 1330426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9365, + "step": 3873 + }, + { + "loss": 0.0021, + "grad_norm": 0.3033559024333954, + "learning_rate": 6.4e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 3874 + }, + { + "loss": 0.0017, + "grad_norm": 0.2499658465385437, + "learning_rate": 6.350000000000001e-07, + "num_tokens": 1330608.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9375, + "step": 3875 + }, + { + "loss": 0.002, + "grad_norm": 0.28729239106178284, + "learning_rate": 6.3e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 3876 + }, + { + "loss": 0.0538, + "grad_norm": 1.3207937479019165, + "learning_rate": 6.25e-07, + "num_tokens": 1331211.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9385, + "step": 3877 + }, + { + "loss": 0.0022, + "grad_norm": 0.3201894760131836, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.939, + "step": 3878 + }, + { + "loss": 0.058, + "grad_norm": 1.3156497478485107, + "learning_rate": 6.15e-07, + "num_tokens": 1331814.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9395, + "step": 3879 + }, + { + "loss": 0.0544, + "grad_norm": 1.192156195640564, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.94, + "step": 3880 + }, + { + "loss": 0.0634, + "grad_norm": 2.076542377471924, + "learning_rate": 6.05e-07, + "num_tokens": 1332838.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9405000000000001, + "step": 3881 + }, + { + "loss": 0.0488, + "grad_norm": 1.3221850395202637, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9409999999999998, + "step": 3882 + }, + { + "loss": 0.0021, + "grad_norm": 0.3004106283187866, + "learning_rate": 5.95e-07, + "num_tokens": 1333441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9415, + "step": 3883 + }, + { + "loss": 0.0541, + "grad_norm": 1.230305790901184, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.942, + "step": 3884 + }, + { + "loss": 0.002, + "grad_norm": 0.2805992662906647, + "learning_rate": 5.850000000000001e-07, + "num_tokens": 1334044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9425, + "step": 3885 + }, + { + "loss": 0.0019, + "grad_norm": 0.27598538994789124, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 3886 + }, + { + "loss": 0.0021, + "grad_norm": 0.3006319999694824, + "learning_rate": 5.750000000000001e-07, + "num_tokens": 1334226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9435, + "step": 3887 + }, + { + "loss": 0.0628, + "grad_norm": 1.3234870433807373, + "learning_rate": 5.7e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.944, + "step": 3888 + }, + { + "loss": 0.0368, + "grad_norm": 0.9632979035377502, + "learning_rate": 5.650000000000001e-07, + "num_tokens": 1335250.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9445000000000001, + "step": 3889 + }, + { + "loss": 0.0396, + "grad_norm": 1.0664863586425781, + "learning_rate": 5.6e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9449999999999998, + "step": 3890 + }, + { + "loss": 0.0361, + "grad_norm": 0.998447060585022, + "learning_rate": 5.550000000000001e-07, + "num_tokens": 1336274.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9455, + "step": 3891 + }, + { + "loss": 0.066, + "grad_norm": 1.6561861038208008, + "learning_rate": 5.5e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.946, + "step": 3892 + }, + { + "loss": 0.0564, + "grad_norm": 1.0982937812805176, + "learning_rate": 5.450000000000001e-07, + "num_tokens": 1337298.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9465, + "step": 3893 + }, + { + "loss": 0.0649, + "grad_norm": 1.3116402626037598, + "learning_rate": 5.4e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.947, + "step": 3894 + }, + { + "loss": 0.0393, + "grad_norm": 1.211995005607605, + "learning_rate": 5.350000000000001e-07, + "num_tokens": 1338322.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9475, + "step": 3895 + }, + { + "loss": 0.0656, + "grad_norm": 1.3053356409072876, + "learning_rate": 5.3e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.948, + "step": 3896 + }, + { + "loss": 0.059, + "grad_norm": 1.4926881790161133, + "learning_rate": 5.250000000000001e-07, + "num_tokens": 1339346.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9485000000000001, + "step": 3897 + }, + { + "loss": 0.0517, + "grad_norm": 1.099536657333374, + "learning_rate": 5.2e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9489999999999998, + "step": 3898 + }, + { + "loss": 0.002, + "grad_norm": 0.2851589620113373, + "learning_rate": 5.15e-07, + "num_tokens": 1339949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9495, + "step": 3899 + }, + { + "loss": 0.002, + "grad_norm": 0.2879925072193146, + "learning_rate": 5.1e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 3900 + }, + { + "loss": 0.0557, + "grad_norm": 1.0640603303909302, + "learning_rate": 5.05e-07, + "num_tokens": 1340552.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9505, + "step": 3901 + }, + { + "loss": 0.0021, + "grad_norm": 0.3005947470664978, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.951, + "step": 3902 + }, + { + "loss": 0.0021, + "grad_norm": 0.30592235922813416, + "learning_rate": 4.95e-07, + "num_tokens": 1340734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9515, + "step": 3903 + }, + { + "loss": 0.0508, + "grad_norm": 1.1045085191726685, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.952, + "step": 3904 + }, + { + "loss": 0.0539, + "grad_norm": 1.1382217407226562, + "learning_rate": 4.85e-07, + "num_tokens": 1341758.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9525000000000001, + "step": 3905 + }, + { + "loss": 0.0576, + "grad_norm": 1.5904083251953125, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9529999999999998, + "step": 3906 + }, + { + "loss": 0.0401, + "grad_norm": 1.0153878927230835, + "learning_rate": 4.7500000000000006e-07, + "num_tokens": 1342782.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9535, + "step": 3907 + }, + { + "loss": 0.0023, + "grad_norm": 0.32124239206314087, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.954, + "step": 3908 + }, + { + "loss": 0.037, + "grad_norm": 1.1176637411117554, + "learning_rate": 4.6500000000000005e-07, + "num_tokens": 1343385.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9545, + "step": 3909 + }, + { + "loss": 0.0414, + "grad_norm": 1.1863677501678467, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.955, + "step": 3910 + }, + { + "loss": 0.0697, + "grad_norm": 1.6575289964675903, + "learning_rate": 4.5500000000000004e-07, + "num_tokens": 1344409.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9555, + "step": 3911 + }, + { + "loss": 0.0384, + "grad_norm": 1.020317554473877, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.956, + "step": 3912 + }, + { + "loss": 0.0554, + "grad_norm": 1.1557419300079346, + "learning_rate": 4.4500000000000003e-07, + "num_tokens": 1345433.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9565000000000001, + "step": 3913 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282678723335266, + "learning_rate": 4.4e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9569999999999999, + "step": 3914 + }, + { + "loss": 0.0611, + "grad_norm": 1.4425996541976929, + "learning_rate": 4.35e-07, + "num_tokens": 1346036.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9575, + "step": 3915 + }, + { + "loss": 0.0021, + "grad_norm": 0.30943119525909424, + "learning_rate": 4.3e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.958, + "step": 3916 + }, + { + "loss": 0.0021, + "grad_norm": 0.29412642121315, + "learning_rate": 4.2500000000000006e-07, + "num_tokens": 1346218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9585, + "step": 3917 + }, + { + "loss": 0.0021, + "grad_norm": 0.2940139174461365, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.959, + "step": 3918 + }, + { + "loss": 0.0021, + "grad_norm": 0.3061344027519226, + "learning_rate": 4.1500000000000005e-07, + "num_tokens": 1346400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9595, + "step": 3919 + }, + { + "loss": 0.0399, + "grad_norm": 1.3357733488082886, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.96, + "step": 3920 + }, + { + "loss": 0.0548, + "grad_norm": 1.1528651714324951, + "learning_rate": 4.0500000000000004e-07, + "num_tokens": 1347424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9605000000000001, + "step": 3921 + }, + { + "loss": 0.0024, + "grad_norm": 0.3415958285331726, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9609999999999999, + "step": 3922 + }, + { + "loss": 0.0672, + "grad_norm": 1.716910719871521, + "learning_rate": 3.9500000000000003e-07, + "num_tokens": 1348027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9615, + "step": 3923 + }, + { + "loss": 0.0019, + "grad_norm": 0.2726108729839325, + "learning_rate": 3.9e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.962, + "step": 3924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6874312162399292, + "learning_rate": 3.85e-07, + "num_tokens": 1348630.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9625, + "step": 3925 + }, + { + "loss": 0.0677, + "grad_norm": 1.6080477237701416, + "learning_rate": 3.8e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 3926 + }, + { + "loss": 0.0455, + "grad_norm": 1.2764126062393188, + "learning_rate": 3.75e-07, + "num_tokens": 1349654.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9635, + "step": 3927 + }, + { + "loss": 0.0414, + "grad_norm": 1.4081971645355225, + "learning_rate": 3.7e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.964, + "step": 3928 + }, + { + "loss": 0.0022, + "grad_norm": 0.3177483081817627, + "learning_rate": 3.65e-07, + "num_tokens": 1350257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9645000000000001, + "step": 3929 + }, + { + "loss": 0.0024, + "grad_norm": 0.33574411273002625, + "learning_rate": 3.6e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 3930 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346923887729645, + "learning_rate": 3.55e-07, + "num_tokens": 1350439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9655, + "step": 3931 + }, + { + "loss": 0.0562, + "grad_norm": 1.2322405576705933, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.966, + "step": 3932 + }, + { + "loss": 0.0382, + "grad_norm": 1.126086711883545, + "learning_rate": 3.4500000000000003e-07, + "num_tokens": 1351463.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9665, + "step": 3933 + }, + { + "loss": 0.0679, + "grad_norm": 1.7950743436813354, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.967, + "step": 3934 + }, + { + "loss": 0.0023, + "grad_norm": 0.31813737750053406, + "learning_rate": 3.35e-07, + "num_tokens": 1352066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9675, + "step": 3935 + }, + { + "loss": 0.0563, + "grad_norm": 1.4460132122039795, + "learning_rate": 3.3e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.968, + "step": 3936 + }, + { + "loss": 0.0388, + "grad_norm": 1.2290942668914795, + "learning_rate": 3.25e-07, + "num_tokens": 1353090.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9685000000000001, + "step": 3937 + }, + { + "loss": 0.0624, + "grad_norm": 1.2616753578186035, + "learning_rate": 3.2e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9689999999999999, + "step": 3938 + }, + { + "loss": 0.0018, + "grad_norm": 0.258317232131958, + "learning_rate": 3.15e-07, + "num_tokens": 1353693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9695, + "step": 3939 + }, + { + "loss": 0.0021, + "grad_norm": 0.2969084680080414, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 3940 + }, + { + "loss": 0.0023, + "grad_norm": 0.3306228518486023, + "learning_rate": 3.0500000000000004e-07, + "num_tokens": 1353875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9705, + "step": 3941 + }, + { + "loss": 0.0021, + "grad_norm": 0.2877337336540222, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.971, + "step": 3942 + }, + { + "loss": 0.0385, + "grad_norm": 1.1180164813995361, + "learning_rate": 2.9500000000000003e-07, + "num_tokens": 1354478.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9715, + "step": 3943 + }, + { + "loss": 0.0422, + "grad_norm": 1.2713475227355957, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 3944 + }, + { + "loss": 0.0021, + "grad_norm": 0.30450907349586487, + "learning_rate": 2.85e-07, + "num_tokens": 1355081.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9725000000000001, + "step": 3945 + }, + { + "loss": 0.0369, + "grad_norm": 1.0453548431396484, + "learning_rate": 2.8e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9729999999999999, + "step": 3946 + }, + { + "loss": 0.0647, + "grad_norm": 1.4603972434997559, + "learning_rate": 2.75e-07, + "num_tokens": 1356105.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9735, + "step": 3947 + }, + { + "loss": 0.0572, + "grad_norm": 1.3418960571289062, + "learning_rate": 2.7e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.974, + "step": 3948 + }, + { + "loss": 0.0616, + "grad_norm": 1.2075037956237793, + "learning_rate": 2.65e-07, + "num_tokens": 1357129.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9745, + "step": 3949 + }, + { + "loss": 0.0561, + "grad_norm": 1.3293365240097046, + "learning_rate": 2.6e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.975, + "step": 3950 + }, + { + "loss": 0.0546, + "grad_norm": 1.1330344676971436, + "learning_rate": 2.55e-07, + "num_tokens": 1358153.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9755, + "step": 3951 + }, + { + "loss": 0.0553, + "grad_norm": 1.403975486755371, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 3952 + }, + { + "loss": 0.0589, + "grad_norm": 1.0574450492858887, + "learning_rate": 2.4500000000000004e-07, + "num_tokens": 1359177.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9765000000000001, + "step": 3953 + }, + { + "loss": 0.0024, + "grad_norm": 0.34114331007003784, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9769999999999999, + "step": 3954 + }, + { + "loss": 0.0531, + "grad_norm": 1.2925927639007568, + "learning_rate": 2.3500000000000003e-07, + "num_tokens": 1359780.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9775, + "step": 3955 + }, + { + "loss": 0.0023, + "grad_norm": 0.32414519786834717, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.978, + "step": 3956 + }, + { + "loss": 0.0409, + "grad_norm": 1.1193647384643555, + "learning_rate": 2.2500000000000002e-07, + "num_tokens": 1360383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9785, + "step": 3957 + }, + { + "loss": 0.0528, + "grad_norm": 1.0519967079162598, + "learning_rate": 2.2e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.979, + "step": 3958 + }, + { + "loss": 0.002, + "grad_norm": 0.290457159280777, + "learning_rate": 2.15e-07, + "num_tokens": 1360986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9795, + "step": 3959 + }, + { + "loss": 0.064, + "grad_norm": 1.5267326831817627, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.98, + "step": 3960 + }, + { + "loss": 0.0571, + "grad_norm": 1.354665756225586, + "learning_rate": 2.0500000000000002e-07, + "num_tokens": 1362010.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9805000000000001, + "step": 3961 + }, + { + "loss": 0.0023, + "grad_norm": 0.3175540566444397, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9809999999999999, + "step": 3962 + }, + { + "loss": 0.0022, + "grad_norm": 0.31645578145980835, + "learning_rate": 1.95e-07, + "num_tokens": 1362192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9815, + "step": 3963 + }, + { + "loss": 0.0023, + "grad_norm": 0.32781633734703064, + "learning_rate": 1.9e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 3964 + }, + { + "loss": 0.0022, + "grad_norm": 0.3074043393135071, + "learning_rate": 1.85e-07, + "num_tokens": 1362374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9825, + "step": 3965 + }, + { + "loss": 0.0616, + "grad_norm": 1.3107956647872925, + "learning_rate": 1.8e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.983, + "step": 3966 + }, + { + "loss": 0.0428, + "grad_norm": 1.0233242511749268, + "learning_rate": 1.7500000000000002e-07, + "num_tokens": 1363398.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9835, + "step": 3967 + }, + { + "loss": 0.0509, + "grad_norm": 1.1120326519012451, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.984, + "step": 3968 + }, + { + "loss": 0.0578, + "grad_norm": 1.1184195280075073, + "learning_rate": 1.65e-07, + "num_tokens": 1364422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9845000000000002, + "step": 3969 + }, + { + "loss": 0.0024, + "grad_norm": 0.3374731242656708, + "learning_rate": 1.6e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9849999999999999, + "step": 3970 + }, + { + "loss": 0.0647, + "grad_norm": 1.385146141052246, + "learning_rate": 1.5500000000000002e-07, + "num_tokens": 1365025.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9855, + "step": 3971 + }, + { + "loss": 0.0621, + "grad_norm": 1.3918462991714478, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.986, + "step": 3972 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185434639453888, + "learning_rate": 1.4500000000000001e-07, + "num_tokens": 1365628.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9865, + "step": 3973 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098815679550171, + "learning_rate": 1.4e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 3974 + }, + { + "loss": 0.0508, + "grad_norm": 1.1450035572052002, + "learning_rate": 1.35e-07, + "num_tokens": 1366231.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9875, + "step": 3975 + }, + { + "loss": 0.0545, + "grad_norm": 1.133862018585205, + "learning_rate": 1.3e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.988, + "step": 3976 + }, + { + "loss": 0.0575, + "grad_norm": 1.3929400444030762, + "learning_rate": 1.2500000000000002e-07, + "num_tokens": 1367255.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9885000000000002, + "step": 3977 + }, + { + "loss": 0.0023, + "grad_norm": 0.32601818442344666, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9889999999999999, + "step": 3978 + }, + { + "loss": 0.0614, + "grad_norm": 1.4804233312606812, + "learning_rate": 1.1500000000000001e-07, + "num_tokens": 1367858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9895, + "step": 3979 + }, + { + "loss": 0.0339, + "grad_norm": 1.0161491632461548, + "learning_rate": 1.1e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.99, + "step": 3980 + }, + { + "loss": 0.0374, + "grad_norm": 0.9113408327102661, + "learning_rate": 1.0500000000000001e-07, + "num_tokens": 1368882.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9905, + "step": 3981 + }, + { + "loss": 0.0022, + "grad_norm": 0.31800293922424316, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.991, + "step": 3982 + }, + { + "loss": 0.0022, + "grad_norm": 0.3091203570365906, + "learning_rate": 9.5e-08, + "num_tokens": 1369064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9915, + "step": 3983 + }, + { + "loss": 0.0697, + "grad_norm": 1.368817687034607, + "learning_rate": 9e-08, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.992, + "step": 3984 + }, + { + "loss": 0.0024, + "grad_norm": 0.334277480840683, + "learning_rate": 8.500000000000001e-08, + "num_tokens": 1369667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9925000000000002, + "step": 3985 + }, + { + "loss": 0.0545, + "grad_norm": 1.1396604776382446, + "learning_rate": 8e-08, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9929999999999999, + "step": 3986 + }, + { + "loss": 0.002, + "grad_norm": 0.2931969463825226, + "learning_rate": 7.500000000000001e-08, + "num_tokens": 1370270.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9935, + "step": 3987 + }, + { + "loss": 0.0021, + "grad_norm": 0.29304033517837524, + "learning_rate": 7e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 3988 + }, + { + "loss": 0.0579, + "grad_norm": 1.3336025476455688, + "learning_rate": 6.5e-08, + "num_tokens": 1370873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9945, + "step": 3989 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215644359588623, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.995, + "step": 3990 + }, + { + "loss": 0.0405, + "grad_norm": 1.221953272819519, + "learning_rate": 5.5e-08, + "num_tokens": 1371476.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9955, + "step": 3991 + }, + { + "loss": 0.0404, + "grad_norm": 1.0604480504989624, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.996, + "step": 3992 + }, + { + "loss": 0.0381, + "grad_norm": 0.919835090637207, + "learning_rate": 4.5e-08, + "num_tokens": 1372500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9965000000000002, + "step": 3993 + }, + { + "loss": 0.0378, + "grad_norm": 1.2490025758743286, + "learning_rate": 4e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9969999999999999, + "step": 3994 + }, + { + "loss": 0.0021, + "grad_norm": 0.3125726878643036, + "learning_rate": 3.5e-08, + "num_tokens": 1373103.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9975, + "step": 3995 + }, + { + "loss": 0.0023, + "grad_norm": 0.3294070065021515, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 3996 + }, + { + "loss": 0.002, + "grad_norm": 0.2793242931365967, + "learning_rate": 2.5000000000000002e-08, + "num_tokens": 1373285.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9985, + "step": 3997 + }, + { + "loss": 0.0386, + "grad_norm": 1.0813380479812622, + "learning_rate": 2e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.999, + "step": 3998 + }, + { + "loss": 0.0025, + "grad_norm": 0.3470178544521332, + "learning_rate": 1.5000000000000002e-08, + "num_tokens": 1373888.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9995, + "step": 3999 + }, + { + "loss": 0.0681, + "grad_norm": 1.5211089849472046, + "learning_rate": 1e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 2.0, + "step": 4000 + }, + { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898, + "epoch": 2.0, + "step": 4000 + } +] \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..be089935a10e89f2cb7ed806e7c10efa3baca54a --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "unsloth_available": false, + "train_runtime": 483.7085, + "train_loss": 0.11515871361242898, + "train_metrics": { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/submission_summary.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/submission_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..559a39eee196526b0c832f9689a667397f11b61a --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/reports/submission_summary.json @@ -0,0 +1,235 @@ +{ + "status": "ok", + "generated_at_unix": 1777179035.763374, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "pending_artifact_upload", + "files": [ + ".gitattributes" + ], + "meaningful_file_count": 0, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/f313e87ad0df089dbe586b469c8f0a34e05bc5cd", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png", + "reward_component_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png", + "primary_reward_channel_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/submission_summary.json b/docs/results/submission_evidence_qwen_0_5b_1_5b/submission_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..559a39eee196526b0c832f9689a667397f11b61a --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/submission_summary.json @@ -0,0 +1,235 @@ +{ + "status": "ok", + "generated_at_unix": 1777179035.763374, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "pending_artifact_upload", + "files": [ + ".gitattributes" + ], + "meaningful_file_count": 0, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/f313e87ad0df089dbe586b469c8f0a34e05bc5cd", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png", + "reward_component_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png", + "primary_reward_channel_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl b/docs/results/submission_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..59db0c703e99a0a76c10f9d2b48c15ab8e71f5c4 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b/traces/action_traces.jsonl @@ -0,0 +1,24 @@ +{"seed": 8000, "policy": "basic_llm", "reward": 0.717, "latency_seconds": 0.0218, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "exploit_detection", "failure_reasons": ["holdout_ddi_not_addressed"], "anti_cheat_reasons": ["holdout_ddi_not_addressed"], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.001, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.675, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.717}, "primary_reward_channels": {"safety_legality": 0.675, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8000, "policy": "sft_policy", "reward": 0.803, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_02", "action_type": "STOP_DRUG", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.842, "burden_improvement_score": 0.55, "disease_stability_score": 0.58, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.78, "primary_safety_legality": 0.944, "primary_clinical_improvement": 0.657, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.803}, "primary_reward_channels": {"safety_legality": 0.944, "clinical_improvement": 0.657, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8000, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 3.0834, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8001, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0014, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8001, "policy": "sft_policy", "reward": 0.755, "latency_seconds": 0.0013, "legal": true, "candidate_id": "cand_02", "action_type": "STOP_DRUG", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.518, "burden_improvement_score": 0.55, "disease_stability_score": 0.58, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.78, "primary_safety_legality": 0.944, "primary_clinical_improvement": 0.549, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.755}, "primary_reward_channels": {"safety_legality": 0.944, "clinical_improvement": 0.549, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8001, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0027, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8002, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8002, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8002, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 0.0027, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8003, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0014, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8003, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0013, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8003, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0024, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8004, "policy": "basic_llm", "reward": 0.717, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "exploit_detection", "failure_reasons": ["holdout_ddi_not_addressed"], "anti_cheat_reasons": ["holdout_ddi_not_addressed"], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.001, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.675, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.717}, "primary_reward_channels": {"safety_legality": 0.675, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8004, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8004, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 0.0027, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8005, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8005, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8005, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0022, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8006, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8006, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0014, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8006, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0023, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8007, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8007, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8007, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0022, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/README.md b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ffebcc31e505d6332119d38493bee487bd979a51 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/README.md @@ -0,0 +1,80 @@ +# PolyGuard Submission Evidence: Qwen 0.5B, 1.5B, and 3B + +This folder is generated without retraining. It uses already completed HF Space status, local mirrored sweep artifacts, and deterministic PolyGuard verifier rollouts. + +## Current HF Training Monitor + +- Training Space: `https://huggingface.co/spaces/TheJackBright/polyguard-openenv-training-full` +- Artifact repo: `https://huggingface.co/TheJackBright/polyguard-openenv-training-full-artifacts` +- Latest runtime report: `reports/training_space_runtime_status.json` +- Current HF runtime stage: `PAUSED` +- Requested hardware now: `cpu-basic` +- Full per-run sweep artifacts downloadable: `false` +- Usable active model bundle downloadable: `true` + +Interpretation: Qwen 0.5B and 1.5B have remote-completed GRPO status records, but their per-run GRPO files are still pending artifact upload. Qwen 3B has SFT artifacts available locally, but GRPO completion is not recorded in the status evidence, and the Space is no longer actively running A10G training. + +## Run Status + +| Model | SFT training | GRPO training | SFT loss | SFT verifier reward | SFT latency | +| --- | --- | --- | ---: | ---: | ---: | +| Qwen 0.5B | artifact_available | remote_completed_pending_artifact_upload | 0.1923 | 0.726 | 1.839s | +| Qwen 1.5B | artifact_available | remote_completed_pending_artifact_upload | 0.1152 | 0.726 | 2.158s | +| Qwen 3B | artifact_available | not_seen_in_status | 0.1818 | 0.762 | 2.748s | + +## Basic LLM vs Full PolyGuard Pipeline + +- Judge: `PolyGuard verifier/reward system`. +- Matched seeds: `8`. +- Pipeline minus basic average reward delta: `0.043`. +- LLM-as-judge is optional and disabled unless `POLYGUARD_ENABLE_LLM_JUDGE=true`. + +## Pending Items + +- Qwen 0.5B grpo_history.json: pending_artifact_upload +- Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload +- Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload +- Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload +- Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload +- Qwen 1.5B grpo_history.json: pending_artifact_upload +- Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload +- Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload +- Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload +- Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload +- Qwen 3B grpo_history.json: pending_artifact_upload +- Qwen 3B grpo_postsave_inference: not_seen_in_status +- Qwen 3B grpo_training: not_seen_in_status +- Qwen 3B policy_ablation: not_seen_in_status +- Qwen 3B postsave_inference_grpo.json: pending_artifact_upload + +## Generated Charts + +- `qwen_0_5b_sft_training_loss.png` +- `qwen_0_5b_sft_token_accuracy.png` +- `qwen_0_5b_sft_learning_rate.png` +- `qwen_1_5b_sft_training_loss.png` +- `qwen_1_5b_sft_token_accuracy.png` +- `qwen_1_5b_sft_learning_rate.png` +- `qwen-qwen2-5-3b-instruct_sft_training_loss.png` +- `qwen-qwen2-5-3b-instruct_sft_token_accuracy.png` +- `qwen-qwen2-5-3b-instruct_sft_learning_rate.png` +- `qwen_0_5b_vs_1_5b_sft_loss_comparison.png` +- `qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png` +- `qwen_0_5b_1_5b_final_sft_train_loss.png` +- `qwen_0_5b_1_5b_postsave_reward.png` +- `qwen_0_5b_1_5b_postsave_latency.png` +- `qwen_0_5b_1_5b_sft_runtime.png` +- `qwen_0_5b_1_5b_remote_completed_stage_durations.png` +- `policy_ablation_avg_reward.png` +- `policy_ablation_legality.png` +- `policy_ablation_exploit_detection.png` +- `reward_component_bars.png` +- `primary_reward_channel_bars.png` +- `basic_llm_vs_full_pipeline_reward.png` +- `basic_llm_vs_full_pipeline_legality.png` +- `basic_llm_vs_full_pipeline_latency.png` +- `basic_llm_vs_full_pipeline_reward_delta_by_seed.png` + +## Important Honesty Note + +Remote-completed stages and uploaded artifact files are tracked separately. If a GRPO run completed on the HF Space but the per-run GRPO history file has not been uploaded yet, this bundle labels it as `remote_completed_pending_artifact_upload` instead of inventing a curve. diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_latency.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..0f7093d3dc5b03c1710e6cd800244e1f0c3d6f0c Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_latency.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_legality.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..354ee4f38019cfceb7db848c00ee7bda6270c162 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_legality.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..a334d8db37904ac9ab47a582cd1efb83545a7027 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png new file mode 100644 index 0000000000000000000000000000000000000000..5d068d5f289f2e688017d55fba2219c1d0154167 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/basic_llm_vs_full_pipeline_reward_delta_by_seed.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..b8a16a69c129c24b20c8ab712e219662b853e8ac Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_avg_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_exploit_detection.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_exploit_detection.png new file mode 100644 index 0000000000000000000000000000000000000000..b02893a92db120bde2f2a629c680c7191230edeb Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_exploit_detection.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_legality.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_legality.png new file mode 100644 index 0000000000000000000000000000000000000000..a084c777866c2316a63e3ab9a6339d45606517a5 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/policy_ablation_legality.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/primary_reward_channel_bars.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/primary_reward_channel_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..2b33f8c40f985870bbf6ad986307cf9988ae229d Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/primary_reward_channel_bars.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_learning_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..eeaee74949d469af50bcf55e1d66b8847e491f78 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_learning_rate.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..176b10578333a39d8ea7e5a324635821effc2343 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_training_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..82738c12da437f5bad55185490b0f85bbbf2b40d Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen-qwen2-5-3b-instruct_sft_training_loss.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..b0ac61084306b4eb2130df9f58696d2980c3f96f Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_final_sft_train_loss.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..b9d1dcdb391fd27ab28296ac3874fb7ff02b5633 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_latency.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..9c3af01d6fb94de66e47a204bfe5a545edd93330 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_postsave_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png new file mode 100644 index 0000000000000000000000000000000000000000..47db263568828b5cee9fe01e3a103dad716e063d Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_remote_completed_stage_durations.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..297e6547bd5e074ff09271eee72d670824892595 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_1_5b_sft_runtime.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_learning_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..ffd982a07fec0d80dff092afea033c65d3a06552 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_learning_rate.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_token_accuracy.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..91f0c0075c563b6915e2f8225a659d9f88c08bc8 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_token_accuracy.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_training_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..8ee344753fde4ea2476b340dbf618a9b12b1f94c Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_sft_training_loss.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..2118ea2b4b2a5dee26ac5177eb0e2ae2bbd48bce Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_loss_comparison.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..2782296497a7a8b3c5134a67aafb5b288e0113dd Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_learning_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..a8de709d9201c4d7a4fb502d3045104c0a8017a5 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_learning_rate.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_token_accuracy.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_token_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..642d57b9cb8a88d2a602adcbc92e220df2fc1c6c Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_token_accuracy.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_training_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..c72e897e7360ab9ceaafaaf36dd867414c0694d9 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/qwen_1_5b_sft_training_loss.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..2f0b417999883105867eebe93b2fdb8bbdaf4b43 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bbe17a795d04470e938101377019eadd6246670049fc717149bbe6d28888bae +size 142092 diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/anti_cheat_failure_rates.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/anti_cheat_failure_rates.png new file mode 100644 index 0000000000000000000000000000000000000000..9ee2415b64aa6d1e4357754bd432cfc43dbf5091 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/anti_cheat_failure_rates.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..edb2fa8c25074d88c90bce5c243af90dcb28e1c6 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/avg_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/grpo_reward_curves.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/grpo_reward_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..b8b1c8d550e72424ffeef18cd8fff38ce8c91cab Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/grpo_reward_curves.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_latency_validity.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_latency_validity.png new file mode 100644 index 0000000000000000000000000000000000000000..0fb4d13ec904f9d31e23bc155fe571425145913c Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/inference_latency_validity.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/legality_rate.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/legality_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..b4c1e418b0262902ad1c9ad4818f4d9b22a152d0 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/legality_rate.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/policy_stack_avg_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/policy_stack_avg_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..b28dc57ac180e83b38194b17251e3cf3a5a941da Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/policy_stack_avg_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_grpo_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4b35e432d6d777827f6bf0dc189bfc74b4427125 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_grpo_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_loss.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1ec58084d2c79f340541654e5d99906a3ae592ac Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_loss.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..2773c4f16e553eeffc43c9ef348a988b77735c52 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/qwen_model_sft_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/reward_component_bars.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/reward_component_bars.png new file mode 100644 index 0000000000000000000000000000000000000000..fc18c8433fb28860795036a1aab24f9aa05f61af Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/reward_component_bars.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_loss_curves.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_loss_curves.png new file mode 100644 index 0000000000000000000000000000000000000000..8d5bf10a57fdc8264485616fd51d637f0709f104 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_loss_curves.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_vs_grpo_reward.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_vs_grpo_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..4765e95fbbc1f1ed2f8a6686909241a75486caa5 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/sft_vs_grpo_reward.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/train_holdout_gap.png b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..3bf8436ec672a1cb1875c178b9369e85e5aca2e8 Binary files /dev/null and b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/local_available_combined/train_holdout_gap.png differ diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/manifest.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..3da0dfffbe111a4157d841c447612b8e57a82adc --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/manifest.json @@ -0,0 +1,378 @@ +{ + "status": "ok", + "generated_at_unix": 1777179904.792038, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "not_seen_in_status", + "grpo_postsave_inference": "not_seen_in_status", + "policy_ablation": "not_seen_in_status" + }, + "metrics": { + "sft_train_loss": 0.18184852770145518, + "sft_train_runtime": 372.1845, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.569, + "sft_last_loss": 0.0037, + "sft_best_loss": 0.0011, + "sft_last_token_accuracy": 1.0, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.762, + "sft_avg_latency_seconds": 2.748, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "ok", + "files": [ + ".gitattributes", + "usable_model_bundles/local-qwen-0-5b-active-smoke/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/bundle_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/generation_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merge_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/acceptance_gate.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/anti_hacking_overfit_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/baselines.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dose_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dosing_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/frontier_ready.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/graph_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_ablation_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_auto.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_fallback_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_strict_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_sweep_summary.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/inference_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/planner_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/plot_index.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/risk_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/robustness.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/supervisor_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json" + ], + "meaningful_file_count": 82, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/63acc4b1a4167e78b785814b5de63c5a913f9099", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 736.955, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png", + "qwen-qwen2-5-3b-instruct_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png", + "qwen-qwen2-5-3b-instruct_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png", + "qwen-qwen2-5-3b-instruct_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png", + "reward_component_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png", + "primary_reward_channel_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 3B grpo_history.json: pending_artifact_upload", + "Qwen 3B grpo_postsave_inference: not_seen_in_status", + "Qwen 3B grpo_training: not_seen_in_status", + "Qwen 3B policy_ablation: not_seen_in_status", + "Qwen 3B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system", + "bundle_zip": "submission_bundle/qwen_0_5b_1_5b_3b_evidence.zip", + "mirrored_file_count": 64 +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/README.md b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4b5588f79b8fe61962621ec97569c6a18d561b67 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/README.md @@ -0,0 +1,68 @@ +# PolyGuard Submission Evidence: Qwen 0.5B and 1.5B + +This folder is generated without retraining. It uses already completed HF Space status, local mirrored sweep artifacts, and deterministic PolyGuard verifier rollouts. + +## Run Status + +| Model | SFT training | GRPO training | SFT loss | SFT verifier reward | SFT latency | +| --- | --- | --- | ---: | ---: | ---: | +| Qwen 0.5B | artifact_available | remote_completed_pending_artifact_upload | 0.1923 | 0.726 | 1.839s | +| Qwen 1.5B | artifact_available | remote_completed_pending_artifact_upload | 0.1152 | 0.726 | 2.158s | +| Qwen 3B | artifact_available | not_seen_in_status | 0.1818 | 0.762 | 2.748s | + +## Basic LLM vs Full PolyGuard Pipeline + +- Judge: `PolyGuard verifier/reward system`. +- Matched seeds: `8`. +- Pipeline minus basic average reward delta: `0.043`. +- LLM-as-judge is optional and disabled unless `POLYGUARD_ENABLE_LLM_JUDGE=true`. + +## Pending Items + +- Qwen 0.5B grpo_history.json: pending_artifact_upload +- Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload +- Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload +- Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload +- Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload +- Qwen 1.5B grpo_history.json: pending_artifact_upload +- Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload +- Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload +- Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload +- Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload +- Qwen 3B grpo_history.json: pending_artifact_upload +- Qwen 3B grpo_postsave_inference: not_seen_in_status +- Qwen 3B grpo_training: not_seen_in_status +- Qwen 3B policy_ablation: not_seen_in_status +- Qwen 3B postsave_inference_grpo.json: pending_artifact_upload + +## Generated Charts + +- `qwen_0_5b_sft_training_loss.png` +- `qwen_0_5b_sft_token_accuracy.png` +- `qwen_0_5b_sft_learning_rate.png` +- `qwen_1_5b_sft_training_loss.png` +- `qwen_1_5b_sft_token_accuracy.png` +- `qwen_1_5b_sft_learning_rate.png` +- `qwen-qwen2-5-3b-instruct_sft_training_loss.png` +- `qwen-qwen2-5-3b-instruct_sft_token_accuracy.png` +- `qwen-qwen2-5-3b-instruct_sft_learning_rate.png` +- `qwen_0_5b_vs_1_5b_sft_loss_comparison.png` +- `qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png` +- `qwen_0_5b_1_5b_final_sft_train_loss.png` +- `qwen_0_5b_1_5b_postsave_reward.png` +- `qwen_0_5b_1_5b_postsave_latency.png` +- `qwen_0_5b_1_5b_sft_runtime.png` +- `qwen_0_5b_1_5b_remote_completed_stage_durations.png` +- `policy_ablation_avg_reward.png` +- `policy_ablation_legality.png` +- `policy_ablation_exploit_detection.png` +- `reward_component_bars.png` +- `primary_reward_channel_bars.png` +- `basic_llm_vs_full_pipeline_reward.png` +- `basic_llm_vs_full_pipeline_legality.png` +- `basic_llm_vs_full_pipeline_latency.png` +- `basic_llm_vs_full_pipeline_reward_delta_by_seed.png` + +## Important Honesty Note + +Remote-completed stages and uploaded artifact files are tracked separately. If a GRPO run completed on the HF Space but the per-run GRPO history file has not been uploaded yet, this bundle labels it as `remote_completed_pending_artifact_upload` instead of inventing a curve. diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/action_traces.jsonl b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/action_traces.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..87a57447f1bc35b3d3352ec244b958c47a92d06b --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/action_traces.jsonl @@ -0,0 +1,24 @@ +{"seed": 8000, "policy": "basic_llm", "reward": 0.717, "latency_seconds": 0.0261, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "exploit_detection", "failure_reasons": ["holdout_ddi_not_addressed"], "anti_cheat_reasons": ["holdout_ddi_not_addressed"], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.001, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.675, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.717}, "primary_reward_channels": {"safety_legality": 0.675, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8000, "policy": "sft_policy", "reward": 0.803, "latency_seconds": 0.0013, "legal": true, "candidate_id": "cand_02", "action_type": "STOP_DRUG", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.842, "burden_improvement_score": 0.55, "disease_stability_score": 0.58, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.78, "primary_safety_legality": 0.944, "primary_clinical_improvement": 0.657, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.803}, "primary_reward_channels": {"safety_legality": 0.944, "clinical_improvement": 0.657, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8000, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 3.9969, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8001, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8001, "policy": "sft_policy", "reward": 0.755, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_02", "action_type": "STOP_DRUG", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.518, "burden_improvement_score": 0.55, "disease_stability_score": 0.58, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.78, "primary_safety_legality": 0.944, "primary_clinical_improvement": 0.549, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.755}, "primary_reward_channels": {"safety_legality": 0.944, "clinical_improvement": 0.549, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8001, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0036, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8002, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8002, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8002, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 0.0024, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8003, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8003, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8003, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0028, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8004, "policy": "basic_llm", "reward": 0.717, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "exploit_detection", "failure_reasons": ["holdout_ddi_not_addressed"], "anti_cheat_reasons": ["holdout_ddi_not_addressed"], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.001, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.675, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.717}, "primary_reward_channels": {"safety_legality": 0.675, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8004, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8004, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 0.0027, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8005, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0017, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8005, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0013, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8005, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.003, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8006, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0014, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8006, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8006, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0028, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8007, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8007, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8007, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0023, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/artifact_repo_listing.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/artifact_repo_listing.json new file mode 100644 index 0000000000000000000000000000000000000000..5f23072480e95f65785211fc47071cef6078b859 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/artifact_repo_listing.json @@ -0,0 +1,91 @@ +{ + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "ok", + "files": [ + ".gitattributes", + "usable_model_bundles/local-qwen-0-5b-active-smoke/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/bundle_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/generation_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merge_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/acceptance_gate.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/anti_hacking_overfit_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/baselines.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dose_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dosing_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/frontier_ready.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/graph_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_ablation_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_auto.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_fallback_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_strict_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_sweep_summary.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/inference_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/planner_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/plot_index.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/risk_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/robustness.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/supervisor_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json" + ], + "meaningful_file_count": 82, + "error": "" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_failure_cases.md b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_failure_cases.md new file mode 100644 index 0000000000000000000000000000000000000000..d520a446c99c01d6446abc8c937157e54f669684 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_failure_cases.md @@ -0,0 +1,43 @@ +# Basic LLM vs PolyGuard Failure Cases + +## Seed 8000 + +- Baseline attempt: candidate `cand_01`, reward `0.717`. +- PolyGuard pipeline attempt: candidate `cand_03`, reward `0.804`. +- Measured reward delta: `0.087`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. + +## Seed 8004 + +- Baseline attempt: candidate `cand_01`, reward `0.717`. +- PolyGuard pipeline attempt: candidate `cand_03`, reward `0.804`. +- Measured reward delta: `0.087`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. + +## Seed 8001 + +- Baseline attempt: candidate `cand_01`, reward `0.777`. +- PolyGuard pipeline attempt: candidate `cand_05`, reward `0.806`. +- Measured reward delta: `0.029`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. + +## Seed 8003 + +- Baseline attempt: candidate `cand_01`, reward `0.777`. +- PolyGuard pipeline attempt: candidate `cand_05`, reward `0.806`. +- Measured reward delta: `0.029`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. + +## Seed 8005 + +- Baseline attempt: candidate `cand_01`, reward `0.777`. +- PolyGuard pipeline attempt: candidate `cand_05`, reward `0.806`. +- Measured reward delta: `0.029`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. + +## Seed 8006 + +- Baseline attempt: candidate `cand_01`, reward `0.777`. +- PolyGuard pipeline attempt: candidate `cand_05`, reward `0.806`. +- Measured reward delta: `0.029`. +- Safeguard: every selected action is re-scored by the legality gate, anti-cheat checks, and decomposed clinical/process reward channels. diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_vs_polyguard_report.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_vs_polyguard_report.json new file mode 100644 index 0000000000000000000000000000000000000000..0e50fc2cc335c77af3fcf4dde5e9e15b2927fcb8 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/basic_llm_vs_polyguard_report.json @@ -0,0 +1,133 @@ +{ + "status": "ok", + "judge": "PolyGuard verifier/reward system", + "llm_as_judge": false, + "matched_seeds": [ + 8000, + 8001, + 8002, + 8003, + 8004, + 8005, + 8006, + 8007 + ], + "summaries": { + "basic_llm": { + "episodes": 8, + "avg_reward": 0.762, + "avg_latency_seconds": 0.0044, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.25, + "candidate_diversity": 1 + }, + "sft_policy": { + "episodes": 8, + "avg_reward": 0.818, + "avg_latency_seconds": 0.0012, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.0, + "candidate_diversity": 2 + }, + "full_polyguard_pipeline": { + "episodes": 8, + "avg_reward": 0.805, + "avg_latency_seconds": 0.5021, + "legality_rate": 1.0, + "exploit_or_failure_rate": 0.0, + "candidate_diversity": 2 + } + }, + "pipeline_minus_basic_reward_delta": 0.043, + "deltas": [ + { + "seed": 8000, + "basic_reward": 0.717, + "pipeline_reward": 0.804, + "reward_delta": 0.087, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [ + "holdout_ddi_not_addressed" + ], + "pipeline_failure_reasons": [] + }, + { + "seed": 8001, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8002, + "basic_reward": 0.777, + "pipeline_reward": 0.804, + "reward_delta": 0.027, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8003, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8004, + "basic_reward": 0.717, + "pipeline_reward": 0.804, + "reward_delta": 0.087, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_03", + "basic_failure_reasons": [ + "holdout_ddi_not_addressed" + ], + "pipeline_failure_reasons": [] + }, + { + "seed": 8005, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8006, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + }, + { + "seed": 8007, + "basic_reward": 0.777, + "pipeline_reward": 0.806, + "reward_delta": 0.029, + "basic_candidate_id": "cand_01", + "pipeline_candidate_id": "cand_05", + "basic_failure_reasons": [], + "pipeline_failure_reasons": [] + } + ], + "notes": [ + "basic_llm is an evaluation-only prompt-style proxy that selects the first legal candidate without verifier reranking.", + "sft_policy is an evaluation-only SFT-style safety ranker over the same candidate set.", + "full_polyguard_pipeline runs the orchestrated LLM+bandit stack and scores through the same verifier." + ] +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/hf_status_snapshot.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/hf_status_snapshot.json new file mode 100644 index 0000000000000000000000000000000000000000..adec7032d7fae6ba4ca73ed347e0176c38aa961f --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/hf_status_snapshot.json @@ -0,0 +1,311 @@ +{ + "status": "running", + "started_at": 1777162756.623835, + "finished_at": null, + "commands": [ + { + "args": [ + "python", + "scripts/bootstrap_data.py" + ], + "returncode": 0, + "elapsed_seconds": 0.577 + }, + { + "args": [ + "python", + "scripts/build_training_corpus.py", + "--profile", + "massive", + "--with-local", + "--with-synthetic", + "--with-hf" + ], + "returncode": 0, + "elapsed_seconds": 3.86 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 257.387 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-0.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 4230.645 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 7.303 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 15.201 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-0.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 18.461 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 3.989 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 454.278 + }, + { + "args": [ + "python", + "scripts/train_grpo_trl.py", + "--model-id", + "Qwen/Qwen2.5-1.5B-Instruct", + "--prompts-path", + "data/processed/training_corpus_grpo_prompts.jsonl", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_trl_run.json", + "--max-prompts", + "0", + "--max-steps", + "0", + "--epochs", + "1.0", + "--batch-size", + "2", + "--grad-accum", + "1", + "--num-generations", + "2", + "--max-prompt-length", + "384", + "--max-completion-length", + "64", + "--learning-rate", + "1e-06", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 5118.654 + }, + { + "args": [ + "python", + "scripts/merge_adapters_safe.py", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged" + ], + "returncode": 0, + "elapsed_seconds": 10.6 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json" + ], + "returncode": 0, + "elapsed_seconds": 17.128 + }, + { + "args": [ + "python", + "scripts/test_inference_postsave.py", + "--samples", + "5", + "--base-model", + "Qwen/Qwen2.5-1.5B-Instruct", + "--merged-model", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/missing_merged_grpo", + "--adapter-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_adapter", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_grpo.json" + ], + "returncode": 0, + "elapsed_seconds": 21.528 + }, + { + "args": [ + "python", + "scripts/evaluate_policy_ablations.py", + "--episodes", + "8", + "--checkpoint-dir", + "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", + "--output", + "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/grpo_ablation_report.json" + ], + "returncode": 0, + "elapsed_seconds": 4.001 + }, + { + "args": [ + "python", + "scripts/train_sft_trl.py", + "--model-id", + "Qwen/Qwen2.5-3B-Instruct", + "--dataset-path", + "data/processed/training_corpus_sft.json", + "--output-dir", + "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", + "--report-path", + "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "--epochs", + "2", + "--max-steps", + "0", + "--batch-size", + "2", + "--max-seq-len", + "512", + "--learning-rate", + "2e-05", + "--use-unsloth" + ], + "returncode": 0, + "elapsed_seconds": 736.955 + } + ], + "artifact_repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "model_sweep": [ + "Qwen/Qwen2.5-0.5B-Instruct", + "Qwen/Qwen2.5-1.5B-Instruct", + "Qwen/Qwen2.5-3B-Instruct" + ], + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "log_tail": "\u2588\u2588\u2588\u2588\u2588\u258a| 1965/2000 [11:41<00:10, 3.22it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1966/2000 [11:42<00:11, 2.91it/s]\n \n{'loss': 0.0449, 'grad_norm': 0.8585970401763916, 'learning_rate': 3.7e-07, 'num_tokens': 1350951.0, 'mean_token_accuracy': 0.9767054915428162, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1966/2000 [11:42<00:11, 2.91it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1967/2000 [11:42<00:11, 2.85it/s]\n \n{'loss': 0.0518, 'grad_norm': 0.7478350400924683, 'learning_rate': 3.6e-07, 'num_tokens': 1351975.0, 'mean_token_accuracy': 0.9755381345748901, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1967/2000 [11:42<00:11, 2.85it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1968/2000 [11:42<00:11, 2.69it/s]\n \n{'loss': 0.0442, 'grad_norm': 0.8791924715042114, 'learning_rate': 3.5000000000000004e-07, 'num_tokens': 1352578.0, 'mean_token_accuracy': 0.9767054915428162, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1968/2000 [11:42<00:11, 2.69it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1969/2000 [11:43<00:11, 2.70it/s]\n \n{'loss': 0.0488, 'grad_norm': 0.6195839047431946, 'learning_rate': 3.4000000000000003e-07, 'num_tokens': 1353602.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1969/2000 [11:43<00:11, 2.70it/s]\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1970/2000 [11:43<00:09, 3.27it/s]\n \n{'loss': 0.0047, 'grad_norm': 0.8639671802520752, 'learning_rate': 3.3e-07, 'num_tokens': 1353784.0, 'mean_token_accuracy': 1.0, 'epoch': 1.97}\n\n 98%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1970/2000 [11:43<00:09, 3.27it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1971/2000 [11:43<00:07, 3.82it/s]\n \n{'loss': 0.0048, 'grad_norm': 0.8560010194778442, 'learning_rate': 3.2e-07, 'num_tokens': 1353966.0, 'mean_token_accuracy': 1.0, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1971/2000 [11:43<00:07, 3.82it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1972/2000 [11:43<00:08, 3.41it/s]\n \n{'loss': 0.0382, 'grad_norm': 0.8542295694351196, 'learning_rate': 3.1000000000000005e-07, 'num_tokens': 1354990.0, 'mean_token_accuracy': 0.9823874831199646, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1972/2000 [11:43<00:08, 3.41it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1973/2000 [11:44<00:08, 3.02it/s]\n \n{'loss': 0.033, 'grad_norm': 0.7632898688316345, 'learning_rate': 3.0000000000000004e-07, 'num_tokens': 1355593.0, 'mean_token_accuracy': 0.9833610653877258, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1973/2000 [11:44<00:08, 3.02it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1974/2000 [11:44<00:08, 2.92it/s]\n \n{'loss': 0.0582, 'grad_norm': 0.7546073198318481, 'learning_rate': 2.9000000000000003e-07, 'num_tokens': 1356617.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.97}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258a| 1974/2000 [11:44<00:08, 2.92it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1975/2000 [11:44<00:08, 2.85it/s]\n \n{'loss': 0.0607, 'grad_norm': 0.9100231528282166, 'learning_rate': 2.8e-07, 'num_tokens': 1357641.0, 'mean_token_accuracy': 0.9706457853317261, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1975/2000 [11:44<00:08, 2.85it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1976/2000 [11:45<00:08, 2.81it/s]\n \n{'loss': 0.0522, 'grad_norm': 0.9831849932670593, 'learning_rate': 2.7e-07, 'num_tokens': 1358665.0, 'mean_token_accuracy': 0.9726027250289917, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1976/2000 [11:45<00:08, 2.81it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1977/2000 [11:45<00:08, 2.67it/s]\n \n{'loss': 0.0455, 'grad_norm': 0.7770227789878845, 'learning_rate': 2.6e-07, 'num_tokens': 1359268.0, 'mean_token_accuracy': 0.9783693552017212, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1977/2000 [11:45<00:08, 2.67it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1978/2000 [11:46<00:08, 2.58it/s]\n \n{'loss': 0.043, 'grad_norm': 0.9285680055618286, 'learning_rate': 2.5000000000000004e-07, 'num_tokens': 1359871.0, 'mean_token_accuracy': 0.981697142124176, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1978/2000 [11:46<00:08, 2.58it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1979/2000 [11:46<00:08, 2.62it/s]\n \n{'loss': 0.0475, 'grad_norm': 0.725820004940033, 'learning_rate': 2.4000000000000003e-07, 'num_tokens': 1360895.0, 'mean_token_accuracy': 0.9784736037254333, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1979/2000 [11:46<00:08, 2.62it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1980/2000 [11:46<00:07, 2.54it/s]\n \n{'loss': 0.0523, 'grad_norm': 0.9508711099624634, 'learning_rate': 2.3000000000000002e-07, 'num_tokens': 1361498.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1980/2000 [11:46<00:07, 2.54it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1981/2000 [11:47<00:07, 2.49it/s]\n \n{'loss': 0.0461, 'grad_norm': 0.9076665639877319, 'learning_rate': 2.2e-07, 'num_tokens': 1362101.0, 'mean_token_accuracy': 0.980033278465271, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1981/2000 [11:47<00:07, 2.49it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1982/2000 [11:47<00:05, 3.07it/s]\n \n{'loss': 0.0049, 'grad_norm': 0.8733372092247009, 'learning_rate': 2.1000000000000003e-07, 'num_tokens': 1362283.0, 'mean_token_accuracy': 1.0, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1982/2000 [11:47<00:05, 3.07it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1983/2000 [11:47<00:06, 2.83it/s]\n \n{'loss': 0.0499, 'grad_norm': 1.0219769477844238, 'learning_rate': 2.0000000000000002e-07, 'num_tokens': 1362886.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1983/2000 [11:47<00:06, 2.83it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1984/2000 [11:48<00:05, 2.79it/s]\n \n{'loss': 0.047, 'grad_norm': 0.6855125427246094, 'learning_rate': 1.9e-07, 'num_tokens': 1363910.0, 'mean_token_accuracy': 0.9794520735740662, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1984/2000 [11:48<00:05, 2.79it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1985/2000 [11:48<00:05, 2.66it/s]\n \n{'loss': 0.053, 'grad_norm': 0.9592626094818115, 'learning_rate': 1.8e-07, 'num_tokens': 1364513.0, 'mean_token_accuracy': 0.9717137813568115, 'epoch': 1.98}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1985/2000 [11:48<00:05, 2.66it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1986/2000 [11:49<00:05, 2.67it/s]\n \n{'loss': 0.0634, 'grad_norm': 0.9822715520858765, 'learning_rate': 1.7000000000000001e-07, 'num_tokens': 1365537.0, 'mean_token_accuracy': 0.9696673154830933, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1986/2000 [11:49<00:05, 2.67it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1987/2000 [11:49<00:04, 3.24it/s]\n \n{'loss': 0.005, 'grad_norm': 0.9051101207733154, 'learning_rate': 1.6e-07, 'num_tokens': 1365719.0, 'mean_token_accuracy': 1.0, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1987/2000 [11:49<00:04, 3.24it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1988/2000 [11:49<00:03, 3.06it/s]\n \n{'loss': 0.057, 'grad_norm': 0.7732815742492676, 'learning_rate': 1.5000000000000002e-07, 'num_tokens': 1366743.0, 'mean_token_accuracy': 0.9716242551803589, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1988/2000 [11:49<00:03, 3.06it/s]\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1989/2000 [11:50<00:03, 2.82it/s]\n \n{'loss': 0.0488, 'grad_norm': 1.0130807161331177, 'learning_rate': 1.4e-07, 'num_tokens': 1367346.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 1.99}\n\n 99%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1989/2000 [11:50<00:03, 2.82it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1990/2000 [11:50<00:03, 2.79it/s]\n \n{'loss': 0.0502, 'grad_norm': 0.7733030319213867, 'learning_rate': 1.3e-07, 'num_tokens': 1368370.0, 'mean_token_accuracy': 0.976516604423523, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1990/2000 [11:50<00:03, 2.79it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1991/2000 [11:50<00:03, 2.65it/s]\n \n{'loss': 0.033, 'grad_norm': 0.8099549412727356, 'learning_rate': 1.2000000000000002e-07, 'num_tokens': 1368973.0, 'mean_token_accuracy': 0.981697142124176, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1991/2000 [11:50<00:03, 2.65it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1992/2000 [11:51<00:03, 2.57it/s]\n \n{'loss': 0.0505, 'grad_norm': 0.8513318300247192, 'learning_rate': 1.1e-07, 'num_tokens': 1369576.0, 'mean_token_accuracy': 0.9733777046203613, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1992/2000 [11:51<00:03, 2.57it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1993/2000 [11:51<00:02, 2.51it/s]\n \n{'loss': 0.0471, 'grad_norm': 0.8666603565216064, 'learning_rate': 1.0000000000000001e-07, 'num_tokens': 1370179.0, 'mean_token_accuracy': 0.9783693552017212, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1993/2000 [11:51<00:02, 2.51it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1994/2000 [11:51<00:01, 3.08it/s]\n \n{'loss': 0.0046, 'grad_norm': 0.8277124166488647, 'learning_rate': 9e-08, 'num_tokens': 1370361.0, 'mean_token_accuracy': 1.0, 'epoch': 1.99}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1994/2000 [11:51<00:01, 3.08it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1995/2000 [11:52<00:01, 2.83it/s]\n \n{'loss': 0.0491, 'grad_norm': 0.7712334990501404, 'learning_rate': 8e-08, 'num_tokens': 1370964.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1995/2000 [11:52<00:01, 2.83it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1996/2000 [11:52<00:01, 2.80it/s]\n \n{'loss': 0.037, 'grad_norm': 0.8775883316993713, 'learning_rate': 7e-08, 'num_tokens': 1371988.0, 'mean_token_accuracy': 0.980430543422699, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1996/2000 [11:52<00:01, 2.80it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1997/2000 [11:53<00:01, 2.77it/s]\n \n{'loss': 0.0377, 'grad_norm': 0.7055721282958984, 'learning_rate': 6.000000000000001e-08, 'num_tokens': 1373012.0, 'mean_token_accuracy': 0.9814090132713318, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1997/2000 [11:53<00:01, 2.77it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1998/2000 [11:53<00:00, 3.33it/s]\n \n{'loss': 0.005, 'grad_norm': 0.8954693675041199, 'learning_rate': 5.0000000000000004e-08, 'num_tokens': 1373194.0, 'mean_token_accuracy': 1.0, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1998/2000 [11:53<00:00, 3.33it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1999/2000 [11:53<00:00, 2.98it/s]\n \n{'loss': 0.0314, 'grad_norm': 0.7444577217102051, 'learning_rate': 4e-08, 'num_tokens': 1373797.0, 'mean_token_accuracy': 0.9883527159690857, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2589| 1999/2000 [11:53<00:00, 2.98it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n \n{'loss': 0.0525, 'grad_norm': 1.007545828819275, 'learning_rate': 3.0000000000000004e-08, 'num_tokens': 1374400.0, 'mean_token_accuracy': 0.9750415682792664, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n \n{'train_runtime': 714.3473, 'train_samples_per_second': 5.6, 'train_steps_per_second': 2.8, 'train_loss': 0.1561080440459773, 'epoch': 2.0}\n\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.77it/s]\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 2000/2000 [11:54<00:00, 2.80it/s]\nsft_trl_done\n$ python scripts/train_grpo_trl.py --model-id Qwen/Qwen2.5-3B-Instruct --prompts-path data/processed/training_corpus_grpo_prompts.jsonl --output-dir checkpoints/sweeps/qwen-qwen2-5-3b-instruct --report-path outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json --max-prompts 0 --max-steps 0 --epochs 1.0 --batch-size 2 --grad-accum 1 --num-generations 2 --max-prompt-length 384 --max-completion-length 64 --learning-rate 1e-06 --use-unsloth\n" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/manifest.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..67d00756e92a5f7b983ca1856d58db24059c3fad --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/manifest.json @@ -0,0 +1,376 @@ +{ + "status": "ok", + "generated_at_unix": 1777179904.792038, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "not_seen_in_status", + "grpo_postsave_inference": "not_seen_in_status", + "policy_ablation": "not_seen_in_status" + }, + "metrics": { + "sft_train_loss": 0.18184852770145518, + "sft_train_runtime": 372.1845, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.569, + "sft_last_loss": 0.0037, + "sft_best_loss": 0.0011, + "sft_last_token_accuracy": 1.0, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.762, + "sft_avg_latency_seconds": 2.748, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "ok", + "files": [ + ".gitattributes", + "usable_model_bundles/local-qwen-0-5b-active-smoke/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/bundle_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/generation_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merge_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/acceptance_gate.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/anti_hacking_overfit_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/baselines.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dose_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dosing_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/frontier_ready.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/graph_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_ablation_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_auto.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_fallback_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_strict_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_sweep_summary.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/inference_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/planner_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/plot_index.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/risk_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/robustness.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/supervisor_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json" + ], + "meaningful_file_count": 82, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/63acc4b1a4167e78b785814b5de63c5a913f9099", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 736.955, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png", + "qwen-qwen2-5-3b-instruct_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png", + "qwen-qwen2-5-3b-instruct_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png", + "qwen-qwen2-5-3b-instruct_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png", + "reward_component_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png", + "primary_reward_channel_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 3B grpo_history.json: pending_artifact_upload", + "Qwen 3B grpo_postsave_inference: not_seen_in_status", + "Qwen 3B grpo_training: not_seen_in_status", + "Qwen 3B policy_ablation: not_seen_in_status", + "Qwen 3B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/policy_ablation_report.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/policy_ablation_report.json new file mode 100644 index 0000000000000000000000000000000000000000..17f42d1ba8e5ed4aaf91fc331e9057d45b539b10 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/policy_ablation_report.json @@ -0,0 +1,150 @@ +{ + "status": "ok", + "ablations": { + "bandit_only": { + "avg_reward": 0.779625, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.8125, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.483125, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9056250000000008, + "exploit_detection_count": 2.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0625, + "avg_invalid_actions": 0.0625, + "reward_columns": { + "format_compliance_score": 0.9989999999999996, + "candidate_alignment_score": 0.9989999999999996, + "legality_score": 0.9989999999999996, + "safety_delta_score": 0.483125, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999995, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000002, + "efficiency_score": 0.5855625, + "process_fidelity_score": 0.9056250000000008, + "explanation_grounding_score": 0.8000000000000004, + "anti_cheat_score": 0.9366249999999997, + "uncertainty_calibration_score": 0.8531250000000004 + }, + "primary_reward_channels": { + "safety_legality": 0.9469062499999998, + "clinical_improvement": 0.6273749999999997, + "dosing_quality": 0.6550000000000001, + "process_integrity": 0.8225937500000001 + }, + "policy_stack": "bandit-only", + "failure_mining": { + "total_rows": 32, + "failure_rows": 2, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 2 + } + ] + } + }, + "llm_only": { + "avg_reward": 0.7723913043478261, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.4882608695652174, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.4882608695652174, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999998, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8482608695652176 + }, + "primary_reward_channels": { + "safety_legality": 0.8853478260869562, + "clinical_improvement": 0.6290869565217388, + "dosing_quality": 0.6549999999999998, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm-only", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + }, + "llm_bandit": { + "avg_reward": 0.7647391304347826, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.48982608695652174, + "avg_dosing_quality": 0.717391304347826, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.48982608695652174, + "burden_improvement_score": 0.5043478260869565, + "disease_stability_score": 0.8582608695652173, + "dosing_quality_score": 0.717391304347826, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8126086956521739 + }, + "primary_reward_channels": { + "safety_legality": 0.8765217391304347, + "clinical_improvement": 0.6171739130434781, + "dosing_quality": 0.6386956521739129, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm+bandit", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + } + }, + "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/grpo_ablation_report.json" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/remote_stage_records.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/remote_stage_records.json new file mode 100644 index 0000000000000000000000000000000000000000..f3fe78e328e89d17c930dcf22e0d42cf569bdc56 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/remote_stage_records.json @@ -0,0 +1,101 @@ +[ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 736.955, + "completed": true + } +] diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json new file mode 100644 index 0000000000000000000000000000000000000000..d10ae0a2d52e93bf7afd4fe5560708fd2cc8e794 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/availability.json @@ -0,0 +1,27 @@ +{ + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + } +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..e97c6f0e7743a9470ea6efc3b741694dafed8b57 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 1.839, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8000\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.224, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "There are 5 candidates with IDs . The candidate with ID has the lowest prediction score. Therefore, we will choose candidate . Rationale: Candidate has the lowest prediction score among the candidates with IDs , indicating that they are the most likely to", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "there is no patient with patient_id = 8002, please select a different patient_id. Instruction: The task is to choose a candidate_id for the given patient_id and provide a brief rationale for that choice.\nThus, the final answer is (e).Human beings have been able to use fire since at least 3 million years ago, but we are not sure how they learned", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8003\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 1.737, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8004\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_1", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.736, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..986493a2b78954513d0716891dc5dd71c576c569 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "model_index": 0, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 2, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..7d5a627d2519bb0afa7485ff88f51eb813d742e6 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.0856, + "grad_norm": 3.887380838394165, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.001, + "step": 1 + }, + { + "loss": 1.6647, + "grad_norm": 1.2190884351730347, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.7138103246688843, + "epoch": 0.002, + "step": 2 + }, + { + "loss": 1.1696, + "grad_norm": 0.8276316523551941, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.003, + "step": 3 + }, + { + "loss": 3.0464, + "grad_norm": 3.3297364711761475, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.004, + "step": 4 + }, + { + "loss": 1.1875, + "grad_norm": 0.8076611757278442, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.005, + "step": 5 + }, + { + "loss": 1.6105, + "grad_norm": 1.0332727432250977, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.7188019752502441, + "epoch": 0.006, + "step": 6 + }, + { + "loss": 1.5834, + "grad_norm": 1.0094527006149292, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.007, + "step": 7 + }, + { + "loss": 1.1683, + "grad_norm": 0.7861526012420654, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.008, + "step": 8 + }, + { + "loss": 1.3843, + "grad_norm": 0.7377748489379883, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7495107650756836, + "epoch": 0.009, + "step": 9 + }, + { + "loss": 1.584, + "grad_norm": 0.9443085193634033, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.720465898513794, + "epoch": 0.01, + "step": 10 + }, + { + "loss": 1.366, + "grad_norm": 0.7967380285263062, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7504892349243164, + "epoch": 0.011, + "step": 11 + }, + { + "loss": 1.5266, + "grad_norm": 1.0016096830368042, + "learning_rate": 1.989e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.012, + "step": 12 + }, + { + "loss": 1.2453, + "grad_norm": 0.9283791184425354, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.7836938500404358, + "epoch": 0.013, + "step": 13 + }, + { + "loss": 1.6206, + "grad_norm": 0.9805537462234497, + "learning_rate": 1.987e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7171381115913391, + "epoch": 0.014, + "step": 14 + }, + { + "loss": 1.5375, + "grad_norm": 0.9191323518753052, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.015, + "step": 15 + }, + { + "loss": 1.3423, + "grad_norm": 0.7822748422622681, + "learning_rate": 1.985e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.016, + "step": 16 + }, + { + "loss": 2.9309, + "grad_norm": 2.773752450942993, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5277777910232544, + "epoch": 0.017, + "step": 17 + }, + { + "loss": 1.1574, + "grad_norm": 0.7265554666519165, + "learning_rate": 1.983e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7876712083816528, + "epoch": 0.018, + "step": 18 + }, + { + "loss": 2.9093, + "grad_norm": 2.9051146507263184, + "learning_rate": 1.982e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5388888716697693, + "epoch": 0.019, + "step": 19 + }, + { + "loss": 1.5786, + "grad_norm": 0.9728697538375854, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.02, + "step": 20 + }, + { + "loss": 1.0934, + "grad_norm": 0.7319854497909546, + "learning_rate": 1.98e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.7974559664726257, + "epoch": 0.021, + "step": 21 + }, + { + "loss": 1.2097, + "grad_norm": 0.8981963992118835, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.022, + "step": 22 + }, + { + "loss": 1.4816, + "grad_norm": 1.0308023691177368, + "learning_rate": 1.978e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.023, + "step": 23 + }, + { + "loss": 1.3218, + "grad_norm": 0.7793745398521423, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.024, + "step": 24 + }, + { + "loss": 1.4883, + "grad_norm": 1.0108226537704468, + "learning_rate": 1.976e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.025, + "step": 25 + }, + { + "loss": 1.1398, + "grad_norm": 0.7284001111984253, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7857142686843872, + "epoch": 0.026, + "step": 26 + }, + { + "loss": 1.5201, + "grad_norm": 0.9933396577835083, + "learning_rate": 1.974e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.7354409098625183, + "epoch": 0.027, + "step": 27 + }, + { + "loss": 2.8162, + "grad_norm": 3.1626200675964355, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.028, + "step": 28 + }, + { + "loss": 1.31, + "grad_norm": 0.8019158244132996, + "learning_rate": 1.972e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.7573385238647461, + "epoch": 0.029, + "step": 29 + }, + { + "loss": 2.7985, + "grad_norm": 3.126246929168701, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.03, + "step": 30 + }, + { + "loss": 1.5341, + "grad_norm": 0.952720582485199, + "learning_rate": 1.97e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7271214723587036, + "epoch": 0.031, + "step": 31 + }, + { + "loss": 1.0763, + "grad_norm": 0.7093926668167114, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.032, + "step": 32 + }, + { + "loss": 1.2127, + "grad_norm": 0.813561201095581, + "learning_rate": 1.968e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.033, + "step": 33 + }, + { + "loss": 2.7516, + "grad_norm": 3.1947083473205566, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.034, + "step": 34 + }, + { + "loss": 1.1881, + "grad_norm": 1.0367817878723145, + "learning_rate": 1.966e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.035, + "step": 35 + }, + { + "loss": 1.1991, + "grad_norm": 0.9249914288520813, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.036, + "step": 36 + }, + { + "loss": 1.0422, + "grad_norm": 0.7850101590156555, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.037, + "step": 37 + }, + { + "loss": 1.2488, + "grad_norm": 0.8151567578315735, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7651663422584534, + "epoch": 0.038, + "step": 38 + }, + { + "loss": 1.5095, + "grad_norm": 1.0585670471191406, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.7254575490951538, + "epoch": 0.039, + "step": 39 + }, + { + "loss": 2.6828, + "grad_norm": 3.3681087493896484, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.04, + "step": 40 + }, + { + "loss": 1.1754, + "grad_norm": 1.029766321182251, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.041, + "step": 41 + }, + { + "loss": 1.0827, + "grad_norm": 0.7520174980163574, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.042, + "step": 42 + }, + { + "loss": 1.1385, + "grad_norm": 1.012759804725647, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.043, + "step": 43 + }, + { + "loss": 2.6322, + "grad_norm": 3.4875218868255615, + "learning_rate": 1.957e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.044, + "step": 44 + }, + { + "loss": 1.23, + "grad_norm": 0.9103058576583862, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.045, + "step": 45 + }, + { + "loss": 1.4499, + "grad_norm": 1.0566458702087402, + "learning_rate": 1.955e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.046, + "step": 46 + }, + { + "loss": 1.1171, + "grad_norm": 1.0389467477798462, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.047, + "step": 47 + }, + { + "loss": 1.4262, + "grad_norm": 1.0595616102218628, + "learning_rate": 1.953e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.048, + "step": 48 + }, + { + "loss": 1.1224, + "grad_norm": 1.0530123710632324, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.049, + "step": 49 + }, + { + "loss": 2.5409, + "grad_norm": 3.6781489849090576, + "learning_rate": 1.951e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.05, + "step": 50 + }, + { + "loss": 1.0942, + "grad_norm": 1.0411880016326904, + "learning_rate": 1.95e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.7970049977302551, + "epoch": 0.051, + "step": 51 + }, + { + "loss": 1.0622, + "grad_norm": 0.8258970975875854, + "learning_rate": 1.949e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.052, + "step": 52 + }, + { + "loss": 1.1977, + "grad_norm": 0.8957047462463379, + "learning_rate": 1.948e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.7700586915016174, + "epoch": 0.053, + "step": 53 + }, + { + "loss": 1.3695, + "grad_norm": 1.122542142868042, + "learning_rate": 1.947e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.7520798444747925, + "epoch": 0.054, + "step": 54 + }, + { + "loss": 0.8548, + "grad_norm": 0.7688314914703369, + "learning_rate": 1.946e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.055, + "step": 55 + }, + { + "loss": 1.0659, + "grad_norm": 1.0568362474441528, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.056, + "step": 56 + }, + { + "loss": 1.0294, + "grad_norm": 0.8596540689468384, + "learning_rate": 1.944e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.057, + "step": 57 + }, + { + "loss": 1.4359, + "grad_norm": 1.2490142583847046, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.7321131229400635, + "epoch": 0.058, + "step": 58 + }, + { + "loss": 2.416, + "grad_norm": 3.7482848167419434, + "learning_rate": 1.942e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.059, + "step": 59 + }, + { + "loss": 1.0725, + "grad_norm": 1.117326259613037, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.06, + "step": 60 + }, + { + "loss": 0.9739, + "grad_norm": 0.8864734768867493, + "learning_rate": 1.94e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.061, + "step": 61 + }, + { + "loss": 1.1443, + "grad_norm": 0.9423307776451111, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.7739726305007935, + "epoch": 0.062, + "step": 62 + }, + { + "loss": 0.8009, + "grad_norm": 0.8988932967185974, + "learning_rate": 1.938e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.063, + "step": 63 + }, + { + "loss": 1.0508, + "grad_norm": 1.1697311401367188, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.064, + "step": 64 + }, + { + "loss": 1.2747, + "grad_norm": 1.2967511415481567, + "learning_rate": 1.936e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.7570715546607971, + "epoch": 0.065, + "step": 65 + }, + { + "loss": 1.2796, + "grad_norm": 1.2881773710250854, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7554076313972473, + "epoch": 0.066, + "step": 66 + }, + { + "loss": 2.3052, + "grad_norm": 4.034823894500732, + "learning_rate": 1.934e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.067, + "step": 67 + }, + { + "loss": 1.2806, + "grad_norm": 1.3690178394317627, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.7587354183197021, + "epoch": 0.068, + "step": 68 + }, + { + "loss": 1.1807, + "grad_norm": 1.0886963605880737, + "learning_rate": 1.932e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.7632094025611877, + "epoch": 0.069, + "step": 69 + }, + { + "loss": 1.0076, + "grad_norm": 1.3501569032669067, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.07, + "step": 70 + }, + { + "loss": 0.921, + "grad_norm": 1.0231209993362427, + "learning_rate": 1.93e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8111546039581299, + "epoch": 0.071, + "step": 71 + }, + { + "loss": 2.1999, + "grad_norm": 4.47637939453125, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.072, + "step": 72 + }, + { + "loss": 2.1852, + "grad_norm": 4.533531188964844, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.073, + "step": 73 + }, + { + "loss": 2.1623, + "grad_norm": 4.683750152587891, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.074, + "step": 74 + }, + { + "loss": 1.2988, + "grad_norm": 1.5087296962738037, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.075, + "step": 75 + }, + { + "loss": 2.1266, + "grad_norm": 4.944180011749268, + "learning_rate": 1.925e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.076, + "step": 76 + }, + { + "loss": 0.9762, + "grad_norm": 1.0376505851745605, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.077, + "step": 77 + }, + { + "loss": 2.0834, + "grad_norm": 5.394686222076416, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.078, + "step": 78 + }, + { + "loss": 0.9309, + "grad_norm": 1.0764528512954712, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8170254230499268, + "epoch": 0.079, + "step": 79 + }, + { + "loss": 0.7549, + "grad_norm": 1.089787244796753, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.08, + "step": 80 + }, + { + "loss": 1.0972, + "grad_norm": 1.2265634536743164, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.7915851473808289, + "epoch": 0.081, + "step": 81 + }, + { + "loss": 2.0061, + "grad_norm": 5.302765846252441, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.082, + "step": 82 + }, + { + "loss": 1.1197, + "grad_norm": 1.216346025466919, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.7749511003494263, + "epoch": 0.083, + "step": 83 + }, + { + "loss": 1.181, + "grad_norm": 1.5846738815307617, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.084, + "step": 84 + }, + { + "loss": 0.8929, + "grad_norm": 1.1130127906799316, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8268101811408997, + "epoch": 0.085, + "step": 85 + }, + { + "loss": 1.9339, + "grad_norm": NaN, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.086, + "step": 86 + }, + { + "loss": 1.1623, + "grad_norm": 1.7714096307754517, + "learning_rate": 1.915e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.7720465660095215, + "epoch": 0.087, + "step": 87 + }, + { + "loss": 1.0203, + "grad_norm": 1.204126000404358, + "learning_rate": 1.914e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.088, + "step": 88 + }, + { + "loss": 0.8569, + "grad_norm": 1.2058078050613403, + "learning_rate": 1.913e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.089, + "step": 89 + }, + { + "loss": 1.197, + "grad_norm": 1.8821589946746826, + "learning_rate": 1.912e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.7670549154281616, + "epoch": 0.09, + "step": 90 + }, + { + "loss": 1.1908, + "grad_norm": 1.9740996360778809, + "learning_rate": 1.911e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.7703827023506165, + "epoch": 0.091, + "step": 91 + }, + { + "loss": 0.889, + "grad_norm": 1.5037046670913696, + "learning_rate": 1.91e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8236272931098938, + "epoch": 0.092, + "step": 92 + }, + { + "loss": 1.1821, + "grad_norm": 1.539967656135559, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.093, + "step": 93 + }, + { + "loss": 1.0278, + "grad_norm": 1.2005809545516968, + "learning_rate": 1.908e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.094, + "step": 94 + }, + { + "loss": 1.1361, + "grad_norm": 1.8167128562927246, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.095, + "step": 95 + }, + { + "loss": 1.0977, + "grad_norm": 2.2985150814056396, + "learning_rate": 1.906e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.096, + "step": 96 + }, + { + "loss": 1.0695, + "grad_norm": 1.590173602104187, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.097, + "step": 97 + }, + { + "loss": 1.1519, + "grad_norm": 1.5389997959136963, + "learning_rate": 1.904e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.098, + "step": 98 + }, + { + "loss": 1.1507, + "grad_norm": 1.6002172231674194, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.099, + "step": 99 + }, + { + "loss": 1.0454, + "grad_norm": 1.181969404220581, + "learning_rate": 1.902e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.1, + "step": 100 + }, + { + "loss": 1.0897, + "grad_norm": 1.832823634147644, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.101, + "step": 101 + }, + { + "loss": 0.8593, + "grad_norm": 1.2972052097320557, + "learning_rate": 1.9e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.102, + "step": 102 + }, + { + "loss": 0.9507, + "grad_norm": 1.114174723625183, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8150684833526611, + "epoch": 0.103, + "step": 103 + }, + { + "loss": 0.8422, + "grad_norm": 1.0837013721466064, + "learning_rate": 1.898e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.104, + "step": 104 + }, + { + "loss": 0.9674, + "grad_norm": 1.1756479740142822, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.105, + "step": 105 + }, + { + "loss": 0.7975, + "grad_norm": 1.3874446153640747, + "learning_rate": 1.896e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.840266227722168, + "epoch": 0.106, + "step": 106 + }, + { + "loss": 1.0557, + "grad_norm": 1.959272027015686, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.7936772108078003, + "epoch": 0.107, + "step": 107 + }, + { + "loss": 1.0885, + "grad_norm": 1.503557801246643, + "learning_rate": 1.894e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.108, + "step": 108 + }, + { + "loss": 0.8082, + "grad_norm": 1.470276117324829, + "learning_rate": 1.893e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.8302828669548035, + "epoch": 0.109, + "step": 109 + }, + { + "loss": 1.5508, + "grad_norm": 6.328886985778809, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.6944444179534912, + "epoch": 0.11, + "step": 110 + }, + { + "loss": 1.0059, + "grad_norm": 1.5663049221038818, + "learning_rate": 1.891e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.8103161454200745, + "epoch": 0.111, + "step": 111 + }, + { + "loss": 1.0336, + "grad_norm": 1.4562171697616577, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.112, + "step": 112 + }, + { + "loss": 1.0438, + "grad_norm": 1.5646629333496094, + "learning_rate": 1.889e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.113, + "step": 113 + }, + { + "loss": 1.0279, + "grad_norm": 1.513607144355774, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.114, + "step": 114 + }, + { + "loss": 1.4402, + "grad_norm": 6.165053367614746, + "learning_rate": 1.887e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.115, + "step": 115 + }, + { + "loss": 0.7349, + "grad_norm": 1.454982876777649, + "learning_rate": 1.886e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.116, + "step": 116 + }, + { + "loss": 0.7338, + "grad_norm": 1.9169820547103882, + "learning_rate": 1.885e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.841930091381073, + "epoch": 0.117, + "step": 117 + }, + { + "loss": 0.7831, + "grad_norm": 1.3472567796707153, + "learning_rate": 1.884e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.118, + "step": 118 + }, + { + "loss": 1.028, + "grad_norm": 1.5241106748580933, + "learning_rate": 1.883e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.8036605715751648, + "epoch": 0.119, + "step": 119 + }, + { + "loss": 1.3458, + "grad_norm": 5.9579386711120605, + "learning_rate": 1.882e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.12, + "step": 120 + }, + { + "loss": 0.7727, + "grad_norm": 1.444265604019165, + "learning_rate": 1.881e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.8385518789291382, + "epoch": 0.121, + "step": 121 + }, + { + "loss": 0.6351, + "grad_norm": 1.281785488128662, + "learning_rate": 1.88e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.122, + "step": 122 + }, + { + "loss": 0.6884, + "grad_norm": 1.6917502880096436, + "learning_rate": 1.879e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.123, + "step": 123 + }, + { + "loss": 0.886, + "grad_norm": 1.6544225215911865, + "learning_rate": 1.878e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.8286189436912537, + "epoch": 0.124, + "step": 124 + }, + { + "loss": 0.7652, + "grad_norm": 1.2762014865875244, + "learning_rate": 1.877e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.125, + "step": 125 + }, + { + "loss": 1.2517, + "grad_norm": 7.621744632720947, + "learning_rate": 1.876e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.126, + "step": 126 + }, + { + "loss": 0.6909, + "grad_norm": 1.8651930093765259, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.127, + "step": 127 + }, + { + "loss": 0.9464, + "grad_norm": 2.0513856410980225, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.821963369846344, + "epoch": 0.128, + "step": 128 + }, + { + "loss": 0.8355, + "grad_norm": 1.3392603397369385, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.8405088186264038, + "epoch": 0.129, + "step": 129 + }, + { + "loss": 0.7124, + "grad_norm": 1.7539966106414795, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.861896812915802, + "epoch": 0.13, + "step": 130 + }, + { + "loss": 1.1931, + "grad_norm": 7.2109856605529785, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.131, + "step": 131 + }, + { + "loss": 0.806, + "grad_norm": 1.531593918800354, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.8424657583236694, + "epoch": 0.132, + "step": 132 + }, + { + "loss": 0.7483, + "grad_norm": 1.6686372756958008, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.133, + "step": 133 + }, + { + "loss": 0.905, + "grad_norm": 3.809466600418091, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.8336106538772583, + "epoch": 0.134, + "step": 134 + }, + { + "loss": 0.7299, + "grad_norm": 1.7963030338287354, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.135, + "step": 135 + }, + { + "loss": 0.6384, + "grad_norm": 2.485582113265991, + "learning_rate": 1.866e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.8718801736831665, + "epoch": 0.136, + "step": 136 + }, + { + "loss": 0.5473, + "grad_norm": 1.6607071161270142, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.137, + "step": 137 + }, + { + "loss": 0.6719, + "grad_norm": 1.6095962524414062, + "learning_rate": 1.864e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.860232949256897, + "epoch": 0.138, + "step": 138 + }, + { + "loss": 0.8772, + "grad_norm": 1.8398959636688232, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.8352745175361633, + "epoch": 0.139, + "step": 139 + }, + { + "loss": 0.6813, + "grad_norm": 1.754347324371338, + "learning_rate": 1.862e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.14, + "step": 140 + }, + { + "loss": 0.8176, + "grad_norm": 1.8010166883468628, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.141, + "step": 141 + }, + { + "loss": 0.6013, + "grad_norm": 2.131845712661743, + "learning_rate": 1.86e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.8768718838691711, + "epoch": 0.142, + "step": 142 + }, + { + "loss": 1.0551, + "grad_norm": 8.797135353088379, + "learning_rate": 1.859e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.8055555820465088, + "epoch": 0.143, + "step": 143 + }, + { + "loss": 0.8096, + "grad_norm": 1.6665289402008057, + "learning_rate": 1.858e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.144, + "step": 144 + }, + { + "loss": 0.6237, + "grad_norm": 2.031190872192383, + "learning_rate": 1.857e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.8735440969467163, + "epoch": 0.145, + "step": 145 + }, + { + "loss": 0.8527, + "grad_norm": 2.5186493396759033, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.8386023044586182, + "epoch": 0.146, + "step": 146 + }, + { + "loss": 0.83, + "grad_norm": 1.5677316188812256, + "learning_rate": 1.855e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.8444226980209351, + "epoch": 0.147, + "step": 147 + }, + { + "loss": 0.6951, + "grad_norm": 3.395341634750366, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.148, + "step": 148 + }, + { + "loss": 0.7634, + "grad_norm": 1.658737301826477, + "learning_rate": 1.853e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.149, + "step": 149 + }, + { + "loss": 0.6195, + "grad_norm": 1.4803838729858398, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.8776907920837402, + "epoch": 0.15, + "step": 150 + }, + { + "loss": 0.6916, + "grad_norm": 1.462860345840454, + "learning_rate": 1.851e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.151, + "step": 151 + }, + { + "loss": 0.7854, + "grad_norm": 1.6279668807983398, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.152, + "step": 152 + }, + { + "loss": 0.749, + "grad_norm": 1.8625388145446777, + "learning_rate": 1.849e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.153, + "step": 153 + }, + { + "loss": 0.6619, + "grad_norm": 1.6320242881774902, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.8679060935974121, + "epoch": 0.154, + "step": 154 + }, + { + "loss": 0.9864, + "grad_norm": NaN, + "learning_rate": 1.847e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.8222222328186035, + "epoch": 0.155, + "step": 155 + }, + { + "loss": 0.7698, + "grad_norm": 2.241466999053955, + "learning_rate": 1.847e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.156, + "step": 156 + }, + { + "loss": 0.8501, + "grad_norm": 2.594738721847534, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.8435940146446228, + "epoch": 0.157, + "step": 157 + }, + { + "loss": 0.962, + "grad_norm": 10.902610778808594, + "learning_rate": 1.845e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.8166666626930237, + "epoch": 0.158, + "step": 158 + }, + { + "loss": 0.7822, + "grad_norm": 1.6955127716064453, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.159, + "step": 159 + }, + { + "loss": 0.7942, + "grad_norm": 2.5727546215057373, + "learning_rate": 1.843e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.8519134521484375, + "epoch": 0.16, + "step": 160 + }, + { + "loss": 0.8074, + "grad_norm": 2.082172155380249, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.161, + "step": 161 + }, + { + "loss": 0.6346, + "grad_norm": 1.4917131662368774, + "learning_rate": 1.841e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.162, + "step": 162 + }, + { + "loss": 0.6574, + "grad_norm": 1.7243297100067139, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.163, + "step": 163 + }, + { + "loss": 0.7782, + "grad_norm": 2.236922264099121, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.164, + "step": 164 + }, + { + "loss": 0.7541, + "grad_norm": 2.998671531677246, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.165, + "step": 165 + }, + { + "loss": 0.7637, + "grad_norm": 2.231337070465088, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.166, + "step": 166 + }, + { + "loss": 0.4918, + "grad_norm": 2.1853654384613037, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.167, + "step": 167 + }, + { + "loss": 0.8615, + "grad_norm": 19.52778434753418, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.168, + "step": 168 + }, + { + "loss": 0.727, + "grad_norm": 2.8629372119903564, + "learning_rate": 1.834e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.169, + "step": 169 + }, + { + "loss": 0.6812, + "grad_norm": 2.578798294067383, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.8600782752037048, + "epoch": 0.17, + "step": 170 + }, + { + "loss": 0.718, + "grad_norm": 2.7950305938720703, + "learning_rate": 1.832e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.171, + "step": 171 + }, + { + "loss": 0.8269, + "grad_norm": 18.518278121948242, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.8333333134651184, + "epoch": 0.172, + "step": 172 + }, + { + "loss": 0.8122, + "grad_norm": 10.636402130126953, + "learning_rate": 1.83e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.8500000238418579, + "epoch": 0.173, + "step": 173 + }, + { + "loss": 0.5631, + "grad_norm": 1.8652675151824951, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.174, + "step": 174 + }, + { + "loss": 0.5823, + "grad_norm": 2.174743890762329, + "learning_rate": 1.828e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.175, + "step": 175 + }, + { + "loss": 0.6878, + "grad_norm": 2.426223039627075, + "learning_rate": 1.827e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.176, + "step": 176 + }, + { + "loss": 0.4815, + "grad_norm": 2.2111594676971436, + "learning_rate": 1.826e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.177, + "step": 177 + }, + { + "loss": 0.7905, + "grad_norm": 12.419157981872559, + "learning_rate": 1.825e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.178, + "step": 178 + }, + { + "loss": 0.6485, + "grad_norm": 2.6929852962493896, + "learning_rate": 1.824e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.8851913213729858, + "epoch": 0.179, + "step": 179 + }, + { + "loss": 0.5821, + "grad_norm": 2.588067054748535, + "learning_rate": 1.823e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.18, + "step": 180 + }, + { + "loss": 0.5376, + "grad_norm": 2.6413276195526123, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.181, + "step": 181 + }, + { + "loss": 0.4776, + "grad_norm": 2.0201733112335205, + "learning_rate": 1.821e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.182, + "step": 182 + }, + { + "loss": 0.7141, + "grad_norm": 8.398615837097168, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 0.8611111044883728, + "epoch": 0.183, + "step": 183 + }, + { + "loss": 0.687, + "grad_norm": 6.920986175537109, + "learning_rate": 1.819e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.184, + "step": 184 + }, + { + "loss": 0.6518, + "grad_norm": 3.54260516166687, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.185, + "step": 185 + }, + { + "loss": 0.6429, + "grad_norm": 4.033841609954834, + "learning_rate": 1.817e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.186, + "step": 186 + }, + { + "loss": 0.4786, + "grad_norm": 2.4023964405059814, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.187, + "step": 187 + }, + { + "loss": 0.5997, + "grad_norm": 2.695603370666504, + "learning_rate": 1.815e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.188, + "step": 188 + }, + { + "loss": 0.6251, + "grad_norm": 7.4209184646606445, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.189, + "step": 189 + }, + { + "loss": 0.6324, + "grad_norm": 10.130674362182617, + "learning_rate": 1.813e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.19, + "step": 190 + }, + { + "loss": 0.5939, + "grad_norm": 2.6180245876312256, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.873776912689209, + "epoch": 0.191, + "step": 191 + }, + { + "loss": 0.4098, + "grad_norm": 2.2663474082946777, + "learning_rate": 1.811e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.192, + "step": 192 + }, + { + "loss": 0.5111, + "grad_norm": 2.2139604091644287, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.8894324898719788, + "epoch": 0.193, + "step": 193 + }, + { + "loss": 0.4332, + "grad_norm": 2.2271547317504883, + "learning_rate": 1.809e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.194, + "step": 194 + }, + { + "loss": 0.4893, + "grad_norm": 2.0789742469787598, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.8972602486610413, + "epoch": 0.195, + "step": 195 + }, + { + "loss": 0.5755, + "grad_norm": 18.601898193359375, + "learning_rate": 1.807e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.196, + "step": 196 + }, + { + "loss": 0.4635, + "grad_norm": 6.127828598022461, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.197, + "step": 197 + }, + { + "loss": 0.603, + "grad_norm": 2.668287515640259, + "learning_rate": 1.805e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.198, + "step": 198 + }, + { + "loss": 0.6088, + "grad_norm": 2.419572353363037, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.8757338523864746, + "epoch": 0.199, + "step": 199 + }, + { + "loss": 0.5672, + "grad_norm": 3.028404712677002, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.8885191082954407, + "epoch": 0.2, + "step": 200 + }, + { + "loss": 0.4556, + "grad_norm": 4.009725093841553, + "learning_rate": 1.802e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.201, + "step": 201 + }, + { + "loss": 0.5269, + "grad_norm": 2.9101243019104004, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.202, + "step": 202 + }, + { + "loss": 0.6214, + "grad_norm": 2.7398433685302734, + "learning_rate": 1.8e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.8581213355064392, + "epoch": 0.203, + "step": 203 + }, + { + "loss": 0.5646, + "grad_norm": 2.60606050491333, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.204, + "step": 204 + }, + { + "loss": 0.3748, + "grad_norm": 3.7512423992156982, + "learning_rate": 1.798e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9234609007835388, + "epoch": 0.205, + "step": 205 + }, + { + "loss": 0.597, + "grad_norm": 3.150888442993164, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.206, + "step": 206 + }, + { + "loss": 0.511, + "grad_norm": 3.328899383544922, + "learning_rate": 1.796e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.207, + "step": 207 + }, + { + "loss": 0.491, + "grad_norm": 8.625993728637695, + "learning_rate": 1.795e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.208, + "step": 208 + }, + { + "loss": 0.4053, + "grad_norm": 2.2067341804504395, + "learning_rate": 1.794e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.209, + "step": 209 + }, + { + "loss": 0.4192, + "grad_norm": 2.0993006229400635, + "learning_rate": 1.793e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.21, + "step": 210 + }, + { + "loss": 0.3785, + "grad_norm": 2.821485996246338, + "learning_rate": 1.792e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9151414036750793, + "epoch": 0.211, + "step": 211 + }, + { + "loss": 0.5336, + "grad_norm": 2.169666051864624, + "learning_rate": 1.791e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.8901830315589905, + "epoch": 0.212, + "step": 212 + }, + { + "loss": 0.5235, + "grad_norm": 3.1590685844421387, + "learning_rate": 1.79e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.8835616707801819, + "epoch": 0.213, + "step": 213 + }, + { + "loss": 0.4736, + "grad_norm": 11.030704498291016, + "learning_rate": 1.789e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 0.9055555462837219, + "epoch": 0.214, + "step": 214 + }, + { + "loss": 0.5599, + "grad_norm": 3.9144341945648193, + "learning_rate": 1.788e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.215, + "step": 215 + }, + { + "loss": 0.5102, + "grad_norm": 2.9705278873443604, + "learning_rate": 1.787e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.216, + "step": 216 + }, + { + "loss": 0.4821, + "grad_norm": 3.4463229179382324, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.217, + "step": 217 + }, + { + "loss": 0.4385, + "grad_norm": 8.850930213928223, + "learning_rate": 1.785e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 0.9277777671813965, + "epoch": 0.218, + "step": 218 + }, + { + "loss": 0.4633, + "grad_norm": 2.936647415161133, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.219, + "step": 219 + }, + { + "loss": 0.4098, + "grad_norm": 6.922672271728516, + "learning_rate": 1.783e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.22, + "step": 220 + }, + { + "loss": 0.5233, + "grad_norm": 2.318746328353882, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.221, + "step": 221 + }, + { + "loss": 0.3223, + "grad_norm": 4.281177520751953, + "learning_rate": 1.781e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.222, + "step": 222 + }, + { + "loss": 0.4973, + "grad_norm": 3.6921546459198, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.8951746821403503, + "epoch": 0.223, + "step": 223 + }, + { + "loss": 0.4666, + "grad_norm": 3.4926915168762207, + "learning_rate": 1.779e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.224, + "step": 224 + }, + { + "loss": 0.3519, + "grad_norm": 2.668114423751831, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.225, + "step": 225 + }, + { + "loss": 0.4244, + "grad_norm": 2.4111084938049316, + "learning_rate": 1.777e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.226, + "step": 226 + }, + { + "loss": 0.3912, + "grad_norm": 10.561456680297852, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 0.949999988079071, + "epoch": 0.227, + "step": 227 + }, + { + "loss": 0.5091, + "grad_norm": 2.472616672515869, + "learning_rate": 1.775e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.228, + "step": 228 + }, + { + "loss": 0.4842, + "grad_norm": 2.881739854812622, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.229, + "step": 229 + }, + { + "loss": 0.4435, + "grad_norm": 3.2438275814056396, + "learning_rate": 1.773e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.23, + "step": 230 + }, + { + "loss": 0.3527, + "grad_norm": 2.2769415378570557, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.231, + "step": 231 + }, + { + "loss": 0.4951, + "grad_norm": 3.046674966812134, + "learning_rate": 1.771e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.232, + "step": 232 + }, + { + "loss": 0.4926, + "grad_norm": 4.042079925537109, + "learning_rate": 1.77e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.233, + "step": 233 + }, + { + "loss": 0.4564, + "grad_norm": 4.222212314605713, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9051580429077148, + "epoch": 0.234, + "step": 234 + }, + { + "loss": 0.3074, + "grad_norm": 3.150768280029297, + "learning_rate": 1.768e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.235, + "step": 235 + }, + { + "loss": 0.3858, + "grad_norm": 3.456815004348755, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.236, + "step": 236 + }, + { + "loss": 0.3352, + "grad_norm": 9.094295501708984, + "learning_rate": 1.766e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.237, + "step": 237 + }, + { + "loss": 0.4867, + "grad_norm": 3.2864322662353516, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.238, + "step": 238 + }, + { + "loss": 0.3303, + "grad_norm": 5.672657012939453, + "learning_rate": 1.764e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.239, + "step": 239 + }, + { + "loss": 0.4708, + "grad_norm": 3.677504062652588, + "learning_rate": 1.763e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.24, + "step": 240 + }, + { + "loss": 0.3175, + "grad_norm": 5.829269886016846, + "learning_rate": 1.762e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.241, + "step": 241 + }, + { + "loss": 0.4315, + "grad_norm": 3.211578130722046, + "learning_rate": 1.761e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.242, + "step": 242 + }, + { + "loss": 0.3084, + "grad_norm": 5.2650628089904785, + "learning_rate": 1.76e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.243, + "step": 243 + }, + { + "loss": 0.4516, + "grad_norm": 5.401496887207031, + "learning_rate": 1.759e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.244, + "step": 244 + }, + { + "loss": 0.4197, + "grad_norm": 3.938694953918457, + "learning_rate": 1.758e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.245, + "step": 245 + }, + { + "loss": 0.4329, + "grad_norm": 3.4744861125946045, + "learning_rate": 1.757e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.246, + "step": 246 + }, + { + "loss": 0.4525, + "grad_norm": 4.853247165679932, + "learning_rate": 1.756e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 0.9084858298301697, + "epoch": 0.247, + "step": 247 + }, + { + "loss": 0.2768, + "grad_norm": 5.6177144050598145, + "learning_rate": 1.755e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.248, + "step": 248 + }, + { + "loss": 0.3517, + "grad_norm": 2.8669052124023438, + "learning_rate": 1.754e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.249, + "step": 249 + }, + { + "loss": 0.4142, + "grad_norm": 3.5590577125549316, + "learning_rate": 1.753e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.25, + "step": 250 + }, + { + "loss": 0.4307, + "grad_norm": 5.072361946105957, + "learning_rate": 1.752e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.251, + "step": 251 + }, + { + "loss": 0.3981, + "grad_norm": 3.637819528579712, + "learning_rate": 1.751e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.912915825843811, + "epoch": 0.252, + "step": 252 + }, + { + "loss": 0.4344, + "grad_norm": 4.066125869750977, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.253, + "step": 253 + }, + { + "loss": 0.3574, + "grad_norm": 4.836447715759277, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.254, + "step": 254 + }, + { + "loss": 0.2738, + "grad_norm": 14.006624221801758, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.255, + "step": 255 + }, + { + "loss": 0.3416, + "grad_norm": 5.2639079093933105, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.256, + "step": 256 + }, + { + "loss": 0.2762, + "grad_norm": 12.536176681518555, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.257, + "step": 257 + }, + { + "loss": 0.4114, + "grad_norm": 6.311218738555908, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9090019464492798, + "epoch": 0.258, + "step": 258 + }, + { + "loss": 0.3912, + "grad_norm": 3.2677178382873535, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.259, + "step": 259 + }, + { + "loss": 0.3059, + "grad_norm": 4.582422256469727, + "learning_rate": 1.743e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.26, + "step": 260 + }, + { + "loss": 0.3697, + "grad_norm": 5.214661121368408, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.261, + "step": 261 + }, + { + "loss": 0.3486, + "grad_norm": 5.719533920288086, + "learning_rate": 1.741e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.262, + "step": 262 + }, + { + "loss": 0.328, + "grad_norm": 4.692359924316406, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9363992214202881, + "epoch": 0.263, + "step": 263 + }, + { + "loss": 0.3665, + "grad_norm": 2.810206174850464, + "learning_rate": 1.739e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.264, + "step": 264 + }, + { + "loss": 0.2363, + "grad_norm": 6.301739692687988, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.265, + "step": 265 + }, + { + "loss": 0.3762, + "grad_norm": 2.9034929275512695, + "learning_rate": 1.737e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.266, + "step": 266 + }, + { + "loss": 0.3573, + "grad_norm": 5.10465669631958, + "learning_rate": 1.736e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.267, + "step": 267 + }, + { + "loss": 0.3708, + "grad_norm": 2.8359761238098145, + "learning_rate": 1.735e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9251247644424438, + "epoch": 0.268, + "step": 268 + }, + { + "loss": 0.3615, + "grad_norm": 2.6100833415985107, + "learning_rate": 1.734e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.9267886877059937, + "epoch": 0.269, + "step": 269 + }, + { + "loss": 0.3131, + "grad_norm": 3.610330820083618, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.27, + "step": 270 + }, + { + "loss": 0.3301, + "grad_norm": 3.1220433712005615, + "learning_rate": 1.732e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.271, + "step": 271 + }, + { + "loss": 0.2314, + "grad_norm": 7.683000564575195, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.272, + "step": 272 + }, + { + "loss": 0.2391, + "grad_norm": 10.635171890258789, + "learning_rate": 1.73e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.273, + "step": 273 + }, + { + "loss": 0.3934, + "grad_norm": 7.659923076629639, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 0.9334442615509033, + "epoch": 0.274, + "step": 274 + }, + { + "loss": 0.3376, + "grad_norm": 5.6293864250183105, + "learning_rate": 1.728e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.275, + "step": 275 + }, + { + "loss": 0.3734, + "grad_norm": 4.872118949890137, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.276, + "step": 276 + }, + { + "loss": 0.2395, + "grad_norm": 3.4475960731506348, + "learning_rate": 1.726e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.277, + "step": 277 + }, + { + "loss": 0.3513, + "grad_norm": 3.5093634128570557, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.278, + "step": 278 + }, + { + "loss": 0.3505, + "grad_norm": 3.436389446258545, + "learning_rate": 1.724e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 0.9367720484733582, + "epoch": 0.279, + "step": 279 + }, + { + "loss": 0.3041, + "grad_norm": 3.4393298625946045, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.28, + "step": 280 + }, + { + "loss": 0.2922, + "grad_norm": 3.826392889022827, + "learning_rate": 1.722e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.281, + "step": 281 + }, + { + "loss": 0.3414, + "grad_norm": 7.017237663269043, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.282, + "step": 282 + }, + { + "loss": 0.3521, + "grad_norm": 4.018287658691406, + "learning_rate": 1.72e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.283, + "step": 283 + }, + { + "loss": 0.3455, + "grad_norm": 3.9697959423065186, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.284, + "step": 284 + }, + { + "loss": 0.3368, + "grad_norm": 3.0641541481018066, + "learning_rate": 1.718e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.285, + "step": 285 + }, + { + "loss": 0.3244, + "grad_norm": 4.277006149291992, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.286, + "step": 286 + }, + { + "loss": 0.353, + "grad_norm": 2.6876814365386963, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.287, + "step": 287 + }, + { + "loss": 0.3236, + "grad_norm": 3.7715723514556885, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.288, + "step": 288 + }, + { + "loss": 0.3158, + "grad_norm": 3.555406332015991, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.289, + "step": 289 + }, + { + "loss": 0.2062, + "grad_norm": 9.316679000854492, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.29, + "step": 290 + }, + { + "loss": 0.2002, + "grad_norm": 5.817254543304443, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.291, + "step": 291 + }, + { + "loss": 0.2809, + "grad_norm": 5.106694221496582, + "learning_rate": 1.711e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.292, + "step": 292 + }, + { + "loss": 0.295, + "grad_norm": 7.797866344451904, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.293, + "step": 293 + }, + { + "loss": 0.3144, + "grad_norm": 8.002677917480469, + "learning_rate": 1.709e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.294, + "step": 294 + }, + { + "loss": 0.2345, + "grad_norm": 4.315321445465088, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.295, + "step": 295 + }, + { + "loss": 0.306, + "grad_norm": 4.690162181854248, + "learning_rate": 1.707e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.296, + "step": 296 + }, + { + "loss": 0.3098, + "grad_norm": 4.387345790863037, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.297, + "step": 297 + }, + { + "loss": 0.2898, + "grad_norm": 5.204096794128418, + "learning_rate": 1.705e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.298, + "step": 298 + }, + { + "loss": 0.2894, + "grad_norm": 4.000877380371094, + "learning_rate": 1.704e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.299, + "step": 299 + }, + { + "loss": 0.3295, + "grad_norm": 5.276703357696533, + "learning_rate": 1.703e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9383561611175537, + "epoch": 0.3, + "step": 300 + }, + { + "loss": 0.2139, + "grad_norm": 2.6593077182769775, + "learning_rate": 1.702e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.301, + "step": 301 + }, + { + "loss": 0.2077, + "grad_norm": 9.37561321258545, + "learning_rate": 1.701e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.302, + "step": 302 + }, + { + "loss": 0.2274, + "grad_norm": 2.972815990447998, + "learning_rate": 1.7e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.303, + "step": 303 + }, + { + "loss": 0.2545, + "grad_norm": 2.4279375076293945, + "learning_rate": 1.699e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.304, + "step": 304 + }, + { + "loss": 0.2871, + "grad_norm": 2.8517541885375977, + "learning_rate": 1.698e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.305, + "step": 305 + }, + { + "loss": 0.2877, + "grad_norm": 4.114612102508545, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.306, + "step": 306 + }, + { + "loss": 0.2145, + "grad_norm": 14.7569580078125, + "learning_rate": 1.696e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.307, + "step": 307 + }, + { + "loss": 0.294, + "grad_norm": 3.094182252883911, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.308, + "step": 308 + }, + { + "loss": 0.2044, + "grad_norm": 3.026052951812744, + "learning_rate": 1.694e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.309, + "step": 309 + }, + { + "loss": 0.3061, + "grad_norm": 3.1381635665893555, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.31, + "step": 310 + }, + { + "loss": 0.2239, + "grad_norm": 2.3573496341705322, + "learning_rate": 1.692e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.311, + "step": 311 + }, + { + "loss": 0.2853, + "grad_norm": 7.762936115264893, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.312, + "step": 312 + }, + { + "loss": 0.2793, + "grad_norm": 7.716437816619873, + "learning_rate": 1.69e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.313, + "step": 313 + }, + { + "loss": 0.2764, + "grad_norm": 4.531182765960693, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.314, + "step": 314 + }, + { + "loss": 0.1807, + "grad_norm": 5.600939750671387, + "learning_rate": 1.688e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.315, + "step": 315 + }, + { + "loss": 0.1751, + "grad_norm": 6.357442378997803, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.316, + "step": 316 + }, + { + "loss": 0.2278, + "grad_norm": 4.381490230560303, + "learning_rate": 1.686e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.317, + "step": 317 + }, + { + "loss": 0.1693, + "grad_norm": 4.711330413818359, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.318, + "step": 318 + }, + { + "loss": 0.2719, + "grad_norm": 7.21658182144165, + "learning_rate": 1.684e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.319, + "step": 319 + }, + { + "loss": 0.1613, + "grad_norm": 2.806929111480713, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.32, + "step": 320 + }, + { + "loss": 0.2236, + "grad_norm": 3.729052782058716, + "learning_rate": 1.682e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.321, + "step": 321 + }, + { + "loss": 0.3026, + "grad_norm": 3.512017250061035, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.322, + "step": 322 + }, + { + "loss": 0.2492, + "grad_norm": 5.842523097991943, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.323, + "step": 323 + }, + { + "loss": 0.2591, + "grad_norm": 3.444624662399292, + "learning_rate": 1.679e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9442269802093506, + "epoch": 0.324, + "step": 324 + }, + { + "loss": 0.245, + "grad_norm": 3.560624837875366, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.325, + "step": 325 + }, + { + "loss": 0.2493, + "grad_norm": 3.812241792678833, + "learning_rate": 1.677e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.326, + "step": 326 + }, + { + "loss": 0.1623, + "grad_norm": 9.361125946044922, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.327, + "step": 327 + }, + { + "loss": 0.2385, + "grad_norm": 4.130789279937744, + "learning_rate": 1.675e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.328, + "step": 328 + }, + { + "loss": 0.248, + "grad_norm": 3.7591042518615723, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.329, + "step": 329 + }, + { + "loss": 0.2815, + "grad_norm": 6.346067905426025, + "learning_rate": 1.673e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.33, + "step": 330 + }, + { + "loss": 0.2502, + "grad_norm": 3.433945655822754, + "learning_rate": 1.672e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.331, + "step": 331 + }, + { + "loss": 0.2994, + "grad_norm": 3.7655599117279053, + "learning_rate": 1.671e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9403131008148193, + "epoch": 0.332, + "step": 332 + }, + { + "loss": 0.2622, + "grad_norm": 3.707118511199951, + "learning_rate": 1.67e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.333, + "step": 333 + }, + { + "loss": 0.2418, + "grad_norm": 5.776569843292236, + "learning_rate": 1.669e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.334, + "step": 334 + }, + { + "loss": 0.2278, + "grad_norm": 2.7461037635803223, + "learning_rate": 1.668e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.335, + "step": 335 + }, + { + "loss": 0.2152, + "grad_norm": 2.729001760482788, + "learning_rate": 1.667e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.336, + "step": 336 + }, + { + "loss": 0.2093, + "grad_norm": 2.409708261489868, + "learning_rate": 1.666e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.337, + "step": 337 + }, + { + "loss": 0.2121, + "grad_norm": 4.6761651039123535, + "learning_rate": 1.665e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.338, + "step": 338 + }, + { + "loss": 0.2645, + "grad_norm": 3.167815685272217, + "learning_rate": 1.664e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.339, + "step": 339 + }, + { + "loss": 0.1629, + "grad_norm": 12.654186248779297, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.34, + "step": 340 + }, + { + "loss": 0.2156, + "grad_norm": 2.461930751800537, + "learning_rate": 1.662e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.341, + "step": 341 + }, + { + "loss": 0.2281, + "grad_norm": 4.044505596160889, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.342, + "step": 342 + }, + { + "loss": 0.2303, + "grad_norm": 3.00589656829834, + "learning_rate": 1.66e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.343, + "step": 343 + }, + { + "loss": 0.2372, + "grad_norm": 1.9332551956176758, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.344, + "step": 344 + }, + { + "loss": 0.2303, + "grad_norm": 3.804724931716919, + "learning_rate": 1.658e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.345, + "step": 345 + }, + { + "loss": 0.1629, + "grad_norm": 13.47612190246582, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.346, + "step": 346 + }, + { + "loss": 0.2276, + "grad_norm": 3.5881187915802, + "learning_rate": 1.656e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.347, + "step": 347 + }, + { + "loss": 0.2474, + "grad_norm": 3.895529270172119, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.348, + "step": 348 + }, + { + "loss": 0.2205, + "grad_norm": 3.4531259536743164, + "learning_rate": 1.654e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.349, + "step": 349 + }, + { + "loss": 0.2277, + "grad_norm": 3.849405288696289, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.35, + "step": 350 + }, + { + "loss": 0.1993, + "grad_norm": 3.522599458694458, + "learning_rate": 1.652e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.351, + "step": 351 + }, + { + "loss": 0.2291, + "grad_norm": 3.7573893070220947, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.352, + "step": 352 + }, + { + "loss": 0.1756, + "grad_norm": 4.224817276000977, + "learning_rate": 1.65e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.353, + "step": 353 + }, + { + "loss": 0.1992, + "grad_norm": 2.2447433471679688, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.354, + "step": 354 + }, + { + "loss": 0.184, + "grad_norm": 2.0203311443328857, + "learning_rate": 1.648e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.355, + "step": 355 + }, + { + "loss": 0.2236, + "grad_norm": 3.499854803085327, + "learning_rate": 1.647e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.356, + "step": 356 + }, + { + "loss": 0.2141, + "grad_norm": 5.057332992553711, + "learning_rate": 1.646e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.357, + "step": 357 + }, + { + "loss": 0.232, + "grad_norm": 2.861778974533081, + "learning_rate": 1.645e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.358, + "step": 358 + }, + { + "loss": 0.184, + "grad_norm": 3.52634596824646, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.359, + "step": 359 + }, + { + "loss": 0.2205, + "grad_norm": 2.3115124702453613, + "learning_rate": 1.643e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.36, + "step": 360 + }, + { + "loss": 0.1838, + "grad_norm": 3.043916940689087, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.361, + "step": 361 + }, + { + "loss": 0.1874, + "grad_norm": 3.2404396533966064, + "learning_rate": 1.641e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.362, + "step": 362 + }, + { + "loss": 0.4084, + "grad_norm": 12.86927604675293, + "learning_rate": 1.64e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.363, + "step": 363 + }, + { + "loss": 0.1677, + "grad_norm": 3.4789700508117676, + "learning_rate": 1.639e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.364, + "step": 364 + }, + { + "loss": 0.1922, + "grad_norm": 4.1049699783325195, + "learning_rate": 1.638e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.365, + "step": 365 + }, + { + "loss": 0.1915, + "grad_norm": 3.2055957317352295, + "learning_rate": 1.637e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.366, + "step": 366 + }, + { + "loss": 0.166, + "grad_norm": 12.477117538452148, + "learning_rate": 1.636e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.367, + "step": 367 + }, + { + "loss": 0.1799, + "grad_norm": 4.58711051940918, + "learning_rate": 1.635e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.368, + "step": 368 + }, + { + "loss": 0.2299, + "grad_norm": 2.874641180038452, + "learning_rate": 1.634e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.369, + "step": 369 + }, + { + "loss": 0.1414, + "grad_norm": 5.157703399658203, + "learning_rate": 1.633e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.37, + "step": 370 + }, + { + "loss": 0.1812, + "grad_norm": 3.2541451454162598, + "learning_rate": 1.632e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.371, + "step": 371 + }, + { + "loss": 0.1366, + "grad_norm": 3.705273151397705, + "learning_rate": 1.631e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.372, + "step": 372 + }, + { + "loss": 0.1681, + "grad_norm": 3.6492865085601807, + "learning_rate": 1.63e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.373, + "step": 373 + }, + { + "loss": 0.1324, + "grad_norm": 3.3717288970947266, + "learning_rate": 1.629e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.374, + "step": 374 + }, + { + "loss": 0.1816, + "grad_norm": 4.410749912261963, + "learning_rate": 1.628e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.375, + "step": 375 + }, + { + "loss": 0.3611, + "grad_norm": 11.978804588317871, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.376, + "step": 376 + }, + { + "loss": 0.1686, + "grad_norm": 2.8153111934661865, + "learning_rate": 1.626e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.377, + "step": 377 + }, + { + "loss": 0.1293, + "grad_norm": 3.5253026485443115, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.378, + "step": 378 + }, + { + "loss": 0.1597, + "grad_norm": 2.9006922245025635, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.379, + "step": 379 + }, + { + "loss": 0.1975, + "grad_norm": 6.231935024261475, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.38, + "step": 380 + }, + { + "loss": 0.1232, + "grad_norm": 3.3006174564361572, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.381, + "step": 381 + }, + { + "loss": 0.1599, + "grad_norm": 3.177495241165161, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.382, + "step": 382 + }, + { + "loss": 0.1858, + "grad_norm": 2.967477798461914, + "learning_rate": 1.62e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.383, + "step": 383 + }, + { + "loss": 0.1725, + "grad_norm": 2.6947214603424072, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.384, + "step": 384 + }, + { + "loss": 0.1644, + "grad_norm": 3.6320605278015137, + "learning_rate": 1.618e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.385, + "step": 385 + }, + { + "loss": 0.1726, + "grad_norm": 6.163839817047119, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.386, + "step": 386 + }, + { + "loss": 0.2253, + "grad_norm": 3.695767879486084, + "learning_rate": 1.616e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.387, + "step": 387 + }, + { + "loss": 0.1295, + "grad_norm": 11.877620697021484, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.388, + "step": 388 + }, + { + "loss": 0.1641, + "grad_norm": 2.5848593711853027, + "learning_rate": 1.614e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.389, + "step": 389 + }, + { + "loss": 0.1299, + "grad_norm": 11.58799934387207, + "learning_rate": 1.613e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.39, + "step": 390 + }, + { + "loss": 0.153, + "grad_norm": 3.0241589546203613, + "learning_rate": 1.612e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.391, + "step": 391 + }, + { + "loss": 0.1741, + "grad_norm": 4.446482181549072, + "learning_rate": 1.611e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.392, + "step": 392 + }, + { + "loss": 0.1517, + "grad_norm": 2.0452992916107178, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.393, + "step": 393 + }, + { + "loss": 0.1482, + "grad_norm": 3.511587142944336, + "learning_rate": 1.609e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.394, + "step": 394 + }, + { + "loss": 0.1673, + "grad_norm": 4.165390968322754, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.395, + "step": 395 + }, + { + "loss": 0.1577, + "grad_norm": 2.5295603275299072, + "learning_rate": 1.607e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.396, + "step": 396 + }, + { + "loss": 0.1444, + "grad_norm": 2.6492788791656494, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.397, + "step": 397 + }, + { + "loss": 0.1731, + "grad_norm": 3.1617088317871094, + "learning_rate": 1.605e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.398, + "step": 398 + }, + { + "loss": 0.1411, + "grad_norm": 2.628790855407715, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.399, + "step": 399 + }, + { + "loss": 0.1442, + "grad_norm": 2.589632272720337, + "learning_rate": 1.603e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.4, + "step": 400 + }, + { + "loss": 0.1647, + "grad_norm": 2.7175090312957764, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.401, + "step": 401 + }, + { + "loss": 0.1225, + "grad_norm": 9.854316711425781, + "learning_rate": 1.601e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.402, + "step": 402 + }, + { + "loss": 0.1635, + "grad_norm": 2.513782501220703, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.403, + "step": 403 + }, + { + "loss": 0.1172, + "grad_norm": 4.978464126586914, + "learning_rate": 1.599e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.404, + "step": 404 + }, + { + "loss": 0.1535, + "grad_norm": 6.545207977294922, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.405, + "step": 405 + }, + { + "loss": 0.1554, + "grad_norm": 4.268946647644043, + "learning_rate": 1.597e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.406, + "step": 406 + }, + { + "loss": 0.1143, + "grad_norm": 2.5581111907958984, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.407, + "step": 407 + }, + { + "loss": 0.1446, + "grad_norm": 4.272138595581055, + "learning_rate": 1.595e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.408, + "step": 408 + }, + { + "loss": 0.1058, + "grad_norm": 1.8749103546142578, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.409, + "step": 409 + }, + { + "loss": 0.1972, + "grad_norm": 4.553700923919678, + "learning_rate": 1.593e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.41, + "step": 410 + }, + { + "loss": 0.1465, + "grad_norm": 4.258208751678467, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.411, + "step": 411 + }, + { + "loss": 0.1556, + "grad_norm": 2.6741788387298584, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.412, + "step": 412 + }, + { + "loss": 0.1074, + "grad_norm": 5.901241779327393, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.413, + "step": 413 + }, + { + "loss": 0.1999, + "grad_norm": 2.886406421661377, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 414 + }, + { + "loss": 0.163, + "grad_norm": 3.367415189743042, + "learning_rate": 1.588e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.415, + "step": 415 + }, + { + "loss": 0.1678, + "grad_norm": 2.3446123600006104, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.416, + "step": 416 + }, + { + "loss": 0.2442, + "grad_norm": 4.648331165313721, + "learning_rate": 1.586e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.417, + "step": 417 + }, + { + "loss": 0.1314, + "grad_norm": 3.296555519104004, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.418, + "step": 418 + }, + { + "loss": 0.1224, + "grad_norm": 14.873774528503418, + "learning_rate": 1.584e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.419, + "step": 419 + }, + { + "loss": 0.1792, + "grad_norm": 2.493760108947754, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.42, + "step": 420 + }, + { + "loss": 0.1289, + "grad_norm": 4.287231922149658, + "learning_rate": 1.582e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.421, + "step": 421 + }, + { + "loss": 0.1176, + "grad_norm": 12.776876449584961, + "learning_rate": 1.581e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.422, + "step": 422 + }, + { + "loss": 0.1651, + "grad_norm": 2.691632032394409, + "learning_rate": 1.58e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.423, + "step": 423 + }, + { + "loss": 0.271, + "grad_norm": 7.320021152496338, + "learning_rate": 1.579e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.424, + "step": 424 + }, + { + "loss": 0.1183, + "grad_norm": 2.511960029602051, + "learning_rate": 1.578e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.425, + "step": 425 + }, + { + "loss": 0.1387, + "grad_norm": 2.424102306365967, + "learning_rate": 1.577e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.426, + "step": 426 + }, + { + "loss": 0.1443, + "grad_norm": 3.659524917602539, + "learning_rate": 1.576e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.427, + "step": 427 + }, + { + "loss": 0.2176, + "grad_norm": 4.393547058105469, + "learning_rate": 1.575e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.428, + "step": 428 + }, + { + "loss": 0.1576, + "grad_norm": 3.995103359222412, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.429, + "step": 429 + }, + { + "loss": 0.0995, + "grad_norm": 7.335996627807617, + "learning_rate": 1.573e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.43, + "step": 430 + }, + { + "loss": 0.1224, + "grad_norm": 2.3261799812316895, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.431, + "step": 431 + }, + { + "loss": 0.1781, + "grad_norm": 3.084444761276245, + "learning_rate": 1.571e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.432, + "step": 432 + }, + { + "loss": 0.1262, + "grad_norm": 2.499669075012207, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.433, + "step": 433 + }, + { + "loss": 0.1306, + "grad_norm": 2.529611587524414, + "learning_rate": 1.569e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.434, + "step": 434 + }, + { + "loss": 0.1473, + "grad_norm": 2.308983325958252, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.435, + "step": 435 + }, + { + "loss": 0.1387, + "grad_norm": 2.9792327880859375, + "learning_rate": 1.567e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.436, + "step": 436 + }, + { + "loss": 0.1256, + "grad_norm": 3.446150302886963, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.437, + "step": 437 + }, + { + "loss": 0.1884, + "grad_norm": 2.8107986450195312, + "learning_rate": 1.565e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.438, + "step": 438 + }, + { + "loss": 0.1801, + "grad_norm": 2.476114511489868, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.439, + "step": 439 + }, + { + "loss": 0.1216, + "grad_norm": 2.8834075927734375, + "learning_rate": 1.563e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.44, + "step": 440 + }, + { + "loss": 0.1391, + "grad_norm": 3.0233523845672607, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.441, + "step": 441 + }, + { + "loss": 0.1355, + "grad_norm": 3.540644645690918, + "learning_rate": 1.561e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.442, + "step": 442 + }, + { + "loss": 0.1031, + "grad_norm": 2.104804515838623, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.443, + "step": 443 + }, + { + "loss": 0.1389, + "grad_norm": 2.2567386627197266, + "learning_rate": 1.559e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.444, + "step": 444 + }, + { + "loss": 0.116, + "grad_norm": 2.4400763511657715, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.445, + "step": 445 + }, + { + "loss": 0.1294, + "grad_norm": 2.306941509246826, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.446, + "step": 446 + }, + { + "loss": 0.1189, + "grad_norm": 2.5862247943878174, + "learning_rate": 1.556e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.447, + "step": 447 + }, + { + "loss": 0.2484, + "grad_norm": 4.606533050537109, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.448, + "step": 448 + }, + { + "loss": 0.2119, + "grad_norm": 3.4597740173339844, + "learning_rate": 1.554e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.449, + "step": 449 + }, + { + "loss": 0.1395, + "grad_norm": 3.5644280910491943, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.45, + "step": 450 + }, + { + "loss": 0.1167, + "grad_norm": 13.761821746826172, + "learning_rate": 1.552e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.451, + "step": 451 + }, + { + "loss": 0.1423, + "grad_norm": 3.3145618438720703, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.452, + "step": 452 + }, + { + "loss": 0.131, + "grad_norm": 4.129085540771484, + "learning_rate": 1.55e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.453, + "step": 453 + }, + { + "loss": 0.1337, + "grad_norm": 2.807199001312256, + "learning_rate": 1.549e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.454, + "step": 454 + }, + { + "loss": 0.1235, + "grad_norm": 2.291154384613037, + "learning_rate": 1.548e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.455, + "step": 455 + }, + { + "loss": 0.123, + "grad_norm": 3.186185836791992, + "learning_rate": 1.547e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.456, + "step": 456 + }, + { + "loss": 0.13, + "grad_norm": 2.2184228897094727, + "learning_rate": 1.546e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.457, + "step": 457 + }, + { + "loss": 0.1232, + "grad_norm": 2.6860218048095703, + "learning_rate": 1.545e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.458, + "step": 458 + }, + { + "loss": 0.1668, + "grad_norm": 2.615064859390259, + "learning_rate": 1.544e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.459, + "step": 459 + }, + { + "loss": 0.1268, + "grad_norm": 3.520294427871704, + "learning_rate": 1.543e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.46, + "step": 460 + }, + { + "loss": 0.1183, + "grad_norm": 3.490569829940796, + "learning_rate": 1.542e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.461, + "step": 461 + }, + { + "loss": 0.1025, + "grad_norm": 12.270122528076172, + "learning_rate": 1.541e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.462, + "step": 462 + }, + { + "loss": 0.1059, + "grad_norm": 2.1151371002197266, + "learning_rate": 1.54e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.463, + "step": 463 + }, + { + "loss": 0.1021, + "grad_norm": 2.0290112495422363, + "learning_rate": 1.539e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.464, + "step": 464 + }, + { + "loss": 0.0993, + "grad_norm": 10.768261909484863, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.465, + "step": 465 + }, + { + "loss": 0.1187, + "grad_norm": 3.7776851654052734, + "learning_rate": 1.537e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.466, + "step": 466 + }, + { + "loss": 0.0929, + "grad_norm": 3.5349013805389404, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.467, + "step": 467 + }, + { + "loss": 0.1292, + "grad_norm": 4.221794605255127, + "learning_rate": 1.535e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.468, + "step": 468 + }, + { + "loss": 0.1597, + "grad_norm": 3.645026445388794, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.469, + "step": 469 + }, + { + "loss": 0.1281, + "grad_norm": 4.336436748504639, + "learning_rate": 1.533e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.47, + "step": 470 + }, + { + "loss": 0.1427, + "grad_norm": 4.119178295135498, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.471, + "step": 471 + }, + { + "loss": 0.1959, + "grad_norm": 3.495059013366699, + "learning_rate": 1.531e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.472, + "step": 472 + }, + { + "loss": 0.1062, + "grad_norm": 2.910947799682617, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.473, + "step": 473 + }, + { + "loss": 0.1641, + "grad_norm": 1.9516125917434692, + "learning_rate": 1.529e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.474, + "step": 474 + }, + { + "loss": 0.1267, + "grad_norm": 2.637050151824951, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.475, + "step": 475 + }, + { + "loss": 0.1602, + "grad_norm": 2.365922689437866, + "learning_rate": 1.527e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 476 + }, + { + "loss": 0.145, + "grad_norm": 3.577690362930298, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.477, + "step": 477 + }, + { + "loss": 0.1917, + "grad_norm": 2.425001621246338, + "learning_rate": 1.525e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.478, + "step": 478 + }, + { + "loss": 0.1295, + "grad_norm": 2.570420503616333, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.479, + "step": 479 + }, + { + "loss": 0.1216, + "grad_norm": 2.951737403869629, + "learning_rate": 1.523e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.48, + "step": 480 + }, + { + "loss": 0.1172, + "grad_norm": 2.9054367542266846, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.481, + "step": 481 + }, + { + "loss": 0.1028, + "grad_norm": 11.967851638793945, + "learning_rate": 1.521e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.482, + "step": 482 + }, + { + "loss": 0.1411, + "grad_norm": 3.018132448196411, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.483, + "step": 483 + }, + { + "loss": 0.0953, + "grad_norm": 2.7196693420410156, + "learning_rate": 1.519e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.484, + "step": 484 + }, + { + "loss": 0.1322, + "grad_norm": 3.49013090133667, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.485, + "step": 485 + }, + { + "loss": 0.0793, + "grad_norm": 3.015738010406494, + "learning_rate": 1.517e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.486, + "step": 486 + }, + { + "loss": 0.1429, + "grad_norm": 2.9223875999450684, + "learning_rate": 1.516e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.487, + "step": 487 + }, + { + "loss": 0.1468, + "grad_norm": 3.956615924835205, + "learning_rate": 1.515e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.488, + "step": 488 + }, + { + "loss": 0.1171, + "grad_norm": 4.619190216064453, + "learning_rate": 1.514e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.489, + "step": 489 + }, + { + "loss": 0.0767, + "grad_norm": 1.605452299118042, + "learning_rate": 1.513e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.49, + "step": 490 + }, + { + "loss": 0.128, + "grad_norm": 4.304430961608887, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.491, + "step": 491 + }, + { + "loss": 0.0781, + "grad_norm": 1.868319034576416, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.492, + "step": 492 + }, + { + "loss": 0.1311, + "grad_norm": 2.720447540283203, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.493, + "step": 493 + }, + { + "loss": 0.1312, + "grad_norm": 3.6773548126220703, + "learning_rate": 1.509e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.494, + "step": 494 + }, + { + "loss": 0.164, + "grad_norm": 3.9428446292877197, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.495, + "step": 495 + }, + { + "loss": 0.1516, + "grad_norm": 2.488532781600952, + "learning_rate": 1.507e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.496, + "step": 496 + }, + { + "loss": 0.076, + "grad_norm": 3.0369679927825928, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.497, + "step": 497 + }, + { + "loss": 0.1552, + "grad_norm": 2.921428680419922, + "learning_rate": 1.505e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.498, + "step": 498 + }, + { + "loss": 0.0745, + "grad_norm": 4.530489921569824, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.499, + "step": 499 + }, + { + "loss": 0.1431, + "grad_norm": 2.894956350326538, + "learning_rate": 1.503e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.5, + "step": 500 + }, + { + "loss": 0.1196, + "grad_norm": 2.8564133644104004, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.501, + "step": 501 + }, + { + "loss": 0.1022, + "grad_norm": 2.487640857696533, + "learning_rate": 1.501e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.502, + "step": 502 + }, + { + "loss": 0.0816, + "grad_norm": 9.081964492797852, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.503, + "step": 503 + }, + { + "loss": 0.0696, + "grad_norm": 5.340896129608154, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.504, + "step": 504 + }, + { + "loss": 0.1355, + "grad_norm": 2.5042786598205566, + "learning_rate": 1.498e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.505, + "step": 505 + }, + { + "loss": 0.1177, + "grad_norm": 2.9676339626312256, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.506, + "step": 506 + }, + { + "loss": 0.1305, + "grad_norm": 2.792555570602417, + "learning_rate": 1.496e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.507, + "step": 507 + }, + { + "loss": 0.1155, + "grad_norm": 3.074509620666504, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.508, + "step": 508 + }, + { + "loss": 0.1274, + "grad_norm": 3.4446146488189697, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.509, + "step": 509 + }, + { + "loss": 0.0961, + "grad_norm": 4.31768798828125, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.51, + "step": 510 + }, + { + "loss": 0.1406, + "grad_norm": 3.5040206909179688, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.511, + "step": 511 + }, + { + "loss": 0.163, + "grad_norm": 3.973576307296753, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.512, + "step": 512 + }, + { + "loss": 0.1435, + "grad_norm": 2.7186615467071533, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.513, + "step": 513 + }, + { + "loss": 0.1024, + "grad_norm": 2.8186845779418945, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.514, + "step": 514 + }, + { + "loss": 0.0781, + "grad_norm": 10.394554138183594, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.515, + "step": 515 + }, + { + "loss": 0.0874, + "grad_norm": 10.657512664794922, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.516, + "step": 516 + }, + { + "loss": 0.0946, + "grad_norm": 2.6607813835144043, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.517, + "step": 517 + }, + { + "loss": 0.1189, + "grad_norm": 2.2012691497802734, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.518, + "step": 518 + }, + { + "loss": 0.1313, + "grad_norm": 3.873806953430176, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.519, + "step": 519 + }, + { + "loss": 0.0999, + "grad_norm": 1.8396018743515015, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.52, + "step": 520 + }, + { + "loss": 0.1057, + "grad_norm": 2.922558307647705, + "learning_rate": 1.482e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.521, + "step": 521 + }, + { + "loss": 0.0865, + "grad_norm": 2.5007052421569824, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.522, + "step": 522 + }, + { + "loss": 0.1029, + "grad_norm": 1.885617733001709, + "learning_rate": 1.48e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.523, + "step": 523 + }, + { + "loss": 0.0958, + "grad_norm": 1.7554020881652832, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.524, + "step": 524 + }, + { + "loss": 0.1244, + "grad_norm": 3.055809736251831, + "learning_rate": 1.478e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.525, + "step": 525 + }, + { + "loss": 0.1059, + "grad_norm": 2.518828868865967, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.526, + "step": 526 + }, + { + "loss": 0.0849, + "grad_norm": 4.157986640930176, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.527, + "step": 527 + }, + { + "loss": 0.0949, + "grad_norm": 5.624795436859131, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.528, + "step": 528 + }, + { + "loss": 0.1133, + "grad_norm": 4.383209228515625, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.529, + "step": 529 + }, + { + "loss": 0.0753, + "grad_norm": 10.447527885437012, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.53, + "step": 530 + }, + { + "loss": 0.0758, + "grad_norm": 2.0648767948150635, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.531, + "step": 531 + }, + { + "loss": 0.109, + "grad_norm": 2.311145782470703, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.532, + "step": 532 + }, + { + "loss": 0.0993, + "grad_norm": 2.5646841526031494, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.533, + "step": 533 + }, + { + "loss": 0.061, + "grad_norm": 4.201132774353027, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 534 + }, + { + "loss": 0.1403, + "grad_norm": 3.2465627193450928, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.535, + "step": 535 + }, + { + "loss": 0.0917, + "grad_norm": 4.278575420379639, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.536, + "step": 536 + }, + { + "loss": 0.1363, + "grad_norm": 2.6477434635162354, + "learning_rate": 1.466e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.537, + "step": 537 + }, + { + "loss": 0.1035, + "grad_norm": 2.616262435913086, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.538, + "step": 538 + }, + { + "loss": 0.1702, + "grad_norm": 2.8426945209503174, + "learning_rate": 1.464e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.539, + "step": 539 + }, + { + "loss": 0.0969, + "grad_norm": 2.934753179550171, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.54, + "step": 540 + }, + { + "loss": 0.0628, + "grad_norm": 6.173173904418945, + "learning_rate": 1.462e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.541, + "step": 541 + }, + { + "loss": 0.113, + "grad_norm": 2.183295249938965, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.542, + "step": 542 + }, + { + "loss": 0.0674, + "grad_norm": 2.466468095779419, + "learning_rate": 1.46e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.543, + "step": 543 + }, + { + "loss": 0.0629, + "grad_norm": 6.685276508331299, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.544, + "step": 544 + }, + { + "loss": 0.0606, + "grad_norm": 6.428196907043457, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 545 + }, + { + "loss": 0.0552, + "grad_norm": 3.2987399101257324, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 546 + }, + { + "loss": 0.1492, + "grad_norm": 3.802187919616699, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.547, + "step": 547 + }, + { + "loss": 0.0903, + "grad_norm": 3.23189115524292, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.548, + "step": 548 + }, + { + "loss": 0.0758, + "grad_norm": 3.0735082626342773, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.549, + "step": 549 + }, + { + "loss": 0.0978, + "grad_norm": 2.9236018657684326, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.55, + "step": 550 + }, + { + "loss": 0.0489, + "grad_norm": 1.232297420501709, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 551 + }, + { + "loss": 0.0472, + "grad_norm": 1.1960967779159546, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 552 + }, + { + "loss": 0.1622, + "grad_norm": 2.9212372303009033, + "learning_rate": 1.45e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.553, + "step": 553 + }, + { + "loss": 0.0964, + "grad_norm": 2.9365901947021484, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.554, + "step": 554 + }, + { + "loss": 0.1015, + "grad_norm": 3.297194719314575, + "learning_rate": 1.448e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.555, + "step": 555 + }, + { + "loss": 0.108, + "grad_norm": 3.8434770107269287, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.556, + "step": 556 + }, + { + "loss": 0.0869, + "grad_norm": 3.068513870239258, + "learning_rate": 1.446e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.557, + "step": 557 + }, + { + "loss": 0.0823, + "grad_norm": 2.382955312728882, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.558, + "step": 558 + }, + { + "loss": 0.0952, + "grad_norm": 2.0796663761138916, + "learning_rate": 1.444e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.559, + "step": 559 + }, + { + "loss": 0.0904, + "grad_norm": 2.491260290145874, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.56, + "step": 560 + }, + { + "loss": 0.0888, + "grad_norm": 1.8683680295944214, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.561, + "step": 561 + }, + { + "loss": 0.0824, + "grad_norm": 2.5860776901245117, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.562, + "step": 562 + }, + { + "loss": 0.0648, + "grad_norm": 10.482237815856934, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 563 + }, + { + "loss": 0.1033, + "grad_norm": 1.8212071657180786, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.564, + "step": 564 + }, + { + "loss": 0.1275, + "grad_norm": 2.206996440887451, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.565, + "step": 565 + }, + { + "loss": 0.1174, + "grad_norm": 2.454157590866089, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.566, + "step": 566 + }, + { + "loss": 0.0846, + "grad_norm": 2.7483479976654053, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.567, + "step": 567 + }, + { + "loss": 0.0712, + "grad_norm": 9.780473709106445, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.568, + "step": 568 + }, + { + "loss": 0.0838, + "grad_norm": 2.227144718170166, + "learning_rate": 1.434e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.569, + "step": 569 + }, + { + "loss": 0.0996, + "grad_norm": 2.4927093982696533, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.57, + "step": 570 + }, + { + "loss": 0.0723, + "grad_norm": 2.6736180782318115, + "learning_rate": 1.432e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.571, + "step": 571 + }, + { + "loss": 0.0765, + "grad_norm": 1.8901737928390503, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 0.572, + "step": 572 + }, + { + "loss": 0.0661, + "grad_norm": 1.9803191423416138, + "learning_rate": 1.43e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.573, + "step": 573 + }, + { + "loss": 0.06, + "grad_norm": 1.9032983779907227, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.574, + "step": 574 + }, + { + "loss": 0.0437, + "grad_norm": 2.9226999282836914, + "learning_rate": 1.428e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 575 + }, + { + "loss": 0.1345, + "grad_norm": 2.60559344291687, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.576, + "step": 576 + }, + { + "loss": 0.043, + "grad_norm": 3.43766713142395, + "learning_rate": 1.426e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 577 + }, + { + "loss": 0.0881, + "grad_norm": 3.27600359916687, + "learning_rate": 1.425e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.578, + "step": 578 + }, + { + "loss": 0.0777, + "grad_norm": 3.8467905521392822, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.579, + "step": 579 + }, + { + "loss": 0.0971, + "grad_norm": 3.3157150745391846, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.58, + "step": 580 + }, + { + "loss": 0.0769, + "grad_norm": 2.6883363723754883, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.581, + "step": 581 + }, + { + "loss": 0.0381, + "grad_norm": 2.187551736831665, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 582 + }, + { + "loss": 0.0571, + "grad_norm": 1.9329798221588135, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.583, + "step": 583 + }, + { + "loss": 0.0984, + "grad_norm": 2.6686573028564453, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 584 + }, + { + "loss": 0.0904, + "grad_norm": 2.7718393802642822, + "learning_rate": 1.418e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.585, + "step": 585 + }, + { + "loss": 0.0364, + "grad_norm": 3.612837314605713, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 586 + }, + { + "loss": 0.1408, + "grad_norm": 2.518528461456299, + "learning_rate": 1.416e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.587, + "step": 587 + }, + { + "loss": 0.0875, + "grad_norm": 2.7795908451080322, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.588, + "step": 588 + }, + { + "loss": 0.0644, + "grad_norm": 2.4260590076446533, + "learning_rate": 1.414e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 0.589, + "step": 589 + }, + { + "loss": 0.0884, + "grad_norm": 2.681588888168335, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 590 + }, + { + "loss": 0.1001, + "grad_norm": 2.8202459812164307, + "learning_rate": 1.412e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.591, + "step": 591 + }, + { + "loss": 0.0774, + "grad_norm": 1.7170965671539307, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.592, + "step": 592 + }, + { + "loss": 0.069, + "grad_norm": 1.68620765209198, + "learning_rate": 1.41e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.593, + "step": 593 + }, + { + "loss": 0.0694, + "grad_norm": 2.236591339111328, + "learning_rate": 1.409e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.594, + "step": 594 + }, + { + "loss": 0.0943, + "grad_norm": 2.7542996406555176, + "learning_rate": 1.408e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.595, + "step": 595 + }, + { + "loss": 0.0578, + "grad_norm": 1.8813996315002441, + "learning_rate": 1.407e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.596, + "step": 596 + }, + { + "loss": 0.0911, + "grad_norm": 2.0993378162384033, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.597, + "step": 597 + }, + { + "loss": 0.107, + "grad_norm": 2.6184418201446533, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.598, + "step": 598 + }, + { + "loss": 0.0803, + "grad_norm": 1.8751370906829834, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.599, + "step": 599 + }, + { + "loss": 0.0774, + "grad_norm": 3.0198869705200195, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.6, + "step": 600 + }, + { + "loss": 0.2953, + "grad_norm": 14.372690200805664, + "learning_rate": 1.402e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.601, + "step": 601 + }, + { + "loss": 0.0943, + "grad_norm": 2.2585110664367676, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.602, + "step": 602 + }, + { + "loss": 0.0432, + "grad_norm": 8.796082496643066, + "learning_rate": 1.4e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.603, + "step": 603 + }, + { + "loss": 0.1307, + "grad_norm": 2.903687000274658, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.604, + "step": 604 + }, + { + "loss": 0.1348, + "grad_norm": 3.1296894550323486, + "learning_rate": 1.398e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.605, + "step": 605 + }, + { + "loss": 0.1161, + "grad_norm": 2.436495542526245, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.606, + "step": 606 + }, + { + "loss": 0.0368, + "grad_norm": 5.359442710876465, + "learning_rate": 1.396e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.607, + "step": 607 + }, + { + "loss": 0.1177, + "grad_norm": 3.3482797145843506, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.608, + "step": 608 + }, + { + "loss": 0.1024, + "grad_norm": 3.229761838912964, + "learning_rate": 1.394e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.609, + "step": 609 + }, + { + "loss": 0.0988, + "grad_norm": 2.772888660430908, + "learning_rate": 1.393e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.61, + "step": 610 + }, + { + "loss": 0.0699, + "grad_norm": 2.91560435295105, + "learning_rate": 1.392e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.611, + "step": 611 + }, + { + "loss": 0.1212, + "grad_norm": 3.1388144493103027, + "learning_rate": 1.391e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.612, + "step": 612 + }, + { + "loss": 0.0776, + "grad_norm": 2.409531831741333, + "learning_rate": 1.39e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.613, + "step": 613 + }, + { + "loss": 0.0922, + "grad_norm": 2.301997423171997, + "learning_rate": 1.389e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.614, + "step": 614 + }, + { + "loss": 0.0382, + "grad_norm": 6.567748546600342, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.615, + "step": 615 + }, + { + "loss": 0.0702, + "grad_norm": 2.9374635219573975, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 616 + }, + { + "loss": 0.0952, + "grad_norm": 2.805278778076172, + "learning_rate": 1.386e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.617, + "step": 617 + }, + { + "loss": 0.0809, + "grad_norm": 2.7832789421081543, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.618, + "step": 618 + }, + { + "loss": 0.0967, + "grad_norm": 2.5809061527252197, + "learning_rate": 1.384e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.619, + "step": 619 + }, + { + "loss": 0.1193, + "grad_norm": 4.146383285522461, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.62, + "step": 620 + }, + { + "loss": 0.0646, + "grad_norm": 2.3339507579803467, + "learning_rate": 1.382e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.621, + "step": 621 + }, + { + "loss": 0.0698, + "grad_norm": 2.154700756072998, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.622, + "step": 622 + }, + { + "loss": 0.0861, + "grad_norm": 3.4389989376068115, + "learning_rate": 1.38e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.623, + "step": 623 + }, + { + "loss": 0.0744, + "grad_norm": 2.087575674057007, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.624, + "step": 624 + }, + { + "loss": 0.093, + "grad_norm": 2.7172322273254395, + "learning_rate": 1.378e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.625, + "step": 625 + }, + { + "loss": 0.0731, + "grad_norm": 2.2669014930725098, + "learning_rate": 1.377e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.626, + "step": 626 + }, + { + "loss": 0.0747, + "grad_norm": 3.104933500289917, + "learning_rate": 1.376e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.627, + "step": 627 + }, + { + "loss": 0.085, + "grad_norm": 2.475816249847412, + "learning_rate": 1.375e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.628, + "step": 628 + }, + { + "loss": 0.1415, + "grad_norm": 3.2964231967926025, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.629, + "step": 629 + }, + { + "loss": 0.0823, + "grad_norm": 1.5372464656829834, + "learning_rate": 1.373e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.63, + "step": 630 + }, + { + "loss": 0.1085, + "grad_norm": 2.136002540588379, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.631, + "step": 631 + }, + { + "loss": 0.0802, + "grad_norm": 2.1365489959716797, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.632, + "step": 632 + }, + { + "loss": 0.0359, + "grad_norm": 7.951494216918945, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.633, + "step": 633 + }, + { + "loss": 0.0344, + "grad_norm": 7.441174507141113, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.634, + "step": 634 + }, + { + "loss": 0.0838, + "grad_norm": 2.689347505569458, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.635, + "step": 635 + }, + { + "loss": 0.1337, + "grad_norm": 4.8380937576293945, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.636, + "step": 636 + }, + { + "loss": 0.1259, + "grad_norm": 3.2358460426330566, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.637, + "step": 637 + }, + { + "loss": 0.0269, + "grad_norm": 3.706432580947876, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 638 + }, + { + "loss": 0.0617, + "grad_norm": 2.4131107330322266, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.639, + "step": 639 + }, + { + "loss": 0.0225, + "grad_norm": 2.5498831272125244, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 640 + }, + { + "loss": 0.1159, + "grad_norm": 2.7629480361938477, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.641, + "step": 641 + }, + { + "loss": 0.0249, + "grad_norm": 2.194697380065918, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 642 + }, + { + "loss": 0.0852, + "grad_norm": 2.5653960704803467, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.643, + "step": 643 + }, + { + "loss": 0.0783, + "grad_norm": 2.402456283569336, + "learning_rate": 1.359e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 644 + }, + { + "loss": 0.1104, + "grad_norm": 2.646005392074585, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.645, + "step": 645 + }, + { + "loss": 0.0582, + "grad_norm": 2.135377883911133, + "learning_rate": 1.357e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.646, + "step": 646 + }, + { + "loss": 0.0242, + "grad_norm": 2.295201539993286, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 647 + }, + { + "loss": 0.0712, + "grad_norm": 2.529376745223999, + "learning_rate": 1.355e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.648, + "step": 648 + }, + { + "loss": 0.0697, + "grad_norm": 2.2107226848602295, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.649, + "step": 649 + }, + { + "loss": 0.1203, + "grad_norm": 2.456563711166382, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.65, + "step": 650 + }, + { + "loss": 0.091, + "grad_norm": 2.3880977630615234, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.651, + "step": 651 + }, + { + "loss": 0.0641, + "grad_norm": 2.5870609283447266, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.652, + "step": 652 + }, + { + "loss": 0.0678, + "grad_norm": 2.0148985385894775, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.653, + "step": 653 + }, + { + "loss": 0.0745, + "grad_norm": 2.9625463485717773, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.654, + "step": 654 + }, + { + "loss": 0.0759, + "grad_norm": 2.3625717163085938, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.655, + "step": 655 + }, + { + "loss": 0.0826, + "grad_norm": 3.747469902038574, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.656, + "step": 656 + }, + { + "loss": 0.0772, + "grad_norm": 2.4018380641937256, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.657, + "step": 657 + }, + { + "loss": 0.0834, + "grad_norm": 2.684398889541626, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.658, + "step": 658 + }, + { + "loss": 0.074, + "grad_norm": 2.106499671936035, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.659, + "step": 659 + }, + { + "loss": 0.0759, + "grad_norm": 2.1065762042999268, + "learning_rate": 1.343e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.66, + "step": 660 + }, + { + "loss": 0.1232, + "grad_norm": 2.89585280418396, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.661, + "step": 661 + }, + { + "loss": 0.0784, + "grad_norm": 2.267303943634033, + "learning_rate": 1.341e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.662, + "step": 662 + }, + { + "loss": 0.0591, + "grad_norm": 1.4712592363357544, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.663, + "step": 663 + }, + { + "loss": 0.0626, + "grad_norm": 1.9069504737854004, + "learning_rate": 1.339e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.664, + "step": 664 + }, + { + "loss": 0.1356, + "grad_norm": 3.2215309143066406, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.665, + "step": 665 + }, + { + "loss": 0.0678, + "grad_norm": 2.080892562866211, + "learning_rate": 1.337e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.666, + "step": 666 + }, + { + "loss": 0.0643, + "grad_norm": 2.593749523162842, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.667, + "step": 667 + }, + { + "loss": 0.3105, + "grad_norm": 13.254192352294922, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.668, + "step": 668 + }, + { + "loss": 0.0305, + "grad_norm": 7.083673000335693, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.669, + "step": 669 + }, + { + "loss": 0.0827, + "grad_norm": 1.9234445095062256, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.67, + "step": 670 + }, + { + "loss": 0.072, + "grad_norm": 1.6489096879959106, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.671, + "step": 671 + }, + { + "loss": 0.0786, + "grad_norm": 2.5704004764556885, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.672, + "step": 672 + }, + { + "loss": 0.1092, + "grad_norm": 2.335846424102783, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.673, + "step": 673 + }, + { + "loss": 0.08, + "grad_norm": 1.7859958410263062, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.674, + "step": 674 + }, + { + "loss": 0.0303, + "grad_norm": 6.245123386383057, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.675, + "step": 675 + }, + { + "loss": 0.0248, + "grad_norm": 6.11707878112793, + "learning_rate": 1.327e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.676, + "step": 676 + }, + { + "loss": 0.0714, + "grad_norm": 2.122776985168457, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.677, + "step": 677 + }, + { + "loss": 0.0583, + "grad_norm": 2.350274085998535, + "learning_rate": 1.325e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.678, + "step": 678 + }, + { + "loss": 0.0192, + "grad_norm": 3.1966686248779297, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 679 + }, + { + "loss": 0.087, + "grad_norm": 2.123091459274292, + "learning_rate": 1.323e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.68, + "step": 680 + }, + { + "loss": 0.0536, + "grad_norm": 2.108837842941284, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.681, + "step": 681 + }, + { + "loss": 0.0187, + "grad_norm": 2.225255012512207, + "learning_rate": 1.321e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 682 + }, + { + "loss": 0.0689, + "grad_norm": 1.968031883239746, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.683, + "step": 683 + }, + { + "loss": 0.0822, + "grad_norm": 2.5669515132904053, + "learning_rate": 1.319e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.684, + "step": 684 + }, + { + "loss": 0.0661, + "grad_norm": 2.156057596206665, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.685, + "step": 685 + }, + { + "loss": 0.0545, + "grad_norm": 2.8333444595336914, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.686, + "step": 686 + }, + { + "loss": 0.0889, + "grad_norm": 3.069793939590454, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.687, + "step": 687 + }, + { + "loss": 0.0761, + "grad_norm": 1.9274708032608032, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.688, + "step": 688 + }, + { + "loss": 0.1089, + "grad_norm": 2.992846965789795, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.689, + "step": 689 + }, + { + "loss": 0.1287, + "grad_norm": 4.56328821182251, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.69, + "step": 690 + }, + { + "loss": 0.1186, + "grad_norm": 2.255676746368408, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.691, + "step": 691 + }, + { + "loss": 0.0906, + "grad_norm": 1.8538860082626343, + "learning_rate": 1.311e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.692, + "step": 692 + }, + { + "loss": 0.2418, + "grad_norm": 11.443807601928711, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9434276223182678, + "epoch": 0.693, + "step": 693 + }, + { + "loss": 0.0399, + "grad_norm": 9.349817276000977, + "learning_rate": 1.309e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.694, + "step": 694 + }, + { + "loss": 0.037, + "grad_norm": 9.234195709228516, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.695, + "step": 695 + }, + { + "loss": 0.1228, + "grad_norm": 2.415926456451416, + "learning_rate": 1.307e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.696, + "step": 696 + }, + { + "loss": 0.0524, + "grad_norm": 2.570728063583374, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.697, + "step": 697 + }, + { + "loss": 0.086, + "grad_norm": 3.062072992324829, + "learning_rate": 1.305e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.698, + "step": 698 + }, + { + "loss": 0.0829, + "grad_norm": 2.552957534790039, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.699, + "step": 699 + }, + { + "loss": 0.1109, + "grad_norm": 2.1273176670074463, + "learning_rate": 1.303e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.7, + "step": 700 + }, + { + "loss": 0.0811, + "grad_norm": 2.13920259475708, + "learning_rate": 1.302e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.701, + "step": 701 + }, + { + "loss": 0.0689, + "grad_norm": 2.0192079544067383, + "learning_rate": 1.301e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.702, + "step": 702 + }, + { + "loss": 0.0726, + "grad_norm": 1.9012140035629272, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.703, + "step": 703 + }, + { + "loss": 0.075, + "grad_norm": 2.420971393585205, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.704, + "step": 704 + }, + { + "loss": 0.0965, + "grad_norm": 1.7867904901504517, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.705, + "step": 705 + }, + { + "loss": 0.0757, + "grad_norm": 2.5515830516815186, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.706, + "step": 706 + }, + { + "loss": 0.0758, + "grad_norm": 2.5376474857330322, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.707, + "step": 707 + }, + { + "loss": 0.0995, + "grad_norm": 1.8845465183258057, + "learning_rate": 1.295e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.708, + "step": 708 + }, + { + "loss": 0.0824, + "grad_norm": 2.292940616607666, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.709, + "step": 709 + }, + { + "loss": 0.0723, + "grad_norm": 2.140986919403076, + "learning_rate": 1.293e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.71, + "step": 710 + }, + { + "loss": 0.0714, + "grad_norm": 2.8790059089660645, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.711, + "step": 711 + }, + { + "loss": 0.0623, + "grad_norm": 1.6493089199066162, + "learning_rate": 1.291e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.712, + "step": 712 + }, + { + "loss": 0.0657, + "grad_norm": 1.8830665349960327, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.713, + "step": 713 + }, + { + "loss": 0.029, + "grad_norm": 7.065803527832031, + "learning_rate": 1.289e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.714, + "step": 714 + }, + { + "loss": 0.0952, + "grad_norm": 2.2632198333740234, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.715, + "step": 715 + }, + { + "loss": 0.0383, + "grad_norm": 8.098624229431152, + "learning_rate": 1.287e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.716, + "step": 716 + }, + { + "loss": 0.023, + "grad_norm": 5.657382011413574, + "learning_rate": 1.286e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.717, + "step": 717 + }, + { + "loss": 0.0649, + "grad_norm": 1.4795526266098022, + "learning_rate": 1.285e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.718, + "step": 718 + }, + { + "loss": 0.0737, + "grad_norm": 2.7369728088378906, + "learning_rate": 1.284e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.719, + "step": 719 + }, + { + "loss": 0.0637, + "grad_norm": 2.345536708831787, + "learning_rate": 1.283e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.72, + "step": 720 + }, + { + "loss": 0.0594, + "grad_norm": 2.2326128482818604, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.721, + "step": 721 + }, + { + "loss": 0.057, + "grad_norm": 3.0859591960906982, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.722, + "step": 722 + }, + { + "loss": 0.0709, + "grad_norm": 2.870548963546753, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.723, + "step": 723 + }, + { + "loss": 0.0772, + "grad_norm": 3.3536510467529297, + "learning_rate": 1.279e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.724, + "step": 724 + }, + { + "loss": 0.0163, + "grad_norm": 2.2633590698242188, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 725 + }, + { + "loss": 0.0128, + "grad_norm": 1.1394838094711304, + "learning_rate": 1.277e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 726 + }, + { + "loss": 0.0683, + "grad_norm": 2.8505446910858154, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.727, + "step": 727 + }, + { + "loss": 0.0557, + "grad_norm": 2.6770808696746826, + "learning_rate": 1.275e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.728, + "step": 728 + }, + { + "loss": 0.0586, + "grad_norm": 3.0272936820983887, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.729, + "step": 729 + }, + { + "loss": 0.0126, + "grad_norm": 0.8217504620552063, + "learning_rate": 1.273e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 730 + }, + { + "loss": 0.0776, + "grad_norm": 4.100428581237793, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.731, + "step": 731 + }, + { + "loss": 0.0689, + "grad_norm": 2.3711600303649902, + "learning_rate": 1.271e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.732, + "step": 732 + }, + { + "loss": 0.0797, + "grad_norm": 3.585756301879883, + "learning_rate": 1.27e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.733, + "step": 733 + }, + { + "loss": 0.0532, + "grad_norm": 2.134615421295166, + "learning_rate": 1.269e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.734, + "step": 734 + }, + { + "loss": 0.0974, + "grad_norm": 2.3772988319396973, + "learning_rate": 1.268e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.735, + "step": 735 + }, + { + "loss": 0.1153, + "grad_norm": 2.4541940689086914, + "learning_rate": 1.267e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.736, + "step": 736 + }, + { + "loss": 0.048, + "grad_norm": 1.6060377359390259, + "learning_rate": 1.266e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.737, + "step": 737 + }, + { + "loss": 0.0451, + "grad_norm": 2.1678755283355713, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.738, + "step": 738 + }, + { + "loss": 0.0748, + "grad_norm": 2.047844409942627, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.739, + "step": 739 + }, + { + "loss": 0.0824, + "grad_norm": 2.762352705001831, + "learning_rate": 1.263e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.74, + "step": 740 + }, + { + "loss": 0.1146, + "grad_norm": 3.0128841400146484, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.741, + "step": 741 + }, + { + "loss": 0.0711, + "grad_norm": 2.0650486946105957, + "learning_rate": 1.261e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.742, + "step": 742 + }, + { + "loss": 0.0334, + "grad_norm": 7.7052412033081055, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.743, + "step": 743 + }, + { + "loss": 0.0709, + "grad_norm": 1.5119361877441406, + "learning_rate": 1.259e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.744, + "step": 744 + }, + { + "loss": 0.0308, + "grad_norm": 7.3754143714904785, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.745, + "step": 745 + }, + { + "loss": 0.0995, + "grad_norm": 2.8331611156463623, + "learning_rate": 1.257e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.746, + "step": 746 + }, + { + "loss": 0.0562, + "grad_norm": 3.423184871673584, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.747, + "step": 747 + }, + { + "loss": 0.0659, + "grad_norm": 1.857692003250122, + "learning_rate": 1.255e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.748, + "step": 748 + }, + { + "loss": 0.2618, + "grad_norm": 11.681804656982422, + "learning_rate": 1.254e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.749, + "step": 749 + }, + { + "loss": 0.0791, + "grad_norm": 2.311647415161133, + "learning_rate": 1.253e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.75, + "step": 750 + }, + { + "loss": 0.0486, + "grad_norm": 2.8530430793762207, + "learning_rate": 1.252e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.751, + "step": 751 + }, + { + "loss": 0.1104, + "grad_norm": 2.617987871170044, + "learning_rate": 1.251e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.752, + "step": 752 + }, + { + "loss": 0.0195, + "grad_norm": 4.978179931640625, + "learning_rate": 1.25e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.753, + "step": 753 + }, + { + "loss": 0.0726, + "grad_norm": 2.0882959365844727, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.754, + "step": 754 + }, + { + "loss": 0.0754, + "grad_norm": 2.1230452060699463, + "learning_rate": 1.248e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.755, + "step": 755 + }, + { + "loss": 0.0707, + "grad_norm": 2.2002744674682617, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.756, + "step": 756 + }, + { + "loss": 0.0494, + "grad_norm": 1.7500207424163818, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.757, + "step": 757 + }, + { + "loss": 0.0811, + "grad_norm": 1.8128851652145386, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.758, + "step": 758 + }, + { + "loss": 0.0756, + "grad_norm": 2.397252082824707, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.759, + "step": 759 + }, + { + "loss": 0.0501, + "grad_norm": 1.975466012954712, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.76, + "step": 760 + }, + { + "loss": 0.1087, + "grad_norm": 2.2733750343322754, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 761 + }, + { + "loss": 0.1041, + "grad_norm": 2.3084492683410645, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.762, + "step": 762 + }, + { + "loss": 0.0496, + "grad_norm": 2.098421096801758, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.763, + "step": 763 + }, + { + "loss": 0.0626, + "grad_norm": 2.004920482635498, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.764, + "step": 764 + }, + { + "loss": 0.0667, + "grad_norm": 1.603124737739563, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.765, + "step": 765 + }, + { + "loss": 0.0829, + "grad_norm": 2.5960142612457275, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.766, + "step": 766 + }, + { + "loss": 0.0234, + "grad_norm": 5.8595757484436035, + "learning_rate": 1.236e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.767, + "step": 767 + }, + { + "loss": 0.1032, + "grad_norm": 1.7731209993362427, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 768 + }, + { + "loss": 0.0228, + "grad_norm": 6.049434185028076, + "learning_rate": 1.234e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.769, + "step": 769 + }, + { + "loss": 0.0828, + "grad_norm": 1.9529765844345093, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.77, + "step": 770 + }, + { + "loss": 0.0718, + "grad_norm": 1.3272991180419922, + "learning_rate": 1.232e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.771, + "step": 771 + }, + { + "loss": 0.0907, + "grad_norm": 2.2710683345794678, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.772, + "step": 772 + }, + { + "loss": 0.2171, + "grad_norm": 6.965005397796631, + "learning_rate": 1.23e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.773, + "step": 773 + }, + { + "loss": 0.0657, + "grad_norm": 2.213243007659912, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.774, + "step": 774 + }, + { + "loss": 0.1745, + "grad_norm": 6.300892353057861, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.775, + "step": 775 + }, + { + "loss": 0.06, + "grad_norm": 2.4582417011260986, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.776, + "step": 776 + }, + { + "loss": 0.0516, + "grad_norm": 1.6709243059158325, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.777, + "step": 777 + }, + { + "loss": 0.1051, + "grad_norm": 2.654740810394287, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.778, + "step": 778 + }, + { + "loss": 0.072, + "grad_norm": 2.0503504276275635, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.779, + "step": 779 + }, + { + "loss": 0.0742, + "grad_norm": 1.800299882888794, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.78, + "step": 780 + }, + { + "loss": 0.0737, + "grad_norm": 2.063502788543701, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.781, + "step": 781 + }, + { + "loss": 0.1061, + "grad_norm": 2.698178291320801, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.782, + "step": 782 + }, + { + "loss": 0.0737, + "grad_norm": 2.0112061500549316, + "learning_rate": 1.22e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.783, + "step": 783 + }, + { + "loss": 0.0195, + "grad_norm": 5.365294933319092, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.784, + "step": 784 + }, + { + "loss": 0.0601, + "grad_norm": 1.5453028678894043, + "learning_rate": 1.218e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.785, + "step": 785 + }, + { + "loss": 0.2441, + "grad_norm": 10.393324851989746, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.786, + "step": 786 + }, + { + "loss": 0.1079, + "grad_norm": 2.6032726764678955, + "learning_rate": 1.216e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.787, + "step": 787 + }, + { + "loss": 0.0639, + "grad_norm": 2.6428260803222656, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.788, + "step": 788 + }, + { + "loss": 0.0632, + "grad_norm": 1.3782398700714111, + "learning_rate": 1.214e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.789, + "step": 789 + }, + { + "loss": 0.0189, + "grad_norm": 4.952188014984131, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.79, + "step": 790 + }, + { + "loss": 0.0613, + "grad_norm": 1.8376456499099731, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.791, + "step": 791 + }, + { + "loss": 0.0539, + "grad_norm": 1.6092228889465332, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.792, + "step": 792 + }, + { + "loss": 0.0151, + "grad_norm": 3.721954345703125, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 793 + }, + { + "loss": 0.0168, + "grad_norm": 3.578442096710205, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 794 + }, + { + "loss": 0.0494, + "grad_norm": 1.714572787284851, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 795 + }, + { + "loss": 0.0715, + "grad_norm": 2.152249813079834, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 796 + }, + { + "loss": 0.0106, + "grad_norm": 1.2338261604309082, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 797 + }, + { + "loss": 0.0948, + "grad_norm": 3.4057295322418213, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 798 + }, + { + "loss": 0.0967, + "grad_norm": 2.297558546066284, + "learning_rate": 1.204e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.799, + "step": 799 + }, + { + "loss": 0.0715, + "grad_norm": 2.948807716369629, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 800 + }, + { + "loss": 0.0691, + "grad_norm": 2.480257749557495, + "learning_rate": 1.202e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.801, + "step": 801 + }, + { + "loss": 0.2602, + "grad_norm": 9.955911636352539, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.802, + "step": 802 + }, + { + "loss": 0.0623, + "grad_norm": 2.92844295501709, + "learning_rate": 1.2e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.803, + "step": 803 + }, + { + "loss": 0.0922, + "grad_norm": 2.3774516582489014, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.804, + "step": 804 + }, + { + "loss": 0.0664, + "grad_norm": 1.5494801998138428, + "learning_rate": 1.198e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.805, + "step": 805 + }, + { + "loss": 0.1929, + "grad_norm": 6.599433422088623, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.806, + "step": 806 + }, + { + "loss": 0.02, + "grad_norm": 5.4353718757629395, + "learning_rate": 1.196e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.807, + "step": 807 + }, + { + "loss": 0.0603, + "grad_norm": 1.707094669342041, + "learning_rate": 1.195e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.808, + "step": 808 + }, + { + "loss": 0.0722, + "grad_norm": 2.148479461669922, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.809, + "step": 809 + }, + { + "loss": 0.0717, + "grad_norm": 2.687295436859131, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.81, + "step": 810 + }, + { + "loss": 0.0695, + "grad_norm": 2.940627098083496, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.811, + "step": 811 + }, + { + "loss": 0.0195, + "grad_norm": 5.349563121795654, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.812, + "step": 812 + }, + { + "loss": 0.0931, + "grad_norm": 1.7995429039001465, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.813, + "step": 813 + }, + { + "loss": 0.0175, + "grad_norm": 5.07689094543457, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.814, + "step": 814 + }, + { + "loss": 0.0159, + "grad_norm": 4.247437000274658, + "learning_rate": 1.188e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.815, + "step": 815 + }, + { + "loss": 0.0783, + "grad_norm": 2.34236216545105, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.816, + "step": 816 + }, + { + "loss": 0.113, + "grad_norm": 2.772456407546997, + "learning_rate": 1.186e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.817, + "step": 817 + }, + { + "loss": 0.0621, + "grad_norm": 2.3582286834716797, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.818, + "step": 818 + }, + { + "loss": 0.0522, + "grad_norm": 3.014678716659546, + "learning_rate": 1.184e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.819, + "step": 819 + }, + { + "loss": 0.0758, + "grad_norm": 2.709341049194336, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.82, + "step": 820 + }, + { + "loss": 0.0718, + "grad_norm": 2.3536617755889893, + "learning_rate": 1.182e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.821, + "step": 821 + }, + { + "loss": 0.0789, + "grad_norm": 3.258106231689453, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.822, + "step": 822 + }, + { + "loss": 0.0763, + "grad_norm": 2.218254804611206, + "learning_rate": 1.18e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.823, + "step": 823 + }, + { + "loss": 0.0599, + "grad_norm": 2.2704806327819824, + "learning_rate": 1.179e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.824, + "step": 824 + }, + { + "loss": 0.0126, + "grad_norm": 2.4626388549804688, + "learning_rate": 1.178e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 825 + }, + { + "loss": 0.0669, + "grad_norm": 2.0617358684539795, + "learning_rate": 1.177e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.826, + "step": 826 + }, + { + "loss": 0.066, + "grad_norm": 2.0766263008117676, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.827, + "step": 827 + }, + { + "loss": 0.0618, + "grad_norm": 1.5771903991699219, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.828, + "step": 828 + }, + { + "loss": 0.0687, + "grad_norm": 1.789569616317749, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.829, + "step": 829 + }, + { + "loss": 0.0157, + "grad_norm": 4.058000087738037, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.83, + "step": 830 + }, + { + "loss": 0.0389, + "grad_norm": 1.5074262619018555, + "learning_rate": 1.172e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.831, + "step": 831 + }, + { + "loss": 0.0663, + "grad_norm": 2.1943564414978027, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.832, + "step": 832 + }, + { + "loss": 0.0734, + "grad_norm": 2.0293729305267334, + "learning_rate": 1.17e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.833, + "step": 833 + }, + { + "loss": 0.0734, + "grad_norm": 1.9577043056488037, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.834, + "step": 834 + }, + { + "loss": 0.0729, + "grad_norm": 2.053274154663086, + "learning_rate": 1.168e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 835 + }, + { + "loss": 0.1016, + "grad_norm": 4.023435115814209, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.836, + "step": 836 + }, + { + "loss": 0.0618, + "grad_norm": 2.152527093887329, + "learning_rate": 1.166e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.837, + "step": 837 + }, + { + "loss": 0.0633, + "grad_norm": 2.2773494720458984, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.838, + "step": 838 + }, + { + "loss": 0.0207, + "grad_norm": 5.423501491546631, + "learning_rate": 1.164e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.839, + "step": 839 + }, + { + "loss": 0.0651, + "grad_norm": 1.2856030464172363, + "learning_rate": 1.163e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.84, + "step": 840 + }, + { + "loss": 0.0628, + "grad_norm": 1.8682835102081299, + "learning_rate": 1.162e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 841 + }, + { + "loss": 0.0192, + "grad_norm": 4.855226516723633, + "learning_rate": 1.161e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.842, + "step": 842 + }, + { + "loss": 0.0757, + "grad_norm": 1.910493016242981, + "learning_rate": 1.16e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.843, + "step": 843 + }, + { + "loss": 0.0778, + "grad_norm": 3.503009796142578, + "learning_rate": 1.159e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.844, + "step": 844 + }, + { + "loss": 0.05, + "grad_norm": 1.867902398109436, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.845, + "step": 845 + }, + { + "loss": 0.0145, + "grad_norm": 3.8562870025634766, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 846 + }, + { + "loss": 0.0668, + "grad_norm": 1.7752705812454224, + "learning_rate": 1.156e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.847, + "step": 847 + }, + { + "loss": 0.0735, + "grad_norm": 2.393582582473755, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.848, + "step": 848 + }, + { + "loss": 0.0985, + "grad_norm": 2.7950665950775146, + "learning_rate": 1.154e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.849, + "step": 849 + }, + { + "loss": 0.0681, + "grad_norm": 2.1131601333618164, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.85, + "step": 850 + }, + { + "loss": 0.0515, + "grad_norm": 2.2755846977233887, + "learning_rate": 1.152e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.851, + "step": 851 + }, + { + "loss": 0.0434, + "grad_norm": 1.569434642791748, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.852, + "step": 852 + }, + { + "loss": 0.1047, + "grad_norm": 3.0928077697753906, + "learning_rate": 1.15e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.853, + "step": 853 + }, + { + "loss": 0.0575, + "grad_norm": 2.008404016494751, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.854, + "step": 854 + }, + { + "loss": 0.0579, + "grad_norm": 1.4861952066421509, + "learning_rate": 1.148e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.855, + "step": 855 + }, + { + "loss": 0.069, + "grad_norm": 1.9950709342956543, + "learning_rate": 1.147e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.856, + "step": 856 + }, + { + "loss": 0.0155, + "grad_norm": 4.394257068634033, + "learning_rate": 1.146e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.857, + "step": 857 + }, + { + "loss": 0.0969, + "grad_norm": 2.6770575046539307, + "learning_rate": 1.145e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.858, + "step": 858 + }, + { + "loss": 0.0712, + "grad_norm": 2.319610595703125, + "learning_rate": 1.144e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 859 + }, + { + "loss": 0.0689, + "grad_norm": 1.8970541954040527, + "learning_rate": 1.143e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.86, + "step": 860 + }, + { + "loss": 0.0899, + "grad_norm": 1.8339478969573975, + "learning_rate": 1.142e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.861, + "step": 861 + }, + { + "loss": 0.1032, + "grad_norm": 2.781162977218628, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.862, + "step": 862 + }, + { + "loss": 0.0604, + "grad_norm": 2.540081024169922, + "learning_rate": 1.14e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.863, + "step": 863 + }, + { + "loss": 0.0491, + "grad_norm": 1.9644439220428467, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.864, + "step": 864 + }, + { + "loss": 0.0802, + "grad_norm": 1.8939117193222046, + "learning_rate": 1.138e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.865, + "step": 865 + }, + { + "loss": 0.0681, + "grad_norm": 2.0177180767059326, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.866, + "step": 866 + }, + { + "loss": 0.0476, + "grad_norm": 1.9407687187194824, + "learning_rate": 1.136e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.867, + "step": 867 + }, + { + "loss": 0.0188, + "grad_norm": 5.371039390563965, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.868, + "step": 868 + }, + { + "loss": 0.0508, + "grad_norm": 1.873732566833496, + "learning_rate": 1.134e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.869, + "step": 869 + }, + { + "loss": 0.0237, + "grad_norm": 6.1496429443359375, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.87, + "step": 870 + }, + { + "loss": 0.099, + "grad_norm": 4.506502151489258, + "learning_rate": 1.132e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.871, + "step": 871 + }, + { + "loss": 0.1, + "grad_norm": 5.314243316650391, + "learning_rate": 1.131e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.872, + "step": 872 + }, + { + "loss": 0.0123, + "grad_norm": 3.1825995445251465, + "learning_rate": 1.13e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 873 + }, + { + "loss": 0.0132, + "grad_norm": 3.1502106189727783, + "learning_rate": 1.129e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 874 + }, + { + "loss": 0.0622, + "grad_norm": 2.719097375869751, + "learning_rate": 1.128e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.875, + "step": 875 + }, + { + "loss": 0.0992, + "grad_norm": 3.1199769973754883, + "learning_rate": 1.127e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.876, + "step": 876 + }, + { + "loss": 0.066, + "grad_norm": 2.5837504863739014, + "learning_rate": 1.126e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.877, + "step": 877 + }, + { + "loss": 0.0542, + "grad_norm": 2.4771666526794434, + "learning_rate": 1.125e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.878, + "step": 878 + }, + { + "loss": 0.0937, + "grad_norm": 3.6200714111328125, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.879, + "step": 879 + }, + { + "loss": 0.0674, + "grad_norm": 2.399535655975342, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.88, + "step": 880 + }, + { + "loss": 0.0678, + "grad_norm": 2.516605854034424, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.881, + "step": 881 + }, + { + "loss": 0.0668, + "grad_norm": 2.5172040462493896, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.882, + "step": 882 + }, + { + "loss": 0.0744, + "grad_norm": 2.4523816108703613, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.883, + "step": 883 + }, + { + "loss": 0.1019, + "grad_norm": 3.3321380615234375, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.884, + "step": 884 + }, + { + "loss": 0.0837, + "grad_norm": 1.8811334371566772, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.885, + "step": 885 + }, + { + "loss": 0.0531, + "grad_norm": 1.9141852855682373, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.886, + "step": 886 + }, + { + "loss": 0.0408, + "grad_norm": 1.487582802772522, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.887, + "step": 887 + }, + { + "loss": 0.0218, + "grad_norm": 5.286271095275879, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.888, + "step": 888 + }, + { + "loss": 0.0628, + "grad_norm": 1.7239201068878174, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.889, + "step": 889 + }, + { + "loss": 0.0625, + "grad_norm": 1.7386255264282227, + "learning_rate": 1.113e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.89, + "step": 890 + }, + { + "loss": 0.0405, + "grad_norm": 1.4104888439178467, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.891, + "step": 891 + }, + { + "loss": 0.0226, + "grad_norm": 4.608585834503174, + "learning_rate": 1.111e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.892, + "step": 892 + }, + { + "loss": 0.0968, + "grad_norm": 2.3830323219299316, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.893, + "step": 893 + }, + { + "loss": 0.0739, + "grad_norm": 1.8739683628082275, + "learning_rate": 1.109e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.894, + "step": 894 + }, + { + "loss": 0.058, + "grad_norm": 2.673945665359497, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.895, + "step": 895 + }, + { + "loss": 0.0943, + "grad_norm": 3.0288586616516113, + "learning_rate": 1.107e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.896, + "step": 896 + }, + { + "loss": 0.0726, + "grad_norm": 2.270813465118408, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.897, + "step": 897 + }, + { + "loss": 0.0589, + "grad_norm": 1.880444049835205, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.898, + "step": 898 + }, + { + "loss": 0.0143, + "grad_norm": 3.3361847400665283, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 899 + }, + { + "loss": 0.059, + "grad_norm": 1.848816990852356, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.9, + "step": 900 + }, + { + "loss": 0.0714, + "grad_norm": 2.0221500396728516, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.901, + "step": 901 + }, + { + "loss": 0.0668, + "grad_norm": 4.154532432556152, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.902, + "step": 902 + }, + { + "loss": 0.0617, + "grad_norm": 1.9648317098617554, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.903, + "step": 903 + }, + { + "loss": 0.0652, + "grad_norm": 2.866431474685669, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.904, + "step": 904 + }, + { + "loss": 0.0459, + "grad_norm": 2.3324079513549805, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.905, + "step": 905 + }, + { + "loss": 0.0111, + "grad_norm": 2.3991503715515137, + "learning_rate": 1.097e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 906 + }, + { + "loss": 0.0654, + "grad_norm": 1.9646960496902466, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.907, + "step": 907 + }, + { + "loss": 0.0798, + "grad_norm": 2.720228433609009, + "learning_rate": 1.095e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.908, + "step": 908 + }, + { + "loss": 0.0974, + "grad_norm": 2.5758628845214844, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.909, + "step": 909 + }, + { + "loss": 0.0621, + "grad_norm": 2.303436517715454, + "learning_rate": 1.093e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.91, + "step": 910 + }, + { + "loss": 0.0944, + "grad_norm": 2.617363929748535, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.911, + "step": 911 + }, + { + "loss": 0.0571, + "grad_norm": 1.898218035697937, + "learning_rate": 1.091e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.912, + "step": 912 + }, + { + "loss": 0.0136, + "grad_norm": 3.2630972862243652, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 913 + }, + { + "loss": 0.0482, + "grad_norm": 2.0208237171173096, + "learning_rate": 1.089e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.914, + "step": 914 + }, + { + "loss": 0.0486, + "grad_norm": 1.8037229776382446, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.915, + "step": 915 + }, + { + "loss": 0.0118, + "grad_norm": 2.722412586212158, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 916 + }, + { + "loss": 0.0687, + "grad_norm": 2.6608150005340576, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.917, + "step": 917 + }, + { + "loss": 0.0101, + "grad_norm": 1.664276361465454, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 918 + }, + { + "loss": 0.0609, + "grad_norm": 2.5043087005615234, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.919, + "step": 919 + }, + { + "loss": 0.0685, + "grad_norm": 2.0320653915405273, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.92, + "step": 920 + }, + { + "loss": 0.0709, + "grad_norm": 2.7590584754943848, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.921, + "step": 921 + }, + { + "loss": 0.0511, + "grad_norm": 2.424579620361328, + "learning_rate": 1.081e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.922, + "step": 922 + }, + { + "loss": 0.061, + "grad_norm": 1.826949119567871, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.923, + "step": 923 + }, + { + "loss": 0.0086, + "grad_norm": 1.5401605367660522, + "learning_rate": 1.079e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 924 + }, + { + "loss": 0.0667, + "grad_norm": 2.49796724319458, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.925, + "step": 925 + }, + { + "loss": 0.0741, + "grad_norm": 2.141827344894409, + "learning_rate": 1.077e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.926, + "step": 926 + }, + { + "loss": 0.0662, + "grad_norm": 2.1507174968719482, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.927, + "step": 927 + }, + { + "loss": 0.0596, + "grad_norm": 1.928731083869934, + "learning_rate": 1.075e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.928, + "step": 928 + }, + { + "loss": 0.0469, + "grad_norm": 2.391432523727417, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.929, + "step": 929 + }, + { + "loss": 0.0121, + "grad_norm": 2.9941039085388184, + "learning_rate": 1.073e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 930 + }, + { + "loss": 0.0452, + "grad_norm": 2.110806465148926, + "learning_rate": 1.072e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.931, + "step": 931 + }, + { + "loss": 0.0624, + "grad_norm": 1.8115919828414917, + "learning_rate": 1.071e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.932, + "step": 932 + }, + { + "loss": 0.0456, + "grad_norm": 1.548567533493042, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.933, + "step": 933 + }, + { + "loss": 0.0565, + "grad_norm": 1.9886720180511475, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.934, + "step": 934 + }, + { + "loss": 0.0457, + "grad_norm": 1.8589720726013184, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.935, + "step": 935 + }, + { + "loss": 0.041, + "grad_norm": 1.6640335321426392, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.936, + "step": 936 + }, + { + "loss": 0.0712, + "grad_norm": 2.0171613693237305, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.937, + "step": 937 + }, + { + "loss": 0.0628, + "grad_norm": 1.6715848445892334, + "learning_rate": 1.065e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.938, + "step": 938 + }, + { + "loss": 0.0416, + "grad_norm": 2.1554946899414062, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.939, + "step": 939 + }, + { + "loss": 0.0737, + "grad_norm": 2.242116689682007, + "learning_rate": 1.063e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.94, + "step": 940 + }, + { + "loss": 0.0177, + "grad_norm": 4.810120105743408, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.941, + "step": 941 + }, + { + "loss": 0.0649, + "grad_norm": 1.675683617591858, + "learning_rate": 1.061e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.942, + "step": 942 + }, + { + "loss": 0.0727, + "grad_norm": 2.5127744674682617, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.943, + "step": 943 + }, + { + "loss": 0.0587, + "grad_norm": 2.14599871635437, + "learning_rate": 1.059e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.944, + "step": 944 + }, + { + "loss": 0.1132, + "grad_norm": 2.5991926193237305, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.945, + "step": 945 + }, + { + "loss": 0.0786, + "grad_norm": 2.0661518573760986, + "learning_rate": 1.057e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.946, + "step": 946 + }, + { + "loss": 0.0686, + "grad_norm": 1.411996841430664, + "learning_rate": 1.056e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 947 + }, + { + "loss": 0.0886, + "grad_norm": 1.8908826112747192, + "learning_rate": 1.055e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.948, + "step": 948 + }, + { + "loss": 0.0795, + "grad_norm": 1.8596928119659424, + "learning_rate": 1.054e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.949, + "step": 949 + }, + { + "loss": 0.064, + "grad_norm": 2.0051939487457275, + "learning_rate": 1.053e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.95, + "step": 950 + }, + { + "loss": 0.0761, + "grad_norm": 1.7486968040466309, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 951 + }, + { + "loss": 0.0519, + "grad_norm": 1.7253214120864868, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.952, + "step": 952 + }, + { + "loss": 0.0688, + "grad_norm": 1.7860913276672363, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.953, + "step": 953 + }, + { + "loss": 0.0287, + "grad_norm": 6.397044658660889, + "learning_rate": 1.049e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 954 + }, + { + "loss": 0.0877, + "grad_norm": 1.6188372373580933, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.955, + "step": 955 + }, + { + "loss": 0.0595, + "grad_norm": 1.6029514074325562, + "learning_rate": 1.047e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.956, + "step": 956 + }, + { + "loss": 0.2163, + "grad_norm": 8.956819534301758, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.957, + "step": 957 + }, + { + "loss": 0.0666, + "grad_norm": 1.4872380495071411, + "learning_rate": 1.045e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.958, + "step": 958 + }, + { + "loss": 0.092, + "grad_norm": 3.029266595840454, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.959, + "step": 959 + }, + { + "loss": 0.0757, + "grad_norm": 1.899221658706665, + "learning_rate": 1.043e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.96, + "step": 960 + }, + { + "loss": 0.0666, + "grad_norm": 1.577907681465149, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.961, + "step": 961 + }, + { + "loss": 0.0581, + "grad_norm": 1.467238426208496, + "learning_rate": 1.041e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 962 + }, + { + "loss": 0.1923, + "grad_norm": 8.706313133239746, + "learning_rate": 1.04e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.963, + "step": 963 + }, + { + "loss": 0.062, + "grad_norm": 2.0428693294525146, + "learning_rate": 1.039e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.964, + "step": 964 + }, + { + "loss": 0.0775, + "grad_norm": 2.0258123874664307, + "learning_rate": 1.038e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.965, + "step": 965 + }, + { + "loss": 0.0661, + "grad_norm": 1.7304749488830566, + "learning_rate": 1.037e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.966, + "step": 966 + }, + { + "loss": 0.0547, + "grad_norm": 1.6691105365753174, + "learning_rate": 1.036e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.967, + "step": 967 + }, + { + "loss": 0.0617, + "grad_norm": 1.681009292602539, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.968, + "step": 968 + }, + { + "loss": 0.0544, + "grad_norm": 1.8074179887771606, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.969, + "step": 969 + }, + { + "loss": 0.0396, + "grad_norm": 1.812711477279663, + "learning_rate": 1.033e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.97, + "step": 970 + }, + { + "loss": 0.0577, + "grad_norm": 2.0831782817840576, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.971, + "step": 971 + }, + { + "loss": 0.0776, + "grad_norm": 1.3640745878219604, + "learning_rate": 1.031e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.972, + "step": 972 + }, + { + "loss": 0.0454, + "grad_norm": 1.9006543159484863, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.973, + "step": 973 + }, + { + "loss": 0.0633, + "grad_norm": 1.6996928453445435, + "learning_rate": 1.029e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.974, + "step": 974 + }, + { + "loss": 0.0738, + "grad_norm": 1.9721561670303345, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.975, + "step": 975 + }, + { + "loss": 0.0439, + "grad_norm": 2.2615768909454346, + "learning_rate": 1.027e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.976, + "step": 976 + }, + { + "loss": 0.0237, + "grad_norm": 5.635776519775391, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.977, + "step": 977 + }, + { + "loss": 0.094, + "grad_norm": 2.4352505207061768, + "learning_rate": 1.025e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.978, + "step": 978 + }, + { + "loss": 0.0648, + "grad_norm": 1.6868159770965576, + "learning_rate": 1.024e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.979, + "step": 979 + }, + { + "loss": 0.0652, + "grad_norm": 2.1479756832122803, + "learning_rate": 1.023e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.98, + "step": 980 + }, + { + "loss": 0.0597, + "grad_norm": 2.0000855922698975, + "learning_rate": 1.022e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.981, + "step": 981 + }, + { + "loss": 0.0643, + "grad_norm": 2.511259078979492, + "learning_rate": 1.021e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.982, + "step": 982 + }, + { + "loss": 0.0161, + "grad_norm": 3.99651837348938, + "learning_rate": 1.02e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.983, + "step": 983 + }, + { + "loss": 0.0649, + "grad_norm": 2.231045722961426, + "learning_rate": 1.019e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.984, + "step": 984 + }, + { + "loss": 0.0386, + "grad_norm": 1.9224427938461304, + "learning_rate": 1.018e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.985, + "step": 985 + }, + { + "loss": 0.0673, + "grad_norm": 2.328557014465332, + "learning_rate": 1.017e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.986, + "step": 986 + }, + { + "loss": 0.0642, + "grad_norm": 2.1176366806030273, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.987, + "step": 987 + }, + { + "loss": 0.0643, + "grad_norm": 2.319209098815918, + "learning_rate": 1.015e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.988, + "step": 988 + }, + { + "loss": 0.0126, + "grad_norm": 2.7921886444091797, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 989 + }, + { + "loss": 0.056, + "grad_norm": 1.6485341787338257, + "learning_rate": 1.013e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.99, + "step": 990 + }, + { + "loss": 0.0559, + "grad_norm": 1.85313081741333, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.991, + "step": 991 + }, + { + "loss": 0.0718, + "grad_norm": 2.0347867012023926, + "learning_rate": 1.011e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.992, + "step": 992 + }, + { + "loss": 0.0611, + "grad_norm": 2.6210453510284424, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.993, + "step": 993 + }, + { + "loss": 0.0428, + "grad_norm": 2.1774537563323975, + "learning_rate": 1.009e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.994, + "step": 994 + }, + { + "loss": 0.0564, + "grad_norm": 1.4708741903305054, + "learning_rate": 1.008e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.995, + "step": 995 + }, + { + "loss": 0.0461, + "grad_norm": 2.133490562438965, + "learning_rate": 1.007e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.996, + "step": 996 + }, + { + "loss": 0.0654, + "grad_norm": 1.8513908386230469, + "learning_rate": 1.006e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.997, + "step": 997 + }, + { + "loss": 0.0467, + "grad_norm": 2.651682138442993, + "learning_rate": 1.005e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.998, + "step": 998 + }, + { + "loss": 0.0496, + "grad_norm": 1.6719735860824585, + "learning_rate": 1.004e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.999, + "step": 999 + }, + { + "loss": 0.064, + "grad_norm": 1.7016679048538208, + "learning_rate": 1.003e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.0, + "step": 1000 + }, + { + "loss": 0.0601, + "grad_norm": 1.5496330261230469, + "learning_rate": 1.002e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.001, + "step": 1001 + }, + { + "loss": 0.0185, + "grad_norm": 4.8348541259765625, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687985.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.002, + "step": 1002 + }, + { + "loss": 0.0205, + "grad_norm": 5.356715202331543, + "learning_rate": 1e-05, + "num_tokens": 688167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.003, + "step": 1003 + }, + { + "loss": 0.065, + "grad_norm": 2.8306968212127686, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.004, + "step": 1004 + }, + { + "loss": 0.048, + "grad_norm": 1.684121012687683, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.005, + "step": 1005 + }, + { + "loss": 0.0611, + "grad_norm": 1.78119957447052, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.006, + "step": 1006 + }, + { + "loss": 0.069, + "grad_norm": 2.2316365242004395, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.007, + "step": 1007 + }, + { + "loss": 0.0779, + "grad_norm": 2.183338165283203, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.008, + "step": 1008 + }, + { + "loss": 0.0642, + "grad_norm": 1.943967580795288, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.009, + "step": 1009 + }, + { + "loss": 0.0415, + "grad_norm": 1.6110951900482178, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.01, + "step": 1010 + }, + { + "loss": 0.0117, + "grad_norm": 3.0185630321502686, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 1011 + }, + { + "loss": 0.0992, + "grad_norm": 3.14607310295105, + "learning_rate": 9.91e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 1.012, + "step": 1012 + }, + { + "loss": 0.047, + "grad_norm": 1.2475289106369019, + "learning_rate": 9.9e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.013, + "step": 1013 + }, + { + "loss": 0.0819, + "grad_norm": 2.5398612022399902, + "learning_rate": 9.89e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.014, + "step": 1014 + }, + { + "loss": 0.0555, + "grad_norm": 1.682294249534607, + "learning_rate": 9.88e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.015, + "step": 1015 + }, + { + "loss": 0.0867, + "grad_norm": 2.457875967025757, + "learning_rate": 9.87e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.016, + "step": 1016 + }, + { + "loss": 0.0667, + "grad_norm": 1.7135660648345947, + "learning_rate": 9.86e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.017, + "step": 1017 + }, + { + "loss": 0.0378, + "grad_norm": 1.4605510234832764, + "learning_rate": 9.85e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.018, + "step": 1018 + }, + { + "loss": 0.0612, + "grad_norm": 3.01509690284729, + "learning_rate": 9.84e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.019, + "step": 1019 + }, + { + "loss": 0.0623, + "grad_norm": 2.2433955669403076, + "learning_rate": 9.83e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.02, + "step": 1020 + }, + { + "loss": 0.0192, + "grad_norm": 5.402326583862305, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.021, + "step": 1021 + }, + { + "loss": 0.099, + "grad_norm": 4.552786827087402, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.022, + "step": 1022 + }, + { + "loss": 0.0569, + "grad_norm": 2.1845462322235107, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.023, + "step": 1023 + }, + { + "loss": 0.063, + "grad_norm": 2.7287683486938477, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.024, + "step": 1024 + }, + { + "loss": 0.0426, + "grad_norm": 2.1356048583984375, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.025, + "step": 1025 + }, + { + "loss": 0.0626, + "grad_norm": 2.1982219219207764, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.026, + "step": 1026 + }, + { + "loss": 0.0881, + "grad_norm": 2.790822982788086, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.027, + "step": 1027 + }, + { + "loss": 0.0872, + "grad_norm": 2.464653968811035, + "learning_rate": 9.75e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.028, + "step": 1028 + }, + { + "loss": 0.0144, + "grad_norm": 3.807983636856079, + "learning_rate": 9.74e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.029, + "step": 1029 + }, + { + "loss": 0.0594, + "grad_norm": 1.6763768196105957, + "learning_rate": 9.73e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.03, + "step": 1030 + }, + { + "loss": 0.0882, + "grad_norm": 1.924737811088562, + "learning_rate": 9.72e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.031, + "step": 1031 + }, + { + "loss": 0.0488, + "grad_norm": 2.331883430480957, + "learning_rate": 9.71e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.032, + "step": 1032 + }, + { + "loss": 0.088, + "grad_norm": 2.7460174560546875, + "learning_rate": 9.7e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.033, + "step": 1033 + }, + { + "loss": 0.0446, + "grad_norm": 1.7645024061203003, + "learning_rate": 9.69e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.034, + "step": 1034 + }, + { + "loss": 0.0806, + "grad_norm": 1.7870028018951416, + "learning_rate": 9.68e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.035, + "step": 1035 + }, + { + "loss": 0.0602, + "grad_norm": 1.6170544624328613, + "learning_rate": 9.67e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.036, + "step": 1036 + }, + { + "loss": 0.0427, + "grad_norm": 2.0376412868499756, + "learning_rate": 9.66e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.037, + "step": 1037 + }, + { + "loss": 0.0636, + "grad_norm": 2.1391189098358154, + "learning_rate": 9.65e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.038, + "step": 1038 + }, + { + "loss": 0.0127, + "grad_norm": 3.4139318466186523, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 1039 + }, + { + "loss": 0.0532, + "grad_norm": 2.2980690002441406, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.04, + "step": 1040 + }, + { + "loss": 0.042, + "grad_norm": 1.7804741859436035, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.041, + "step": 1041 + }, + { + "loss": 0.039, + "grad_norm": 1.5417966842651367, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.042, + "step": 1042 + }, + { + "loss": 0.0691, + "grad_norm": 1.9181416034698486, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.043, + "step": 1043 + }, + { + "loss": 0.0105, + "grad_norm": 2.567687511444092, + "learning_rate": 9.59e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 1044 + }, + { + "loss": 0.0513, + "grad_norm": 2.1507062911987305, + "learning_rate": 9.58e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.045, + "step": 1045 + }, + { + "loss": 0.0661, + "grad_norm": 2.6471474170684814, + "learning_rate": 9.57e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.046, + "step": 1046 + }, + { + "loss": 0.0528, + "grad_norm": 1.6081326007843018, + "learning_rate": 9.56e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.047, + "step": 1047 + }, + { + "loss": 0.0148, + "grad_norm": 3.6129963397979736, + "learning_rate": 9.55e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.048, + "step": 1048 + }, + { + "loss": 0.0589, + "grad_norm": 1.6536871194839478, + "learning_rate": 9.54e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 1049 + }, + { + "loss": 0.0893, + "grad_norm": 2.1024138927459717, + "learning_rate": 9.53e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.05, + "step": 1050 + }, + { + "loss": 0.0628, + "grad_norm": 1.6858649253845215, + "learning_rate": 9.52e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.051, + "step": 1051 + }, + { + "loss": 0.0532, + "grad_norm": 1.6352399587631226, + "learning_rate": 9.51e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.052, + "step": 1052 + }, + { + "loss": 0.0673, + "grad_norm": 1.62017822265625, + "learning_rate": 9.5e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.053, + "step": 1053 + }, + { + "loss": 0.0577, + "grad_norm": 1.5879229307174683, + "learning_rate": 9.49e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.054, + "step": 1054 + }, + { + "loss": 0.0148, + "grad_norm": 4.010829925537109, + "learning_rate": 9.48e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.055, + "step": 1055 + }, + { + "loss": 0.0147, + "grad_norm": 4.00789213180542, + "learning_rate": 9.47e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.056, + "step": 1056 + }, + { + "loss": 0.015, + "grad_norm": 4.107461929321289, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.057, + "step": 1057 + }, + { + "loss": 0.0458, + "grad_norm": 2.3218655586242676, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.058, + "step": 1058 + }, + { + "loss": 0.0119, + "grad_norm": 2.9490623474121094, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 1059 + }, + { + "loss": 0.0367, + "grad_norm": 1.8217196464538574, + "learning_rate": 9.43e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.06, + "step": 1060 + }, + { + "loss": 0.0079, + "grad_norm": 1.3022953271865845, + "learning_rate": 9.42e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 1061 + }, + { + "loss": 0.0724, + "grad_norm": 2.17926287651062, + "learning_rate": 9.41e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.062, + "step": 1062 + }, + { + "loss": 0.039, + "grad_norm": 1.739366888999939, + "learning_rate": 9.4e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.063, + "step": 1063 + }, + { + "loss": 0.0534, + "grad_norm": 2.180590867996216, + "learning_rate": 9.39e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.064, + "step": 1064 + }, + { + "loss": 0.0063, + "grad_norm": 0.5163084864616394, + "learning_rate": 9.38e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 1065 + }, + { + "loss": 0.0584, + "grad_norm": 2.8058063983917236, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.066, + "step": 1066 + }, + { + "loss": 0.0582, + "grad_norm": 2.005493640899658, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.067, + "step": 1067 + }, + { + "loss": 0.0497, + "grad_norm": 2.923448324203491, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.068, + "step": 1068 + }, + { + "loss": 0.006, + "grad_norm": 0.48110926151275635, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 1069 + }, + { + "loss": 0.0704, + "grad_norm": 2.408653497695923, + "learning_rate": 9.33e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.07, + "step": 1070 + }, + { + "loss": 0.0878, + "grad_norm": 2.767408847808838, + "learning_rate": 9.32e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 1071 + }, + { + "loss": 0.0599, + "grad_norm": 1.9640824794769287, + "learning_rate": 9.31e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.072, + "step": 1072 + }, + { + "loss": 0.0674, + "grad_norm": 2.939439535140991, + "learning_rate": 9.3e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.073, + "step": 1073 + }, + { + "loss": 0.0866, + "grad_norm": 2.223776340484619, + "learning_rate": 9.29e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.074, + "step": 1074 + }, + { + "loss": 0.0819, + "grad_norm": 1.7831770181655884, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.075, + "step": 1075 + }, + { + "loss": 0.0552, + "grad_norm": 1.528134822845459, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.076, + "step": 1076 + }, + { + "loss": 0.0105, + "grad_norm": 2.722768783569336, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 1077 + }, + { + "loss": 0.0559, + "grad_norm": 1.601446509361267, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.078, + "step": 1078 + }, + { + "loss": 0.0571, + "grad_norm": 1.6370468139648438, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.079, + "step": 1079 + }, + { + "loss": 0.0611, + "grad_norm": 1.7496470212936401, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.08, + "step": 1080 + }, + { + "loss": 0.0582, + "grad_norm": 1.8051985502243042, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.081, + "step": 1081 + }, + { + "loss": 0.0527, + "grad_norm": 1.1893869638442993, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.082, + "step": 1082 + }, + { + "loss": 0.0613, + "grad_norm": 1.7861930131912231, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.083, + "step": 1083 + }, + { + "loss": 0.0771, + "grad_norm": 1.6442121267318726, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.084, + "step": 1084 + }, + { + "loss": 0.0614, + "grad_norm": 1.7604858875274658, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.085, + "step": 1085 + }, + { + "loss": 0.0686, + "grad_norm": 1.7211897373199463, + "learning_rate": 9.17e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.086, + "step": 1086 + }, + { + "loss": 0.0851, + "grad_norm": 2.2072157859802246, + "learning_rate": 9.16e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.087, + "step": 1087 + }, + { + "loss": 0.0234, + "grad_norm": 6.049727916717529, + "learning_rate": 9.15e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.088, + "step": 1088 + }, + { + "loss": 0.0462, + "grad_norm": 2.178677558898926, + "learning_rate": 9.14e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.089, + "step": 1089 + }, + { + "loss": 0.0866, + "grad_norm": 2.1971359252929688, + "learning_rate": 9.13e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.09, + "step": 1090 + }, + { + "loss": 0.0701, + "grad_norm": 2.604931116104126, + "learning_rate": 9.12e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.091, + "step": 1091 + }, + { + "loss": 0.1403, + "grad_norm": 4.8585004806518555, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.092, + "step": 1092 + }, + { + "loss": 0.0418, + "grad_norm": 2.0918304920196533, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.093, + "step": 1093 + }, + { + "loss": 0.0607, + "grad_norm": 1.5581291913986206, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.094, + "step": 1094 + }, + { + "loss": 0.0464, + "grad_norm": 2.2121376991271973, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.095, + "step": 1095 + }, + { + "loss": 0.0187, + "grad_norm": 5.02223539352417, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.096, + "step": 1096 + }, + { + "loss": 0.051, + "grad_norm": 1.1968108415603638, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.097, + "step": 1097 + }, + { + "loss": 0.0379, + "grad_norm": 1.5838263034820557, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.098, + "step": 1098 + }, + { + "loss": 0.0599, + "grad_norm": 2.1656548976898193, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.099, + "step": 1099 + }, + { + "loss": 0.0531, + "grad_norm": 1.5780129432678223, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1, + "step": 1100 + }, + { + "loss": 0.0101, + "grad_norm": 2.5371878147125244, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 1101 + }, + { + "loss": 0.0635, + "grad_norm": 1.7947604656219482, + "learning_rate": 9.01e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.102, + "step": 1102 + }, + { + "loss": 0.0522, + "grad_norm": 2.101656436920166, + "learning_rate": 9e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.103, + "step": 1103 + }, + { + "loss": 0.0803, + "grad_norm": 1.9881861209869385, + "learning_rate": 8.99e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.104, + "step": 1104 + }, + { + "loss": 0.0618, + "grad_norm": 1.884840965270996, + "learning_rate": 8.98e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.105, + "step": 1105 + }, + { + "loss": 0.0554, + "grad_norm": 1.8216484785079956, + "learning_rate": 8.97e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.106, + "step": 1106 + }, + { + "loss": 0.0631, + "grad_norm": 2.1785407066345215, + "learning_rate": 8.96e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.107, + "step": 1107 + }, + { + "loss": 0.0409, + "grad_norm": 1.5896263122558594, + "learning_rate": 8.95e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.108, + "step": 1108 + }, + { + "loss": 0.1964, + "grad_norm": 6.368833541870117, + "learning_rate": 8.94e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 1.109, + "step": 1109 + }, + { + "loss": 0.0087, + "grad_norm": 1.9522284269332886, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 1110 + }, + { + "loss": 0.2323, + "grad_norm": 7.9943718910217285, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 1.111, + "step": 1111 + }, + { + "loss": 0.0801, + "grad_norm": 1.92306387424469, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.112, + "step": 1112 + }, + { + "loss": 0.045, + "grad_norm": 1.3462337255477905, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.113, + "step": 1113 + }, + { + "loss": 0.0721, + "grad_norm": 2.416792869567871, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 1114 + }, + { + "loss": 0.0406, + "grad_norm": 2.1178133487701416, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.115, + "step": 1115 + }, + { + "loss": 0.0559, + "grad_norm": 1.5205347537994385, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.116, + "step": 1116 + }, + { + "loss": 0.0342, + "grad_norm": 1.617630124092102, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.117, + "step": 1117 + }, + { + "loss": 0.0438, + "grad_norm": 2.34078049659729, + "learning_rate": 8.85e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1179999999999999, + "step": 1118 + }, + { + "loss": 0.0753, + "grad_norm": 1.8780885934829712, + "learning_rate": 8.84e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.119, + "step": 1119 + }, + { + "loss": 0.147, + "grad_norm": 5.077685356140137, + "learning_rate": 8.83e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.12, + "step": 1120 + }, + { + "loss": 0.0469, + "grad_norm": 1.9634060859680176, + "learning_rate": 8.82e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.121, + "step": 1121 + }, + { + "loss": 0.0662, + "grad_norm": 1.4567596912384033, + "learning_rate": 8.81e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1219999999999999, + "step": 1122 + }, + { + "loss": 0.0167, + "grad_norm": 4.722336292266846, + "learning_rate": 8.8e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.123, + "step": 1123 + }, + { + "loss": 0.0388, + "grad_norm": 2.1787490844726562, + "learning_rate": 8.79e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.124, + "step": 1124 + }, + { + "loss": 0.0508, + "grad_norm": 1.4540494680404663, + "learning_rate": 8.78e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.125, + "step": 1125 + }, + { + "loss": 0.0463, + "grad_norm": 1.9126884937286377, + "learning_rate": 8.77e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.126, + "step": 1126 + }, + { + "loss": 0.0413, + "grad_norm": 1.3725852966308594, + "learning_rate": 8.76e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.127, + "step": 1127 + }, + { + "loss": 0.0406, + "grad_norm": 1.769464373588562, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.1280000000000001, + "step": 1128 + }, + { + "loss": 0.0157, + "grad_norm": 4.246346473693848, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.129, + "step": 1129 + }, + { + "loss": 0.1541, + "grad_norm": 4.8993754386901855, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.13, + "step": 1130 + }, + { + "loss": 0.041, + "grad_norm": 1.7246980667114258, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.131, + "step": 1131 + }, + { + "loss": 0.0726, + "grad_norm": 2.2514991760253906, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1320000000000001, + "step": 1132 + }, + { + "loss": 0.0097, + "grad_norm": 2.538367509841919, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 1133 + }, + { + "loss": 0.083, + "grad_norm": 2.2139499187469482, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.134, + "step": 1134 + }, + { + "loss": 0.0086, + "grad_norm": 2.0688657760620117, + "learning_rate": 8.68e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 1135 + }, + { + "loss": 0.0579, + "grad_norm": 1.7580430507659912, + "learning_rate": 8.67e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.1360000000000001, + "step": 1136 + }, + { + "loss": 0.0071, + "grad_norm": 1.2317492961883545, + "learning_rate": 8.66e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 1137 + }, + { + "loss": 0.0547, + "grad_norm": 1.7383458614349365, + "learning_rate": 8.65e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.138, + "step": 1138 + }, + { + "loss": 0.0493, + "grad_norm": 1.9442108869552612, + "learning_rate": 8.64e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.139, + "step": 1139 + }, + { + "loss": 0.0743, + "grad_norm": 2.8182926177978516, + "learning_rate": 8.63e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.1400000000000001, + "step": 1140 + }, + { + "loss": 0.0058, + "grad_norm": 0.5721865296363831, + "learning_rate": 8.62e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 1141 + }, + { + "loss": 0.0615, + "grad_norm": 2.226674795150757, + "learning_rate": 8.61e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.142, + "step": 1142 + }, + { + "loss": 0.0063, + "grad_norm": 0.8222597241401672, + "learning_rate": 8.6e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 1143 + }, + { + "loss": 0.0679, + "grad_norm": 2.1432037353515625, + "learning_rate": 8.59e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.144, + "step": 1144 + }, + { + "loss": 0.0604, + "grad_norm": 2.196251392364502, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.145, + "step": 1145 + }, + { + "loss": 0.0067, + "grad_norm": 0.9334397912025452, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 1146 + }, + { + "loss": 0.0877, + "grad_norm": 2.9189441204071045, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.147, + "step": 1147 + }, + { + "loss": 0.04, + "grad_norm": 1.8555492162704468, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.148, + "step": 1148 + }, + { + "loss": 0.0433, + "grad_norm": 2.1462485790252686, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.149, + "step": 1149 + }, + { + "loss": 0.0912, + "grad_norm": 2.674384593963623, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.15, + "step": 1150 + }, + { + "loss": 0.0806, + "grad_norm": 2.1967833042144775, + "learning_rate": 8.52e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.151, + "step": 1151 + }, + { + "loss": 0.0397, + "grad_norm": 1.576885461807251, + "learning_rate": 8.51e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.152, + "step": 1152 + }, + { + "loss": 0.0385, + "grad_norm": 1.8607549667358398, + "learning_rate": 8.5e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.153, + "step": 1153 + }, + { + "loss": 0.0591, + "grad_norm": 2.075608491897583, + "learning_rate": 8.49e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.154, + "step": 1154 + }, + { + "loss": 0.0072, + "grad_norm": 1.595956563949585, + "learning_rate": 8.48e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 1155 + }, + { + "loss": 0.0107, + "grad_norm": 2.7350447177886963, + "learning_rate": 8.47e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 1156 + }, + { + "loss": 0.0675, + "grad_norm": 1.7995527982711792, + "learning_rate": 8.46e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.157, + "step": 1157 + }, + { + "loss": 0.0655, + "grad_norm": 2.3666279315948486, + "learning_rate": 8.45e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.158, + "step": 1158 + }, + { + "loss": 0.0898, + "grad_norm": 2.2464659214019775, + "learning_rate": 8.44e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.159, + "step": 1159 + }, + { + "loss": 0.0555, + "grad_norm": 2.4049134254455566, + "learning_rate": 8.43e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.16, + "step": 1160 + }, + { + "loss": 0.0835, + "grad_norm": 2.0087289810180664, + "learning_rate": 8.42e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.161, + "step": 1161 + }, + { + "loss": 0.0679, + "grad_norm": 2.1180970668792725, + "learning_rate": 8.41e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.162, + "step": 1162 + }, + { + "loss": 0.0605, + "grad_norm": 1.7271490097045898, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.163, + "step": 1163 + }, + { + "loss": 0.0381, + "grad_norm": 2.031334400177002, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.164, + "step": 1164 + }, + { + "loss": 0.0639, + "grad_norm": 1.7528166770935059, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.165, + "step": 1165 + }, + { + "loss": 0.1307, + "grad_norm": 3.783503293991089, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.166, + "step": 1166 + }, + { + "loss": 0.0473, + "grad_norm": 2.779741048812866, + "learning_rate": 8.36e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.167, + "step": 1167 + }, + { + "loss": 0.0455, + "grad_norm": 1.9504565000534058, + "learning_rate": 8.35e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.168, + "step": 1168 + }, + { + "loss": 0.0662, + "grad_norm": 2.2791426181793213, + "learning_rate": 8.34e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.169, + "step": 1169 + }, + { + "loss": 0.0857, + "grad_norm": 2.4661900997161865, + "learning_rate": 8.33e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.17, + "step": 1170 + }, + { + "loss": 0.0817, + "grad_norm": 2.018150568008423, + "learning_rate": 8.32e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.171, + "step": 1171 + }, + { + "loss": 0.0491, + "grad_norm": 1.4105336666107178, + "learning_rate": 8.31e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.172, + "step": 1172 + }, + { + "loss": 0.0705, + "grad_norm": 1.7099734544754028, + "learning_rate": 8.3e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.173, + "step": 1173 + }, + { + "loss": 0.0197, + "grad_norm": 5.4979472160339355, + "learning_rate": 8.29e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.174, + "step": 1174 + }, + { + "loss": 0.0515, + "grad_norm": 1.9852694272994995, + "learning_rate": 8.28e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.175, + "step": 1175 + }, + { + "loss": 0.0435, + "grad_norm": 1.3928176164627075, + "learning_rate": 8.27e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.176, + "step": 1176 + }, + { + "loss": 0.062, + "grad_norm": 2.7774510383605957, + "learning_rate": 8.26e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.177, + "step": 1177 + }, + { + "loss": 0.053, + "grad_norm": 0.9669445753097534, + "learning_rate": 8.25e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.178, + "step": 1178 + }, + { + "loss": 0.0178, + "grad_norm": 4.694067478179932, + "learning_rate": 8.24e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.179, + "step": 1179 + }, + { + "loss": 0.0133, + "grad_norm": 3.8942577838897705, + "learning_rate": 8.23e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.18, + "step": 1180 + }, + { + "loss": 0.042, + "grad_norm": 1.4630885124206543, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.181, + "step": 1181 + }, + { + "loss": 0.0598, + "grad_norm": 1.6373014450073242, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.182, + "step": 1182 + }, + { + "loss": 0.0454, + "grad_norm": 1.9768292903900146, + "learning_rate": 8.2e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.183, + "step": 1183 + }, + { + "loss": 0.0734, + "grad_norm": 1.4859123229980469, + "learning_rate": 8.19e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.184, + "step": 1184 + }, + { + "loss": 0.0647, + "grad_norm": 1.7751868963241577, + "learning_rate": 8.18e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.185, + "step": 1185 + }, + { + "loss": 0.0643, + "grad_norm": 1.6454154253005981, + "learning_rate": 8.17e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.186, + "step": 1186 + }, + { + "loss": 0.0511, + "grad_norm": 1.9402817487716675, + "learning_rate": 8.16e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.187, + "step": 1187 + }, + { + "loss": 0.047, + "grad_norm": 1.6513389348983765, + "learning_rate": 8.15e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.188, + "step": 1188 + }, + { + "loss": 0.0107, + "grad_norm": 2.9602744579315186, + "learning_rate": 8.14e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 1189 + }, + { + "loss": 0.0708, + "grad_norm": 1.9953235387802124, + "learning_rate": 8.13e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.19, + "step": 1190 + }, + { + "loss": 0.0562, + "grad_norm": 1.7549750804901123, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.191, + "step": 1191 + }, + { + "loss": 0.0589, + "grad_norm": 2.0597615242004395, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.192, + "step": 1192 + }, + { + "loss": 0.0469, + "grad_norm": 1.7559466361999512, + "learning_rate": 8.1e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.193, + "step": 1193 + }, + { + "loss": 0.0757, + "grad_norm": 2.0765254497528076, + "learning_rate": 8.09e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.194, + "step": 1194 + }, + { + "loss": 0.0118, + "grad_norm": 3.379472017288208, + "learning_rate": 8.08e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 1195 + }, + { + "loss": 0.0692, + "grad_norm": 1.6905264854431152, + "learning_rate": 8.07e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.196, + "step": 1196 + }, + { + "loss": 0.0493, + "grad_norm": 2.3974990844726562, + "learning_rate": 8.06e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.197, + "step": 1197 + }, + { + "loss": 0.0533, + "grad_norm": 1.609572410583496, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.198, + "step": 1198 + }, + { + "loss": 0.0727, + "grad_norm": 2.563096523284912, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.199, + "step": 1199 + }, + { + "loss": 0.0556, + "grad_norm": 2.0002143383026123, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.2, + "step": 1200 + }, + { + "loss": 0.0487, + "grad_norm": 1.7846338748931885, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.201, + "step": 1201 + }, + { + "loss": 0.0802, + "grad_norm": 2.2537660598754883, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.202, + "step": 1202 + }, + { + "loss": 0.0584, + "grad_norm": 3.043835163116455, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.203, + "step": 1203 + }, + { + "loss": 0.012, + "grad_norm": 3.2526142597198486, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.204, + "step": 1204 + }, + { + "loss": 0.063, + "grad_norm": 1.3797202110290527, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.205, + "step": 1205 + }, + { + "loss": 0.0658, + "grad_norm": 2.5818750858306885, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.206, + "step": 1206 + }, + { + "loss": 0.0108, + "grad_norm": 3.089911699295044, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 1207 + }, + { + "loss": 0.0781, + "grad_norm": 2.348559856414795, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.208, + "step": 1208 + }, + { + "loss": 0.053, + "grad_norm": 1.6293948888778687, + "learning_rate": 7.94e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.209, + "step": 1209 + }, + { + "loss": 0.0541, + "grad_norm": 1.7948721647262573, + "learning_rate": 7.93e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.21, + "step": 1210 + }, + { + "loss": 0.0408, + "grad_norm": 2.3477344512939453, + "learning_rate": 7.92e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.211, + "step": 1211 + }, + { + "loss": 0.0579, + "grad_norm": 2.6738388538360596, + "learning_rate": 7.91e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.212, + "step": 1212 + }, + { + "loss": 0.055, + "grad_norm": 1.522643804550171, + "learning_rate": 7.9e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.213, + "step": 1213 + }, + { + "loss": 0.0634, + "grad_norm": 1.585366129875183, + "learning_rate": 7.89e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.214, + "step": 1214 + }, + { + "loss": 0.0616, + "grad_norm": 1.645047664642334, + "learning_rate": 7.88e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.215, + "step": 1215 + }, + { + "loss": 0.0757, + "grad_norm": 1.689460039138794, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.216, + "step": 1216 + }, + { + "loss": 0.0454, + "grad_norm": 2.0291545391082764, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.217, + "step": 1217 + }, + { + "loss": 0.0104, + "grad_norm": 3.0368359088897705, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 1218 + }, + { + "loss": 0.0097, + "grad_norm": 2.792633533477783, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 1219 + }, + { + "loss": 0.0776, + "grad_norm": 2.638593912124634, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.22, + "step": 1220 + }, + { + "loss": 0.0612, + "grad_norm": 2.7605133056640625, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.221, + "step": 1221 + }, + { + "loss": 0.0884, + "grad_norm": 2.6775927543640137, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.222, + "step": 1222 + }, + { + "loss": 0.0752, + "grad_norm": 1.9850537776947021, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.223, + "step": 1223 + }, + { + "loss": 0.0439, + "grad_norm": 1.5452102422714233, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.224, + "step": 1224 + }, + { + "loss": 0.0435, + "grad_norm": 2.2355833053588867, + "learning_rate": 7.78e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.225, + "step": 1225 + }, + { + "loss": 0.0532, + "grad_norm": 1.7478253841400146, + "learning_rate": 7.77e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.226, + "step": 1226 + }, + { + "loss": 0.0106, + "grad_norm": 3.0870492458343506, + "learning_rate": 7.76e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 1227 + }, + { + "loss": 0.0534, + "grad_norm": 1.8180068731307983, + "learning_rate": 7.75e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.228, + "step": 1228 + }, + { + "loss": 0.0088, + "grad_norm": 2.428753137588501, + "learning_rate": 7.74e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 1229 + }, + { + "loss": 0.0094, + "grad_norm": 2.480687141418457, + "learning_rate": 7.73e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 1230 + }, + { + "loss": 0.056, + "grad_norm": 1.977836012840271, + "learning_rate": 7.72e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.231, + "step": 1231 + }, + { + "loss": 0.0576, + "grad_norm": 2.694723129272461, + "learning_rate": 7.71e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.232, + "step": 1232 + }, + { + "loss": 0.0559, + "grad_norm": 1.785524606704712, + "learning_rate": 7.7e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.233, + "step": 1233 + }, + { + "loss": 0.0548, + "grad_norm": 1.7176051139831543, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.234, + "step": 1234 + }, + { + "loss": 0.07, + "grad_norm": 1.961999773979187, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2349999999999999, + "step": 1235 + }, + { + "loss": 0.0592, + "grad_norm": 2.465545654296875, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.236, + "step": 1236 + }, + { + "loss": 0.0378, + "grad_norm": 1.4544801712036133, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.237, + "step": 1237 + }, + { + "loss": 0.0602, + "grad_norm": 1.772146224975586, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.238, + "step": 1238 + }, + { + "loss": 0.04, + "grad_norm": 2.1550979614257812, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2389999999999999, + "step": 1239 + }, + { + "loss": 0.0448, + "grad_norm": 2.0862441062927246, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.24, + "step": 1240 + }, + { + "loss": 0.073, + "grad_norm": 1.8445123434066772, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.241, + "step": 1241 + }, + { + "loss": 0.0701, + "grad_norm": 1.734731912612915, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.242, + "step": 1242 + }, + { + "loss": 0.0621, + "grad_norm": 2.5419921875, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2429999999999999, + "step": 1243 + }, + { + "loss": 0.0387, + "grad_norm": 2.232482671737671, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.244, + "step": 1244 + }, + { + "loss": 0.041, + "grad_norm": 2.1068978309631348, + "learning_rate": 7.58e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.245, + "step": 1245 + }, + { + "loss": 0.0677, + "grad_norm": 1.7934560775756836, + "learning_rate": 7.57e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.246, + "step": 1246 + }, + { + "loss": 0.0866, + "grad_norm": 2.3774123191833496, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.2469999999999999, + "step": 1247 + }, + { + "loss": 0.0188, + "grad_norm": 5.182284832000732, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.248, + "step": 1248 + }, + { + "loss": 0.0517, + "grad_norm": 1.6540446281433105, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.249, + "step": 1249 + }, + { + "loss": 0.0801, + "grad_norm": 1.7044258117675781, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.25, + "step": 1250 + }, + { + "loss": 0.018, + "grad_norm": 4.825031757354736, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.251, + "step": 1251 + }, + { + "loss": 0.0579, + "grad_norm": 1.9127049446105957, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.252, + "step": 1252 + }, + { + "loss": 0.0387, + "grad_norm": 1.524353265762329, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2530000000000001, + "step": 1253 + }, + { + "loss": 0.0743, + "grad_norm": 1.8598476648330688, + "learning_rate": 7.49e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.254, + "step": 1254 + }, + { + "loss": 0.0364, + "grad_norm": 1.6264195442199707, + "learning_rate": 7.48e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.255, + "step": 1255 + }, + { + "loss": 0.0746, + "grad_norm": 1.4887213706970215, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.256, + "step": 1256 + }, + { + "loss": 0.0117, + "grad_norm": 3.425563335418701, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 1257 + }, + { + "loss": 0.0552, + "grad_norm": 1.6610738039016724, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.258, + "step": 1258 + }, + { + "loss": 0.0105, + "grad_norm": 2.9016385078430176, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 1259 + }, + { + "loss": 0.0657, + "grad_norm": 2.349597215652466, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.26, + "step": 1260 + }, + { + "loss": 0.0706, + "grad_norm": 1.7171733379364014, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.2610000000000001, + "step": 1261 + }, + { + "loss": 0.0076, + "grad_norm": 2.070596933364868, + "learning_rate": 7.41e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 1262 + }, + { + "loss": 0.082, + "grad_norm": 2.476560115814209, + "learning_rate": 7.4e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.263, + "step": 1263 + }, + { + "loss": 0.0696, + "grad_norm": 2.013134002685547, + "learning_rate": 7.39e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 1264 + }, + { + "loss": 0.0456, + "grad_norm": 2.0719385147094727, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2650000000000001, + "step": 1265 + }, + { + "loss": 0.0789, + "grad_norm": 2.737678289413452, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.266, + "step": 1266 + }, + { + "loss": 0.0755, + "grad_norm": 2.932962417602539, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.267, + "step": 1267 + }, + { + "loss": 0.0621, + "grad_norm": 1.5760010480880737, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.268, + "step": 1268 + }, + { + "loss": 0.145, + "grad_norm": 4.413599491119385, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.2690000000000001, + "step": 1269 + }, + { + "loss": 0.052, + "grad_norm": 1.3965295553207397, + "learning_rate": 7.33e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.27, + "step": 1270 + }, + { + "loss": 0.0507, + "grad_norm": 1.5652461051940918, + "learning_rate": 7.32e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.271, + "step": 1271 + }, + { + "loss": 0.1608, + "grad_norm": 5.22923469543457, + "learning_rate": 7.31e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 1.272, + "step": 1272 + }, + { + "loss": 0.04, + "grad_norm": 2.1607284545898438, + "learning_rate": 7.3e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2730000000000001, + "step": 1273 + }, + { + "loss": 0.0093, + "grad_norm": 2.755345106124878, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 1274 + }, + { + "loss": 0.0403, + "grad_norm": 1.6918083429336548, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.275, + "step": 1275 + }, + { + "loss": 0.0569, + "grad_norm": 1.4805766344070435, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.276, + "step": 1276 + }, + { + "loss": 0.0639, + "grad_norm": 1.9898265600204468, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2770000000000001, + "step": 1277 + }, + { + "loss": 0.0764, + "grad_norm": 2.4644553661346436, + "learning_rate": 7.25e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.278, + "step": 1278 + }, + { + "loss": 0.0458, + "grad_norm": 1.6111081838607788, + "learning_rate": 7.24e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.279, + "step": 1279 + }, + { + "loss": 0.0439, + "grad_norm": 1.847048282623291, + "learning_rate": 7.23e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.28, + "step": 1280 + }, + { + "loss": 0.0485, + "grad_norm": 2.2336626052856445, + "learning_rate": 7.22e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2810000000000001, + "step": 1281 + }, + { + "loss": 0.0204, + "grad_norm": 5.058897972106934, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.282, + "step": 1282 + }, + { + "loss": 0.059, + "grad_norm": 1.464397668838501, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.283, + "step": 1283 + }, + { + "loss": 0.0663, + "grad_norm": 1.986909031867981, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.284, + "step": 1284 + }, + { + "loss": 0.0553, + "grad_norm": 1.3948322534561157, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.285, + "step": 1285 + }, + { + "loss": 0.0762, + "grad_norm": 1.8114221096038818, + "learning_rate": 7.17e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.286, + "step": 1286 + }, + { + "loss": 0.0596, + "grad_norm": 1.3451945781707764, + "learning_rate": 7.16e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 1287 + }, + { + "loss": 0.066, + "grad_norm": 1.6588683128356934, + "learning_rate": 7.15e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.288, + "step": 1288 + }, + { + "loss": 0.0486, + "grad_norm": 1.8605456352233887, + "learning_rate": 7.14e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.289, + "step": 1289 + }, + { + "loss": 0.0567, + "grad_norm": 1.8595200777053833, + "learning_rate": 7.13e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.29, + "step": 1290 + }, + { + "loss": 0.0651, + "grad_norm": 1.3704520463943481, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.291, + "step": 1291 + }, + { + "loss": 0.0776, + "grad_norm": 1.5874192714691162, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.292, + "step": 1292 + }, + { + "loss": 0.0584, + "grad_norm": 1.6083050966262817, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.293, + "step": 1293 + }, + { + "loss": 0.0526, + "grad_norm": 2.637402296066284, + "learning_rate": 7.09e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.294, + "step": 1294 + }, + { + "loss": 0.0434, + "grad_norm": 1.125180721282959, + "learning_rate": 7.08e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.295, + "step": 1295 + }, + { + "loss": 0.0604, + "grad_norm": 1.9658552408218384, + "learning_rate": 7.07e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.296, + "step": 1296 + }, + { + "loss": 0.0609, + "grad_norm": 2.3239123821258545, + "learning_rate": 7.06e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.297, + "step": 1297 + }, + { + "loss": 0.0822, + "grad_norm": 2.9983248710632324, + "learning_rate": 7.05e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.298, + "step": 1298 + }, + { + "loss": 0.062, + "grad_norm": 1.7106144428253174, + "learning_rate": 7.04e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.299, + "step": 1299 + }, + { + "loss": 0.0542, + "grad_norm": 1.9297690391540527, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3, + "step": 1300 + }, + { + "loss": 0.0174, + "grad_norm": 4.6414361000061035, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.301, + "step": 1301 + }, + { + "loss": 0.0755, + "grad_norm": 2.1787867546081543, + "learning_rate": 7.01e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.302, + "step": 1302 + }, + { + "loss": 0.015, + "grad_norm": 4.113848686218262, + "learning_rate": 7e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.303, + "step": 1303 + }, + { + "loss": 0.0492, + "grad_norm": 1.3803060054779053, + "learning_rate": 6.99e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.304, + "step": 1304 + }, + { + "loss": 0.0512, + "grad_norm": 1.5045576095581055, + "learning_rate": 6.98e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.305, + "step": 1305 + }, + { + "loss": 0.0608, + "grad_norm": 1.5915031433105469, + "learning_rate": 6.97e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.306, + "step": 1306 + }, + { + "loss": 0.0583, + "grad_norm": 1.2304151058197021, + "learning_rate": 6.96e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.307, + "step": 1307 + }, + { + "loss": 0.0563, + "grad_norm": 1.7730633020401, + "learning_rate": 6.95e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.308, + "step": 1308 + }, + { + "loss": 0.0684, + "grad_norm": 1.730749249458313, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.309, + "step": 1309 + }, + { + "loss": 0.052, + "grad_norm": 1.6816562414169312, + "learning_rate": 6.93e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.31, + "step": 1310 + }, + { + "loss": 0.0732, + "grad_norm": 2.309110164642334, + "learning_rate": 6.92e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.311, + "step": 1311 + }, + { + "loss": 0.0634, + "grad_norm": 1.8224540948867798, + "learning_rate": 6.91e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.312, + "step": 1312 + }, + { + "loss": 0.0584, + "grad_norm": 1.9186445474624634, + "learning_rate": 6.9e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.313, + "step": 1313 + }, + { + "loss": 0.0348, + "grad_norm": 1.3239874839782715, + "learning_rate": 6.89e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.314, + "step": 1314 + }, + { + "loss": 0.0938, + "grad_norm": 2.3451895713806152, + "learning_rate": 6.88e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.315, + "step": 1315 + }, + { + "loss": 0.0623, + "grad_norm": 1.8779281377792358, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.316, + "step": 1316 + }, + { + "loss": 0.167, + "grad_norm": 4.993703842163086, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.317, + "step": 1317 + }, + { + "loss": 0.0142, + "grad_norm": 4.2328338623046875, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.318, + "step": 1318 + }, + { + "loss": 0.0792, + "grad_norm": 2.0863592624664307, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.319, + "step": 1319 + }, + { + "loss": 0.044, + "grad_norm": 2.3412485122680664, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.32, + "step": 1320 + }, + { + "loss": 0.0404, + "grad_norm": 1.4804179668426514, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.321, + "step": 1321 + }, + { + "loss": 0.0168, + "grad_norm": 4.645394802093506, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.322, + "step": 1322 + }, + { + "loss": 0.0718, + "grad_norm": 1.6375811100006104, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.323, + "step": 1323 + }, + { + "loss": 0.06, + "grad_norm": 1.5656460523605347, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.324, + "step": 1324 + }, + { + "loss": 0.065, + "grad_norm": 1.7190107107162476, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.325, + "step": 1325 + }, + { + "loss": 0.0152, + "grad_norm": 3.9972171783447266, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.326, + "step": 1326 + }, + { + "loss": 0.0679, + "grad_norm": 2.4974441528320312, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 1327 + }, + { + "loss": 0.0582, + "grad_norm": 2.3485262393951416, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.328, + "step": 1328 + }, + { + "loss": 0.0829, + "grad_norm": 2.598663091659546, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.329, + "step": 1329 + }, + { + "loss": 0.01, + "grad_norm": 2.8793528079986572, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 1330 + }, + { + "loss": 0.0661, + "grad_norm": 1.9478849172592163, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.331, + "step": 1331 + }, + { + "loss": 0.0715, + "grad_norm": 1.916156530380249, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.332, + "step": 1332 + }, + { + "loss": 0.0601, + "grad_norm": 1.6466504335403442, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.333, + "step": 1333 + }, + { + "loss": 0.01, + "grad_norm": 2.8242533206939697, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 1334 + }, + { + "loss": 0.0409, + "grad_norm": 1.506545066833496, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.335, + "step": 1335 + }, + { + "loss": 0.0809, + "grad_norm": 1.7198259830474854, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.336, + "step": 1336 + }, + { + "loss": 0.1451, + "grad_norm": 4.725864887237549, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 1.337, + "step": 1337 + }, + { + "loss": 0.0649, + "grad_norm": 1.4829907417297363, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.338, + "step": 1338 + }, + { + "loss": 0.0779, + "grad_norm": 1.798589825630188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.339, + "step": 1339 + }, + { + "loss": 0.0645, + "grad_norm": 2.8309855461120605, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.34, + "step": 1340 + }, + { + "loss": 0.0573, + "grad_norm": 2.2329795360565186, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.341, + "step": 1341 + }, + { + "loss": 0.0633, + "grad_norm": 1.7102524042129517, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.342, + "step": 1342 + }, + { + "loss": 0.0533, + "grad_norm": 1.8966953754425049, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.343, + "step": 1343 + }, + { + "loss": 0.1242, + "grad_norm": 3.5069096088409424, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3439999999999999, + "step": 1344 + }, + { + "loss": 0.0668, + "grad_norm": 1.6451408863067627, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.345, + "step": 1345 + }, + { + "loss": 0.0168, + "grad_norm": 4.646505355834961, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.346, + "step": 1346 + }, + { + "loss": 0.0122, + "grad_norm": 3.5036394596099854, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.347, + "step": 1347 + }, + { + "loss": 0.054, + "grad_norm": 1.476265788078308, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3479999999999999, + "step": 1348 + }, + { + "loss": 0.0771, + "grad_norm": 2.343313455581665, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.349, + "step": 1349 + }, + { + "loss": 0.041, + "grad_norm": 1.5659995079040527, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.35, + "step": 1350 + }, + { + "loss": 0.0377, + "grad_norm": 1.196007251739502, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.351, + "step": 1351 + }, + { + "loss": 0.1297, + "grad_norm": 3.8112542629241943, + "learning_rate": 6.51e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 1.3519999999999999, + "step": 1352 + }, + { + "loss": 0.0526, + "grad_norm": 1.3368208408355713, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.353, + "step": 1353 + }, + { + "loss": 0.0444, + "grad_norm": 1.8093925714492798, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.354, + "step": 1354 + }, + { + "loss": 0.0101, + "grad_norm": 2.882591485977173, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 1355 + }, + { + "loss": 0.0437, + "grad_norm": 1.7717807292938232, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3559999999999999, + "step": 1356 + }, + { + "loss": 0.0546, + "grad_norm": 2.2301149368286133, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.357, + "step": 1357 + }, + { + "loss": 0.0102, + "grad_norm": 2.8497674465179443, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 1358 + }, + { + "loss": 0.059, + "grad_norm": 1.9033845663070679, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.359, + "step": 1359 + }, + { + "loss": 0.0431, + "grad_norm": 1.6551549434661865, + "learning_rate": 6.43e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3599999999999999, + "step": 1360 + }, + { + "loss": 0.0585, + "grad_norm": 1.5250738859176636, + "learning_rate": 6.42e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.361, + "step": 1361 + }, + { + "loss": 0.0576, + "grad_norm": 1.7390161752700806, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.362, + "step": 1362 + }, + { + "loss": 0.0642, + "grad_norm": 2.0047788619995117, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.363, + "step": 1363 + }, + { + "loss": 0.0409, + "grad_norm": 1.696035385131836, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.3639999999999999, + "step": 1364 + }, + { + "loss": 0.0577, + "grad_norm": 1.9078930616378784, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.365, + "step": 1365 + }, + { + "loss": 0.0098, + "grad_norm": 2.792039155960083, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 1366 + }, + { + "loss": 0.0582, + "grad_norm": 1.8414034843444824, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.367, + "step": 1367 + }, + { + "loss": 0.0545, + "grad_norm": 2.1793394088745117, + "learning_rate": 6.35e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 1368 + }, + { + "loss": 0.0449, + "grad_norm": 2.220048666000366, + "learning_rate": 6.34e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.369, + "step": 1369 + }, + { + "loss": 0.0545, + "grad_norm": 1.9344781637191772, + "learning_rate": 6.33e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.37, + "step": 1370 + }, + { + "loss": 0.0567, + "grad_norm": 1.8442058563232422, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.371, + "step": 1371 + }, + { + "loss": 0.0118, + "grad_norm": 3.14497971534729, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.3719999999999999, + "step": 1372 + }, + { + "loss": 0.0721, + "grad_norm": 2.7254114151000977, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.373, + "step": 1373 + }, + { + "loss": 0.0587, + "grad_norm": 1.436458945274353, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.374, + "step": 1374 + }, + { + "loss": 0.1323, + "grad_norm": 3.204223871231079, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.375, + "step": 1375 + }, + { + "loss": 0.0704, + "grad_norm": 1.601090431213379, + "learning_rate": 6.27e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.376, + "step": 1376 + }, + { + "loss": 0.0601, + "grad_norm": 1.5754057168960571, + "learning_rate": 6.26e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.377, + "step": 1377 + }, + { + "loss": 0.0711, + "grad_norm": 1.8766717910766602, + "learning_rate": 6.25e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.3780000000000001, + "step": 1378 + }, + { + "loss": 0.059, + "grad_norm": 2.119466781616211, + "learning_rate": 6.24e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.379, + "step": 1379 + }, + { + "loss": 0.0772, + "grad_norm": 1.8192287683486938, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.38, + "step": 1380 + }, + { + "loss": 0.0588, + "grad_norm": 1.6275320053100586, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.381, + "step": 1381 + }, + { + "loss": 0.0417, + "grad_norm": 2.3129870891571045, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3820000000000001, + "step": 1382 + }, + { + "loss": 0.0444, + "grad_norm": 1.6177237033843994, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.383, + "step": 1383 + }, + { + "loss": 0.0566, + "grad_norm": 2.093630075454712, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.384, + "step": 1384 + }, + { + "loss": 0.0655, + "grad_norm": 1.9267455339431763, + "learning_rate": 6.18e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.385, + "step": 1385 + }, + { + "loss": 0.0442, + "grad_norm": 1.0200287103652954, + "learning_rate": 6.17e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3860000000000001, + "step": 1386 + }, + { + "loss": 0.0638, + "grad_norm": 1.3187520503997803, + "learning_rate": 6.16e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.387, + "step": 1387 + }, + { + "loss": 0.0364, + "grad_norm": 1.6464682817459106, + "learning_rate": 6.15e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.388, + "step": 1388 + }, + { + "loss": 0.0775, + "grad_norm": 2.474910020828247, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.389, + "step": 1389 + }, + { + "loss": 0.0621, + "grad_norm": 1.1011793613433838, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.3900000000000001, + "step": 1390 + }, + { + "loss": 0.0218, + "grad_norm": 5.168939113616943, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.391, + "step": 1391 + }, + { + "loss": 0.0221, + "grad_norm": 5.572858810424805, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.392, + "step": 1392 + }, + { + "loss": 0.0561, + "grad_norm": 1.8146536350250244, + "learning_rate": 6.1e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.393, + "step": 1393 + }, + { + "loss": 0.0804, + "grad_norm": 3.2232189178466797, + "learning_rate": 6.09e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.3940000000000001, + "step": 1394 + }, + { + "loss": 0.039, + "grad_norm": 1.8940805196762085, + "learning_rate": 6.08e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.395, + "step": 1395 + }, + { + "loss": 0.0584, + "grad_norm": 2.0325937271118164, + "learning_rate": 6.07e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.396, + "step": 1396 + }, + { + "loss": 0.0422, + "grad_norm": 1.980771541595459, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.397, + "step": 1397 + }, + { + "loss": 0.0593, + "grad_norm": 1.710123896598816, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.3980000000000001, + "step": 1398 + }, + { + "loss": 0.0592, + "grad_norm": 2.430305004119873, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.399, + "step": 1399 + }, + { + "loss": 0.0467, + "grad_norm": 2.204895496368408, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.4, + "step": 1400 + }, + { + "loss": 0.0496, + "grad_norm": 1.7684513330459595, + "learning_rate": 6.02e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.401, + "step": 1401 + }, + { + "loss": 0.0462, + "grad_norm": 1.7807819843292236, + "learning_rate": 6.01e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.4020000000000001, + "step": 1402 + }, + { + "loss": 0.08, + "grad_norm": 1.9608607292175293, + "learning_rate": 6e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.403, + "step": 1403 + }, + { + "loss": 0.0588, + "grad_norm": 1.6851762533187866, + "learning_rate": 5.99e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.404, + "step": 1404 + }, + { + "loss": 0.0448, + "grad_norm": 1.395566701889038, + "learning_rate": 5.98e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 1.405, + "step": 1405 + }, + { + "loss": 0.0771, + "grad_norm": 1.94028639793396, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.4060000000000001, + "step": 1406 + }, + { + "loss": 0.0717, + "grad_norm": 2.421177864074707, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.407, + "step": 1407 + }, + { + "loss": 0.0602, + "grad_norm": 1.947490930557251, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.408, + "step": 1408 + }, + { + "loss": 0.084, + "grad_norm": 3.4976916313171387, + "learning_rate": 5.94e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.409, + "step": 1409 + }, + { + "loss": 0.0146, + "grad_norm": 3.9808900356292725, + "learning_rate": 5.93e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.41, + "step": 1410 + }, + { + "loss": 0.0583, + "grad_norm": 1.8078984022140503, + "learning_rate": 5.92e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 1411 + }, + { + "loss": 0.0687, + "grad_norm": 1.9551893472671509, + "learning_rate": 5.91e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.412, + "step": 1412 + }, + { + "loss": 0.0133, + "grad_norm": 3.68121075630188, + "learning_rate": 5.9e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.413, + "step": 1413 + }, + { + "loss": 0.0411, + "grad_norm": 1.987641453742981, + "learning_rate": 5.89e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.414, + "step": 1414 + }, + { + "loss": 0.0527, + "grad_norm": 1.6725058555603027, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.415, + "step": 1415 + }, + { + "loss": 0.0516, + "grad_norm": 1.3503282070159912, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.416, + "step": 1416 + }, + { + "loss": 0.0439, + "grad_norm": 1.5804824829101562, + "learning_rate": 5.86e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.417, + "step": 1417 + }, + { + "loss": 0.0481, + "grad_norm": 1.3769683837890625, + "learning_rate": 5.85e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.418, + "step": 1418 + }, + { + "loss": 0.0108, + "grad_norm": 3.01991868019104, + "learning_rate": 5.84e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.419, + "step": 1419 + }, + { + "loss": 0.0497, + "grad_norm": 1.416107177734375, + "learning_rate": 5.83e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.42, + "step": 1420 + }, + { + "loss": 0.0377, + "grad_norm": 1.3515864610671997, + "learning_rate": 5.82e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.421, + "step": 1421 + }, + { + "loss": 0.0607, + "grad_norm": 1.8614403009414673, + "learning_rate": 5.81e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.422, + "step": 1422 + }, + { + "loss": 0.0679, + "grad_norm": 2.109128952026367, + "learning_rate": 5.8e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.423, + "step": 1423 + }, + { + "loss": 0.0751, + "grad_norm": 1.5067026615142822, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.424, + "step": 1424 + }, + { + "loss": 0.0547, + "grad_norm": 1.5301975011825562, + "learning_rate": 5.78e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.425, + "step": 1425 + }, + { + "loss": 0.0683, + "grad_norm": 2.2441554069519043, + "learning_rate": 5.77e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.426, + "step": 1426 + }, + { + "loss": 0.0458, + "grad_norm": 1.8737249374389648, + "learning_rate": 5.76e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.427, + "step": 1427 + }, + { + "loss": 0.0687, + "grad_norm": 1.9434070587158203, + "learning_rate": 5.75e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.428, + "step": 1428 + }, + { + "loss": 0.0806, + "grad_norm": 1.8568007946014404, + "learning_rate": 5.74e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.429, + "step": 1429 + }, + { + "loss": 0.065, + "grad_norm": 2.0390608310699463, + "learning_rate": 5.73e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.43, + "step": 1430 + }, + { + "loss": 0.0615, + "grad_norm": 1.7913262844085693, + "learning_rate": 5.72e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.431, + "step": 1431 + }, + { + "loss": 0.0515, + "grad_norm": 2.496122121810913, + "learning_rate": 5.71e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.432, + "step": 1432 + }, + { + "loss": 0.0501, + "grad_norm": 1.633486270904541, + "learning_rate": 5.7e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.433, + "step": 1433 + }, + { + "loss": 0.0171, + "grad_norm": 4.812644958496094, + "learning_rate": 5.69e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.434, + "step": 1434 + }, + { + "loss": 0.0756, + "grad_norm": 2.208841562271118, + "learning_rate": 5.68e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.435, + "step": 1435 + }, + { + "loss": 0.0358, + "grad_norm": 1.725355625152588, + "learning_rate": 5.67e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.436, + "step": 1436 + }, + { + "loss": 0.0173, + "grad_norm": 4.879479885101318, + "learning_rate": 5.66e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.437, + "step": 1437 + }, + { + "loss": 0.1386, + "grad_norm": 3.6769933700561523, + "learning_rate": 5.65e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.438, + "step": 1438 + }, + { + "loss": 0.0712, + "grad_norm": 1.624098300933838, + "learning_rate": 5.64e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.439, + "step": 1439 + }, + { + "loss": 0.0534, + "grad_norm": 2.2485837936401367, + "learning_rate": 5.63e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.44, + "step": 1440 + }, + { + "loss": 0.0572, + "grad_norm": 1.977672815322876, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.441, + "step": 1441 + }, + { + "loss": 0.0515, + "grad_norm": 2.81058669090271, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.442, + "step": 1442 + }, + { + "loss": 0.0118, + "grad_norm": 3.3733158111572266, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.443, + "step": 1443 + }, + { + "loss": 0.0546, + "grad_norm": 1.634824275970459, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.444, + "step": 1444 + }, + { + "loss": 0.0549, + "grad_norm": 1.9184083938598633, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.445, + "step": 1445 + }, + { + "loss": 0.1835, + "grad_norm": 5.609441757202148, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 1.446, + "step": 1446 + }, + { + "loss": 0.0568, + "grad_norm": 1.4348167181015015, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.447, + "step": 1447 + }, + { + "loss": 0.0711, + "grad_norm": 1.6240220069885254, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.448, + "step": 1448 + }, + { + "loss": 0.0395, + "grad_norm": 1.7122279405593872, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.449, + "step": 1449 + }, + { + "loss": 0.0092, + "grad_norm": 2.6746726036071777, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 1450 + }, + { + "loss": 0.0516, + "grad_norm": 1.2466599941253662, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 1451 + }, + { + "loss": 0.0755, + "grad_norm": 2.3185651302337646, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.452, + "step": 1452 + }, + { + "loss": 0.0107, + "grad_norm": 3.2160799503326416, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.453, + "step": 1453 + }, + { + "loss": 0.0353, + "grad_norm": 1.6237694025039673, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.454, + "step": 1454 + }, + { + "loss": 0.052, + "grad_norm": 1.6856698989868164, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.455, + "step": 1455 + }, + { + "loss": 0.0672, + "grad_norm": 1.7814722061157227, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.456, + "step": 1456 + }, + { + "loss": 0.0354, + "grad_norm": 1.4843939542770386, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.457, + "step": 1457 + }, + { + "loss": 0.0642, + "grad_norm": 1.6205660104751587, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.458, + "step": 1458 + }, + { + "loss": 0.0694, + "grad_norm": 2.024721384048462, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.459, + "step": 1459 + }, + { + "loss": 0.0587, + "grad_norm": 1.8312665224075317, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.46, + "step": 1460 + }, + { + "loss": 0.0411, + "grad_norm": 1.8380608558654785, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.461, + "step": 1461 + }, + { + "loss": 0.0597, + "grad_norm": 1.7451549768447876, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.462, + "step": 1462 + }, + { + "loss": 0.0773, + "grad_norm": 1.7938144207000732, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.463, + "step": 1463 + }, + { + "loss": 0.0639, + "grad_norm": 2.6028213500976562, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.464, + "step": 1464 + }, + { + "loss": 0.0686, + "grad_norm": 1.8541765213012695, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.465, + "step": 1465 + }, + { + "loss": 0.0548, + "grad_norm": 1.739157795906067, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.466, + "step": 1466 + }, + { + "loss": 0.0131, + "grad_norm": 3.847865581512451, + "learning_rate": 5.36e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.467, + "step": 1467 + }, + { + "loss": 0.0556, + "grad_norm": 1.4072014093399048, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.468, + "step": 1468 + }, + { + "loss": 0.0656, + "grad_norm": 1.7529304027557373, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.4689999999999999, + "step": 1469 + }, + { + "loss": 0.0472, + "grad_norm": 1.359227180480957, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 1470 + }, + { + "loss": 0.0553, + "grad_norm": 1.8881477117538452, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.471, + "step": 1471 + }, + { + "loss": 0.0728, + "grad_norm": 1.792786717414856, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.472, + "step": 1472 + }, + { + "loss": 0.0589, + "grad_norm": 1.9897642135620117, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.4729999999999999, + "step": 1473 + }, + { + "loss": 0.0641, + "grad_norm": 2.224968433380127, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.474, + "step": 1474 + }, + { + "loss": 0.0176, + "grad_norm": 4.579442977905273, + "learning_rate": 5.28e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.475, + "step": 1475 + }, + { + "loss": 0.0465, + "grad_norm": 1.7030646800994873, + "learning_rate": 5.27e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.476, + "step": 1476 + }, + { + "loss": 0.0638, + "grad_norm": 1.8251057863235474, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.4769999999999999, + "step": 1477 + }, + { + "loss": 0.0532, + "grad_norm": 1.7170004844665527, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.478, + "step": 1478 + }, + { + "loss": 0.0146, + "grad_norm": 4.36711311340332, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.479, + "step": 1479 + }, + { + "loss": 0.0384, + "grad_norm": 1.4616270065307617, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.48, + "step": 1480 + }, + { + "loss": 0.0536, + "grad_norm": 1.4146326780319214, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4809999999999999, + "step": 1481 + }, + { + "loss": 0.058, + "grad_norm": 1.4087859392166138, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.482, + "step": 1482 + }, + { + "loss": 0.0131, + "grad_norm": 3.685961961746216, + "learning_rate": 5.2e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.483, + "step": 1483 + }, + { + "loss": 0.054, + "grad_norm": 2.024017572402954, + "learning_rate": 5.19e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.484, + "step": 1484 + }, + { + "loss": 0.0127, + "grad_norm": 3.772671699523926, + "learning_rate": 5.18e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.4849999999999999, + "step": 1485 + }, + { + "loss": 0.0119, + "grad_norm": 3.4980599880218506, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.486, + "step": 1486 + }, + { + "loss": 0.0759, + "grad_norm": 2.152510643005371, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.487, + "step": 1487 + }, + { + "loss": 0.0408, + "grad_norm": 1.5923069715499878, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.488, + "step": 1488 + }, + { + "loss": 0.0085, + "grad_norm": 2.5293490886688232, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 1489 + }, + { + "loss": 0.0694, + "grad_norm": 2.434215545654297, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.49, + "step": 1490 + }, + { + "loss": 0.0084, + "grad_norm": 2.269744873046875, + "learning_rate": 5.12e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 1491 + }, + { + "loss": 0.0472, + "grad_norm": 2.460083246231079, + "learning_rate": 5.11e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.492, + "step": 1492 + }, + { + "loss": 0.0346, + "grad_norm": 1.8150253295898438, + "learning_rate": 5.1e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.4929999999999999, + "step": 1493 + }, + { + "loss": 0.0436, + "grad_norm": 2.3509392738342285, + "learning_rate": 5.09e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.494, + "step": 1494 + }, + { + "loss": 0.0413, + "grad_norm": 1.7899376153945923, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.495, + "step": 1495 + }, + { + "loss": 0.0068, + "grad_norm": 1.4986844062805176, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 1496 + }, + { + "loss": 0.0719, + "grad_norm": 1.9978880882263184, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4969999999999999, + "step": 1497 + }, + { + "loss": 0.0407, + "grad_norm": 1.5322047472000122, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.498, + "step": 1498 + }, + { + "loss": 0.0057, + "grad_norm": 1.21915602684021, + "learning_rate": 5.04e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 1499 + }, + { + "loss": 0.0392, + "grad_norm": 1.8600904941558838, + "learning_rate": 5.03e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5, + "step": 1500 + }, + { + "loss": 0.058, + "grad_norm": 1.788377285003662, + "learning_rate": 5.02e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.501, + "step": 1501 + }, + { + "loss": 0.073, + "grad_norm": 2.0460190773010254, + "learning_rate": 5.01e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 1502 + }, + { + "loss": 0.0631, + "grad_norm": 2.3501951694488525, + "learning_rate": 5e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5030000000000001, + "step": 1503 + }, + { + "loss": 0.0655, + "grad_norm": 1.5405539274215698, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.504, + "step": 1504 + }, + { + "loss": 0.0527, + "grad_norm": 2.613194227218628, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.505, + "step": 1505 + }, + { + "loss": 0.0533, + "grad_norm": 2.3490524291992188, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.506, + "step": 1506 + }, + { + "loss": 0.007, + "grad_norm": 1.7071534395217896, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 1507 + }, + { + "loss": 0.0063, + "grad_norm": 1.578574776649475, + "learning_rate": 4.95e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 1508 + }, + { + "loss": 0.0586, + "grad_norm": 1.7500479221343994, + "learning_rate": 4.94e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.509, + "step": 1509 + }, + { + "loss": 0.0489, + "grad_norm": 2.1021506786346436, + "learning_rate": 4.93e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.51, + "step": 1510 + }, + { + "loss": 0.0505, + "grad_norm": 1.444482684135437, + "learning_rate": 4.92e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5110000000000001, + "step": 1511 + }, + { + "loss": 0.0663, + "grad_norm": 2.043468475341797, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.512, + "step": 1512 + }, + { + "loss": 0.0429, + "grad_norm": 1.7074294090270996, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.513, + "step": 1513 + }, + { + "loss": 0.0655, + "grad_norm": 2.4234681129455566, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.514, + "step": 1514 + }, + { + "loss": 0.0766, + "grad_norm": 2.124605655670166, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.5150000000000001, + "step": 1515 + }, + { + "loss": 0.0549, + "grad_norm": 1.533837080001831, + "learning_rate": 4.87e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.516, + "step": 1516 + }, + { + "loss": 0.0674, + "grad_norm": 1.8479790687561035, + "learning_rate": 4.86e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.517, + "step": 1517 + }, + { + "loss": 0.0105, + "grad_norm": 2.9812541007995605, + "learning_rate": 4.85e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 1518 + }, + { + "loss": 0.0394, + "grad_norm": 1.3361161947250366, + "learning_rate": 4.84e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5190000000000001, + "step": 1519 + }, + { + "loss": 0.0526, + "grad_norm": 1.8740735054016113, + "learning_rate": 4.83e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.52, + "step": 1520 + }, + { + "loss": 0.0622, + "grad_norm": 2.8182497024536133, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.521, + "step": 1521 + }, + { + "loss": 0.053, + "grad_norm": 1.3909233808517456, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.522, + "step": 1522 + }, + { + "loss": 0.0352, + "grad_norm": 1.3657585382461548, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5230000000000001, + "step": 1523 + }, + { + "loss": 0.0667, + "grad_norm": 1.9412925243377686, + "learning_rate": 4.79e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.524, + "step": 1524 + }, + { + "loss": 0.0536, + "grad_norm": 1.9261113405227661, + "learning_rate": 4.78e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.525, + "step": 1525 + }, + { + "loss": 0.0371, + "grad_norm": 1.7484430074691772, + "learning_rate": 4.77e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.526, + "step": 1526 + }, + { + "loss": 0.0629, + "grad_norm": 1.5757131576538086, + "learning_rate": 4.76e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5270000000000001, + "step": 1527 + }, + { + "loss": 0.0743, + "grad_norm": 2.2460429668426514, + "learning_rate": 4.75e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.528, + "step": 1528 + }, + { + "loss": 0.0537, + "grad_norm": 2.029741048812866, + "learning_rate": 4.74e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.529, + "step": 1529 + }, + { + "loss": 0.0363, + "grad_norm": 1.7011500597000122, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.53, + "step": 1530 + }, + { + "loss": 0.0773, + "grad_norm": 2.4450201988220215, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.5310000000000001, + "step": 1531 + }, + { + "loss": 0.0597, + "grad_norm": 2.192077159881592, + "learning_rate": 4.71e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.532, + "step": 1532 + }, + { + "loss": 0.0539, + "grad_norm": 1.464800238609314, + "learning_rate": 4.7e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.533, + "step": 1533 + }, + { + "loss": 0.0762, + "grad_norm": 2.326375722885132, + "learning_rate": 4.69e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.534, + "step": 1534 + }, + { + "loss": 0.0517, + "grad_norm": 1.547634482383728, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5350000000000001, + "step": 1535 + }, + { + "loss": 0.0783, + "grad_norm": 2.2572309970855713, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.536, + "step": 1536 + }, + { + "loss": 0.0644, + "grad_norm": 2.7545583248138428, + "learning_rate": 4.66e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.537, + "step": 1537 + }, + { + "loss": 0.0596, + "grad_norm": 1.4186100959777832, + "learning_rate": 4.65e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.538, + "step": 1538 + }, + { + "loss": 0.0408, + "grad_norm": 1.7284655570983887, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5390000000000001, + "step": 1539 + }, + { + "loss": 0.0605, + "grad_norm": 1.7523491382598877, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.54, + "step": 1540 + }, + { + "loss": 0.0593, + "grad_norm": 1.346951961517334, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.541, + "step": 1541 + }, + { + "loss": 0.0618, + "grad_norm": 1.4633326530456543, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.542, + "step": 1542 + }, + { + "loss": 0.0401, + "grad_norm": 1.6125143766403198, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5430000000000001, + "step": 1543 + }, + { + "loss": 0.0703, + "grad_norm": 1.801979422569275, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.544, + "step": 1544 + }, + { + "loss": 0.0168, + "grad_norm": 4.75988245010376, + "learning_rate": 4.58e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.545, + "step": 1545 + }, + { + "loss": 0.0395, + "grad_norm": 1.7274175882339478, + "learning_rate": 4.57e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.546, + "step": 1546 + }, + { + "loss": 0.0673, + "grad_norm": 1.813065767288208, + "learning_rate": 4.56e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5470000000000002, + "step": 1547 + }, + { + "loss": 0.0149, + "grad_norm": 4.271875858306885, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.548, + "step": 1548 + }, + { + "loss": 0.0663, + "grad_norm": 2.038168430328369, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.549, + "step": 1549 + }, + { + "loss": 0.0129, + "grad_norm": 3.939451217651367, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.55, + "step": 1550 + }, + { + "loss": 0.0375, + "grad_norm": 1.818014144897461, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5510000000000002, + "step": 1551 + }, + { + "loss": 0.0589, + "grad_norm": 1.9127329587936401, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.552, + "step": 1552 + }, + { + "loss": 0.062, + "grad_norm": 2.125767946243286, + "learning_rate": 4.5e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.553, + "step": 1553 + }, + { + "loss": 0.0627, + "grad_norm": 1.3601936101913452, + "learning_rate": 4.49e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.554, + "step": 1554 + }, + { + "loss": 0.0573, + "grad_norm": 1.9718780517578125, + "learning_rate": 4.48e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.5550000000000002, + "step": 1555 + }, + { + "loss": 0.0702, + "grad_norm": 1.8015897274017334, + "learning_rate": 4.47e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.556, + "step": 1556 + }, + { + "loss": 0.0456, + "grad_norm": 2.072335958480835, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.557, + "step": 1557 + }, + { + "loss": 0.0567, + "grad_norm": 1.921351432800293, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.558, + "step": 1558 + }, + { + "loss": 0.065, + "grad_norm": 1.5375345945358276, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5590000000000002, + "step": 1559 + }, + { + "loss": 0.0384, + "grad_norm": 1.3858362436294556, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.56, + "step": 1560 + }, + { + "loss": 0.0613, + "grad_norm": 1.8221303224563599, + "learning_rate": 4.42e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.561, + "step": 1561 + }, + { + "loss": 0.051, + "grad_norm": 1.5935691595077515, + "learning_rate": 4.41e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.562, + "step": 1562 + }, + { + "loss": 0.052, + "grad_norm": 1.4923861026763916, + "learning_rate": 4.4e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.563, + "step": 1563 + }, + { + "loss": 0.0114, + "grad_norm": 3.3136603832244873, + "learning_rate": 4.39e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.564, + "step": 1564 + }, + { + "loss": 0.0634, + "grad_norm": 1.8046377897262573, + "learning_rate": 4.38e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.565, + "step": 1565 + }, + { + "loss": 0.01, + "grad_norm": 2.8774094581604004, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.5659999999999998, + "step": 1566 + }, + { + "loss": 0.0506, + "grad_norm": 1.315585732460022, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.567, + "step": 1567 + }, + { + "loss": 0.051, + "grad_norm": 1.6535403728485107, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.568, + "step": 1568 + }, + { + "loss": 0.069, + "grad_norm": 1.9435205459594727, + "learning_rate": 4.34e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.569, + "step": 1569 + }, + { + "loss": 0.0599, + "grad_norm": 1.8793127536773682, + "learning_rate": 4.33e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.5699999999999998, + "step": 1570 + }, + { + "loss": 0.0098, + "grad_norm": 2.910207986831665, + "learning_rate": 4.32e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 1571 + }, + { + "loss": 0.0636, + "grad_norm": 2.1943273544311523, + "learning_rate": 4.31e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.572, + "step": 1572 + }, + { + "loss": 0.0567, + "grad_norm": 1.5598511695861816, + "learning_rate": 4.3e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.573, + "step": 1573 + }, + { + "loss": 0.0453, + "grad_norm": 1.9701513051986694, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 1574 + }, + { + "loss": 0.0102, + "grad_norm": 3.0775904655456543, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.575, + "step": 1575 + }, + { + "loss": 0.0422, + "grad_norm": 1.8043560981750488, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.576, + "step": 1576 + }, + { + "loss": 0.0473, + "grad_norm": 1.871073842048645, + "learning_rate": 4.26e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.577, + "step": 1577 + }, + { + "loss": 0.0514, + "grad_norm": 1.4562617540359497, + "learning_rate": 4.25e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5779999999999998, + "step": 1578 + }, + { + "loss": 0.0367, + "grad_norm": 1.4301601648330688, + "learning_rate": 4.24e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.579, + "step": 1579 + }, + { + "loss": 0.0504, + "grad_norm": 1.6110836267471313, + "learning_rate": 4.23e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.58, + "step": 1580 + }, + { + "loss": 0.074, + "grad_norm": 2.0486574172973633, + "learning_rate": 4.22e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.581, + "step": 1581 + }, + { + "loss": 0.1233, + "grad_norm": 3.3242132663726807, + "learning_rate": 4.21e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5819999999999999, + "step": 1582 + }, + { + "loss": 0.0647, + "grad_norm": 1.307567834854126, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.583, + "step": 1583 + }, + { + "loss": 0.0609, + "grad_norm": 1.7847832441329956, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.584, + "step": 1584 + }, + { + "loss": 0.0095, + "grad_norm": 2.857769727706909, + "learning_rate": 4.18e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 1585 + }, + { + "loss": 0.0358, + "grad_norm": 1.3912484645843506, + "learning_rate": 4.17e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5859999999999999, + "step": 1586 + }, + { + "loss": 0.0389, + "grad_norm": 1.5175739526748657, + "learning_rate": 4.16e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.587, + "step": 1587 + }, + { + "loss": 0.0126, + "grad_norm": 3.7526566982269287, + "learning_rate": 4.15e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.588, + "step": 1588 + }, + { + "loss": 0.0558, + "grad_norm": 1.6538053750991821, + "learning_rate": 4.14e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.589, + "step": 1589 + }, + { + "loss": 0.0538, + "grad_norm": 1.3453150987625122, + "learning_rate": 4.13e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5899999999999999, + "step": 1590 + }, + { + "loss": 0.0608, + "grad_norm": 2.0873332023620605, + "learning_rate": 4.12e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.591, + "step": 1591 + }, + { + "loss": 0.0611, + "grad_norm": 1.9410951137542725, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.592, + "step": 1592 + }, + { + "loss": 0.0769, + "grad_norm": 1.8411427736282349, + "learning_rate": 4.1e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.593, + "step": 1593 + }, + { + "loss": 0.0111, + "grad_norm": 3.2430572509765625, + "learning_rate": 4.09e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 1594 + }, + { + "loss": 0.0722, + "grad_norm": 2.1307482719421387, + "learning_rate": 4.08e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.595, + "step": 1595 + }, + { + "loss": 0.0377, + "grad_norm": 2.088995933532715, + "learning_rate": 4.07e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.596, + "step": 1596 + }, + { + "loss": 0.0617, + "grad_norm": 1.546595811843872, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.597, + "step": 1597 + }, + { + "loss": 0.0683, + "grad_norm": 1.7900023460388184, + "learning_rate": 4.05e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.5979999999999999, + "step": 1598 + }, + { + "loss": 0.057, + "grad_norm": 1.5026994943618774, + "learning_rate": 4.04e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.599, + "step": 1599 + }, + { + "loss": 0.0468, + "grad_norm": 1.8879090547561646, + "learning_rate": 4.03e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6, + "step": 1600 + }, + { + "loss": 0.0345, + "grad_norm": 1.3179066181182861, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.601, + "step": 1601 + }, + { + "loss": 0.0363, + "grad_norm": 1.297089695930481, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.6019999999999999, + "step": 1602 + }, + { + "loss": 0.0465, + "grad_norm": 1.4451963901519775, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.603, + "step": 1603 + }, + { + "loss": 0.0593, + "grad_norm": 1.6601592302322388, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.604, + "step": 1604 + }, + { + "loss": 0.0633, + "grad_norm": 1.759940266609192, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.605, + "step": 1605 + }, + { + "loss": 0.0394, + "grad_norm": 1.640942096710205, + "learning_rate": 3.97e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.6059999999999999, + "step": 1606 + }, + { + "loss": 0.0107, + "grad_norm": 3.121732711791992, + "learning_rate": 3.96e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.607, + "step": 1607 + }, + { + "loss": 0.0343, + "grad_norm": 1.376590371131897, + "learning_rate": 3.95e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.608, + "step": 1608 + }, + { + "loss": 0.0731, + "grad_norm": 1.5605193376541138, + "learning_rate": 3.94e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.609, + "step": 1609 + }, + { + "loss": 0.011, + "grad_norm": 3.3589043617248535, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6099999999999999, + "step": 1610 + }, + { + "loss": 0.0541, + "grad_norm": 1.0635466575622559, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.611, + "step": 1611 + }, + { + "loss": 0.0801, + "grad_norm": 2.1112594604492188, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.612, + "step": 1612 + }, + { + "loss": 0.0541, + "grad_norm": 1.915789008140564, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.613, + "step": 1613 + }, + { + "loss": 0.0097, + "grad_norm": 2.9668385982513428, + "learning_rate": 3.89e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 1614 + }, + { + "loss": 0.0785, + "grad_norm": 1.7575700283050537, + "learning_rate": 3.88e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 1.615, + "step": 1615 + }, + { + "loss": 0.0092, + "grad_norm": 2.8856735229492188, + "learning_rate": 3.87e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 1616 + }, + { + "loss": 0.0842, + "grad_norm": 2.108201265335083, + "learning_rate": 3.86e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.617, + "step": 1617 + }, + { + "loss": 0.0513, + "grad_norm": 1.646217942237854, + "learning_rate": 3.85e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6179999999999999, + "step": 1618 + }, + { + "loss": 0.0323, + "grad_norm": 1.7345075607299805, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.619, + "step": 1619 + }, + { + "loss": 0.0508, + "grad_norm": 2.1174609661102295, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.62, + "step": 1620 + }, + { + "loss": 0.0794, + "grad_norm": 1.751968502998352, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.621, + "step": 1621 + }, + { + "loss": 0.052, + "grad_norm": 2.0297329425811768, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6219999999999999, + "step": 1622 + }, + { + "loss": 0.0414, + "grad_norm": 1.4483790397644043, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.623, + "step": 1623 + }, + { + "loss": 0.0387, + "grad_norm": 1.6367487907409668, + "learning_rate": 3.79e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.624, + "step": 1624 + }, + { + "loss": 0.0579, + "grad_norm": 1.947627305984497, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.625, + "step": 1625 + }, + { + "loss": 0.0746, + "grad_norm": 1.7073363065719604, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.626, + "step": 1626 + }, + { + "loss": 0.07, + "grad_norm": 2.310190439224243, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.627, + "step": 1627 + }, + { + "loss": 0.0614, + "grad_norm": 1.841750979423523, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6280000000000001, + "step": 1628 + }, + { + "loss": 0.01, + "grad_norm": 3.1444506645202637, + "learning_rate": 3.74e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 1629 + }, + { + "loss": 0.0522, + "grad_norm": 1.662224292755127, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.63, + "step": 1630 + }, + { + "loss": 0.0132, + "grad_norm": 3.9977800846099854, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.631, + "step": 1631 + }, + { + "loss": 0.0544, + "grad_norm": 1.3922324180603027, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6320000000000001, + "step": 1632 + }, + { + "loss": 0.054, + "grad_norm": 2.120187759399414, + "learning_rate": 3.7e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.633, + "step": 1633 + }, + { + "loss": 0.0536, + "grad_norm": 1.914109468460083, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.634, + "step": 1634 + }, + { + "loss": 0.0598, + "grad_norm": 1.831244707107544, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.635, + "step": 1635 + }, + { + "loss": 0.0573, + "grad_norm": 1.5706382989883423, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6360000000000001, + "step": 1636 + }, + { + "loss": 0.1282, + "grad_norm": 2.7458832263946533, + "learning_rate": 3.66e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 1.637, + "step": 1637 + }, + { + "loss": 0.0356, + "grad_norm": 1.4152108430862427, + "learning_rate": 3.65e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.638, + "step": 1638 + }, + { + "loss": 0.0121, + "grad_norm": 3.4849400520324707, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.639, + "step": 1639 + }, + { + "loss": 0.0702, + "grad_norm": 1.8692002296447754, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.6400000000000001, + "step": 1640 + }, + { + "loss": 0.0601, + "grad_norm": 1.828239917755127, + "learning_rate": 3.62e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.641, + "step": 1641 + }, + { + "loss": 0.0399, + "grad_norm": 1.8158057928085327, + "learning_rate": 3.61e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.642, + "step": 1642 + }, + { + "loss": 0.0451, + "grad_norm": 1.7628754377365112, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.643, + "step": 1643 + }, + { + "loss": 0.0679, + "grad_norm": 1.837315320968628, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6440000000000001, + "step": 1644 + }, + { + "loss": 0.0112, + "grad_norm": 3.3357973098754883, + "learning_rate": 3.58e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.645, + "step": 1645 + }, + { + "loss": 0.0501, + "grad_norm": 1.5952306985855103, + "learning_rate": 3.57e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 1646 + }, + { + "loss": 0.0742, + "grad_norm": 2.5686585903167725, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.647, + "step": 1647 + }, + { + "loss": 0.0109, + "grad_norm": 3.133192777633667, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 1648 + }, + { + "loss": 0.068, + "grad_norm": 1.585485577583313, + "learning_rate": 3.54e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.649, + "step": 1649 + }, + { + "loss": 0.0687, + "grad_norm": 2.0019702911376953, + "learning_rate": 3.53e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.65, + "step": 1650 + }, + { + "loss": 0.0575, + "grad_norm": 1.6265766620635986, + "learning_rate": 3.52e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.651, + "step": 1651 + }, + { + "loss": 0.0707, + "grad_norm": 1.6374586820602417, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6520000000000001, + "step": 1652 + }, + { + "loss": 0.0697, + "grad_norm": 2.4204654693603516, + "learning_rate": 3.5e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.653, + "step": 1653 + }, + { + "loss": 0.0588, + "grad_norm": 2.1378262042999268, + "learning_rate": 3.49e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.654, + "step": 1654 + }, + { + "loss": 0.0562, + "grad_norm": 2.214315414428711, + "learning_rate": 3.48e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.655, + "step": 1655 + }, + { + "loss": 0.0124, + "grad_norm": 3.5861706733703613, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6560000000000001, + "step": 1656 + }, + { + "loss": 0.0487, + "grad_norm": 1.6121397018432617, + "learning_rate": 3.46e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.657, + "step": 1657 + }, + { + "loss": 0.0556, + "grad_norm": 2.084545850753784, + "learning_rate": 3.45e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.658, + "step": 1658 + }, + { + "loss": 0.0471, + "grad_norm": 1.8340671062469482, + "learning_rate": 3.44e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.659, + "step": 1659 + }, + { + "loss": 0.0507, + "grad_norm": 1.5023232698440552, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6600000000000001, + "step": 1660 + }, + { + "loss": 0.055, + "grad_norm": 1.5226930379867554, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.661, + "step": 1661 + }, + { + "loss": 0.0689, + "grad_norm": 1.8650307655334473, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.662, + "step": 1662 + }, + { + "loss": 0.0687, + "grad_norm": 1.4976561069488525, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.663, + "step": 1663 + }, + { + "loss": 0.012, + "grad_norm": 3.7820823192596436, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6640000000000001, + "step": 1664 + }, + { + "loss": 0.0644, + "grad_norm": 1.6768338680267334, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.665, + "step": 1665 + }, + { + "loss": 0.0508, + "grad_norm": 1.6384755373001099, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.666, + "step": 1666 + }, + { + "loss": 0.0557, + "grad_norm": 1.67027747631073, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.667, + "step": 1667 + }, + { + "loss": 0.0443, + "grad_norm": 1.8305268287658691, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6680000000000001, + "step": 1668 + }, + { + "loss": 0.0398, + "grad_norm": 1.6602362394332886, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.669, + "step": 1669 + }, + { + "loss": 0.0479, + "grad_norm": 1.694201946258545, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.67, + "step": 1670 + }, + { + "loss": 0.0693, + "grad_norm": 1.8437001705169678, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.671, + "step": 1671 + }, + { + "loss": 0.0512, + "grad_norm": 1.319399118423462, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6720000000000002, + "step": 1672 + }, + { + "loss": 0.0141, + "grad_norm": 4.160251617431641, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.673, + "step": 1673 + }, + { + "loss": 0.0473, + "grad_norm": 1.736594557762146, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 1674 + }, + { + "loss": 0.0117, + "grad_norm": 3.6965503692626953, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.675, + "step": 1675 + }, + { + "loss": 0.0129, + "grad_norm": 3.8872127532958984, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6760000000000002, + "step": 1676 + }, + { + "loss": 0.0338, + "grad_norm": 1.6114709377288818, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.677, + "step": 1677 + }, + { + "loss": 0.0401, + "grad_norm": 1.4854273796081543, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.678, + "step": 1678 + }, + { + "loss": 0.0091, + "grad_norm": 2.8193323612213135, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 1679 + }, + { + "loss": 0.0104, + "grad_norm": 3.194824457168579, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 1680 + }, + { + "loss": 0.0082, + "grad_norm": 2.627159357070923, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 1681 + }, + { + "loss": 0.0715, + "grad_norm": 2.015965223312378, + "learning_rate": 3.21e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.682, + "step": 1682 + }, + { + "loss": 0.0752, + "grad_norm": 1.8641659021377563, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.683, + "step": 1683 + }, + { + "loss": 0.0446, + "grad_norm": 1.8558416366577148, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 1684 + }, + { + "loss": 0.0754, + "grad_norm": 2.614729881286621, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.685, + "step": 1685 + }, + { + "loss": 0.0781, + "grad_norm": 2.3581247329711914, + "learning_rate": 3.17e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.686, + "step": 1686 + }, + { + "loss": 0.044, + "grad_norm": 2.02897310256958, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.687, + "step": 1687 + }, + { + "loss": 0.0576, + "grad_norm": 1.8537285327911377, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.688, + "step": 1688 + }, + { + "loss": 0.0673, + "grad_norm": 2.3672072887420654, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 1689 + }, + { + "loss": 0.0406, + "grad_norm": 2.049578905105591, + "learning_rate": 3.13e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.69, + "step": 1690 + }, + { + "loss": 0.0514, + "grad_norm": 1.8079686164855957, + "learning_rate": 3.12e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.6909999999999998, + "step": 1691 + }, + { + "loss": 0.0467, + "grad_norm": 1.5584005117416382, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.692, + "step": 1692 + }, + { + "loss": 0.0073, + "grad_norm": 2.0741705894470215, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 1693 + }, + { + "loss": 0.0501, + "grad_norm": 1.9797930717468262, + "learning_rate": 3.09e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.694, + "step": 1694 + }, + { + "loss": 0.0514, + "grad_norm": 1.531952977180481, + "learning_rate": 3.08e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 1695 + }, + { + "loss": 0.0511, + "grad_norm": 2.27657413482666, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.696, + "step": 1696 + }, + { + "loss": 0.0501, + "grad_norm": 1.5408827066421509, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.697, + "step": 1697 + }, + { + "loss": 0.0356, + "grad_norm": 1.3495177030563354, + "learning_rate": 3.05e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.698, + "step": 1698 + }, + { + "loss": 0.0524, + "grad_norm": 2.264927864074707, + "learning_rate": 3.04e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6989999999999998, + "step": 1699 + }, + { + "loss": 0.0085, + "grad_norm": 2.3997385501861572, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 1700 + }, + { + "loss": 0.0537, + "grad_norm": 2.03108811378479, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.701, + "step": 1701 + }, + { + "loss": 0.0625, + "grad_norm": 1.5735002756118774, + "learning_rate": 3.01e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.702, + "step": 1702 + }, + { + "loss": 0.0498, + "grad_norm": 1.4873791933059692, + "learning_rate": 3e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7029999999999998, + "step": 1703 + }, + { + "loss": 0.0401, + "grad_norm": 1.646492600440979, + "learning_rate": 2.99e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.704, + "step": 1704 + }, + { + "loss": 0.0092, + "grad_norm": 2.825364828109741, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 1705 + }, + { + "loss": 0.0094, + "grad_norm": 2.7768924236297607, + "learning_rate": 2.97e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 1706 + }, + { + "loss": 0.0095, + "grad_norm": 2.475404977798462, + "learning_rate": 2.96e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 1707 + }, + { + "loss": 0.0416, + "grad_norm": 2.0638792514801025, + "learning_rate": 2.95e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.708, + "step": 1708 + }, + { + "loss": 0.0544, + "grad_norm": 1.6516914367675781, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.709, + "step": 1709 + }, + { + "loss": 0.0534, + "grad_norm": 1.9903455972671509, + "learning_rate": 2.93e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.71, + "step": 1710 + }, + { + "loss": 0.061, + "grad_norm": 1.6336207389831543, + "learning_rate": 2.92e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7109999999999999, + "step": 1711 + }, + { + "loss": 0.0484, + "grad_norm": 1.5735485553741455, + "learning_rate": 2.91e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.712, + "step": 1712 + }, + { + "loss": 0.0523, + "grad_norm": 1.7996323108673096, + "learning_rate": 2.9e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.713, + "step": 1713 + }, + { + "loss": 0.0568, + "grad_norm": 1.6357063055038452, + "learning_rate": 2.89e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.714, + "step": 1714 + }, + { + "loss": 0.0097, + "grad_norm": 2.460446357727051, + "learning_rate": 2.88e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 1715 + }, + { + "loss": 0.0488, + "grad_norm": 1.7914141416549683, + "learning_rate": 2.87e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.716, + "step": 1716 + }, + { + "loss": 0.0426, + "grad_norm": 2.875281572341919, + "learning_rate": 2.86e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.717, + "step": 1717 + }, + { + "loss": 0.0535, + "grad_norm": 1.9656765460968018, + "learning_rate": 2.85e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.718, + "step": 1718 + }, + { + "loss": 0.0582, + "grad_norm": 1.7268273830413818, + "learning_rate": 2.84e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.7189999999999999, + "step": 1719 + }, + { + "loss": 0.0625, + "grad_norm": 1.7748886346817017, + "learning_rate": 2.83e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 1720 + }, + { + "loss": 0.0624, + "grad_norm": 1.655421257019043, + "learning_rate": 2.82e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.721, + "step": 1721 + }, + { + "loss": 0.0418, + "grad_norm": 1.857727289199829, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.722, + "step": 1722 + }, + { + "loss": 0.0628, + "grad_norm": 1.6072860956192017, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7229999999999999, + "step": 1723 + }, + { + "loss": 0.0079, + "grad_norm": 2.1282646656036377, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 1724 + }, + { + "loss": 0.0097, + "grad_norm": 2.870497465133667, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 1725 + }, + { + "loss": 0.0573, + "grad_norm": 2.2278597354888916, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.726, + "step": 1726 + }, + { + "loss": 0.0479, + "grad_norm": 1.6248372793197632, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.7269999999999999, + "step": 1727 + }, + { + "loss": 0.0098, + "grad_norm": 3.043905258178711, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 1728 + }, + { + "loss": 0.0515, + "grad_norm": 1.613357424736023, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.729, + "step": 1729 + }, + { + "loss": 0.0391, + "grad_norm": 1.959555983543396, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.73, + "step": 1730 + }, + { + "loss": 0.0085, + "grad_norm": 2.4167284965515137, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 1731 + }, + { + "loss": 0.0638, + "grad_norm": 1.9236712455749512, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.732, + "step": 1732 + }, + { + "loss": 0.0359, + "grad_norm": 1.9113582372665405, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.733, + "step": 1733 + }, + { + "loss": 0.0083, + "grad_norm": 2.5152554512023926, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 1734 + }, + { + "loss": 0.0471, + "grad_norm": 1.6409229040145874, + "learning_rate": 2.68e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7349999999999999, + "step": 1735 + }, + { + "loss": 0.0695, + "grad_norm": 2.0613510608673096, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.736, + "step": 1736 + }, + { + "loss": 0.057, + "grad_norm": 2.3862340450286865, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.737, + "step": 1737 + }, + { + "loss": 0.0733, + "grad_norm": 2.13395357131958, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.738, + "step": 1738 + }, + { + "loss": 0.0398, + "grad_norm": 1.8025071620941162, + "learning_rate": 2.64e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7389999999999999, + "step": 1739 + }, + { + "loss": 0.0076, + "grad_norm": 2.0499792098999023, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 1740 + }, + { + "loss": 0.061, + "grad_norm": 1.6320290565490723, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.741, + "step": 1741 + }, + { + "loss": 0.0581, + "grad_norm": 1.9588946104049683, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.742, + "step": 1742 + }, + { + "loss": 0.062, + "grad_norm": 1.8158897161483765, + "learning_rate": 2.6e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.7429999999999999, + "step": 1743 + }, + { + "loss": 0.0464, + "grad_norm": 2.4023096561431885, + "learning_rate": 2.59e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.744, + "step": 1744 + }, + { + "loss": 0.0604, + "grad_norm": 2.0760178565979004, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.745, + "step": 1745 + }, + { + "loss": 0.0721, + "grad_norm": 1.8943363428115845, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.746, + "step": 1746 + }, + { + "loss": 0.0394, + "grad_norm": 1.6580768823623657, + "learning_rate": 2.56e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.7469999999999999, + "step": 1747 + }, + { + "loss": 0.0575, + "grad_norm": 1.7064754962921143, + "learning_rate": 2.55e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.748, + "step": 1748 + }, + { + "loss": 0.1451, + "grad_norm": 5.286960124969482, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 1.749, + "step": 1749 + }, + { + "loss": 0.0367, + "grad_norm": 1.5256696939468384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.75, + "step": 1750 + }, + { + "loss": 0.0352, + "grad_norm": 1.4353508949279785, + "learning_rate": 2.52e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.751, + "step": 1751 + }, + { + "loss": 0.0544, + "grad_norm": 1.449508547782898, + "learning_rate": 2.51e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.752, + "step": 1752 + }, + { + "loss": 0.0088, + "grad_norm": 2.6737008094787598, + "learning_rate": 2.5e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 1753 + }, + { + "loss": 0.054, + "grad_norm": 1.1922411918640137, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.754, + "step": 1754 + }, + { + "loss": 0.0108, + "grad_norm": 3.180657386779785, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.755, + "step": 1755 + }, + { + "loss": 0.0636, + "grad_norm": 1.900195598602295, + "learning_rate": 2.47e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.756, + "step": 1756 + }, + { + "loss": 0.0602, + "grad_norm": 2.505511522293091, + "learning_rate": 2.46e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7570000000000001, + "step": 1757 + }, + { + "loss": 0.0516, + "grad_norm": 1.517896056175232, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.758, + "step": 1758 + }, + { + "loss": 0.0653, + "grad_norm": 1.5359817743301392, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.759, + "step": 1759 + }, + { + "loss": 0.062, + "grad_norm": 2.56500244140625, + "learning_rate": 2.43e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.76, + "step": 1760 + }, + { + "loss": 0.0616, + "grad_norm": 1.2327522039413452, + "learning_rate": 2.42e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7610000000000001, + "step": 1761 + }, + { + "loss": 0.0641, + "grad_norm": 2.0313050746917725, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.762, + "step": 1762 + }, + { + "loss": 0.0509, + "grad_norm": 1.9020798206329346, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.763, + "step": 1763 + }, + { + "loss": 0.0573, + "grad_norm": 1.3576561212539673, + "learning_rate": 2.39e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.764, + "step": 1764 + }, + { + "loss": 0.0359, + "grad_norm": 1.6285313367843628, + "learning_rate": 2.38e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7650000000000001, + "step": 1765 + }, + { + "loss": 0.0779, + "grad_norm": 2.119893789291382, + "learning_rate": 2.37e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.766, + "step": 1766 + }, + { + "loss": 0.0459, + "grad_norm": 1.8730247020721436, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.767, + "step": 1767 + }, + { + "loss": 0.0359, + "grad_norm": 1.5724204778671265, + "learning_rate": 2.35e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.768, + "step": 1768 + }, + { + "loss": 0.0375, + "grad_norm": 1.7161457538604736, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.7690000000000001, + "step": 1769 + }, + { + "loss": 0.0522, + "grad_norm": 1.3714388608932495, + "learning_rate": 2.33e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.77, + "step": 1770 + }, + { + "loss": 0.0368, + "grad_norm": 1.6326324939727783, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.771, + "step": 1771 + }, + { + "loss": 0.0526, + "grad_norm": 1.4099246263504028, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.772, + "step": 1772 + }, + { + "loss": 0.0343, + "grad_norm": 1.331606149673462, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7730000000000001, + "step": 1773 + }, + { + "loss": 0.0521, + "grad_norm": 2.03346586227417, + "learning_rate": 2.29e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.774, + "step": 1774 + }, + { + "loss": 0.0738, + "grad_norm": 2.287825584411621, + "learning_rate": 2.28e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.775, + "step": 1775 + }, + { + "loss": 0.0711, + "grad_norm": 1.560683012008667, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.776, + "step": 1776 + }, + { + "loss": 0.0483, + "grad_norm": 1.860205888748169, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.7770000000000001, + "step": 1777 + }, + { + "loss": 0.0418, + "grad_norm": 1.6539009809494019, + "learning_rate": 2.25e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.778, + "step": 1778 + }, + { + "loss": 0.0669, + "grad_norm": 1.5473995208740234, + "learning_rate": 2.24e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.779, + "step": 1779 + }, + { + "loss": 0.0488, + "grad_norm": 1.3596010208129883, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.78, + "step": 1780 + }, + { + "loss": 0.0407, + "grad_norm": 1.8577399253845215, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7810000000000001, + "step": 1781 + }, + { + "loss": 0.0639, + "grad_norm": 2.693002462387085, + "learning_rate": 2.21e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.782, + "step": 1782 + }, + { + "loss": 0.0146, + "grad_norm": 4.3713555335998535, + "learning_rate": 2.2e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.783, + "step": 1783 + }, + { + "loss": 0.0702, + "grad_norm": 1.8829140663146973, + "learning_rate": 2.19e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.784, + "step": 1784 + }, + { + "loss": 0.0145, + "grad_norm": 4.203199863433838, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.7850000000000001, + "step": 1785 + }, + { + "loss": 0.0418, + "grad_norm": 1.0440939664840698, + "learning_rate": 2.17e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.786, + "step": 1786 + }, + { + "loss": 0.0658, + "grad_norm": 1.5156137943267822, + "learning_rate": 2.16e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.787, + "step": 1787 + }, + { + "loss": 0.0506, + "grad_norm": 1.6226084232330322, + "learning_rate": 2.15e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.788, + "step": 1788 + }, + { + "loss": 0.087, + "grad_norm": 1.8399536609649658, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7890000000000001, + "step": 1789 + }, + { + "loss": 0.0607, + "grad_norm": 2.031243324279785, + "learning_rate": 2.13e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.79, + "step": 1790 + }, + { + "loss": 0.0609, + "grad_norm": 1.581013798713684, + "learning_rate": 2.12e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.791, + "step": 1791 + }, + { + "loss": 0.0149, + "grad_norm": 4.233753681182861, + "learning_rate": 2.11e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.792, + "step": 1792 + }, + { + "loss": 0.0698, + "grad_norm": 1.890411615371704, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7930000000000001, + "step": 1793 + }, + { + "loss": 0.0529, + "grad_norm": 1.3680751323699951, + "learning_rate": 2.09e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.794, + "step": 1794 + }, + { + "loss": 0.0528, + "grad_norm": 1.9651073217391968, + "learning_rate": 2.08e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.795, + "step": 1795 + }, + { + "loss": 0.0133, + "grad_norm": 3.887544631958008, + "learning_rate": 2.07e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.796, + "step": 1796 + }, + { + "loss": 0.05, + "grad_norm": 1.304778814315796, + "learning_rate": 2.06e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7970000000000002, + "step": 1797 + }, + { + "loss": 0.071, + "grad_norm": 1.9661753177642822, + "learning_rate": 2.05e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.798, + "step": 1798 + }, + { + "loss": 0.0557, + "grad_norm": 1.5037291049957275, + "learning_rate": 2.04e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.799, + "step": 1799 + }, + { + "loss": 0.0372, + "grad_norm": 1.4804255962371826, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.8, + "step": 1800 + }, + { + "loss": 0.0645, + "grad_norm": 1.577778697013855, + "learning_rate": 2.02e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.8010000000000002, + "step": 1801 + }, + { + "loss": 0.0399, + "grad_norm": 1.5963507890701294, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.802, + "step": 1802 + }, + { + "loss": 0.0612, + "grad_norm": 1.7424527406692505, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.803, + "step": 1803 + }, + { + "loss": 0.0377, + "grad_norm": 1.4296543598175049, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.804, + "step": 1804 + }, + { + "loss": 0.0378, + "grad_norm": 1.4681419134140015, + "learning_rate": 1.98e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8050000000000002, + "step": 1805 + }, + { + "loss": 0.0385, + "grad_norm": 1.876345157623291, + "learning_rate": 1.97e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.806, + "step": 1806 + }, + { + "loss": 0.0454, + "grad_norm": 1.3991385698318481, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.807, + "step": 1807 + }, + { + "loss": 0.0706, + "grad_norm": 1.6286864280700684, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.808, + "step": 1808 + }, + { + "loss": 0.0409, + "grad_norm": 1.7534390687942505, + "learning_rate": 1.94e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8090000000000002, + "step": 1809 + }, + { + "loss": 0.1302, + "grad_norm": 4.238317966461182, + "learning_rate": 1.93e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.81, + "step": 1810 + }, + { + "loss": 0.0525, + "grad_norm": 2.2462339401245117, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.811, + "step": 1811 + }, + { + "loss": 0.0609, + "grad_norm": 1.5136423110961914, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.812, + "step": 1812 + }, + { + "loss": 0.0595, + "grad_norm": 1.4645228385925293, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.813, + "step": 1813 + }, + { + "loss": 0.0485, + "grad_norm": 1.4663139581680298, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.814, + "step": 1814 + }, + { + "loss": 0.0117, + "grad_norm": 3.569246768951416, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.815, + "step": 1815 + }, + { + "loss": 0.0765, + "grad_norm": 1.4224154949188232, + "learning_rate": 1.87e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.8159999999999998, + "step": 1816 + }, + { + "loss": 0.0517, + "grad_norm": 1.4875210523605347, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.817, + "step": 1817 + }, + { + "loss": 0.0123, + "grad_norm": 3.643899440765381, + "learning_rate": 1.85e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.818, + "step": 1818 + }, + { + "loss": 0.0358, + "grad_norm": 1.7132638692855835, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.819, + "step": 1819 + }, + { + "loss": 0.0396, + "grad_norm": 1.291243553161621, + "learning_rate": 1.83e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8199999999999998, + "step": 1820 + }, + { + "loss": 0.0611, + "grad_norm": 1.6885188817977905, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.821, + "step": 1821 + }, + { + "loss": 0.0507, + "grad_norm": 1.215349555015564, + "learning_rate": 1.81e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.822, + "step": 1822 + }, + { + "loss": 0.0508, + "grad_norm": 1.5074315071105957, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.823, + "step": 1823 + }, + { + "loss": 0.0593, + "grad_norm": 1.500303030014038, + "learning_rate": 1.79e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8239999999999998, + "step": 1824 + }, + { + "loss": 0.0696, + "grad_norm": 2.0285537242889404, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.825, + "step": 1825 + }, + { + "loss": 0.051, + "grad_norm": 1.3399317264556885, + "learning_rate": 1.77e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.826, + "step": 1826 + }, + { + "loss": 0.0479, + "grad_norm": 1.868754506111145, + "learning_rate": 1.76e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.827, + "step": 1827 + }, + { + "loss": 0.0123, + "grad_norm": 3.5505826473236084, + "learning_rate": 1.75e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.8279999999999998, + "step": 1828 + }, + { + "loss": 0.0384, + "grad_norm": 1.1001877784729004, + "learning_rate": 1.74e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.829, + "step": 1829 + }, + { + "loss": 0.0503, + "grad_norm": 1.5732758045196533, + "learning_rate": 1.73e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.83, + "step": 1830 + }, + { + "loss": 0.0569, + "grad_norm": 1.4768040180206299, + "learning_rate": 1.72e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.831, + "step": 1831 + }, + { + "loss": 0.0376, + "grad_norm": 2.298859119415283, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8319999999999999, + "step": 1832 + }, + { + "loss": 0.0626, + "grad_norm": 1.4698207378387451, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 1833 + }, + { + "loss": 0.0527, + "grad_norm": 1.462391972541809, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.834, + "step": 1834 + }, + { + "loss": 0.0751, + "grad_norm": 2.242673873901367, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.835, + "step": 1835 + }, + { + "loss": 0.0633, + "grad_norm": 1.4788683652877808, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.8359999999999999, + "step": 1836 + }, + { + "loss": 0.0523, + "grad_norm": 1.5662829875946045, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.837, + "step": 1837 + }, + { + "loss": 0.0496, + "grad_norm": 1.2137081623077393, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.838, + "step": 1838 + }, + { + "loss": 0.0144, + "grad_norm": 3.972593307495117, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.839, + "step": 1839 + }, + { + "loss": 0.0612, + "grad_norm": 2.0851247310638428, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.8399999999999999, + "step": 1840 + }, + { + "loss": 0.0351, + "grad_norm": 1.7115992307662964, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.841, + "step": 1841 + }, + { + "loss": 0.0543, + "grad_norm": 1.7121071815490723, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.842, + "step": 1842 + }, + { + "loss": 0.0398, + "grad_norm": 2.520775318145752, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.843, + "step": 1843 + }, + { + "loss": 0.0588, + "grad_norm": 1.4704424142837524, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8439999999999999, + "step": 1844 + }, + { + "loss": 0.0393, + "grad_norm": 1.1732555627822876, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.845, + "step": 1845 + }, + { + "loss": 0.0126, + "grad_norm": 3.8587839603424072, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.846, + "step": 1846 + }, + { + "loss": 0.0154, + "grad_norm": 4.2589006423950195, + "learning_rate": 1.56e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.847, + "step": 1847 + }, + { + "loss": 0.0525, + "grad_norm": 1.5793870687484741, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.8479999999999999, + "step": 1848 + }, + { + "loss": 0.0711, + "grad_norm": 1.637081265449524, + "learning_rate": 1.54e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.849, + "step": 1849 + }, + { + "loss": 0.0367, + "grad_norm": 1.405205488204956, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.85, + "step": 1850 + }, + { + "loss": 0.0122, + "grad_norm": 3.7381093502044678, + "learning_rate": 1.52e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.851, + "step": 1851 + }, + { + "loss": 0.0595, + "grad_norm": 1.4563549757003784, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8519999999999999, + "step": 1852 + }, + { + "loss": 0.012, + "grad_norm": 3.3752598762512207, + "learning_rate": 1.5e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.853, + "step": 1853 + }, + { + "loss": 0.0575, + "grad_norm": 1.6581268310546875, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.854, + "step": 1854 + }, + { + "loss": 0.037, + "grad_norm": 1.6496632099151611, + "learning_rate": 1.48e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.855, + "step": 1855 + }, + { + "loss": 0.0435, + "grad_norm": 2.816823959350586, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.8559999999999999, + "step": 1856 + }, + { + "loss": 0.0691, + "grad_norm": 1.9923897981643677, + "learning_rate": 1.46e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.857, + "step": 1857 + }, + { + "loss": 0.0601, + "grad_norm": 1.9515984058380127, + "learning_rate": 1.45e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.858, + "step": 1858 + }, + { + "loss": 0.0097, + "grad_norm": 3.0719552040100098, + "learning_rate": 1.44e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 1859 + }, + { + "loss": 0.0641, + "grad_norm": 1.8086748123168945, + "learning_rate": 1.43e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8599999999999999, + "step": 1860 + }, + { + "loss": 0.067, + "grad_norm": 1.6446064710617065, + "learning_rate": 1.42e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.861, + "step": 1861 + }, + { + "loss": 0.0101, + "grad_norm": 3.0983476638793945, + "learning_rate": 1.41e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 1862 + }, + { + "loss": 0.0362, + "grad_norm": 1.6780548095703125, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.863, + "step": 1863 + }, + { + "loss": 0.054, + "grad_norm": 1.5340514183044434, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8639999999999999, + "step": 1864 + }, + { + "loss": 0.0562, + "grad_norm": 1.6704845428466797, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.865, + "step": 1865 + }, + { + "loss": 0.0647, + "grad_norm": 2.0944159030914307, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.866, + "step": 1866 + }, + { + "loss": 0.0497, + "grad_norm": 1.6780622005462646, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.867, + "step": 1867 + }, + { + "loss": 0.0531, + "grad_norm": 1.5871188640594482, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8679999999999999, + "step": 1868 + }, + { + "loss": 0.061, + "grad_norm": 1.572225570678711, + "learning_rate": 1.34e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.869, + "step": 1869 + }, + { + "loss": 0.0636, + "grad_norm": 1.7540369033813477, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.87, + "step": 1870 + }, + { + "loss": 0.0516, + "grad_norm": 1.9117010831832886, + "learning_rate": 1.32e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.871, + "step": 1871 + }, + { + "loss": 0.0516, + "grad_norm": 1.8945181369781494, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8719999999999999, + "step": 1872 + }, + { + "loss": 0.1903, + "grad_norm": 7.168573379516602, + "learning_rate": 1.3e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 1.873, + "step": 1873 + }, + { + "loss": 0.0584, + "grad_norm": 1.7484742403030396, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.874, + "step": 1874 + }, + { + "loss": 0.0592, + "grad_norm": 1.998748540878296, + "learning_rate": 1.28e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.875, + "step": 1875 + }, + { + "loss": 0.0132, + "grad_norm": 3.7218382358551025, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.876, + "step": 1876 + }, + { + "loss": 0.0397, + "grad_norm": 1.7368042469024658, + "learning_rate": 1.26e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.877, + "step": 1877 + }, + { + "loss": 0.0747, + "grad_norm": 1.7804408073425293, + "learning_rate": 1.25e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8780000000000001, + "step": 1878 + }, + { + "loss": 0.0564, + "grad_norm": 1.812559962272644, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.879, + "step": 1879 + }, + { + "loss": 0.0359, + "grad_norm": 1.5748106241226196, + "learning_rate": 1.23e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.88, + "step": 1880 + }, + { + "loss": 0.1015, + "grad_norm": 2.9346442222595215, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.881, + "step": 1881 + }, + { + "loss": 0.0714, + "grad_norm": 2.8724288940429688, + "learning_rate": 1.21e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.8820000000000001, + "step": 1882 + }, + { + "loss": 0.0544, + "grad_norm": 1.6409680843353271, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.883, + "step": 1883 + }, + { + "loss": 0.0569, + "grad_norm": 1.441733479499817, + "learning_rate": 1.19e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.884, + "step": 1884 + }, + { + "loss": 0.0709, + "grad_norm": 2.3944602012634277, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.885, + "step": 1885 + }, + { + "loss": 0.0593, + "grad_norm": 2.0737223625183105, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8860000000000001, + "step": 1886 + }, + { + "loss": 0.011, + "grad_norm": 3.4782493114471436, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.887, + "step": 1887 + }, + { + "loss": 0.0115, + "grad_norm": 3.5657458305358887, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.888, + "step": 1888 + }, + { + "loss": 0.0598, + "grad_norm": 1.5167820453643799, + "learning_rate": 1.14e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.889, + "step": 1889 + }, + { + "loss": 0.0507, + "grad_norm": 1.6942130327224731, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.8900000000000001, + "step": 1890 + }, + { + "loss": 0.05, + "grad_norm": 1.4450113773345947, + "learning_rate": 1.12e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.891, + "step": 1891 + }, + { + "loss": 0.0672, + "grad_norm": 1.7840543985366821, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.892, + "step": 1892 + }, + { + "loss": 0.0114, + "grad_norm": 3.6806554794311523, + "learning_rate": 1.1e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.893, + "step": 1893 + }, + { + "loss": 0.0433, + "grad_norm": 2.5975944995880127, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.8940000000000001, + "step": 1894 + }, + { + "loss": 0.048, + "grad_norm": 1.2934935092926025, + "learning_rate": 1.08e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.895, + "step": 1895 + }, + { + "loss": 0.0129, + "grad_norm": 3.9428789615631104, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.896, + "step": 1896 + }, + { + "loss": 0.0106, + "grad_norm": 3.178393840789795, + "learning_rate": 1.06e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.897, + "step": 1897 + }, + { + "loss": 0.0601, + "grad_norm": 1.3654727935791016, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8980000000000001, + "step": 1898 + }, + { + "loss": 0.0372, + "grad_norm": 1.596958041191101, + "learning_rate": 1.04e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.899, + "step": 1899 + }, + { + "loss": 0.0407, + "grad_norm": 1.3870348930358887, + "learning_rate": 1.03e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9, + "step": 1900 + }, + { + "loss": 0.0398, + "grad_norm": 1.8837169408798218, + "learning_rate": 1.02e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.901, + "step": 1901 + }, + { + "loss": 0.0685, + "grad_norm": 2.1320674419403076, + "learning_rate": 1.01e-06, + "num_tokens": 1308570.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9020000000000001, + "step": 1902 + }, + { + "loss": 0.0824, + "grad_norm": 2.3401284217834473, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.903, + "step": 1903 + }, + { + "loss": 0.0107, + "grad_norm": 3.2646677494049072, + "learning_rate": 9.9e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 1904 + }, + { + "loss": 0.053, + "grad_norm": 1.7195311784744263, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.905, + "step": 1905 + }, + { + "loss": 0.0388, + "grad_norm": 1.4336844682693481, + "learning_rate": 9.7e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.9060000000000001, + "step": 1906 + }, + { + "loss": 0.0496, + "grad_norm": 1.5110867023468018, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.907, + "step": 1907 + }, + { + "loss": 0.0106, + "grad_norm": 3.0311079025268555, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.908, + "step": 1908 + }, + { + "loss": 0.0536, + "grad_norm": 1.9689549207687378, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.909, + "step": 1909 + }, + { + "loss": 0.0761, + "grad_norm": 2.2891626358032227, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.9100000000000001, + "step": 1910 + }, + { + "loss": 0.0099, + "grad_norm": 2.886558771133423, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 1911 + }, + { + "loss": 0.0509, + "grad_norm": 2.247649669647217, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.912, + "step": 1912 + }, + { + "loss": 0.0396, + "grad_norm": 1.8190995454788208, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.913, + "step": 1913 + }, + { + "loss": 0.0681, + "grad_norm": 1.9473356008529663, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.9140000000000001, + "step": 1914 + }, + { + "loss": 0.0583, + "grad_norm": 1.7244383096694946, + "learning_rate": 8.8e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.915, + "step": 1915 + }, + { + "loss": 0.0497, + "grad_norm": 1.471281886100769, + "learning_rate": 8.7e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.916, + "step": 1916 + }, + { + "loss": 0.0105, + "grad_norm": 3.1323492527008057, + "learning_rate": 8.6e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.917, + "step": 1917 + }, + { + "loss": 0.0587, + "grad_norm": 1.6258044242858887, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9180000000000001, + "step": 1918 + }, + { + "loss": 0.0396, + "grad_norm": 3.7344205379486084, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.919, + "step": 1919 + }, + { + "loss": 0.0669, + "grad_norm": 1.567430853843689, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.92, + "step": 1920 + }, + { + "loss": 0.0403, + "grad_norm": 2.391710042953491, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.921, + "step": 1921 + }, + { + "loss": 0.0731, + "grad_norm": 1.7387372255325317, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 1922 + }, + { + "loss": 0.0346, + "grad_norm": 1.5562756061553955, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.923, + "step": 1923 + }, + { + "loss": 0.0094, + "grad_norm": 2.8271360397338867, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 1924 + }, + { + "loss": 0.0458, + "grad_norm": 2.486022472381592, + "learning_rate": 7.8e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.925, + "step": 1925 + }, + { + "loss": 0.0432, + "grad_norm": 1.4174907207489014, + "learning_rate": 7.7e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9260000000000002, + "step": 1926 + }, + { + "loss": 0.0685, + "grad_norm": 1.9511269330978394, + "learning_rate": 7.6e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.927, + "step": 1927 + }, + { + "loss": 0.0541, + "grad_norm": 1.7855056524276733, + "learning_rate": 7.5e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.928, + "step": 1928 + }, + { + "loss": 0.0381, + "grad_norm": 1.345107913017273, + "learning_rate": 7.4e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.929, + "step": 1929 + }, + { + "loss": 0.0405, + "grad_norm": 2.1388049125671387, + "learning_rate": 7.3e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9300000000000002, + "step": 1930 + }, + { + "loss": 0.065, + "grad_norm": 1.9286760091781616, + "learning_rate": 7.2e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.931, + "step": 1931 + }, + { + "loss": 0.0084, + "grad_norm": 2.553018808364868, + "learning_rate": 7.1e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 1932 + }, + { + "loss": 0.0591, + "grad_norm": 1.3521795272827148, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.933, + "step": 1933 + }, + { + "loss": 0.0407, + "grad_norm": 2.3110647201538086, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.9340000000000002, + "step": 1934 + }, + { + "loss": 0.0087, + "grad_norm": 2.560931921005249, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 1935 + }, + { + "loss": 0.1207, + "grad_norm": 3.6795732975006104, + "learning_rate": 6.7e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 1.936, + "step": 1936 + }, + { + "loss": 0.0079, + "grad_norm": 2.1008386611938477, + "learning_rate": 6.6e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 1937 + }, + { + "loss": 0.0087, + "grad_norm": 2.5367555618286133, + "learning_rate": 6.5e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 1938 + }, + { + "loss": 0.0518, + "grad_norm": 2.0541486740112305, + "learning_rate": 6.4e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.939, + "step": 1939 + }, + { + "loss": 0.0618, + "grad_norm": 1.8797075748443604, + "learning_rate": 6.3e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.94, + "step": 1940 + }, + { + "loss": 0.0628, + "grad_norm": 2.0876829624176025, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9409999999999998, + "step": 1941 + }, + { + "loss": 0.0453, + "grad_norm": 1.7904268503189087, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.942, + "step": 1942 + }, + { + "loss": 0.009, + "grad_norm": 2.73040771484375, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 1943 + }, + { + "loss": 0.0617, + "grad_norm": 1.6844722032546997, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.944, + "step": 1944 + }, + { + "loss": 0.0431, + "grad_norm": 1.8085075616836548, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9449999999999998, + "step": 1945 + }, + { + "loss": 0.0554, + "grad_norm": 1.8000997304916382, + "learning_rate": 5.7e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.946, + "step": 1946 + }, + { + "loss": 0.0608, + "grad_norm": 1.8177446126937866, + "learning_rate": 5.6e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.947, + "step": 1947 + }, + { + "loss": 0.0624, + "grad_norm": 1.5957430601119995, + "learning_rate": 5.5e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.948, + "step": 1948 + }, + { + "loss": 0.0615, + "grad_norm": 1.5245059728622437, + "learning_rate": 5.4e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9489999999999998, + "step": 1949 + }, + { + "loss": 0.0087, + "grad_norm": 2.8260550498962402, + "learning_rate": 5.3e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 1950 + }, + { + "loss": 0.0491, + "grad_norm": 1.5616376399993896, + "learning_rate": 5.2e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.951, + "step": 1951 + }, + { + "loss": 0.0552, + "grad_norm": 1.530611276626587, + "learning_rate": 5.1e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.952, + "step": 1952 + }, + { + "loss": 0.0563, + "grad_norm": 1.5877563953399658, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.9529999999999998, + "step": 1953 + }, + { + "loss": 0.034, + "grad_norm": 1.3671666383743286, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.954, + "step": 1954 + }, + { + "loss": 0.0447, + "grad_norm": 1.4045659303665161, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.955, + "step": 1955 + }, + { + "loss": 0.0523, + "grad_norm": 1.3664851188659668, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.956, + "step": 1956 + }, + { + "loss": 0.0545, + "grad_norm": 1.9731861352920532, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9569999999999999, + "step": 1957 + }, + { + "loss": 0.056, + "grad_norm": 1.9783090353012085, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.958, + "step": 1958 + }, + { + "loss": 0.0103, + "grad_norm": 3.2062110900878906, + "learning_rate": 4.4e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.959, + "step": 1959 + }, + { + "loss": 0.0356, + "grad_norm": 1.8231993913650513, + "learning_rate": 4.3e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.96, + "step": 1960 + }, + { + "loss": 0.0525, + "grad_norm": 1.708391785621643, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9609999999999999, + "step": 1961 + }, + { + "loss": 0.0794, + "grad_norm": 2.159344434738159, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.962, + "step": 1962 + }, + { + "loss": 0.0815, + "grad_norm": 1.9803351163864136, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 1963 + }, + { + "loss": 0.0442, + "grad_norm": 2.2135045528411865, + "learning_rate": 3.9e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.964, + "step": 1964 + }, + { + "loss": 0.0082, + "grad_norm": 2.504026174545288, + "learning_rate": 3.8e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 1965 + }, + { + "loss": 0.0524, + "grad_norm": 2.4293482303619385, + "learning_rate": 3.7e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.966, + "step": 1966 + }, + { + "loss": 0.0543, + "grad_norm": 1.5671586990356445, + "learning_rate": 3.6e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.967, + "step": 1967 + }, + { + "loss": 0.0549, + "grad_norm": 2.1507840156555176, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.968, + "step": 1968 + }, + { + "loss": 0.0561, + "grad_norm": 1.4668017625808716, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9689999999999999, + "step": 1969 + }, + { + "loss": 0.008, + "grad_norm": 2.4691226482391357, + "learning_rate": 3.3e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 1970 + }, + { + "loss": 0.0104, + "grad_norm": 3.135504722595215, + "learning_rate": 3.2e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.971, + "step": 1971 + }, + { + "loss": 0.0442, + "grad_norm": 1.5039496421813965, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 1972 + }, + { + "loss": 0.035, + "grad_norm": 1.5489939451217651, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9729999999999999, + "step": 1973 + }, + { + "loss": 0.0687, + "grad_norm": 1.601294994354248, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.974, + "step": 1974 + }, + { + "loss": 0.0629, + "grad_norm": 1.7154121398925781, + "learning_rate": 2.8e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.975, + "step": 1975 + }, + { + "loss": 0.0587, + "grad_norm": 2.0388171672821045, + "learning_rate": 2.7e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 1976 + }, + { + "loss": 0.051, + "grad_norm": 1.9510704278945923, + "learning_rate": 2.6e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9769999999999999, + "step": 1977 + }, + { + "loss": 0.0512, + "grad_norm": 1.7245160341262817, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.978, + "step": 1978 + }, + { + "loss": 0.0465, + "grad_norm": 1.383158802986145, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.979, + "step": 1979 + }, + { + "loss": 0.054, + "grad_norm": 2.2401952743530273, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.98, + "step": 1980 + }, + { + "loss": 0.0516, + "grad_norm": 2.7115116119384766, + "learning_rate": 2.2e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.9809999999999999, + "step": 1981 + }, + { + "loss": 0.0095, + "grad_norm": 2.8770017623901367, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 1982 + }, + { + "loss": 0.0618, + "grad_norm": 1.8771051168441772, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.983, + "step": 1983 + }, + { + "loss": 0.0524, + "grad_norm": 1.3788121938705444, + "learning_rate": 1.9e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.984, + "step": 1984 + }, + { + "loss": 0.0582, + "grad_norm": 1.583976149559021, + "learning_rate": 1.8e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9849999999999999, + "step": 1985 + }, + { + "loss": 0.0802, + "grad_norm": 1.9991214275360107, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.986, + "step": 1986 + }, + { + "loss": 0.0085, + "grad_norm": 2.6479129791259766, + "learning_rate": 1.6e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 1987 + }, + { + "loss": 0.06, + "grad_norm": 1.4170489311218262, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.988, + "step": 1988 + }, + { + "loss": 0.0502, + "grad_norm": 1.5151011943817139, + "learning_rate": 1.4e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9889999999999999, + "step": 1989 + }, + { + "loss": 0.0639, + "grad_norm": 1.8262159824371338, + "learning_rate": 1.3e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.99, + "step": 1990 + }, + { + "loss": 0.039, + "grad_norm": 1.5687544345855713, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.991, + "step": 1991 + }, + { + "loss": 0.0601, + "grad_norm": 1.4482007026672363, + "learning_rate": 1.1e-07, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.992, + "step": 1992 + }, + { + "loss": 0.0549, + "grad_norm": 2.2203757762908936, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9929999999999999, + "step": 1993 + }, + { + "loss": 0.0089, + "grad_norm": 2.697735071182251, + "learning_rate": 9e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 1994 + }, + { + "loss": 0.0595, + "grad_norm": 1.751516342163086, + "learning_rate": 8e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.995, + "step": 1995 + }, + { + "loss": 0.0499, + "grad_norm": 1.9196858406066895, + "learning_rate": 7e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.996, + "step": 1996 + }, + { + "loss": 0.0385, + "grad_norm": 1.7155030965805054, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.9969999999999999, + "step": 1997 + }, + { + "loss": 0.0089, + "grad_norm": 2.759751558303833, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 1998 + }, + { + "loss": 0.0393, + "grad_norm": 1.8272897005081177, + "learning_rate": 4e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.999, + "step": 1999 + }, + { + "loss": 0.0626, + "grad_norm": 1.7148770093917847, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 2.0, + "step": 2000 + }, + { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502, + "epoch": 2.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba2cfbcdcee3753e484b0a1b3adcb35ee225098 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "unsloth_available": false, + "train_runtime": 234.6302, + "train_loss": 0.19233327957964502, + "train_metrics": { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json new file mode 100644 index 0000000000000000000000000000000000000000..f2d95c49345fee0c966ee899582d2fc611158764 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/availability.json @@ -0,0 +1,27 @@ +{ + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + } +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..7d529fa55bcfe3e68b31a5702ee3f96c63526167 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 2.158, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely to continue the sentence smoothly? Please provide a brief justification for your choice.\" rationale: \"The chosen candidate is more likely to maintain coherence with the existing context and future predictions accurately.\" \n\nMy reasoning:\n- The", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.557, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI; justification: The instruction is incomplete. Please provide a specific question or task related to the candidates mentioned in order for me to choose an appropriate candidate_id and provide a justified response.\" \"rationale\": \"This question asks for a justification, but no question or task was provided in the initial instruction. To fulfill this request, I need more information about what", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 2.04, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate is most likely to be correct for the following sentence fragment: 'The quick brown fox jumps over the lazy dog.'? Choose one candidate_id and provide your justification.\" \nrationale: The choice of 'cand_07' as the", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.059, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"I would choose candidate_id=cand_05; rationale=This candidate uses", + "candidate_id": "cand_05", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.007, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you select as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"Selecting the best candidate involves considering factors such as syntactic correctness, semantic coherence", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.127, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5778936849f9a5bb988c315271fbf3c3507aba26 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "model_index": 1, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 1, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..d8c5d1cfe6fab1b4a4647f03f5ca461b1739180f --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json @@ -0,0 +1,36011 @@ +[ + { + "loss": 2.9686, + "grad_norm": 1.1798820495605469, + "learning_rate": 2e-05, + "num_tokens": 91.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 2.9639, + "grad_norm": 1.146132469177246, + "learning_rate": 1.9995e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 1.2609, + "grad_norm": 0.2891564667224884, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 694.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 2.9479, + "grad_norm": 1.1511788368225098, + "learning_rate": 1.9985000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.002, + "step": 4 + }, + { + "loss": 0.8201, + "grad_norm": 0.27247434854507446, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1297.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 1.1688, + "grad_norm": 0.30153799057006836, + "learning_rate": 1.9975e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 2.927, + "grad_norm": 1.123976469039917, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1900.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 2.9219, + "grad_norm": 1.1258331537246704, + "learning_rate": 1.9965e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 1.2624, + "grad_norm": 0.3105297088623047, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 2503.0, + "mean_token_accuracy": 0.7592955231666565, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.8468, + "grad_norm": 0.27270445227622986, + "learning_rate": 1.9955e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.005, + "step": 10 + }, + { + "loss": 1.1895, + "grad_norm": 0.31019389629364014, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3527.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 2.8961, + "grad_norm": 1.0758286714553833, + "learning_rate": 1.9945e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 1.1822, + "grad_norm": 0.3052140772342682, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4130.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 2.8831, + "grad_norm": 1.0789313316345215, + "learning_rate": 1.9935e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.8383, + "grad_norm": 0.2903873026371002, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 4733.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 1.2037, + "grad_norm": 0.3023833632469177, + "learning_rate": 1.9925e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 1.2477, + "grad_norm": 0.28835517168045044, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 5757.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 1.237, + "grad_norm": 0.30421048402786255, + "learning_rate": 1.9915e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 2.8549, + "grad_norm": 1.0703911781311035, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6360.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 1.2092, + "grad_norm": 0.30991482734680176, + "learning_rate": 1.9905e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7690802216529846, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 1.2362, + "grad_norm": 0.3097628951072693, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7384.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 1.223, + "grad_norm": 0.31258082389831543, + "learning_rate": 1.9895000000000002e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 2.8321, + "grad_norm": 1.0650557279586792, + "learning_rate": 1.989e-05, + "num_tokens": 7987.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 1.1381, + "grad_norm": 0.31106889247894287, + "learning_rate": 1.9885e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.8059, + "grad_norm": 0.28179118037223816, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9011.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 2.8152, + "grad_norm": 1.0609599351882935, + "learning_rate": 1.9875000000000002e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 2.8078, + "grad_norm": 1.06212317943573, + "learning_rate": 1.987e-05, + "num_tokens": 9193.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 1.205, + "grad_norm": 0.3027011752128601, + "learning_rate": 1.9865e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 1.1295, + "grad_norm": 0.30131977796554565, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10217.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 2.7894, + "grad_norm": 1.0723512172698975, + "learning_rate": 1.9855000000000002e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 1.1157, + "grad_norm": 0.30370256304740906, + "learning_rate": 1.985e-05, + "num_tokens": 10820.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 1.2198, + "grad_norm": 0.3102725148200989, + "learning_rate": 1.9845e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 2.7699, + "grad_norm": 1.0780471563339233, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11423.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 2.7633, + "grad_norm": 1.0721458196640015, + "learning_rate": 1.9835000000000002e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.8241, + "grad_norm": 0.2753015458583832, + "learning_rate": 1.983e-05, + "num_tokens": 12026.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 1.2029, + "grad_norm": 0.32459118962287903, + "learning_rate": 1.9825e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 2.7393, + "grad_norm": 1.089471459388733, + "learning_rate": 1.982e-05, + "num_tokens": 12629.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 2.7339, + "grad_norm": 1.085958480834961, + "learning_rate": 1.9815000000000003e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 2.7235, + "grad_norm": 1.1013903617858887, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 12811.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 1.1925, + "grad_norm": 0.322603315114975, + "learning_rate": 1.9805e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 1.0755, + "grad_norm": 0.33030447363853455, + "learning_rate": 1.98e-05, + "num_tokens": 13835.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.8072, + "grad_norm": 0.292123407125473, + "learning_rate": 1.9795000000000003e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.7719, + "grad_norm": 0.2785574495792389, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14859.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 2.6826, + "grad_norm": 1.1196017265319824, + "learning_rate": 1.9785e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 2.6763, + "grad_norm": 1.1198991537094116, + "learning_rate": 1.978e-05, + "num_tokens": 15041.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 1.0823, + "grad_norm": 0.3456343412399292, + "learning_rate": 1.9775000000000003e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 1.1172, + "grad_norm": 0.3377469480037689, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16065.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 1.19, + "grad_norm": 0.3273194134235382, + "learning_rate": 1.9765e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 1.0897, + "grad_norm": 0.330640584230423, + "learning_rate": 1.976e-05, + "num_tokens": 17089.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 2.6381, + "grad_norm": 1.1452019214630127, + "learning_rate": 1.9755000000000003e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.7974, + "grad_norm": 0.30913424491882324, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 17692.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 1.175, + "grad_norm": 0.3387100100517273, + "learning_rate": 1.9745e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 1.1322, + "grad_norm": 0.3353443443775177, + "learning_rate": 1.974e-05, + "num_tokens": 18716.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 2.6086, + "grad_norm": 1.1715646982192993, + "learning_rate": 1.9735000000000003e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 2.5992, + "grad_norm": 1.1846489906311035, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18898.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 2.5913, + "grad_norm": 1.1861159801483154, + "learning_rate": 1.9725000000000002e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 1.1598, + "grad_norm": 0.3380836546421051, + "learning_rate": 1.972e-05, + "num_tokens": 19501.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 1.1193, + "grad_norm": 0.34247249364852905, + "learning_rate": 1.9715000000000004e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 2.5644, + "grad_norm": 1.205854892730713, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20104.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 2.5553, + "grad_norm": 1.211520791053772, + "learning_rate": 1.9705000000000002e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 2.5452, + "grad_norm": 1.2238597869873047, + "learning_rate": 1.97e-05, + "num_tokens": 20286.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 1.1531, + "grad_norm": 0.3495417535305023, + "learning_rate": 1.9695e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 1.0714, + "grad_norm": 0.3549030125141144, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21310.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.765, + "grad_norm": 0.3008621335029602, + "learning_rate": 1.9685000000000002e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 1.0392, + "grad_norm": 0.3398958444595337, + "learning_rate": 1.968e-05, + "num_tokens": 22334.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 1.0477, + "grad_norm": 0.35012176632881165, + "learning_rate": 1.9675e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 2.4882, + "grad_norm": 1.2684752941131592, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 22937.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 2.478, + "grad_norm": 1.2892162799835205, + "learning_rate": 1.9665000000000002e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 2.4664, + "grad_norm": 1.296135663986206, + "learning_rate": 1.966e-05, + "num_tokens": 23119.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.7605, + "grad_norm": 0.3300800323486328, + "learning_rate": 1.9655e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.7663, + "grad_norm": 0.33007505536079407, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24143.0, + "mean_token_accuracy": 0.8512719869613647, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 2.4349, + "grad_norm": 1.3247182369232178, + "learning_rate": 1.9645e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 1.0354, + "grad_norm": 0.3528023660182953, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 24746.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.738, + "grad_norm": 0.3283436894416809, + "learning_rate": 1.9635e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 1.1271, + "grad_norm": 0.38431045413017273, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 25770.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": 1.0373, + "grad_norm": 0.3673364818096161, + "learning_rate": 1.9625e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 1.156, + "grad_norm": 0.3851627707481384, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26794.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 2.3789, + "grad_norm": 1.3850467205047607, + "learning_rate": 1.9615e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 2.3734, + "grad_norm": 1.3814043998718262, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 26976.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 2.3599, + "grad_norm": 1.3965320587158203, + "learning_rate": 1.9605e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 2.3458, + "grad_norm": 1.4337000846862793, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27158.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.7631, + "grad_norm": 0.328967422246933, + "learning_rate": 1.9595e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 1.0816, + "grad_norm": 0.40056440234184265, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28182.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.761, + "grad_norm": 0.34349334239959717, + "learning_rate": 1.9585e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.7308, + "grad_norm": 0.35714098811149597, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29206.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 2.2886, + "grad_norm": 1.4950672388076782, + "learning_rate": 1.9575e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 2.2801, + "grad_norm": 1.5058231353759766, + "learning_rate": 1.957e-05, + "num_tokens": 29388.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 2.2683, + "grad_norm": 1.5141775608062744, + "learning_rate": 1.9565e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.9814, + "grad_norm": 0.3899815082550049, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 29991.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 1.1155, + "grad_norm": 0.40274983644485474, + "learning_rate": 1.9555e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 2.2309, + "grad_norm": 1.5758429765701294, + "learning_rate": 1.955e-05, + "num_tokens": 30594.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 1.0635, + "grad_norm": 0.4182218015193939, + "learning_rate": 1.9545e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.7083, + "grad_norm": 0.35819146037101746, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31618.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 2.1959, + "grad_norm": 1.6126611232757568, + "learning_rate": 1.9535000000000002e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 2.1797, + "grad_norm": 1.676061987876892, + "learning_rate": 1.953e-05, + "num_tokens": 31800.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 1.0347, + "grad_norm": 0.4216737151145935, + "learning_rate": 1.9525e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.6884, + "grad_norm": 0.39531153440475464, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32824.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 2.1441, + "grad_norm": 1.7453250885009766, + "learning_rate": 1.9515000000000002e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 2.1265, + "grad_norm": 1.7851935625076294, + "learning_rate": 1.951e-05, + "num_tokens": 33006.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 2.112, + "grad_norm": 1.830625057220459, + "learning_rate": 1.9505e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 2.0989, + "grad_norm": 1.851873755455017, + "learning_rate": 1.95e-05, + "num_tokens": 33188.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.6824, + "grad_norm": 0.39206984639167786, + "learning_rate": 1.9495000000000002e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.6874, + "grad_norm": 0.3998919725418091, + "learning_rate": 1.949e-05, + "num_tokens": 34212.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 1.0692, + "grad_norm": 0.45781052112579346, + "learning_rate": 1.9485e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.052, + "step": 104 + }, + { + "loss": 1.061, + "grad_norm": 0.4857180714607239, + "learning_rate": 1.948e-05, + "num_tokens": 35236.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.9418, + "grad_norm": 0.4719521701335907, + "learning_rate": 1.9475000000000002e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.9888, + "grad_norm": 0.4797465205192566, + "learning_rate": 1.947e-05, + "num_tokens": 36260.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 1.994, + "grad_norm": 2.2058191299438477, + "learning_rate": 1.9465e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.7016, + "grad_norm": 0.41740846633911133, + "learning_rate": 1.946e-05, + "num_tokens": 36863.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.6818, + "grad_norm": 0.43658050894737244, + "learning_rate": 1.9455000000000003e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.6655, + "grad_norm": 0.46398866176605225, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37887.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 1.9355, + "grad_norm": 2.4030585289001465, + "learning_rate": 1.9445e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 1.0308, + "grad_norm": 0.47935715317726135, + "learning_rate": 1.944e-05, + "num_tokens": 38490.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": 0.6529, + "grad_norm": 0.5175711512565613, + "learning_rate": 1.9435000000000003e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 1.9, + "grad_norm": 2.3800323009490967, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39093.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 1.0589, + "grad_norm": 0.5446810722351074, + "learning_rate": 1.9425e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 1.8661, + "grad_norm": 2.2952208518981934, + "learning_rate": 1.942e-05, + "num_tokens": 39696.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 1.8546, + "grad_norm": 2.2471399307250977, + "learning_rate": 1.9415000000000003e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 1.8394, + "grad_norm": 2.1859543323516846, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 39878.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.6737, + "grad_norm": 0.5614652633666992, + "learning_rate": 1.9405e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.6406, + "grad_norm": 0.5995651483535767, + "learning_rate": 1.94e-05, + "num_tokens": 40902.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.9218, + "grad_norm": 0.6819480657577515, + "learning_rate": 1.9395000000000003e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.9464, + "grad_norm": 0.6670010089874268, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 41926.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.9323, + "grad_norm": 0.8481072187423706, + "learning_rate": 1.9385e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.6372, + "grad_norm": 0.5398988127708435, + "learning_rate": 1.938e-05, + "num_tokens": 42950.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.6362, + "grad_norm": 0.5465712547302246, + "learning_rate": 1.9375e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 1.7297, + "grad_norm": 2.4601035118103027, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 43553.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.6423, + "grad_norm": 0.5248544812202454, + "learning_rate": 1.9365000000000002e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 1.7024, + "grad_norm": 2.7017173767089844, + "learning_rate": 1.936e-05, + "num_tokens": 44156.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.8623, + "grad_norm": 0.6321293711662292, + "learning_rate": 1.9355e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.8852, + "grad_norm": 0.7586547136306763, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45180.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 1.6632, + "grad_norm": 3.066443920135498, + "learning_rate": 1.9345000000000002e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 1.642, + "grad_norm": 3.3219645023345947, + "learning_rate": 1.934e-05, + "num_tokens": 45362.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 1.623, + "grad_norm": 3.5062637329101562, + "learning_rate": 1.9335e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 1.6017, + "grad_norm": 3.623307228088379, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 45544.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.8752, + "grad_norm": 0.7358177900314331, + "learning_rate": 1.9325000000000002e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.9563, + "grad_norm": 0.8089514970779419, + "learning_rate": 1.932e-05, + "num_tokens": 46568.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.9479, + "grad_norm": 0.8843920826911926, + "learning_rate": 1.9315e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 1.5158, + "grad_norm": 3.546642303466797, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47171.0, + "mean_token_accuracy": 0.7333333492279053, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.5831, + "grad_norm": 0.7032448053359985, + "learning_rate": 1.9305000000000002e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.8191, + "grad_norm": 0.9835058450698853, + "learning_rate": 1.93e-05, + "num_tokens": 48195.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.5936, + "grad_norm": 0.7396312952041626, + "learning_rate": 1.9295e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 1.4418, + "grad_norm": 3.6846494674682617, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48798.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 1.4276, + "grad_norm": 3.8224549293518066, + "learning_rate": 1.9285000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 1.4024, + "grad_norm": 3.874878168106079, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 48980.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 1.3769, + "grad_norm": 3.8388218879699707, + "learning_rate": 1.9275e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 1.3516, + "grad_norm": 3.6529314517974854, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49162.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 1.3215, + "grad_norm": 3.6978349685668945, + "learning_rate": 1.9265000000000003e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.7666666507720947, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 1.2966, + "grad_norm": 3.7301321029663086, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49344.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.9111, + "grad_norm": 0.9517998695373535, + "learning_rate": 1.9255e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 1.2327, + "grad_norm": 4.175051212310791, + "learning_rate": 1.925e-05, + "num_tokens": 49947.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 1.2076, + "grad_norm": 4.348862171173096, + "learning_rate": 1.9245000000000003e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.5662, + "grad_norm": 0.9280498623847961, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 50550.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.8844, + "grad_norm": 1.042202353477478, + "learning_rate": 1.9235e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 1.1432, + "grad_norm": NaN, + "learning_rate": 1.923e-05, + "num_tokens": 51153.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 1.1364, + "grad_norm": 3.4773733615875244, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.7888888716697693, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.5305, + "grad_norm": 1.0232493877410889, + "learning_rate": 1.9225000000000003e-05, + "num_tokens": 51756.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.8352, + "grad_norm": 1.172676920890808, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.5667, + "grad_norm": 1.041461706161499, + "learning_rate": 1.9215e-05, + "num_tokens": 52780.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.5104, + "grad_norm": 1.050549030303955, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.875, + "grad_norm": 1.1163139343261719, + "learning_rate": 1.9205000000000003e-05, + "num_tokens": 53804.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.799, + "grad_norm": 0.9202898740768433, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 1.0468, + "grad_norm": 6.722721576690674, + "learning_rate": 1.9195000000000002e-05, + "num_tokens": 54407.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 1.032, + "grad_norm": 6.30849027633667, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.8387, + "grad_norm": 0.8642046451568604, + "learning_rate": 1.9185000000000004e-05, + "num_tokens": 55010.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.8299, + "grad_norm": 0.8796883821487427, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.9957, + "grad_norm": 6.16769552230835, + "learning_rate": 1.9175000000000002e-05, + "num_tokens": 55613.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.7521, + "grad_norm": 0.8700262904167175, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.5251, + "grad_norm": 1.2144312858581543, + "learning_rate": 1.9165000000000004e-05, + "num_tokens": 56637.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": 0.76, + "grad_norm": 0.9009570479393005, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.941, + "grad_norm": 5.8355841636657715, + "learning_rate": 1.9155000000000002e-05, + "num_tokens": 57240.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.928, + "grad_norm": 5.541483402252197, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.716, + "grad_norm": 1.0414000749588013, + "learning_rate": 1.9145000000000004e-05, + "num_tokens": 57843.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.8929, + "grad_norm": 4.810738563537598, + "learning_rate": 1.914e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.7684, + "grad_norm": 1.2132883071899414, + "learning_rate": 1.9135000000000002e-05, + "num_tokens": 58446.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.6497, + "grad_norm": 1.1370697021484375, + "learning_rate": 1.913e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.6995, + "grad_norm": 1.2495081424713135, + "learning_rate": 1.9125000000000004e-05, + "num_tokens": 59470.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.4539, + "grad_norm": 1.0713244676589966, + "learning_rate": 1.912e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.8311, + "grad_norm": 8.016578674316406, + "learning_rate": 1.9115000000000002e-05, + "num_tokens": 60073.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.7657, + "grad_norm": 1.6656423807144165, + "learning_rate": 1.911e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.7687, + "grad_norm": 1.0611323118209839, + "learning_rate": 1.9105e-05, + "num_tokens": 61097.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.8062, + "grad_norm": 10.057961463928223, + "learning_rate": 1.91e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.4494, + "grad_norm": 0.8912132978439331, + "learning_rate": 1.9095000000000003e-05, + "num_tokens": 61700.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.7813, + "grad_norm": 8.121318817138672, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.762, + "grad_norm": 7.607242584228516, + "learning_rate": 1.9085e-05, + "num_tokens": 61882.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.7692, + "grad_norm": 1.015843391418457, + "learning_rate": 1.908e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.7587, + "grad_norm": 0.9659166932106018, + "learning_rate": 1.9075000000000003e-05, + "num_tokens": 62906.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.6702, + "grad_norm": 1.6121653318405151, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.7191, + "grad_norm": 5.08962345123291, + "learning_rate": 1.9065e-05, + "num_tokens": 63509.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.7033, + "grad_norm": 1.2752808332443237, + "learning_rate": 1.906e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.7025, + "grad_norm": 5.420579433441162, + "learning_rate": 1.9055e-05, + "num_tokens": 64112.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.6507, + "grad_norm": 0.9945167899131775, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.5894, + "grad_norm": 1.0229939222335815, + "learning_rate": 1.9045e-05, + "num_tokens": 65136.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.6627, + "grad_norm": 9.837233543395996, + "learning_rate": 1.904e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.7, + "grad_norm": 1.4510327577590942, + "learning_rate": 1.9035e-05, + "num_tokens": 65739.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.6437, + "grad_norm": 11.414746284484863, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.631, + "grad_norm": 10.233067512512207, + "learning_rate": 1.9025e-05, + "num_tokens": 65921.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.6945, + "grad_norm": 1.3608763217926025, + "learning_rate": 1.902e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.6546, + "grad_norm": 1.217339038848877, + "learning_rate": 1.9015e-05, + "num_tokens": 66945.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.6805, + "grad_norm": 1.5453741550445557, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.5748, + "grad_norm": 4.581247806549072, + "learning_rate": 1.9005000000000002e-05, + "num_tokens": 67548.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.6366, + "grad_norm": 1.6470707654953003, + "learning_rate": 1.9e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.4235, + "grad_norm": 0.9932326078414917, + "learning_rate": 1.8995e-05, + "num_tokens": 68572.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": 0.6296, + "grad_norm": 1.9582555294036865, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.5822, + "grad_norm": 1.569627046585083, + "learning_rate": 1.8985000000000002e-05, + "num_tokens": 69596.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.5748, + "grad_norm": 1.2322492599487305, + "learning_rate": 1.898e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.6398, + "grad_norm": 1.6496992111206055, + "learning_rate": 1.8975e-05, + "num_tokens": 70620.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.3614, + "grad_norm": 1.1484179496765137, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.6247, + "grad_norm": 2.376291275024414, + "learning_rate": 1.8965000000000002e-05, + "num_tokens": 71644.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.5296, + "grad_norm": 1.148452877998352, + "learning_rate": 1.896e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.3511, + "grad_norm": 1.6766430139541626, + "learning_rate": 1.8955e-05, + "num_tokens": 72668.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.5254, + "grad_norm": 13.195364952087402, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.5164, + "grad_norm": 10.336882591247559, + "learning_rate": 1.8945000000000002e-05, + "num_tokens": 72850.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.5768, + "grad_norm": 1.2533048391342163, + "learning_rate": 1.894e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.5941, + "grad_norm": 1.1360353231430054, + "learning_rate": 1.8935e-05, + "num_tokens": 73874.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.4831, + "grad_norm": 6.034897327423096, + "learning_rate": 1.893e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.4774, + "grad_norm": 5.36783504486084, + "learning_rate": 1.8925000000000003e-05, + "num_tokens": 74056.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.3472, + "grad_norm": 2.312915563583374, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.4547, + "grad_norm": 5.124778747558594, + "learning_rate": 1.8915e-05, + "num_tokens": 74659.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.4438, + "grad_norm": 3.7214717864990234, + "learning_rate": 1.891e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.5071, + "grad_norm": 1.825179100036621, + "learning_rate": 1.8905000000000003e-05, + "num_tokens": 75262.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.4157, + "grad_norm": 2.892442464828491, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.4085, + "grad_norm": 3.1406774520874023, + "learning_rate": 1.8895e-05, + "num_tokens": 75444.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.532, + "grad_norm": 2.529170274734497, + "learning_rate": 1.889e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.3828, + "grad_norm": 3.846367597579956, + "learning_rate": 1.8885000000000003e-05, + "num_tokens": 76047.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.5073, + "grad_norm": 2.1968491077423096, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.5165, + "grad_norm": 1.508063793182373, + "learning_rate": 1.8875e-05, + "num_tokens": 77071.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.3491, + "grad_norm": 2.4780421257019043, + "learning_rate": 1.887e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.3379, + "grad_norm": 2.2446343898773193, + "learning_rate": 1.8865000000000003e-05, + "num_tokens": 77253.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.3318, + "grad_norm": 3.05029296875, + "learning_rate": 1.886e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.3173, + "grad_norm": 2.2870967388153076, + "learning_rate": 1.8855e-05, + "num_tokens": 77435.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.3278, + "grad_norm": 1.3750704526901245, + "learning_rate": 1.885e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.2964, + "grad_norm": 2.238151788711548, + "learning_rate": 1.8845000000000003e-05, + "num_tokens": 78038.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.305, + "grad_norm": 1.4246138334274292, + "learning_rate": 1.884e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.3385, + "grad_norm": 1.810808777809143, + "learning_rate": 1.8835000000000002e-05, + "num_tokens": 79062.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.5181, + "grad_norm": 2.939674139022827, + "learning_rate": 1.883e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.4909, + "grad_norm": 2.4543910026550293, + "learning_rate": 1.8825000000000004e-05, + "num_tokens": 80086.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.2604, + "grad_norm": 2.63846492767334, + "learning_rate": 1.882e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.2533, + "grad_norm": 3.536795139312744, + "learning_rate": 1.8815000000000002e-05, + "num_tokens": 80268.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.2449, + "grad_norm": 2.941943645477295, + "learning_rate": 1.881e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.4928, + "grad_norm": 2.69899582862854, + "learning_rate": 1.8805000000000004e-05, + "num_tokens": 80871.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.3019, + "grad_norm": 1.5328068733215332, + "learning_rate": 1.88e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.4154, + "grad_norm": 5.932051181793213, + "learning_rate": 1.8795000000000002e-05, + "num_tokens": 81895.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.4072, + "grad_norm": 3.7254579067230225, + "learning_rate": 1.879e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.2266, + "grad_norm": 4.67811918258667, + "learning_rate": 1.8785e-05, + "num_tokens": 82498.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.2835, + "grad_norm": 2.31062650680542, + "learning_rate": 1.878e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.222, + "grad_norm": 4.9225335121154785, + "learning_rate": 1.8775000000000002e-05, + "num_tokens": 83101.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.4098, + "grad_norm": 2.3302409648895264, + "learning_rate": 1.877e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.4401, + "grad_norm": 1.917952299118042, + "learning_rate": 1.8765e-05, + "num_tokens": 84125.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.3927, + "grad_norm": 4.312741279602051, + "learning_rate": 1.876e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.2032, + "grad_norm": 4.237610340118408, + "learning_rate": 1.8755000000000003e-05, + "num_tokens": 84728.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.2, + "grad_norm": 4.144465446472168, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.1974, + "grad_norm": 4.548800945281982, + "learning_rate": 1.8745e-05, + "num_tokens": 84910.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.2936, + "grad_norm": 1.368138313293457, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.4425, + "grad_norm": 1.6547119617462158, + "learning_rate": 1.8735e-05, + "num_tokens": 85934.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.1815, + "grad_norm": 1.936987042427063, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.3853, + "grad_norm": 1.9844653606414795, + "learning_rate": 1.8725e-05, + "num_tokens": 86537.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.3816, + "grad_norm": 2.563992977142334, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.1717, + "grad_norm": 1.9275789260864258, + "learning_rate": 1.8715e-05, + "num_tokens": 87140.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.3635, + "grad_norm": 2.198817014694214, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.166, + "grad_norm": 2.225175380706787, + "learning_rate": 1.8705e-05, + "num_tokens": 87743.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.1618, + "grad_norm": 1.4393062591552734, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.3188, + "grad_norm": 1.8201826810836792, + "learning_rate": 1.8695e-05, + "num_tokens": 88346.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.3957, + "grad_norm": 1.8483490943908691, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.3545, + "grad_norm": 2.5658915042877197, + "learning_rate": 1.8685e-05, + "num_tokens": 89370.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.4109, + "grad_norm": 2.197061777114868, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.3934, + "grad_norm": 1.9570775032043457, + "learning_rate": 1.8675e-05, + "num_tokens": 90394.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.149, + "grad_norm": 2.242249011993408, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.3673, + "grad_norm": 2.5640757083892822, + "learning_rate": 1.8665000000000002e-05, + "num_tokens": 90997.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.3437, + "grad_norm": 1.6239393949508667, + "learning_rate": 1.866e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.1448, + "grad_norm": 2.4205758571624756, + "learning_rate": 1.8655e-05, + "num_tokens": 91600.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.2803, + "grad_norm": 1.5447510480880737, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.2501, + "grad_norm": 1.2362499237060547, + "learning_rate": 1.8645000000000002e-05, + "num_tokens": 92624.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.263, + "grad_norm": 1.3345736265182495, + "learning_rate": 1.864e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.3598, + "grad_norm": 5.145051002502441, + "learning_rate": 1.8635e-05, + "num_tokens": 93648.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.143, + "grad_norm": 3.363790988922119, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.3858, + "grad_norm": 2.9212327003479004, + "learning_rate": 1.8625000000000002e-05, + "num_tokens": 94251.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.1404, + "grad_norm": 2.9169602394104004, + "learning_rate": 1.862e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.2422, + "grad_norm": 1.9243407249450684, + "learning_rate": 1.8615e-05, + "num_tokens": 94854.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.3585, + "grad_norm": 4.024987697601318, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.3474, + "grad_norm": 2.019094944000244, + "learning_rate": 1.8605000000000002e-05, + "num_tokens": 95878.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.3368, + "grad_norm": 1.5415781736373901, + "learning_rate": 1.86e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.1373, + "grad_norm": 3.6068742275238037, + "learning_rate": 1.8595e-05, + "num_tokens": 96481.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.2176, + "grad_norm": 1.1446317434310913, + "learning_rate": 1.859e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.1328, + "grad_norm": 3.26859974861145, + "learning_rate": 1.8585000000000002e-05, + "num_tokens": 97084.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.131, + "grad_norm": 2.849381446838379, + "learning_rate": 1.858e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.3323, + "grad_norm": 4.831865310668945, + "learning_rate": 1.8575e-05, + "num_tokens": 97687.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.3036, + "grad_norm": 1.8017945289611816, + "learning_rate": 1.857e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.3478, + "grad_norm": 4.759650707244873, + "learning_rate": 1.8565000000000003e-05, + "num_tokens": 98711.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.1239, + "grad_norm": 1.6707216501235962, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.3554, + "grad_norm": 3.568655014038086, + "learning_rate": 1.8555e-05, + "num_tokens": 99314.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.1219, + "grad_norm": 1.743139624595642, + "learning_rate": 1.855e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.3297, + "grad_norm": 3.192558526992798, + "learning_rate": 1.8545000000000003e-05, + "num_tokens": 99917.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.339, + "grad_norm": 2.8700854778289795, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.3341, + "grad_norm": 3.1597092151641846, + "learning_rate": 1.8535e-05, + "num_tokens": 100941.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.3151, + "grad_norm": 2.549912929534912, + "learning_rate": 1.853e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.249, + "grad_norm": 4.164290904998779, + "learning_rate": 1.8525000000000003e-05, + "num_tokens": 101965.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.2877, + "grad_norm": 1.8462411165237427, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.2215, + "grad_norm": 1.49083411693573, + "learning_rate": 1.8515e-05, + "num_tokens": 102989.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.2631, + "grad_norm": 1.5168116092681885, + "learning_rate": 1.851e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.3179, + "grad_norm": 3.1732399463653564, + "learning_rate": 1.8505000000000003e-05, + "num_tokens": 104013.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.315, + "grad_norm": 2.9725892543792725, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.2763, + "grad_norm": 1.4138047695159912, + "learning_rate": 1.8495e-05, + "num_tokens": 105037.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.3151, + "grad_norm": 2.3229987621307373, + "learning_rate": 1.849e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.2862, + "grad_norm": 3.2318272590637207, + "learning_rate": 1.8485000000000003e-05, + "num_tokens": 106061.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.2339, + "grad_norm": 3.401787757873535, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.2094, + "grad_norm": 2.1061453819274902, + "learning_rate": 1.8475000000000002e-05, + "num_tokens": 107085.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.2863, + "grad_norm": 1.6479979753494263, + "learning_rate": 1.847e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.1445, + "grad_norm": 7.635932445526123, + "learning_rate": 1.8465e-05, + "num_tokens": 107688.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.1347, + "grad_norm": 6.305334091186523, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.2233, + "grad_norm": 3.41860294342041, + "learning_rate": 1.8455000000000002e-05, + "num_tokens": 108291.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.128, + "grad_norm": 5.801213264465332, + "learning_rate": 1.845e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.1283, + "grad_norm": 5.675178527832031, + "learning_rate": 1.8445e-05, + "num_tokens": 108473.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.3029, + "grad_norm": 5.509076118469238, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.1112, + "grad_norm": 2.6948108673095703, + "learning_rate": 1.8435000000000002e-05, + "num_tokens": 109076.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.107, + "grad_norm": 2.523871421813965, + "learning_rate": 1.843e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.2636, + "grad_norm": 2.1710612773895264, + "learning_rate": 1.8425e-05, + "num_tokens": 109679.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.2891, + "grad_norm": 2.2263383865356445, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.2611, + "grad_norm": 1.752862572669983, + "learning_rate": 1.8415e-05, + "num_tokens": 110703.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.1023, + "grad_norm": 3.256633996963501, + "learning_rate": 1.841e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.1009, + "grad_norm": 2.10860276222229, + "learning_rate": 1.8405e-05, + "num_tokens": 110885.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.2849, + "grad_norm": 3.3475303649902344, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.2727, + "grad_norm": 2.763415575027466, + "learning_rate": 1.8395e-05, + "num_tokens": 111909.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.1914, + "grad_norm": 1.7206056118011475, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.2981, + "grad_norm": 4.825778484344482, + "learning_rate": 1.8385e-05, + "num_tokens": 112933.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.2575, + "grad_norm": 2.3532052040100098, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.3108, + "grad_norm": 2.1766650676727295, + "learning_rate": 1.8375e-05, + "num_tokens": 113957.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.2547, + "grad_norm": 1.6271114349365234, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.2451, + "grad_norm": 1.533071517944336, + "learning_rate": 1.8365e-05, + "num_tokens": 114981.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.2362, + "grad_norm": 1.4881736040115356, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0898, + "grad_norm": 1.764446496963501, + "learning_rate": 1.8355e-05, + "num_tokens": 115584.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.2345, + "grad_norm": 1.3447750806808472, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.2802, + "grad_norm": 3.713470458984375, + "learning_rate": 1.8345e-05, + "num_tokens": 116608.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.1853, + "grad_norm": 1.427515983581543, + "learning_rate": 1.834e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0921, + "grad_norm": 2.3074567317962646, + "learning_rate": 1.8335e-05, + "num_tokens": 117211.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0887, + "grad_norm": 2.2687530517578125, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.2126, + "grad_norm": 3.1814491748809814, + "learning_rate": 1.8325e-05, + "num_tokens": 117814.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0881, + "grad_norm": 2.606569528579712, + "learning_rate": 1.832e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.1751, + "grad_norm": 2.4892592430114746, + "learning_rate": 1.8315e-05, + "num_tokens": 118417.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.2011, + "grad_norm": 2.357940673828125, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.2168, + "grad_norm": 2.8288958072662354, + "learning_rate": 1.8305000000000002e-05, + "num_tokens": 119441.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.183, + "grad_norm": 1.945565104484558, + "learning_rate": 1.83e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0746, + "grad_norm": 1.7267169952392578, + "learning_rate": 1.8295e-05, + "num_tokens": 120044.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0733, + "grad_norm": 1.9393048286437988, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0728, + "grad_norm": 2.1715469360351562, + "learning_rate": 1.8285000000000002e-05, + "num_tokens": 120226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0704, + "grad_norm": 2.0847175121307373, + "learning_rate": 1.828e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.1791, + "grad_norm": 1.5438156127929688, + "learning_rate": 1.8275e-05, + "num_tokens": 120829.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.2073, + "grad_norm": 1.6084765195846558, + "learning_rate": 1.827e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.2215, + "grad_norm": 1.543698787689209, + "learning_rate": 1.8265000000000002e-05, + "num_tokens": 121853.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.1904, + "grad_norm": 1.41824209690094, + "learning_rate": 1.826e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.2005, + "grad_norm": 1.6803160905838013, + "learning_rate": 1.8255e-05, + "num_tokens": 122877.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0605, + "grad_norm": 1.5710349082946777, + "learning_rate": 1.825e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0615, + "grad_norm": 1.633989691734314, + "learning_rate": 1.8245000000000002e-05, + "num_tokens": 123059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.1828, + "grad_norm": 1.6902644634246826, + "learning_rate": 1.824e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0558, + "grad_norm": 1.7157853841781616, + "learning_rate": 1.8235e-05, + "num_tokens": 123662.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0515, + "grad_norm": 1.4476577043533325, + "learning_rate": 1.823e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0502, + "grad_norm": 2.1938326358795166, + "learning_rate": 1.8225000000000003e-05, + "num_tokens": 123844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.1783, + "grad_norm": 2.738436460494995, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.275, + "grad_norm": 3.493831157684326, + "learning_rate": 1.8215e-05, + "num_tokens": 124868.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.1786, + "grad_norm": 1.7162284851074219, + "learning_rate": 1.821e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0448, + "grad_norm": 2.925360679626465, + "learning_rate": 1.8205000000000003e-05, + "num_tokens": 125471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.3138, + "grad_norm": 4.2967753410339355, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.0381, + "grad_norm": 1.3151957988739014, + "learning_rate": 1.8195e-05, + "num_tokens": 126074.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.1773, + "grad_norm": 1.440629243850708, + "learning_rate": 1.819e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0361, + "grad_norm": 1.378117561340332, + "learning_rate": 1.8185000000000003e-05, + "num_tokens": 126677.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0357, + "grad_norm": 1.3120638132095337, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 1.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0333, + "grad_norm": 1.1625266075134277, + "learning_rate": 1.8175e-05, + "num_tokens": 126859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0292, + "grad_norm": 1.198464035987854, + "learning_rate": 1.817e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.193, + "grad_norm": 1.9310072660446167, + "learning_rate": 1.8165000000000003e-05, + "num_tokens": 127462.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": 0.209, + "grad_norm": 1.7112150192260742, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.1398, + "grad_norm": 1.4659478664398193, + "learning_rate": 1.8155e-05, + "num_tokens": 128486.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.1688, + "grad_norm": 3.3470299243927, + "learning_rate": 1.815e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.2416, + "grad_norm": 3.232045888900757, + "learning_rate": 1.8145e-05, + "num_tokens": 129510.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0242, + "grad_norm": 2.809112548828125, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 1.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.0222, + "grad_norm": 2.652397394180298, + "learning_rate": 1.8135000000000002e-05, + "num_tokens": 129692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.1619, + "grad_norm": 1.6935186386108398, + "learning_rate": 1.813e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0206, + "grad_norm": 1.8048573732376099, + "learning_rate": 1.8125e-05, + "num_tokens": 130295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.0199, + "grad_norm": 1.7344465255737305, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0174, + "grad_norm": 1.6794533729553223, + "learning_rate": 1.8115000000000002e-05, + "num_tokens": 130477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0172, + "grad_norm": 2.995704174041748, + "learning_rate": 1.811e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 1.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.241, + "grad_norm": 2.3058347702026367, + "learning_rate": 1.8105e-05, + "num_tokens": 131080.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.2068, + "grad_norm": 2.030050277709961, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.1573, + "grad_norm": 2.108264207839966, + "learning_rate": 1.8095000000000002e-05, + "num_tokens": 132104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0128, + "grad_norm": 0.9666662812232971, + "learning_rate": 1.809e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.1613, + "grad_norm": 1.9703510999679565, + "learning_rate": 1.8085e-05, + "num_tokens": 132707.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.1579, + "grad_norm": 1.7536500692367554, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.2503, + "grad_norm": 3.074944257736206, + "learning_rate": 1.8075000000000002e-05, + "num_tokens": 133731.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.154, + "grad_norm": 2.3541879653930664, + "learning_rate": 1.807e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.1655, + "grad_norm": 1.2853813171386719, + "learning_rate": 1.8065e-05, + "num_tokens": 134755.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.1481, + "grad_norm": 1.4534378051757812, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0156, + "grad_norm": 2.346766710281372, + "learning_rate": 1.8055000000000002e-05, + "num_tokens": 135358.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0179, + "grad_norm": 2.7506628036499023, + "learning_rate": 1.805e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 1.0, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.2665, + "grad_norm": 7.800353050231934, + "learning_rate": 1.8045e-05, + "num_tokens": 135961.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0137, + "grad_norm": 1.6062291860580444, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 1.0, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.1298, + "grad_norm": 1.9706884622573853, + "learning_rate": 1.8035000000000003e-05, + "num_tokens": 136564.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.1587, + "grad_norm": 4.288624286651611, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.1706, + "grad_norm": 2.351865291595459, + "learning_rate": 1.8025e-05, + "num_tokens": 137588.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.1391, + "grad_norm": 2.3107855319976807, + "learning_rate": 1.802e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0116, + "grad_norm": 1.2413067817687988, + "learning_rate": 1.8015000000000003e-05, + "num_tokens": 138191.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.1528, + "grad_norm": 2.238205671310425, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0111, + "grad_norm": 1.0291837453842163, + "learning_rate": 1.8005e-05, + "num_tokens": 138794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": 0.2551, + "grad_norm": 3.0084855556488037, + "learning_rate": 1.8e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.271, + "grad_norm": 3.355750560760498, + "learning_rate": 1.7995000000000003e-05, + "num_tokens": 139818.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.1479, + "grad_norm": 3.3119289875030518, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.1951, + "grad_norm": 3.4890756607055664, + "learning_rate": 1.7985e-05, + "num_tokens": 140842.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.1439, + "grad_norm": 2.5274429321289062, + "learning_rate": 1.798e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.1537, + "grad_norm": 3.0909008979797363, + "learning_rate": 1.7975000000000003e-05, + "num_tokens": 141866.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0107, + "grad_norm": 2.0530686378479004, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 1.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.011, + "grad_norm": 1.7325184345245361, + "learning_rate": 1.7965e-05, + "num_tokens": 142048.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.135, + "grad_norm": 1.9106756448745728, + "learning_rate": 1.796e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.177, + "grad_norm": 3.206461191177368, + "learning_rate": 1.7955000000000003e-05, + "num_tokens": 143072.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0095, + "grad_norm": 0.8696625828742981, + "learning_rate": 1.795e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 1.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.1656, + "grad_norm": 5.9883856773376465, + "learning_rate": 1.7945000000000002e-05, + "num_tokens": 143675.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.2393, + "grad_norm": 3.601959466934204, + "learning_rate": 1.794e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0092, + "grad_norm": 1.547377586364746, + "learning_rate": 1.7935000000000004e-05, + "num_tokens": 144278.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0099, + "grad_norm": 1.7349345684051514, + "learning_rate": 1.793e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 1.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.1454, + "grad_norm": 2.134899377822876, + "learning_rate": 1.7925000000000002e-05, + "num_tokens": 144881.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.2317, + "grad_norm": 3.7199866771698, + "learning_rate": 1.792e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.2081, + "grad_norm": 3.7679033279418945, + "learning_rate": 1.7915000000000004e-05, + "num_tokens": 145905.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0084, + "grad_norm": 0.7981175184249878, + "learning_rate": 1.791e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 1.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0078, + "grad_norm": 0.624564528465271, + "learning_rate": 1.7905000000000002e-05, + "num_tokens": 146087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.153, + "grad_norm": 1.46378755569458, + "learning_rate": 1.79e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0085, + "grad_norm": 1.403277039527893, + "learning_rate": 1.7895000000000004e-05, + "num_tokens": 146690.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.1413, + "grad_norm": 2.821493148803711, + "learning_rate": 1.789e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.1268, + "grad_norm": 2.5567212104797363, + "learning_rate": 1.7885000000000002e-05, + "num_tokens": 147714.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.1303, + "grad_norm": 2.5823540687561035, + "learning_rate": 1.788e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0075, + "grad_norm": 1.26413094997406, + "learning_rate": 1.7875e-05, + "num_tokens": 148317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0067, + "grad_norm": 0.9559513330459595, + "learning_rate": 1.787e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0069, + "grad_norm": 0.641984224319458, + "learning_rate": 1.7865000000000003e-05, + "num_tokens": 148499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.1762, + "grad_norm": 2.6874637603759766, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0062, + "grad_norm": 0.4612693786621094, + "learning_rate": 1.7855e-05, + "num_tokens": 149102.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.1284, + "grad_norm": 2.1469764709472656, + "learning_rate": 1.785e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.1216, + "grad_norm": 2.77829909324646, + "learning_rate": 1.7845000000000003e-05, + "num_tokens": 150126.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0056, + "grad_norm": 0.3416956067085266, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 1.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0055, + "grad_norm": 0.3599971830844879, + "learning_rate": 1.7835e-05, + "num_tokens": 150308.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0054, + "grad_norm": 0.3336946368217468, + "learning_rate": 1.783e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 1.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.1384, + "grad_norm": 2.486008882522583, + "learning_rate": 1.7825e-05, + "num_tokens": 150911.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.1366, + "grad_norm": 1.806955337524414, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.0053, + "grad_norm": 0.3250260651111603, + "learning_rate": 1.7815e-05, + "num_tokens": 151514.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0048, + "grad_norm": 0.33809739351272583, + "learning_rate": 1.781e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 1.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.1241, + "grad_norm": 1.514503002166748, + "learning_rate": 1.7805e-05, + "num_tokens": 152117.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.1369, + "grad_norm": 1.73817777633667, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.005, + "grad_norm": 0.6402959227561951, + "learning_rate": 1.7795e-05, + "num_tokens": 152720.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.1392, + "grad_norm": 2.1087169647216797, + "learning_rate": 1.779e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0056, + "grad_norm": 0.7931351661682129, + "learning_rate": 1.7785e-05, + "num_tokens": 153323.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.1216, + "grad_norm": 2.559343099594116, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.1415, + "grad_norm": 3.7847163677215576, + "learning_rate": 1.7775000000000002e-05, + "num_tokens": 154347.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0056, + "grad_norm": 0.6650505661964417, + "learning_rate": 1.777e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0058, + "grad_norm": 0.6711560487747192, + "learning_rate": 1.7765e-05, + "num_tokens": 154529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.1339, + "grad_norm": 2.383869171142578, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.1384, + "grad_norm": 2.9380829334259033, + "learning_rate": 1.7755000000000002e-05, + "num_tokens": 155553.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.1355, + "grad_norm": 3.530726432800293, + "learning_rate": 1.775e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0057, + "grad_norm": 0.6963756680488586, + "learning_rate": 1.7745e-05, + "num_tokens": 156156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0047, + "grad_norm": 0.45467251539230347, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.1322, + "grad_norm": 2.1101133823394775, + "learning_rate": 1.7735000000000002e-05, + "num_tokens": 156759.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.3436, + "grad_norm": 10.156854629516602, + "learning_rate": 1.773e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.1111, + "grad_norm": 1.9533101320266724, + "learning_rate": 1.7725e-05, + "num_tokens": 157783.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0054, + "grad_norm": 0.571807861328125, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 1.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0045, + "grad_norm": 0.6374226808547974, + "learning_rate": 1.7715000000000002e-05, + "num_tokens": 157965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.1115, + "grad_norm": 1.9669644832611084, + "learning_rate": 1.771e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.1336, + "grad_norm": 1.4811934232711792, + "learning_rate": 1.7705e-05, + "num_tokens": 158989.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.2041, + "grad_norm": 3.112797737121582, + "learning_rate": 1.77e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0045, + "grad_norm": 0.5766833424568176, + "learning_rate": 1.7695000000000003e-05, + "num_tokens": 159592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.1237, + "grad_norm": 1.863338589668274, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.1236, + "grad_norm": 2.4069719314575195, + "learning_rate": 1.7685e-05, + "num_tokens": 160616.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0055, + "grad_norm": 0.8338965177536011, + "learning_rate": 1.768e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 1.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0044, + "grad_norm": 0.5481887459754944, + "learning_rate": 1.7675000000000003e-05, + "num_tokens": 160798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.1354, + "grad_norm": 4.145319938659668, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.1279, + "grad_norm": 3.560887575149536, + "learning_rate": 1.7665e-05, + "num_tokens": 161822.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0044, + "grad_norm": 0.43582797050476074, + "learning_rate": 1.766e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 1.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.004, + "grad_norm": 0.3212014138698578, + "learning_rate": 1.7655000000000003e-05, + "num_tokens": 162004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.1956, + "grad_norm": 2.662240982055664, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0038, + "grad_norm": 0.32649490237236023, + "learning_rate": 1.7645e-05, + "num_tokens": 162607.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0039, + "grad_norm": 0.33435314893722534, + "learning_rate": 1.764e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": 0.1217, + "grad_norm": 3.422117233276367, + "learning_rate": 1.7635000000000003e-05, + "num_tokens": 163210.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.1169, + "grad_norm": 1.9841532707214355, + "learning_rate": 1.763e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0035, + "grad_norm": 0.23611226677894592, + "learning_rate": 1.7625e-05, + "num_tokens": 163813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0036, + "grad_norm": 0.35102367401123047, + "learning_rate": 1.762e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 1.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0034, + "grad_norm": 0.22219745814800262, + "learning_rate": 1.7615000000000003e-05, + "num_tokens": 163995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.1109, + "grad_norm": 1.8000237941741943, + "learning_rate": 1.761e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0034, + "grad_norm": 0.4621182084083557, + "learning_rate": 1.7605000000000002e-05, + "num_tokens": 164598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0035, + "grad_norm": 0.5149714350700378, + "learning_rate": 1.76e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.004, + "grad_norm": 0.5277268886566162, + "learning_rate": 1.7595000000000003e-05, + "num_tokens": 164780.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.1178, + "grad_norm": 1.9578617811203003, + "learning_rate": 1.759e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0032, + "grad_norm": 0.30999821424484253, + "learning_rate": 1.7585000000000002e-05, + "num_tokens": 165383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0032, + "grad_norm": 0.3227098882198334, + "learning_rate": 1.758e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 1.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0032, + "grad_norm": 0.2970958352088928, + "learning_rate": 1.7575000000000004e-05, + "num_tokens": 165565.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.1054, + "grad_norm": 3.3750076293945312, + "learning_rate": 1.757e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.003, + "grad_norm": 0.315746933221817, + "learning_rate": 1.7565000000000002e-05, + "num_tokens": 166168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.1014, + "grad_norm": 1.7110451459884644, + "learning_rate": 1.756e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.1009, + "grad_norm": 2.0282938480377197, + "learning_rate": 1.7555e-05, + "num_tokens": 167192.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0029, + "grad_norm": 0.18862634897232056, + "learning_rate": 1.755e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 1.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.1251, + "grad_norm": 1.5325688123703003, + "learning_rate": 1.7545000000000002e-05, + "num_tokens": 167795.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0032, + "grad_norm": 0.37112897634506226, + "learning_rate": 1.754e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 1.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.0031, + "grad_norm": 0.32201266288757324, + "learning_rate": 1.7535e-05, + "num_tokens": 167977.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.003, + "grad_norm": 0.32648831605911255, + "learning_rate": 1.753e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 1.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": 0.1251, + "grad_norm": 2.044515371322632, + "learning_rate": 1.7525000000000002e-05, + "num_tokens": 168580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.1099, + "grad_norm": 2.5852344036102295, + "learning_rate": 1.752e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0032, + "grad_norm": 0.33884692192077637, + "learning_rate": 1.7515e-05, + "num_tokens": 169183.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.1006, + "grad_norm": 1.9987916946411133, + "learning_rate": 1.751e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0895, + "grad_norm": 2.697984457015991, + "learning_rate": 1.7505e-05, + "num_tokens": 170207.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.0034, + "grad_norm": 0.4763769507408142, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 1.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0899, + "grad_norm": 3.0565173625946045, + "learning_rate": 1.7495e-05, + "num_tokens": 170810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0909, + "grad_norm": 1.3817325830459595, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0044, + "grad_norm": 0.8519660830497742, + "learning_rate": 1.7485e-05, + "num_tokens": 171413.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.1095, + "grad_norm": 2.0203707218170166, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0048, + "grad_norm": 1.1067970991134644, + "learning_rate": 1.7475e-05, + "num_tokens": 172016.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.1167, + "grad_norm": 2.3915855884552, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0049, + "grad_norm": 1.0700874328613281, + "learning_rate": 1.7465e-05, + "num_tokens": 172619.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.004, + "grad_norm": 0.6739718317985535, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 1.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.1176, + "grad_norm": 2.5957095623016357, + "learning_rate": 1.7455e-05, + "num_tokens": 173222.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": 0.0763, + "grad_norm": 2.0077261924743652, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0028, + "grad_norm": 0.2505457103252411, + "learning_rate": 1.7445e-05, + "num_tokens": 173825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0025, + "grad_norm": 0.1596791297197342, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 1.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.1892, + "grad_norm": 2.4415338039398193, + "learning_rate": 1.7435e-05, + "num_tokens": 174428.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.1134, + "grad_norm": 2.0744497776031494, + "learning_rate": 1.743e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0991, + "grad_norm": 2.4540417194366455, + "learning_rate": 1.7425e-05, + "num_tokens": 175452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0025, + "grad_norm": 0.17656919360160828, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.1227, + "grad_norm": 2.1174721717834473, + "learning_rate": 1.7415000000000002e-05, + "num_tokens": 176055.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0026, + "grad_norm": 0.23843693733215332, + "learning_rate": 1.741e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 1.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.1103, + "grad_norm": 3.4821200370788574, + "learning_rate": 1.7405e-05, + "num_tokens": 176658.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0027, + "grad_norm": 0.3274306654930115, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 1.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0924, + "grad_norm": 1.685363531112671, + "learning_rate": 1.7395000000000002e-05, + "num_tokens": 177261.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0028, + "grad_norm": 0.3265073299407959, + "learning_rate": 1.739e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 1.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.1099, + "grad_norm": 3.1508426666259766, + "learning_rate": 1.7385e-05, + "num_tokens": 177864.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.1034, + "grad_norm": 1.8193601369857788, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.1016, + "grad_norm": 1.59476637840271, + "learning_rate": 1.7375000000000002e-05, + "num_tokens": 178888.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.1998, + "grad_norm": 3.547844648361206, + "learning_rate": 1.737e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.004, + "grad_norm": 0.7272564172744751, + "learning_rate": 1.7365e-05, + "num_tokens": 179491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0046, + "grad_norm": 0.918525755405426, + "learning_rate": 1.736e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 1.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.1078, + "grad_norm": 2.3493764400482178, + "learning_rate": 1.7355000000000002e-05, + "num_tokens": 180094.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0042, + "grad_norm": 0.7224324941635132, + "learning_rate": 1.735e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 1.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0043, + "grad_norm": 0.6705859303474426, + "learning_rate": 1.7345e-05, + "num_tokens": 180276.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.1953, + "grad_norm": 2.93843674659729, + "learning_rate": 1.734e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.0034, + "grad_norm": 0.46903571486473083, + "learning_rate": 1.7335000000000003e-05, + "num_tokens": 180879.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0938, + "grad_norm": 2.1053452491760254, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0026, + "grad_norm": 0.24292589724063873, + "learning_rate": 1.7325e-05, + "num_tokens": 181482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0922, + "grad_norm": 2.257225275039673, + "learning_rate": 1.732e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.003, + "grad_norm": 0.4069388508796692, + "learning_rate": 1.7315000000000003e-05, + "num_tokens": 182085.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.183, + "grad_norm": 3.2919442653656006, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.1693, + "grad_norm": 2.224686861038208, + "learning_rate": 1.7305e-05, + "num_tokens": 183109.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.1085, + "grad_norm": 1.8910117149353027, + "learning_rate": 1.73e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0026, + "grad_norm": 0.40661975741386414, + "learning_rate": 1.7295000000000003e-05, + "num_tokens": 183712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0027, + "grad_norm": 0.4873325228691101, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 1.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0028, + "grad_norm": 0.6161079406738281, + "learning_rate": 1.7285e-05, + "num_tokens": 183894.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0027, + "grad_norm": 0.4630989134311676, + "learning_rate": 1.728e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 1.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0985, + "grad_norm": 1.9053902626037598, + "learning_rate": 1.7275000000000003e-05, + "num_tokens": 184497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.0026, + "grad_norm": 0.37032097578048706, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 1.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.0024, + "grad_norm": 0.27917778491973877, + "learning_rate": 1.7265e-05, + "num_tokens": 184679.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0826, + "grad_norm": 2.2242591381073, + "learning_rate": 1.726e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0023, + "grad_norm": 0.22320418059825897, + "learning_rate": 1.7255000000000003e-05, + "num_tokens": 185282.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0958, + "grad_norm": 2.1955316066741943, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.1204, + "grad_norm": 2.8383123874664307, + "learning_rate": 1.7245000000000002e-05, + "num_tokens": 186306.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0025, + "grad_norm": 0.2997134327888489, + "learning_rate": 1.724e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0024, + "grad_norm": 0.24415498971939087, + "learning_rate": 1.7235e-05, + "num_tokens": 186488.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0885, + "grad_norm": 2.02583384513855, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0927, + "grad_norm": 2.139193534851074, + "learning_rate": 1.7225000000000002e-05, + "num_tokens": 187512.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0025, + "grad_norm": 0.3212721347808838, + "learning_rate": 1.722e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.1594, + "grad_norm": 1.6018428802490234, + "learning_rate": 1.7215e-05, + "num_tokens": 188115.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0027, + "grad_norm": 0.43617552518844604, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 1.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.1228, + "grad_norm": 1.8676470518112183, + "learning_rate": 1.7205000000000002e-05, + "num_tokens": 188718.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": 0.1523, + "grad_norm": 2.5800390243530273, + "learning_rate": 1.72e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0036, + "grad_norm": 0.7294099926948547, + "learning_rate": 1.7195e-05, + "num_tokens": 189321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.0797, + "grad_norm": 2.594087600708008, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.1031, + "grad_norm": 3.2291526794433594, + "learning_rate": 1.7185e-05, + "num_tokens": 190345.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0036, + "grad_norm": 0.7465726733207703, + "learning_rate": 1.718e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 1.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.1692, + "grad_norm": 2.709357500076294, + "learning_rate": 1.7175e-05, + "num_tokens": 190948.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.1003, + "grad_norm": 2.117990493774414, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.1015, + "grad_norm": 2.4742591381073, + "learning_rate": 1.7165e-05, + "num_tokens": 191972.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0911, + "grad_norm": 2.098302125930786, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.1107, + "grad_norm": 1.915540337562561, + "learning_rate": 1.7155e-05, + "num_tokens": 192996.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0049, + "grad_norm": 1.0682960748672485, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0965, + "grad_norm": 1.5651695728302002, + "learning_rate": 1.7145e-05, + "num_tokens": 193599.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.103, + "grad_norm": 2.3110480308532715, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.005, + "grad_norm": 1.1688706874847412, + "learning_rate": 1.7135e-05, + "num_tokens": 194202.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0801, + "grad_norm": 2.4091689586639404, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.159, + "grad_norm": 2.0551347732543945, + "learning_rate": 1.7125e-05, + "num_tokens": 195226.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.004, + "grad_norm": 0.8690920472145081, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0052, + "grad_norm": 1.225834608078003, + "learning_rate": 1.7115e-05, + "num_tokens": 195408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0038, + "grad_norm": 0.7105492949485779, + "learning_rate": 1.711e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0027, + "grad_norm": 0.3135615587234497, + "learning_rate": 1.7105e-05, + "num_tokens": 195590.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0025, + "grad_norm": 0.33731189370155334, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 1.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0024, + "grad_norm": 0.6950210928916931, + "learning_rate": 1.7095e-05, + "num_tokens": 195772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.166, + "grad_norm": 3.7873523235321045, + "learning_rate": 1.709e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.081, + "grad_norm": 2.6900861263275146, + "learning_rate": 1.7085e-05, + "num_tokens": 196796.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.002, + "grad_norm": 0.19354696571826935, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 1.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0935, + "grad_norm": 2.4997594356536865, + "learning_rate": 1.7075e-05, + "num_tokens": 197399.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.002, + "grad_norm": 0.24508339166641235, + "learning_rate": 1.707e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 1.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0019, + "grad_norm": 0.1790609359741211, + "learning_rate": 1.7065e-05, + "num_tokens": 197581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.1101, + "grad_norm": 2.382162570953369, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.1892, + "grad_norm": 3.0123023986816406, + "learning_rate": 1.7055000000000002e-05, + "num_tokens": 198605.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0019, + "grad_norm": 0.27882760763168335, + "learning_rate": 1.705e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0019, + "grad_norm": 0.23136040568351746, + "learning_rate": 1.7045e-05, + "num_tokens": 198787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.1046, + "grad_norm": 1.8799446821212769, + "learning_rate": 1.704e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0018, + "grad_norm": 0.23780478537082672, + "learning_rate": 1.7035000000000002e-05, + "num_tokens": 199390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0849, + "grad_norm": 1.9498792886734009, + "learning_rate": 1.703e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.298, + "step": 596 + }, + { + "loss": 0.0953, + "grad_norm": 2.2400667667388916, + "learning_rate": 1.7025e-05, + "num_tokens": 200414.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.002, + "grad_norm": 0.3908434510231018, + "learning_rate": 1.702e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 1.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0939, + "grad_norm": 2.667379140853882, + "learning_rate": 1.7015000000000002e-05, + "num_tokens": 201017.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.0745, + "grad_norm": 2.066331624984741, + "learning_rate": 1.701e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0025, + "grad_norm": 0.5688944458961487, + "learning_rate": 1.7005e-05, + "num_tokens": 201620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.1069, + "grad_norm": 2.021451950073242, + "learning_rate": 1.7e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.003, + "grad_norm": 0.6418687105178833, + "learning_rate": 1.6995000000000002e-05, + "num_tokens": 202223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0029, + "grad_norm": 0.6194710731506348, + "learning_rate": 1.699e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 1.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.1193, + "grad_norm": 3.001216411590576, + "learning_rate": 1.6985e-05, + "num_tokens": 202826.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": 0.1078, + "grad_norm": 2.1146023273468018, + "learning_rate": 1.698e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.098, + "grad_norm": 3.064103841781616, + "learning_rate": 1.6975000000000003e-05, + "num_tokens": 203850.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0747, + "grad_norm": 3.1524202823638916, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.1506, + "grad_norm": 3.1213419437408447, + "learning_rate": 1.6965e-05, + "num_tokens": 204874.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0038, + "grad_norm": 0.8761835098266602, + "learning_rate": 1.696e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0733, + "grad_norm": 2.0461108684539795, + "learning_rate": 1.6955000000000003e-05, + "num_tokens": 205477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0948, + "grad_norm": 2.52803111076355, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0036, + "grad_norm": 0.837294340133667, + "learning_rate": 1.6945e-05, + "num_tokens": 206080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0036, + "grad_norm": 0.8330880403518677, + "learning_rate": 1.694e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0608, + "grad_norm": 1.6941643953323364, + "learning_rate": 1.6935000000000003e-05, + "num_tokens": 206683.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0897, + "grad_norm": 1.850446105003357, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.0933, + "grad_norm": 2.3541157245635986, + "learning_rate": 1.6925e-05, + "num_tokens": 207707.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0026, + "grad_norm": 0.45243605971336365, + "learning_rate": 1.692e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0694, + "grad_norm": 2.299668312072754, + "learning_rate": 1.6915e-05, + "num_tokens": 208310.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0029, + "grad_norm": 0.6032459139823914, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.0967, + "grad_norm": 2.7924766540527344, + "learning_rate": 1.6905e-05, + "num_tokens": 208913.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0027, + "grad_norm": 0.5459297299385071, + "learning_rate": 1.69e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0033, + "grad_norm": 0.7005264759063721, + "learning_rate": 1.6895e-05, + "num_tokens": 209095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0712, + "grad_norm": 2.0087270736694336, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0816, + "grad_norm": 2.023620843887329, + "learning_rate": 1.6885000000000002e-05, + "num_tokens": 210119.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0884, + "grad_norm": 3.3579723834991455, + "learning_rate": 1.688e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.1001, + "grad_norm": 2.1446380615234375, + "learning_rate": 1.6875e-05, + "num_tokens": 211143.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0024, + "grad_norm": 0.46906810998916626, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.003, + "grad_norm": 0.6180875897407532, + "learning_rate": 1.6865000000000002e-05, + "num_tokens": 211325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0024, + "grad_norm": 0.44018203020095825, + "learning_rate": 1.686e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0021, + "grad_norm": 0.3610388934612274, + "learning_rate": 1.6855e-05, + "num_tokens": 211507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0025, + "grad_norm": 0.42492103576660156, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0785, + "grad_norm": 2.052070379257202, + "learning_rate": 1.6845000000000002e-05, + "num_tokens": 212110.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0955, + "grad_norm": 1.5501021146774292, + "learning_rate": 1.684e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0017, + "grad_norm": 0.14774425327777863, + "learning_rate": 1.6835e-05, + "num_tokens": 212713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0016, + "grad_norm": 0.13003599643707275, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0016, + "grad_norm": 0.11263933777809143, + "learning_rate": 1.6825000000000002e-05, + "num_tokens": 212895.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0814, + "grad_norm": 2.4652907848358154, + "learning_rate": 1.682e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0016, + "grad_norm": 0.1284048706293106, + "learning_rate": 1.6815e-05, + "num_tokens": 213498.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0016, + "grad_norm": 0.14626798033714294, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 1.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0984, + "grad_norm": 2.53958797454834, + "learning_rate": 1.6805000000000003e-05, + "num_tokens": 214101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0983, + "grad_norm": 2.0881552696228027, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0016, + "grad_norm": 0.14537213742733002, + "learning_rate": 1.6795e-05, + "num_tokens": 214704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.0642, + "grad_norm": 2.0831480026245117, + "learning_rate": 1.679e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.0016, + "grad_norm": 0.12770842015743256, + "learning_rate": 1.6785000000000003e-05, + "num_tokens": 215307.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0878, + "grad_norm": 2.531637668609619, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0864, + "grad_norm": 2.4697654247283936, + "learning_rate": 1.6775e-05, + "num_tokens": 216331.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0865, + "grad_norm": 1.655576229095459, + "learning_rate": 1.677e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.1086, + "grad_norm": 2.826423168182373, + "learning_rate": 1.6765000000000003e-05, + "num_tokens": 217355.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.1042, + "grad_norm": 3.4096198081970215, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.0027, + "grad_norm": 0.5534147620201111, + "learning_rate": 1.6755e-05, + "num_tokens": 217958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0611, + "grad_norm": 1.5646562576293945, + "learning_rate": 1.675e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0033, + "grad_norm": 1.048545479774475, + "learning_rate": 1.6745000000000003e-05, + "num_tokens": 218561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.005, + "grad_norm": 1.3414465188980103, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0032, + "grad_norm": 0.636330246925354, + "learning_rate": 1.6735e-05, + "num_tokens": 218743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0882, + "grad_norm": 1.7900675535202026, + "learning_rate": 1.673e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.0883, + "grad_norm": 1.8037763833999634, + "learning_rate": 1.6725000000000003e-05, + "num_tokens": 219767.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0733, + "grad_norm": 1.7987661361694336, + "learning_rate": 1.672e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0033, + "grad_norm": 0.6671841740608215, + "learning_rate": 1.6715000000000002e-05, + "num_tokens": 220370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": 0.0699, + "grad_norm": 2.178269147872925, + "learning_rate": 1.671e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0653, + "grad_norm": 2.165506601333618, + "learning_rate": 1.6705000000000004e-05, + "num_tokens": 221394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0828, + "grad_norm": 1.837323546409607, + "learning_rate": 1.67e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0798, + "grad_norm": 2.296050548553467, + "learning_rate": 1.6695000000000002e-05, + "num_tokens": 222418.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.169, + "grad_norm": 3.554818868637085, + "learning_rate": 1.669e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.1585, + "grad_norm": 2.993666887283325, + "learning_rate": 1.6685000000000004e-05, + "num_tokens": 223442.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0864, + "grad_norm": 3.0106112957000732, + "learning_rate": 1.668e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0605, + "grad_norm": 1.362823247909546, + "learning_rate": 1.6675000000000002e-05, + "num_tokens": 224466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.0055, + "grad_norm": 1.2802313566207886, + "learning_rate": 1.667e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0906, + "grad_norm": 2.1969728469848633, + "learning_rate": 1.6665000000000004e-05, + "num_tokens": 225069.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.0919, + "grad_norm": 3.0707828998565674, + "learning_rate": 1.666e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0061, + "grad_norm": 1.514074444770813, + "learning_rate": 1.6655000000000002e-05, + "num_tokens": 225672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0968, + "grad_norm": 2.7561936378479004, + "learning_rate": 1.665e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0887, + "grad_norm": 2.4263193607330322, + "learning_rate": 1.6645e-05, + "num_tokens": 226696.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0924, + "grad_norm": 2.360464572906494, + "learning_rate": 1.664e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.0926, + "grad_norm": 2.564941644668579, + "learning_rate": 1.6635000000000003e-05, + "num_tokens": 227720.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0849, + "grad_norm": 3.0359439849853516, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.1488, + "grad_norm": 2.505728006362915, + "learning_rate": 1.6625e-05, + "num_tokens": 228744.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0757, + "grad_norm": 1.8170560598373413, + "learning_rate": 1.662e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0083, + "grad_norm": 2.0260066986083984, + "learning_rate": 1.6615000000000003e-05, + "num_tokens": 229347.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0088, + "grad_norm": 2.0579655170440674, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0698, + "grad_norm": 2.465139865875244, + "learning_rate": 1.6605e-05, + "num_tokens": 229950.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.0865, + "grad_norm": 2.2099132537841797, + "learning_rate": 1.66e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0585, + "grad_norm": 2.1250336170196533, + "learning_rate": 1.6595e-05, + "num_tokens": 230974.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0047, + "grad_norm": 1.0128132104873657, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 1.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0934, + "grad_norm": 2.2283778190612793, + "learning_rate": 1.6585e-05, + "num_tokens": 231577.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0616, + "grad_norm": 1.5224443674087524, + "learning_rate": 1.658e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0831, + "grad_norm": 2.9646942615509033, + "learning_rate": 1.6575e-05, + "num_tokens": 232601.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.1237, + "grad_norm": 2.9797046184539795, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0636, + "grad_norm": 2.184934139251709, + "learning_rate": 1.6565e-05, + "num_tokens": 233625.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0624, + "grad_norm": 2.1586413383483887, + "learning_rate": 1.656e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.004, + "grad_norm": 0.7300480604171753, + "learning_rate": 1.6555e-05, + "num_tokens": 234228.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0034, + "grad_norm": 0.6544972062110901, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 1.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0696, + "grad_norm": 2.013485908508301, + "learning_rate": 1.6545e-05, + "num_tokens": 234831.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0029, + "grad_norm": 0.5221191048622131, + "learning_rate": 1.654e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 1.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0772, + "grad_norm": 1.8417952060699463, + "learning_rate": 1.6535e-05, + "num_tokens": 235434.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0713, + "grad_norm": 1.9944443702697754, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.0658, + "grad_norm": 1.900722861289978, + "learning_rate": 1.6525000000000002e-05, + "num_tokens": 236458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0679, + "grad_norm": 2.4299168586730957, + "learning_rate": 1.652e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.06, + "grad_norm": 1.561680793762207, + "learning_rate": 1.6515e-05, + "num_tokens": 237482.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0739, + "grad_norm": 1.774482011795044, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0436, + "grad_norm": 1.7762006521224976, + "learning_rate": 1.6505000000000002e-05, + "num_tokens": 238506.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0034, + "grad_norm": 0.7131043672561646, + "learning_rate": 1.65e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0823, + "grad_norm": 2.994682550430298, + "learning_rate": 1.6495e-05, + "num_tokens": 239109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": 0.0776, + "grad_norm": 2.6362464427948, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0846, + "grad_norm": 2.8052642345428467, + "learning_rate": 1.6485000000000002e-05, + "num_tokens": 240133.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0048, + "grad_norm": 1.1239407062530518, + "learning_rate": 1.648e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 1.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0841, + "grad_norm": 2.1707019805908203, + "learning_rate": 1.6475e-05, + "num_tokens": 240736.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0423, + "grad_norm": 1.9918863773345947, + "learning_rate": 1.647e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.0903, + "grad_norm": 2.1334235668182373, + "learning_rate": 1.6465000000000002e-05, + "num_tokens": 241760.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0067, + "grad_norm": 1.6682239770889282, + "learning_rate": 1.646e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 1.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0652, + "grad_norm": 1.4505804777145386, + "learning_rate": 1.6455e-05, + "num_tokens": 242363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0769, + "grad_norm": 1.6511123180389404, + "learning_rate": 1.645e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.006, + "grad_norm": 1.3824306726455688, + "learning_rate": 1.6445000000000003e-05, + "num_tokens": 242966.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0742, + "grad_norm": 2.109647512435913, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.1414, + "grad_norm": 2.5469703674316406, + "learning_rate": 1.6435e-05, + "num_tokens": 243990.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0669, + "grad_norm": 1.3465361595153809, + "learning_rate": 1.643e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.082, + "grad_norm": 2.1633052825927734, + "learning_rate": 1.6425000000000003e-05, + "num_tokens": 245014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0043, + "grad_norm": 0.926991879940033, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.143, + "grad_norm": 2.2284176349639893, + "learning_rate": 1.6415e-05, + "num_tokens": 245617.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0981, + "grad_norm": 2.301908493041992, + "learning_rate": 1.641e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0448, + "grad_norm": 1.2258681058883667, + "learning_rate": 1.6405000000000003e-05, + "num_tokens": 246641.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.0043, + "grad_norm": 0.9370044469833374, + "learning_rate": 1.64e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 1.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0788, + "grad_norm": 3.762192964553833, + "learning_rate": 1.6395e-05, + "num_tokens": 247244.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.0046, + "grad_norm": 0.9186903238296509, + "learning_rate": 1.639e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 1.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0035, + "grad_norm": 0.6930652260780334, + "learning_rate": 1.6385000000000003e-05, + "num_tokens": 247426.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.3322, + "grad_norm": 9.659932136535645, + "learning_rate": 1.638e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0662, + "grad_norm": 1.7305420637130737, + "learning_rate": 1.6375e-05, + "num_tokens": 248450.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0024, + "grad_norm": 0.3103489577770233, + "learning_rate": 1.637e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 1.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0915, + "grad_norm": 2.235250234603882, + "learning_rate": 1.6365000000000003e-05, + "num_tokens": 249053.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0596, + "grad_norm": 2.24996280670166, + "learning_rate": 1.636e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0679, + "grad_norm": 2.596879005432129, + "learning_rate": 1.6355000000000002e-05, + "num_tokens": 250077.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0642, + "grad_norm": 1.9771475791931152, + "learning_rate": 1.635e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0024, + "grad_norm": 0.7699919939041138, + "learning_rate": 1.6345000000000004e-05, + "num_tokens": 250680.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792540490627289, + "learning_rate": 1.634e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0021, + "grad_norm": 0.32606813311576843, + "learning_rate": 1.6335000000000002e-05, + "num_tokens": 250862.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0574, + "grad_norm": 2.3009800910949707, + "learning_rate": 1.633e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0966, + "grad_norm": 2.396700859069824, + "learning_rate": 1.6325e-05, + "num_tokens": 251886.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.1378, + "grad_norm": 2.726357936859131, + "learning_rate": 1.632e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0022, + "grad_norm": 0.36913836002349854, + "learning_rate": 1.6315000000000002e-05, + "num_tokens": 252489.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0021, + "grad_norm": 0.34592556953430176, + "learning_rate": 1.631e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0024, + "grad_norm": 0.45417988300323486, + "learning_rate": 1.6305e-05, + "num_tokens": 252671.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0672, + "grad_norm": 2.153691053390503, + "learning_rate": 1.63e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0021, + "grad_norm": 0.35626691579818726, + "learning_rate": 1.6295000000000002e-05, + "num_tokens": 253274.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0021, + "grad_norm": 0.37343284487724304, + "learning_rate": 1.629e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.002, + "grad_norm": 0.34979110956192017, + "learning_rate": 1.6285e-05, + "num_tokens": 253456.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.078, + "grad_norm": 2.1453590393066406, + "learning_rate": 1.628e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0019, + "grad_norm": 0.21562984585762024, + "learning_rate": 1.6275e-05, + "num_tokens": 254059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0017, + "grad_norm": 0.18868863582611084, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.075, + "grad_norm": 2.238870143890381, + "learning_rate": 1.6265e-05, + "num_tokens": 254662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0712, + "grad_norm": 1.3297274112701416, + "learning_rate": 1.626e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.2668, + "grad_norm": 6.078666687011719, + "learning_rate": 1.6255e-05, + "num_tokens": 255686.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0017, + "grad_norm": 0.18387450277805328, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 1.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0017, + "grad_norm": 0.1908990740776062, + "learning_rate": 1.6245e-05, + "num_tokens": 255868.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0796, + "grad_norm": 1.9942879676818848, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0017, + "grad_norm": 0.18278343975543976, + "learning_rate": 1.6235e-05, + "num_tokens": 256471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0017, + "grad_norm": 0.2012937068939209, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0019, + "grad_norm": 0.23027914762496948, + "learning_rate": 1.6225e-05, + "num_tokens": 256653.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.088, + "grad_norm": 2.3463082313537598, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0015, + "grad_norm": 0.1516222059726715, + "learning_rate": 1.6215e-05, + "num_tokens": 257256.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0592, + "grad_norm": 1.780516505241394, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0016, + "grad_norm": 0.1569552719593048, + "learning_rate": 1.6205e-05, + "num_tokens": 257859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0015, + "grad_norm": 0.15376536548137665, + "learning_rate": 1.62e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0016, + "grad_norm": 0.16803313791751862, + "learning_rate": 1.6195e-05, + "num_tokens": 258041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0877, + "grad_norm": 1.7319484949111938, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0015, + "grad_norm": 0.14868228137493134, + "learning_rate": 1.6185000000000002e-05, + "num_tokens": 258644.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0703, + "grad_norm": 1.626076102256775, + "learning_rate": 1.618e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0852, + "grad_norm": 1.4952802658081055, + "learning_rate": 1.6175e-05, + "num_tokens": 259668.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0782, + "grad_norm": 1.6785380840301514, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0806, + "grad_norm": 1.424209475517273, + "learning_rate": 1.6165000000000002e-05, + "num_tokens": 260692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0018, + "grad_norm": 0.27588197588920593, + "learning_rate": 1.616e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 1.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0682, + "grad_norm": 2.780993938446045, + "learning_rate": 1.6155e-05, + "num_tokens": 261295.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.0027, + "grad_norm": 0.5201116800308228, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0028, + "grad_norm": 0.5331841111183167, + "learning_rate": 1.6145000000000002e-05, + "num_tokens": 261477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.1404, + "grad_norm": 3.156398296356201, + "learning_rate": 1.614e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.003, + "grad_norm": 0.5515365600585938, + "learning_rate": 1.6135e-05, + "num_tokens": 262080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.0029, + "grad_norm": 0.5499039888381958, + "learning_rate": 1.613e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0473, + "grad_norm": 1.4062751531600952, + "learning_rate": 1.6125000000000002e-05, + "num_tokens": 262683.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0638, + "grad_norm": 1.5207608938217163, + "learning_rate": 1.612e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0018, + "grad_norm": 0.24566565454006195, + "learning_rate": 1.6115e-05, + "num_tokens": 263286.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0019, + "grad_norm": 0.26229217648506165, + "learning_rate": 1.611e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 1.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0019, + "grad_norm": 0.2518826425075531, + "learning_rate": 1.6105000000000003e-05, + "num_tokens": 263468.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.066, + "grad_norm": 1.8491489887237549, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0771, + "grad_norm": 2.3547780513763428, + "learning_rate": 1.6095e-05, + "num_tokens": 264492.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": 0.067, + "grad_norm": 1.581396222114563, + "learning_rate": 1.609e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0017, + "grad_norm": 0.22524242103099823, + "learning_rate": 1.6085000000000003e-05, + "num_tokens": 265095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0825, + "grad_norm": 1.542362928390503, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.0019, + "grad_norm": 0.2753300964832306, + "learning_rate": 1.6075e-05, + "num_tokens": 265698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0594, + "grad_norm": 2.435917377471924, + "learning_rate": 1.607e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0671, + "grad_norm": 1.3892773389816284, + "learning_rate": 1.6065000000000003e-05, + "num_tokens": 266722.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0552, + "grad_norm": 1.9706708192825317, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0029, + "grad_norm": 0.5541112422943115, + "learning_rate": 1.6055e-05, + "num_tokens": 267325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0765, + "grad_norm": 2.187875270843506, + "learning_rate": 1.605e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0533, + "grad_norm": 1.9069744348526, + "learning_rate": 1.6045000000000003e-05, + "num_tokens": 268349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0034, + "grad_norm": 0.6806110739707947, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0033, + "grad_norm": 0.6904415488243103, + "learning_rate": 1.6035e-05, + "num_tokens": 268531.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0835, + "grad_norm": 1.7817496061325073, + "learning_rate": 1.603e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.003, + "grad_norm": 0.576019823551178, + "learning_rate": 1.6025000000000003e-05, + "num_tokens": 269134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0444, + "grad_norm": 2.0043082237243652, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0456, + "grad_norm": 1.6300431489944458, + "learning_rate": 1.6015e-05, + "num_tokens": 270158.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.002, + "grad_norm": 0.3286590874195099, + "learning_rate": 1.601e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0593, + "grad_norm": 3.0931613445281982, + "learning_rate": 1.6005e-05, + "num_tokens": 270761.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0774, + "grad_norm": 2.7380502223968506, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0025, + "grad_norm": 0.5391877293586731, + "learning_rate": 1.5995000000000002e-05, + "num_tokens": 271364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0022, + "grad_norm": 0.43329155445098877, + "learning_rate": 1.599e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0753, + "grad_norm": 2.46846866607666, + "learning_rate": 1.5985e-05, + "num_tokens": 271967.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0021, + "grad_norm": 0.3546755313873291, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0023, + "grad_norm": 0.4083067774772644, + "learning_rate": 1.5975000000000002e-05, + "num_tokens": 272149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.002, + "grad_norm": 0.3581921458244324, + "learning_rate": 1.597e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0613, + "grad_norm": 2.8087387084960938, + "learning_rate": 1.5965e-05, + "num_tokens": 272752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0015, + "grad_norm": 0.1888950765132904, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0671, + "grad_norm": 2.2728195190429688, + "learning_rate": 1.5955e-05, + "num_tokens": 273355.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0829, + "grad_norm": 2.8371574878692627, + "learning_rate": 1.595e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0013, + "grad_norm": 0.12679244577884674, + "learning_rate": 1.5945e-05, + "num_tokens": 273958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0014, + "grad_norm": 0.14318323135375977, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0013, + "grad_norm": 0.12078670412302017, + "learning_rate": 1.5935e-05, + "num_tokens": 274140.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0605, + "grad_norm": 2.762150764465332, + "learning_rate": 1.593e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0013, + "grad_norm": 0.1383422166109085, + "learning_rate": 1.5925e-05, + "num_tokens": 274743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0012, + "grad_norm": 0.1123310998082161, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0802, + "grad_norm": 2.965071201324463, + "learning_rate": 1.5915e-05, + "num_tokens": 275346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.1343, + "grad_norm": 3.2984137535095215, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0829, + "grad_norm": 1.568178415298462, + "learning_rate": 1.5905e-05, + "num_tokens": 276370.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0014, + "grad_norm": 0.21307793259620667, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 1.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0579, + "grad_norm": 2.5958898067474365, + "learning_rate": 1.5895e-05, + "num_tokens": 276973.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0013, + "grad_norm": 0.1617453545331955, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0014, + "grad_norm": 0.1798456758260727, + "learning_rate": 1.5885e-05, + "num_tokens": 277155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0016, + "grad_norm": 0.20433904230594635, + "learning_rate": 1.588e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0486, + "grad_norm": 1.5812333822250366, + "learning_rate": 1.5875e-05, + "num_tokens": 277758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.1437, + "grad_norm": 3.0360054969787598, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0804, + "grad_norm": 2.6603028774261475, + "learning_rate": 1.5865e-05, + "num_tokens": 278782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": 0.0814, + "grad_norm": 1.870706558227539, + "learning_rate": 1.586e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0718, + "grad_norm": 1.5813627243041992, + "learning_rate": 1.5855e-05, + "num_tokens": 279806.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0729, + "grad_norm": 2.107619285583496, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0683, + "grad_norm": 1.209026575088501, + "learning_rate": 1.5845e-05, + "num_tokens": 280830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.2674, + "grad_norm": 6.916773319244385, + "learning_rate": 1.584e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0581, + "grad_norm": 2.1409847736358643, + "learning_rate": 1.5835e-05, + "num_tokens": 281854.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0054, + "grad_norm": 1.191935420036316, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0057, + "grad_norm": 1.2228178977966309, + "learning_rate": 1.5825000000000002e-05, + "num_tokens": 282036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.005, + "grad_norm": 1.1271437406539917, + "learning_rate": 1.582e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0545, + "grad_norm": 2.2059969902038574, + "learning_rate": 1.5815e-05, + "num_tokens": 282639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.1348, + "grad_norm": 2.8853166103363037, + "learning_rate": 1.581e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0027, + "grad_norm": 0.5147932767868042, + "learning_rate": 1.5805000000000002e-05, + "num_tokens": 283242.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0514, + "grad_norm": 1.7287933826446533, + "learning_rate": 1.58e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0024, + "grad_norm": 0.41022399067878723, + "learning_rate": 1.5795e-05, + "num_tokens": 283845.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0021, + "grad_norm": 0.31408146023750305, + "learning_rate": 1.579e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 1.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0019, + "grad_norm": 0.3368740677833557, + "learning_rate": 1.5785000000000002e-05, + "num_tokens": 284027.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0733, + "grad_norm": 1.9898301362991333, + "learning_rate": 1.578e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.2631, + "grad_norm": 6.1759562492370605, + "learning_rate": 1.5775e-05, + "num_tokens": 285051.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0518, + "grad_norm": 1.7494398355484009, + "learning_rate": 1.577e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0634, + "grad_norm": 3.39536452293396, + "learning_rate": 1.5765000000000002e-05, + "num_tokens": 286075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0015, + "grad_norm": 0.16311416029930115, + "learning_rate": 1.576e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0014, + "grad_norm": 0.1292622685432434, + "learning_rate": 1.5755e-05, + "num_tokens": 286257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0487, + "grad_norm": 1.4789959192276, + "learning_rate": 1.575e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0704, + "grad_norm": 1.8533966541290283, + "learning_rate": 1.5745000000000003e-05, + "num_tokens": 287281.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0431, + "grad_norm": 1.6309059858322144, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.125, + "grad_norm": 1.811131238937378, + "learning_rate": 1.5735e-05, + "num_tokens": 288305.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0018, + "grad_norm": 0.2807428240776062, + "learning_rate": 1.573e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 1.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.0991, + "grad_norm": 2.5759706497192383, + "learning_rate": 1.5725000000000003e-05, + "num_tokens": 288908.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0646, + "grad_norm": 2.325784206390381, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0021, + "grad_norm": 0.398372620344162, + "learning_rate": 1.5715e-05, + "num_tokens": 289511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.002, + "grad_norm": 0.34870296716690063, + "learning_rate": 1.571e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0458, + "grad_norm": 1.5269895792007446, + "learning_rate": 1.5705000000000003e-05, + "num_tokens": 290114.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0023, + "grad_norm": 0.4617532789707184, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.1164, + "grad_norm": 2.049588680267334, + "learning_rate": 1.5695e-05, + "num_tokens": 290717.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0707, + "grad_norm": 3.5546929836273193, + "learning_rate": 1.569e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0686, + "grad_norm": 1.6962814331054688, + "learning_rate": 1.5685e-05, + "num_tokens": 291741.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0509, + "grad_norm": 1.9832770824432373, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0028, + "grad_norm": 0.5347197651863098, + "learning_rate": 1.5675e-05, + "num_tokens": 292344.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.0716, + "grad_norm": 2.209432363510132, + "learning_rate": 1.567e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0826, + "grad_norm": 1.7408462762832642, + "learning_rate": 1.5665e-05, + "num_tokens": 293368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0553, + "grad_norm": 1.7983943223953247, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0042, + "grad_norm": 0.8812737464904785, + "learning_rate": 1.5655000000000002e-05, + "num_tokens": 293971.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0649, + "grad_norm": 2.0859007835388184, + "learning_rate": 1.565e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0581, + "grad_norm": 1.566475510597229, + "learning_rate": 1.5645e-05, + "num_tokens": 294995.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0045, + "grad_norm": 0.9423922896385193, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0517, + "grad_norm": 1.8182531595230103, + "learning_rate": 1.5635e-05, + "num_tokens": 295598.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.1177, + "grad_norm": 2.7388081550598145, + "learning_rate": 1.563e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.1132, + "grad_norm": 2.579310655593872, + "learning_rate": 1.5625e-05, + "num_tokens": 296622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": 0.065, + "grad_norm": 1.4705184698104858, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0032, + "grad_norm": 0.6671587228775024, + "learning_rate": 1.5615000000000002e-05, + "num_tokens": 297225.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0579, + "grad_norm": 2.3290131092071533, + "learning_rate": 1.561e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0814, + "grad_norm": 2.8370614051818848, + "learning_rate": 1.5605e-05, + "num_tokens": 298249.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0689, + "grad_norm": 2.715596914291382, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.0671, + "grad_norm": 1.7622898817062378, + "learning_rate": 1.5595000000000002e-05, + "num_tokens": 299273.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0042, + "grad_norm": 0.9052322506904602, + "learning_rate": 1.559e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.049, + "grad_norm": 1.3162498474121094, + "learning_rate": 1.5585e-05, + "num_tokens": 299876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0036, + "grad_norm": 0.7319129109382629, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 1.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.0032, + "grad_norm": 0.6452810764312744, + "learning_rate": 1.5575000000000002e-05, + "num_tokens": 300058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0757, + "grad_norm": 2.2865378856658936, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0538, + "grad_norm": 1.7665457725524902, + "learning_rate": 1.5565e-05, + "num_tokens": 301082.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.047, + "grad_norm": 1.9683163166046143, + "learning_rate": 1.556e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0664, + "grad_norm": 2.087733030319214, + "learning_rate": 1.5555000000000003e-05, + "num_tokens": 302106.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0023, + "grad_norm": 0.39902573823928833, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 1.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0021, + "grad_norm": 0.34475409984588623, + "learning_rate": 1.5545e-05, + "num_tokens": 302288.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0532, + "grad_norm": 1.763016700744629, + "learning_rate": 1.554e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0625, + "grad_norm": 2.4447097778320312, + "learning_rate": 1.5535000000000003e-05, + "num_tokens": 303312.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.2444, + "grad_norm": 5.089849948883057, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.1233, + "grad_norm": 1.9174350500106812, + "learning_rate": 1.5525e-05, + "num_tokens": 304336.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.002, + "grad_norm": 0.34749460220336914, + "learning_rate": 1.552e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 1.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.075, + "grad_norm": 1.8123295307159424, + "learning_rate": 1.5515000000000003e-05, + "num_tokens": 304939.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0942, + "grad_norm": 2.2524919509887695, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0023, + "grad_norm": 0.4282050132751465, + "learning_rate": 1.5505e-05, + "num_tokens": 305542.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0022, + "grad_norm": 0.4201665222644806, + "learning_rate": 1.55e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0022, + "grad_norm": 0.38267236948013306, + "learning_rate": 1.5495000000000003e-05, + "num_tokens": 305724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0668, + "grad_norm": 1.5852563381195068, + "learning_rate": 1.549e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0686, + "grad_norm": 2.5186655521392822, + "learning_rate": 1.5485e-05, + "num_tokens": 306748.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0018, + "grad_norm": 0.3009900450706482, + "learning_rate": 1.548e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 1.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0591, + "grad_norm": 2.0340046882629395, + "learning_rate": 1.5475000000000003e-05, + "num_tokens": 307351.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.0652, + "grad_norm": 2.206228017807007, + "learning_rate": 1.547e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0485, + "grad_norm": 1.763405203819275, + "learning_rate": 1.5465000000000002e-05, + "num_tokens": 308375.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.002, + "grad_norm": 0.35779571533203125, + "learning_rate": 1.546e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0019, + "grad_norm": 0.32313865423202515, + "learning_rate": 1.5455000000000004e-05, + "num_tokens": 308557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0749, + "grad_norm": 2.2083141803741455, + "learning_rate": 1.545e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0762, + "grad_norm": 1.5048847198486328, + "learning_rate": 1.5445000000000002e-05, + "num_tokens": 309581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0536, + "grad_norm": 1.6958098411560059, + "learning_rate": 1.544e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0718, + "grad_norm": 1.9835456609725952, + "learning_rate": 1.5435000000000004e-05, + "num_tokens": 310605.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0459, + "grad_norm": 1.618090033531189, + "learning_rate": 1.543e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0611, + "grad_norm": 1.508302092552185, + "learning_rate": 1.5425000000000002e-05, + "num_tokens": 311629.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.1341, + "grad_norm": 3.744704008102417, + "learning_rate": 1.542e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0649, + "grad_norm": 1.4073272943496704, + "learning_rate": 1.5415e-05, + "num_tokens": 312653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0059, + "grad_norm": 1.3199745416641235, + "learning_rate": 1.541e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0445, + "grad_norm": 1.7224688529968262, + "learning_rate": 1.5405000000000002e-05, + "num_tokens": 313256.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.0697, + "grad_norm": 1.5272228717803955, + "learning_rate": 1.54e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0061, + "grad_norm": 1.3069825172424316, + "learning_rate": 1.5395e-05, + "num_tokens": 313859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0059, + "grad_norm": 1.285326600074768, + "learning_rate": 1.539e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0044, + "grad_norm": 0.9240864515304565, + "learning_rate": 1.5385000000000003e-05, + "num_tokens": 314041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0535, + "grad_norm": 1.9520580768585205, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0426, + "grad_norm": 1.3014405965805054, + "learning_rate": 1.5375e-05, + "num_tokens": 315065.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0024, + "grad_norm": 0.4011932611465454, + "learning_rate": 1.537e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0019, + "grad_norm": 0.2749421298503876, + "learning_rate": 1.5365e-05, + "num_tokens": 315247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0022, + "grad_norm": 0.31892502307891846, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 1.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0483, + "grad_norm": 2.0664267539978027, + "learning_rate": 1.5355e-05, + "num_tokens": 315850.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0819, + "grad_norm": 2.846149206161499, + "learning_rate": 1.535e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0013, + "grad_norm": 0.1373102068901062, + "learning_rate": 1.5345e-05, + "num_tokens": 316453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0013, + "grad_norm": 0.1736987680196762, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.053, + "grad_norm": 1.4268443584442139, + "learning_rate": 1.5335e-05, + "num_tokens": 317056.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0917, + "grad_norm": 1.9649128913879395, + "learning_rate": 1.533e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.1411, + "grad_norm": 2.5292632579803467, + "learning_rate": 1.5325e-05, + "num_tokens": 318080.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0527, + "grad_norm": 1.9480016231536865, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.0846, + "grad_norm": 2.2493338584899902, + "learning_rate": 1.5315e-05, + "num_tokens": 319104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0013, + "grad_norm": 0.13474015891551971, + "learning_rate": 1.531e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0832, + "grad_norm": 1.5178154706954956, + "learning_rate": 1.5305e-05, + "num_tokens": 319707.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": 0.0795, + "grad_norm": 2.071016788482666, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.1163, + "grad_norm": 2.11936092376709, + "learning_rate": 1.5295000000000002e-05, + "num_tokens": 320731.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0018, + "grad_norm": 0.2738206088542938, + "learning_rate": 1.529e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 1.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0674, + "grad_norm": 1.7774465084075928, + "learning_rate": 1.5285e-05, + "num_tokens": 321334.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0019, + "grad_norm": 0.3061210513114929, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.1228, + "grad_norm": 2.0818684101104736, + "learning_rate": 1.5275000000000002e-05, + "num_tokens": 321937.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0716, + "grad_norm": 1.6649255752563477, + "learning_rate": 1.527e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0026, + "grad_norm": 0.477672815322876, + "learning_rate": 1.5265e-05, + "num_tokens": 322540.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0728, + "grad_norm": 1.9350183010101318, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0631, + "grad_norm": 1.786603569984436, + "learning_rate": 1.5255000000000002e-05, + "num_tokens": 323564.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.1006, + "grad_norm": 2.4447789192199707, + "learning_rate": 1.525e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0034, + "grad_norm": 0.6078147292137146, + "learning_rate": 1.5245e-05, + "num_tokens": 324167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0769, + "grad_norm": 1.76687753200531, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.1099, + "grad_norm": 1.7330924272537231, + "learning_rate": 1.5235000000000002e-05, + "num_tokens": 325191.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.1119, + "grad_norm": 2.317302942276001, + "learning_rate": 1.523e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0047, + "grad_norm": 0.8692587018013, + "learning_rate": 1.5225e-05, + "num_tokens": 325794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0752, + "grad_norm": 2.7787444591522217, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0042, + "grad_norm": 0.7904698252677917, + "learning_rate": 1.5215000000000003e-05, + "num_tokens": 326397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0637, + "grad_norm": 1.9206311702728271, + "learning_rate": 1.521e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0641, + "grad_norm": 1.5487322807312012, + "learning_rate": 1.5205000000000001e-05, + "num_tokens": 327421.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0034, + "grad_norm": 0.6128824949264526, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0026, + "grad_norm": 0.4303649365901947, + "learning_rate": 1.5195000000000003e-05, + "num_tokens": 327603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0024, + "grad_norm": 0.3603818118572235, + "learning_rate": 1.519e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 1.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.0722, + "grad_norm": 1.3239399194717407, + "learning_rate": 1.5185000000000001e-05, + "num_tokens": 328206.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0714, + "grad_norm": 1.5037869215011597, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0462, + "grad_norm": 1.4942961931228638, + "learning_rate": 1.5175000000000001e-05, + "num_tokens": 329230.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0019, + "grad_norm": 0.2582552134990692, + "learning_rate": 1.517e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0018, + "grad_norm": 0.22304527461528778, + "learning_rate": 1.5165000000000001e-05, + "num_tokens": 329412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.083, + "grad_norm": 2.117966890335083, + "learning_rate": 1.516e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.0018, + "grad_norm": 0.21721050143241882, + "learning_rate": 1.5155000000000001e-05, + "num_tokens": 330015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0016, + "grad_norm": 0.20195893943309784, + "learning_rate": 1.515e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0807, + "grad_norm": 2.2437827587127686, + "learning_rate": 1.5145000000000002e-05, + "num_tokens": 330618.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0803, + "grad_norm": 2.0074269771575928, + "learning_rate": 1.514e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.1081, + "grad_norm": 2.117880344390869, + "learning_rate": 1.5135000000000002e-05, + "num_tokens": 331642.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0498, + "grad_norm": 1.624760389328003, + "learning_rate": 1.513e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0017, + "grad_norm": 0.2406463772058487, + "learning_rate": 1.5125e-05, + "num_tokens": 332245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.078, + "grad_norm": 1.9976122379302979, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0017, + "grad_norm": 0.2691337466239929, + "learning_rate": 1.5115000000000002e-05, + "num_tokens": 332848.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0017, + "grad_norm": 0.3240523040294647, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.002, + "grad_norm": 0.3948870897293091, + "learning_rate": 1.5105e-05, + "num_tokens": 333030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.058, + "grad_norm": 2.228799343109131, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0019, + "grad_norm": 0.30388572812080383, + "learning_rate": 1.5095000000000002e-05, + "num_tokens": 333633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0018, + "grad_norm": 0.23492957651615143, + "learning_rate": 1.509e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0767, + "grad_norm": 1.961020588874817, + "learning_rate": 1.5085e-05, + "num_tokens": 334236.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0015, + "grad_norm": 0.18129733204841614, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0016, + "grad_norm": 0.20082105696201324, + "learning_rate": 1.5075000000000002e-05, + "num_tokens": 334418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0715, + "grad_norm": 1.6847742795944214, + "learning_rate": 1.507e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.1066, + "grad_norm": 1.804700255393982, + "learning_rate": 1.5065e-05, + "num_tokens": 335442.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0017, + "grad_norm": 0.24969542026519775, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 1.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.076, + "grad_norm": 1.119564175605774, + "learning_rate": 1.5055000000000002e-05, + "num_tokens": 336045.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.1127, + "grad_norm": 1.9994937181472778, + "learning_rate": 1.505e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0018, + "grad_norm": 0.27987295389175415, + "learning_rate": 1.5045e-05, + "num_tokens": 336648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0019, + "grad_norm": 0.3454192876815796, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0023, + "grad_norm": 0.4122897684574127, + "learning_rate": 1.5035000000000003e-05, + "num_tokens": 336830.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": 0.1004, + "grad_norm": 1.930411696434021, + "learning_rate": 1.503e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0019, + "grad_norm": 0.29886701703071594, + "learning_rate": 1.5025000000000001e-05, + "num_tokens": 337433.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0016, + "grad_norm": 0.2443024218082428, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.0673, + "grad_norm": 1.4124706983566284, + "learning_rate": 1.5015000000000001e-05, + "num_tokens": 338036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0799, + "grad_norm": 2.3533709049224854, + "learning_rate": 1.501e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0713, + "grad_norm": 1.8907470703125, + "learning_rate": 1.5005000000000001e-05, + "num_tokens": 339060.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0689, + "grad_norm": 2.691020965576172, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0497, + "grad_norm": 1.6671160459518433, + "learning_rate": 1.4995000000000001e-05, + "num_tokens": 340084.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.002, + "grad_norm": 0.29797157645225525, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 1.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0019, + "grad_norm": 0.29996100068092346, + "learning_rate": 1.4985000000000001e-05, + "num_tokens": 340266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.0024, + "grad_norm": 0.4070133566856384, + "learning_rate": 1.498e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0021, + "grad_norm": 0.3220314681529999, + "learning_rate": 1.4975000000000001e-05, + "num_tokens": 340448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0019, + "grad_norm": 0.3058181405067444, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0018, + "grad_norm": 0.28231292963027954, + "learning_rate": 1.4965e-05, + "num_tokens": 340630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0692, + "grad_norm": 1.5155085325241089, + "learning_rate": 1.496e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.0683, + "grad_norm": 1.8045986890792847, + "learning_rate": 1.4955000000000002e-05, + "num_tokens": 341654.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.0408, + "grad_norm": 1.349377989768982, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0734, + "grad_norm": 1.7803888320922852, + "learning_rate": 1.4945e-05, + "num_tokens": 342678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0014, + "grad_norm": 0.1658269613981247, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 1.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0625, + "grad_norm": 1.7009806632995605, + "learning_rate": 1.4935000000000002e-05, + "num_tokens": 343281.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0017, + "grad_norm": 0.25617343187332153, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.0625, + "grad_norm": 1.769629955291748, + "learning_rate": 1.4925e-05, + "num_tokens": 343884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0017, + "grad_norm": 0.2548482418060303, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 1.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.0016, + "grad_norm": 0.2222324013710022, + "learning_rate": 1.4915000000000002e-05, + "num_tokens": 344066.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0774, + "grad_norm": 4.686360836029053, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0459, + "grad_norm": 2.749084234237671, + "learning_rate": 1.4905e-05, + "num_tokens": 345090.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.1302, + "grad_norm": 4.177389621734619, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.1173, + "grad_norm": 4.055930137634277, + "learning_rate": 1.4895000000000002e-05, + "num_tokens": 346114.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.002, + "grad_norm": 0.3603017032146454, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": 0.0693, + "grad_norm": 1.6064629554748535, + "learning_rate": 1.4885e-05, + "num_tokens": 346717.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0652, + "grad_norm": 1.3037128448486328, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0689, + "grad_norm": 2.06034779548645, + "learning_rate": 1.4875000000000002e-05, + "num_tokens": 347741.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0029, + "grad_norm": 0.5724895596504211, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 1.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0033, + "grad_norm": 0.6629590392112732, + "learning_rate": 1.4865e-05, + "num_tokens": 347923.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0024, + "grad_norm": 0.453980416059494, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 1.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0024, + "grad_norm": 0.4251463711261749, + "learning_rate": 1.4855000000000001e-05, + "num_tokens": 348105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0019, + "grad_norm": 0.30966171622276306, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 1.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.002, + "grad_norm": 0.3118286430835724, + "learning_rate": 1.4845000000000001e-05, + "num_tokens": 348287.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0555, + "grad_norm": 1.792464256286621, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0675, + "grad_norm": 1.5182185173034668, + "learning_rate": 1.4835000000000001e-05, + "num_tokens": 349311.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0674, + "grad_norm": 2.3636367321014404, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0656, + "grad_norm": 2.3102426528930664, + "learning_rate": 1.4825000000000001e-05, + "num_tokens": 350335.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0649, + "grad_norm": 1.6550447940826416, + "learning_rate": 1.482e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0633, + "grad_norm": 1.6831378936767578, + "learning_rate": 1.4815000000000001e-05, + "num_tokens": 351359.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0012, + "grad_norm": 0.14287354052066803, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 1.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0506, + "grad_norm": 1.8767977952957153, + "learning_rate": 1.4805e-05, + "num_tokens": 351962.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0798, + "grad_norm": 1.768181562423706, + "learning_rate": 1.48e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0537, + "grad_norm": 1.7165502309799194, + "learning_rate": 1.4795000000000001e-05, + "num_tokens": 352986.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0016, + "grad_norm": 0.24984677135944366, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.06, + "grad_norm": 1.5225651264190674, + "learning_rate": 1.4785e-05, + "num_tokens": 353589.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0453, + "grad_norm": 1.48419988155365, + "learning_rate": 1.478e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": 0.0693, + "grad_norm": 1.9988808631896973, + "learning_rate": 1.4775000000000002e-05, + "num_tokens": 354613.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0419, + "grad_norm": 1.4052188396453857, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0582, + "grad_norm": 1.6217740774154663, + "learning_rate": 1.4765e-05, + "num_tokens": 355637.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0778, + "grad_norm": 1.9261959791183472, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0593, + "grad_norm": 1.315152645111084, + "learning_rate": 1.4755000000000002e-05, + "num_tokens": 356661.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0069, + "grad_norm": 1.2978978157043457, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0412, + "grad_norm": 1.215545654296875, + "learning_rate": 1.4745e-05, + "num_tokens": 357264.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0075, + "grad_norm": 1.4120475053787231, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 1.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": 0.033, + "grad_norm": 1.2826626300811768, + "learning_rate": 1.4735000000000002e-05, + "num_tokens": 357867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.0074, + "grad_norm": 1.4002093076705933, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0695, + "grad_norm": 2.1978306770324707, + "learning_rate": 1.4725e-05, + "num_tokens": 358470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0693, + "grad_norm": 1.8518682718276978, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0048, + "grad_norm": 0.920648455619812, + "learning_rate": 1.4715000000000002e-05, + "num_tokens": 359073.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0041, + "grad_norm": 0.7800686955451965, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0515, + "grad_norm": 2.606135606765747, + "learning_rate": 1.4705e-05, + "num_tokens": 359676.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0024, + "grad_norm": 0.40420445799827576, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 1.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0719, + "grad_norm": 1.9594024419784546, + "learning_rate": 1.4695e-05, + "num_tokens": 360279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0018, + "grad_norm": 0.245815709233284, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.0787, + "grad_norm": 2.42266845703125, + "learning_rate": 1.4685000000000001e-05, + "num_tokens": 360882.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0014, + "grad_norm": 0.19625961780548096, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 1.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0014, + "grad_norm": 0.18439820408821106, + "learning_rate": 1.4675000000000001e-05, + "num_tokens": 361064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0012, + "grad_norm": 0.15009146928787231, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0961, + "grad_norm": 1.6586538553237915, + "learning_rate": 1.4665000000000001e-05, + "num_tokens": 361667.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.056, + "grad_norm": 1.6204346418380737, + "learning_rate": 1.466e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0629, + "grad_norm": 3.179530382156372, + "learning_rate": 1.4655000000000001e-05, + "num_tokens": 362691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0475, + "grad_norm": 1.5324857234954834, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0475, + "grad_norm": 1.6246694326400757, + "learning_rate": 1.4645e-05, + "num_tokens": 363715.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.1217, + "grad_norm": 3.528550624847412, + "learning_rate": 1.464e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0013, + "grad_norm": 0.17739705741405487, + "learning_rate": 1.4635000000000001e-05, + "num_tokens": 364318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0736, + "grad_norm": 1.7169992923736572, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.1137, + "grad_norm": 2.5113534927368164, + "learning_rate": 1.4625e-05, + "num_tokens": 365342.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.105, + "grad_norm": 2.1154234409332275, + "learning_rate": 1.462e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.0014, + "grad_norm": 0.19033615291118622, + "learning_rate": 1.4615000000000002e-05, + "num_tokens": 365945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0521, + "grad_norm": 1.7730141878128052, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.0016, + "grad_norm": 0.24216671288013458, + "learning_rate": 1.4605e-05, + "num_tokens": 366548.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0018, + "grad_norm": 0.27462536096572876, + "learning_rate": 1.46e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": 0.0744, + "grad_norm": 1.9374821186065674, + "learning_rate": 1.4595000000000002e-05, + "num_tokens": 367151.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0679, + "grad_norm": 1.6294903755187988, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0495, + "grad_norm": 1.4929898977279663, + "learning_rate": 1.4585e-05, + "num_tokens": 368175.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0026, + "grad_norm": 0.4472891092300415, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0023, + "grad_norm": 0.36597439646720886, + "learning_rate": 1.4575000000000002e-05, + "num_tokens": 368357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0024, + "grad_norm": 0.42359644174575806, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0022, + "grad_norm": 0.37764036655426025, + "learning_rate": 1.4565e-05, + "num_tokens": 368539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0021, + "grad_norm": 0.34881848096847534, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0019, + "grad_norm": 0.2842845320701599, + "learning_rate": 1.4555000000000002e-05, + "num_tokens": 368721.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0016, + "grad_norm": 0.23593850433826447, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.0773, + "grad_norm": 1.4594675302505493, + "learning_rate": 1.4545e-05, + "num_tokens": 369324.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.1, + "grad_norm": 1.863494873046875, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0013, + "grad_norm": 0.13081954419612885, + "learning_rate": 1.4535e-05, + "num_tokens": 369927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0443, + "grad_norm": 1.7305635213851929, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0012, + "grad_norm": 0.12010564655065536, + "learning_rate": 1.4525e-05, + "num_tokens": 370530.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.046, + "grad_norm": 1.4965153932571411, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0013, + "grad_norm": 0.1335715800523758, + "learning_rate": 1.4515e-05, + "num_tokens": 371133.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0789, + "grad_norm": 2.0868091583251953, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0013, + "grad_norm": 0.1260039061307907, + "learning_rate": 1.4505000000000001e-05, + "num_tokens": 371736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0013, + "grad_norm": 0.1729843020439148, + "learning_rate": 1.45e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0014, + "grad_norm": 0.1744985431432724, + "learning_rate": 1.4495000000000001e-05, + "num_tokens": 371918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0012, + "grad_norm": 0.12203537672758102, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.1175, + "grad_norm": 2.857239007949829, + "learning_rate": 1.4485e-05, + "num_tokens": 372521.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0012, + "grad_norm": 0.13221806287765503, + "learning_rate": 1.448e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0558, + "grad_norm": 1.8117022514343262, + "learning_rate": 1.4475000000000001e-05, + "num_tokens": 373124.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.0746, + "grad_norm": 1.5601890087127686, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0667, + "grad_norm": 2.6270835399627686, + "learning_rate": 1.4465e-05, + "num_tokens": 374148.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.07, + "grad_norm": 2.4209983348846436, + "learning_rate": 1.446e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0759, + "grad_norm": 1.9546290636062622, + "learning_rate": 1.4455000000000001e-05, + "num_tokens": 375172.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0673, + "grad_norm": 2.9238405227661133, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0505, + "grad_norm": 1.4308744668960571, + "learning_rate": 1.4445e-05, + "num_tokens": 376196.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0491, + "grad_norm": 1.8547859191894531, + "learning_rate": 1.444e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0611, + "grad_norm": 1.7769485712051392, + "learning_rate": 1.4435000000000002e-05, + "num_tokens": 377220.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0026, + "grad_norm": 0.4414771497249603, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 1.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0645, + "grad_norm": 2.1288139820098877, + "learning_rate": 1.4425e-05, + "num_tokens": 377823.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0449, + "grad_norm": 1.480977177619934, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0622, + "grad_norm": 1.4551938772201538, + "learning_rate": 1.4415000000000002e-05, + "num_tokens": 378847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0435, + "grad_norm": 1.613083004951477, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0666, + "grad_norm": 1.3638219833374023, + "learning_rate": 1.4405e-05, + "num_tokens": 379871.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0447, + "grad_norm": 1.5498117208480835, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0423, + "grad_norm": 1.8802024126052856, + "learning_rate": 1.4395000000000002e-05, + "num_tokens": 380895.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0077, + "grad_norm": 1.3431289196014404, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0075, + "grad_norm": 1.2728586196899414, + "learning_rate": 1.4385e-05, + "num_tokens": 381077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0072, + "grad_norm": 1.205004096031189, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0776, + "grad_norm": 1.9510324001312256, + "learning_rate": 1.4375e-05, + "num_tokens": 381680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0585, + "grad_norm": 1.6569032669067383, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0437, + "grad_norm": 1.996708631515503, + "learning_rate": 1.4365000000000002e-05, + "num_tokens": 382704.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.1022, + "grad_norm": 1.9323452711105347, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.1023, + "grad_norm": 2.318890333175659, + "learning_rate": 1.4355e-05, + "num_tokens": 383728.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0406, + "grad_norm": 1.4253126382827759, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0032, + "grad_norm": 0.5123540759086609, + "learning_rate": 1.4345000000000002e-05, + "num_tokens": 384331.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0466, + "grad_norm": 1.6153643131256104, + "learning_rate": 1.434e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.003, + "grad_norm": 0.468280553817749, + "learning_rate": 1.4335e-05, + "num_tokens": 384934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0028, + "grad_norm": 0.4284001588821411, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0484, + "grad_norm": 1.9119105339050293, + "learning_rate": 1.4325000000000003e-05, + "num_tokens": 385537.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0616, + "grad_norm": 2.9587130546569824, + "learning_rate": 1.432e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0645, + "grad_norm": 2.1663818359375, + "learning_rate": 1.4315000000000001e-05, + "num_tokens": 386561.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0022, + "grad_norm": 0.33302196860313416, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0019, + "grad_norm": 0.2560519278049469, + "learning_rate": 1.4305000000000003e-05, + "num_tokens": 386743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0504, + "grad_norm": 2.333263397216797, + "learning_rate": 1.43e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0514, + "grad_norm": 1.790854573249817, + "learning_rate": 1.4295000000000001e-05, + "num_tokens": 387767.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0478, + "grad_norm": 1.8263012170791626, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0017, + "grad_norm": 0.22925561666488647, + "learning_rate": 1.4285000000000003e-05, + "num_tokens": 388370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0508, + "grad_norm": 1.9549782276153564, + "learning_rate": 1.428e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0461, + "grad_norm": 2.7456071376800537, + "learning_rate": 1.4275000000000001e-05, + "num_tokens": 389394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0019, + "grad_norm": 0.25512465834617615, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0018, + "grad_norm": 0.2454918771982193, + "learning_rate": 1.4265000000000001e-05, + "num_tokens": 389576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.0016, + "grad_norm": 0.20499202609062195, + "learning_rate": 1.426e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0016, + "grad_norm": 0.22024467587471008, + "learning_rate": 1.4255000000000002e-05, + "num_tokens": 389758.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.1054, + "grad_norm": 1.7958146333694458, + "learning_rate": 1.425e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0016, + "grad_norm": 0.19123780727386475, + "learning_rate": 1.4245000000000002e-05, + "num_tokens": 390361.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0015, + "grad_norm": 0.1973554641008377, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0515, + "grad_norm": 1.5054925680160522, + "learning_rate": 1.4235000000000002e-05, + "num_tokens": 390964.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0645, + "grad_norm": 1.4418784379959106, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0428, + "grad_norm": 1.3686002492904663, + "learning_rate": 1.4225000000000002e-05, + "num_tokens": 391988.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0015, + "grad_norm": 0.18040749430656433, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 1.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0666, + "grad_norm": 1.9525736570358276, + "learning_rate": 1.4215e-05, + "num_tokens": 392591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0416, + "grad_norm": 1.5055146217346191, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0016, + "grad_norm": 0.21493053436279297, + "learning_rate": 1.4205000000000002e-05, + "num_tokens": 393194.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0483, + "grad_norm": 1.4553972482681274, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0017, + "grad_norm": 0.24199633300304413, + "learning_rate": 1.4195e-05, + "num_tokens": 393797.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0017, + "grad_norm": 0.22347070276737213, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0446, + "grad_norm": 1.314347743988037, + "learning_rate": 1.4185000000000002e-05, + "num_tokens": 394400.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.002, + "grad_norm": 0.3113741874694824, + "learning_rate": 1.418e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0634, + "grad_norm": 1.786219596862793, + "learning_rate": 1.4175e-05, + "num_tokens": 395003.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0906, + "grad_norm": 2.9753689765930176, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0019, + "grad_norm": 0.2806491255760193, + "learning_rate": 1.4165000000000002e-05, + "num_tokens": 395606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0444, + "grad_norm": 1.8984386920928955, + "learning_rate": 1.416e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0023, + "grad_norm": 0.3554719090461731, + "learning_rate": 1.4155000000000001e-05, + "num_tokens": 396209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0021, + "grad_norm": 0.3154850900173187, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.002, + "grad_norm": 0.2822473347187042, + "learning_rate": 1.4145000000000003e-05, + "num_tokens": 396391.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.0933, + "grad_norm": 2.0030465126037598, + "learning_rate": 1.414e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0018, + "grad_norm": 0.25846239924430847, + "learning_rate": 1.4135000000000001e-05, + "num_tokens": 396994.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0576, + "grad_norm": 1.3536447286605835, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.0018, + "grad_norm": 0.23509684205055237, + "learning_rate": 1.4125000000000003e-05, + "num_tokens": 397597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0402, + "grad_norm": 1.1482503414154053, + "learning_rate": 1.412e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.065, + "grad_norm": 1.7037919759750366, + "learning_rate": 1.4115000000000001e-05, + "num_tokens": 398621.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0691, + "grad_norm": 1.7646807432174683, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0804, + "grad_norm": 1.7181248664855957, + "learning_rate": 1.4105000000000001e-05, + "num_tokens": 399645.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0019, + "grad_norm": 0.2505536675453186, + "learning_rate": 1.41e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0618, + "grad_norm": 1.5859951972961426, + "learning_rate": 1.4095000000000001e-05, + "num_tokens": 400248.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0019, + "grad_norm": 0.2755191922187805, + "learning_rate": 1.409e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 1.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0555, + "grad_norm": 1.4727070331573486, + "learning_rate": 1.4085000000000002e-05, + "num_tokens": 400851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0481, + "grad_norm": 1.8706026077270508, + "learning_rate": 1.408e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.0474, + "grad_norm": 1.1995218992233276, + "learning_rate": 1.4075000000000002e-05, + "num_tokens": 401875.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0543, + "grad_norm": 1.2178373336791992, + "learning_rate": 1.407e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0562, + "grad_norm": 1.595617413520813, + "learning_rate": 1.4065000000000002e-05, + "num_tokens": 402899.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0029, + "grad_norm": 0.46309027075767517, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 1.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0031, + "grad_norm": 0.5019537210464478, + "learning_rate": 1.4055e-05, + "num_tokens": 403081.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.0481, + "grad_norm": 1.4502179622650146, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0612, + "grad_norm": 1.3172924518585205, + "learning_rate": 1.4045000000000002e-05, + "num_tokens": 404105.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0643, + "grad_norm": 1.8145051002502441, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0939, + "grad_norm": 2.2837142944335938, + "learning_rate": 1.4035e-05, + "num_tokens": 405129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0444, + "grad_norm": 1.4133625030517578, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0762, + "grad_norm": 3.3270263671875, + "learning_rate": 1.4025000000000002e-05, + "num_tokens": 406153.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0382, + "grad_norm": 1.5502580404281616, + "learning_rate": 1.402e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0624, + "grad_norm": 2.8620283603668213, + "learning_rate": 1.4015e-05, + "num_tokens": 407177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0062, + "grad_norm": 0.9600316286087036, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": 0.232, + "grad_norm": 6.662532329559326, + "learning_rate": 1.4005000000000002e-05, + "num_tokens": 407780.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.2308, + "grad_norm": 5.728747844696045, + "learning_rate": 1.4e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0064, + "grad_norm": 1.0067918300628662, + "learning_rate": 1.3995e-05, + "num_tokens": 408383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0717, + "grad_norm": 2.222224712371826, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0049, + "grad_norm": 0.7748068571090698, + "learning_rate": 1.3985000000000002e-05, + "num_tokens": 408986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0042, + "grad_norm": 0.6555838584899902, + "learning_rate": 1.398e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.1053, + "grad_norm": 2.1453135013580322, + "learning_rate": 1.3975000000000001e-05, + "num_tokens": 409589.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0814, + "grad_norm": 2.092453718185425, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0025, + "grad_norm": 0.37734025716781616, + "learning_rate": 1.3965000000000003e-05, + "num_tokens": 410192.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.0859, + "grad_norm": 2.4313082695007324, + "learning_rate": 1.396e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0571, + "grad_norm": 1.533075213432312, + "learning_rate": 1.3955000000000001e-05, + "num_tokens": 411216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0907, + "grad_norm": 1.7440866231918335, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0015, + "grad_norm": 0.19383682310581207, + "learning_rate": 1.3945000000000001e-05, + "num_tokens": 411819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0015, + "grad_norm": 0.1786634922027588, + "learning_rate": 1.394e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.065, + "grad_norm": 2.1025426387786865, + "learning_rate": 1.3935000000000001e-05, + "num_tokens": 412422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0879, + "grad_norm": 1.9717315435409546, + "learning_rate": 1.393e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0726, + "grad_norm": 2.1733202934265137, + "learning_rate": 1.3925000000000001e-05, + "num_tokens": 413446.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": 0.0635, + "grad_norm": 2.1671876907348633, + "learning_rate": 1.392e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0728, + "grad_norm": 1.5356316566467285, + "learning_rate": 1.3915000000000001e-05, + "num_tokens": 414470.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0014, + "grad_norm": 0.16603456437587738, + "learning_rate": 1.391e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0558, + "grad_norm": 1.9890317916870117, + "learning_rate": 1.3905000000000002e-05, + "num_tokens": 415073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0015, + "grad_norm": 0.20005646347999573, + "learning_rate": 1.39e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.1005, + "grad_norm": 3.5178253650665283, + "learning_rate": 1.3895e-05, + "num_tokens": 415676.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0702, + "grad_norm": 2.5081353187561035, + "learning_rate": 1.389e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0017, + "grad_norm": 0.23757857084274292, + "learning_rate": 1.3885000000000002e-05, + "num_tokens": 416279.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0531, + "grad_norm": 1.5659825801849365, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.002, + "grad_norm": 0.3491363525390625, + "learning_rate": 1.3875e-05, + "num_tokens": 416882.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0663, + "grad_norm": 1.5751999616622925, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0018, + "grad_norm": 0.3209178149700165, + "learning_rate": 1.3865000000000002e-05, + "num_tokens": 417485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0019, + "grad_norm": 0.3630707561969757, + "learning_rate": 1.386e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0437, + "grad_norm": 1.6397857666015625, + "learning_rate": 1.3855e-05, + "num_tokens": 418088.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0585, + "grad_norm": 2.164947748184204, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0754, + "grad_norm": 1.7066527605056763, + "learning_rate": 1.3845000000000002e-05, + "num_tokens": 419112.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0021, + "grad_norm": 0.3518334627151489, + "learning_rate": 1.384e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 1.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.0505, + "grad_norm": 1.5215017795562744, + "learning_rate": 1.3835e-05, + "num_tokens": 419715.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0572, + "grad_norm": 1.9514737129211426, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0023, + "grad_norm": 0.4249929189682007, + "learning_rate": 1.3825000000000002e-05, + "num_tokens": 420318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0661, + "grad_norm": 1.7851744890213013, + "learning_rate": 1.382e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": 0.0621, + "grad_norm": 1.3740767240524292, + "learning_rate": 1.3815e-05, + "num_tokens": 421342.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0841, + "grad_norm": 2.665015459060669, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0028, + "grad_norm": 0.4941730797290802, + "learning_rate": 1.3805000000000003e-05, + "num_tokens": 421945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.044, + "grad_norm": 1.4924557209014893, + "learning_rate": 1.38e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.0511, + "grad_norm": 2.1234307289123535, + "learning_rate": 1.3795000000000001e-05, + "num_tokens": 422969.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0426, + "grad_norm": 1.1785792112350464, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.0773, + "grad_norm": 1.6448895931243896, + "learning_rate": 1.3785000000000001e-05, + "num_tokens": 423993.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0594, + "grad_norm": 1.792230486869812, + "learning_rate": 1.378e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0535, + "grad_norm": 1.3552350997924805, + "learning_rate": 1.3775000000000001e-05, + "num_tokens": 425017.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0388, + "grad_norm": 1.0532437562942505, + "learning_rate": 1.377e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0759, + "grad_norm": 2.1115078926086426, + "learning_rate": 1.3765000000000001e-05, + "num_tokens": 426041.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0056, + "grad_norm": 0.8818362355232239, + "learning_rate": 1.376e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 1.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0051, + "grad_norm": 0.8002524971961975, + "learning_rate": 1.3755000000000001e-05, + "num_tokens": 426223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0619, + "grad_norm": 2.207181692123413, + "learning_rate": 1.375e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0053, + "grad_norm": 0.814557671546936, + "learning_rate": 1.3745000000000001e-05, + "num_tokens": 426826.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0621, + "grad_norm": 1.6394788026809692, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.0678, + "grad_norm": 1.9382132291793823, + "learning_rate": 1.3735e-05, + "num_tokens": 427850.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0396, + "grad_norm": 1.3062744140625, + "learning_rate": 1.373e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.1056, + "grad_norm": 1.7765963077545166, + "learning_rate": 1.3725000000000002e-05, + "num_tokens": 428874.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0036, + "grad_norm": 0.5703164339065552, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.062, + "grad_norm": 1.6491400003433228, + "learning_rate": 1.3715e-05, + "num_tokens": 429477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0414, + "grad_norm": 1.2670550346374512, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0977, + "grad_norm": 2.5612552165985107, + "learning_rate": 1.3705000000000002e-05, + "num_tokens": 430501.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.043, + "grad_norm": 1.5120333433151245, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0513, + "grad_norm": 1.3469822406768799, + "learning_rate": 1.3695e-05, + "num_tokens": 431525.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.052, + "grad_norm": 1.3584448099136353, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0043, + "grad_norm": 0.6871080994606018, + "learning_rate": 1.3685000000000002e-05, + "num_tokens": 432128.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0038, + "grad_norm": 0.6316184401512146, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 1.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0039, + "grad_norm": 0.6172608733177185, + "learning_rate": 1.3675e-05, + "num_tokens": 432310.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0034, + "grad_norm": 0.5193918943405151, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": 0.0545, + "grad_norm": 1.789426326751709, + "learning_rate": 1.3665000000000002e-05, + "num_tokens": 432913.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.0681, + "grad_norm": 1.8359259366989136, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0856, + "grad_norm": 2.033186197280884, + "learning_rate": 1.3655e-05, + "num_tokens": 433937.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0019, + "grad_norm": 0.2717677354812622, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0902, + "grad_norm": 1.8082786798477173, + "learning_rate": 1.3645000000000002e-05, + "num_tokens": 434540.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0019, + "grad_norm": 0.27892598509788513, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0017, + "grad_norm": 0.21636277437210083, + "learning_rate": 1.3635e-05, + "num_tokens": 434722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0017, + "grad_norm": 0.21708306670188904, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0442, + "grad_norm": 1.8083100318908691, + "learning_rate": 1.3625e-05, + "num_tokens": 435325.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0015, + "grad_norm": 0.16797110438346863, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0013, + "grad_norm": 0.1489250212907791, + "learning_rate": 1.3615000000000001e-05, + "num_tokens": 435507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0013, + "grad_norm": 0.14432698488235474, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0845, + "grad_norm": 1.7793538570404053, + "learning_rate": 1.3605000000000001e-05, + "num_tokens": 436110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.072, + "grad_norm": 2.0468149185180664, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0012, + "grad_norm": 0.13057845830917358, + "learning_rate": 1.3595000000000001e-05, + "num_tokens": 436713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0012, + "grad_norm": 0.1187715157866478, + "learning_rate": 1.359e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0652, + "grad_norm": 1.7846852540969849, + "learning_rate": 1.3585000000000001e-05, + "num_tokens": 437316.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.001, + "grad_norm": 0.09880056232213974, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 1.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0543, + "grad_norm": 1.7948801517486572, + "learning_rate": 1.3575e-05, + "num_tokens": 437919.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0613, + "grad_norm": 1.7139854431152344, + "learning_rate": 1.357e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0932, + "grad_norm": 2.8757143020629883, + "learning_rate": 1.3565000000000001e-05, + "num_tokens": 438943.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0542, + "grad_norm": 1.7751576900482178, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0011, + "grad_norm": 0.10208199918270111, + "learning_rate": 1.3555e-05, + "num_tokens": 439546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0441, + "grad_norm": 1.3240106105804443, + "learning_rate": 1.355e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0013, + "grad_norm": 0.14222493767738342, + "learning_rate": 1.3545000000000002e-05, + "num_tokens": 440149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0013, + "grad_norm": 0.15622317790985107, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.049, + "grad_norm": 1.685028076171875, + "learning_rate": 1.3535e-05, + "num_tokens": 440752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0013, + "grad_norm": 0.15723161399364471, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0014, + "grad_norm": 0.1701563447713852, + "learning_rate": 1.3525000000000002e-05, + "num_tokens": 440934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0554, + "grad_norm": 1.94820237159729, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": 0.0868, + "grad_norm": 1.4613052606582642, + "learning_rate": 1.3515e-05, + "num_tokens": 441958.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0606, + "grad_norm": 1.5318107604980469, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0695, + "grad_norm": 1.676740050315857, + "learning_rate": 1.3505000000000002e-05, + "num_tokens": 442982.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0588, + "grad_norm": 1.5801854133605957, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.002, + "grad_norm": 0.27110394835472107, + "learning_rate": 1.3495e-05, + "num_tokens": 443585.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0493, + "grad_norm": 1.5821062326431274, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0409, + "grad_norm": 1.4319894313812256, + "learning_rate": 1.3485000000000002e-05, + "num_tokens": 444609.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0451, + "grad_norm": 1.562462329864502, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0621, + "grad_norm": 1.4181314706802368, + "learning_rate": 1.3475e-05, + "num_tokens": 445633.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0031, + "grad_norm": 0.48450395464897156, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0561, + "grad_norm": 1.5698680877685547, + "learning_rate": 1.3465e-05, + "num_tokens": 446236.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0035, + "grad_norm": 0.5244553685188293, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0036, + "grad_norm": 0.534037709236145, + "learning_rate": 1.3455e-05, + "num_tokens": 446418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0728, + "grad_norm": 2.4191722869873047, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0694, + "grad_norm": 2.0287888050079346, + "learning_rate": 1.3445000000000001e-05, + "num_tokens": 447442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.057, + "grad_norm": 1.7234476804733276, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0033, + "grad_norm": 0.48596495389938354, + "learning_rate": 1.3435000000000001e-05, + "num_tokens": 448045.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0572, + "grad_norm": 1.4727040529251099, + "learning_rate": 1.343e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0031, + "grad_norm": 0.4591142535209656, + "learning_rate": 1.3425000000000001e-05, + "num_tokens": 448648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0578, + "grad_norm": 1.542529582977295, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0544, + "grad_norm": 1.567787766456604, + "learning_rate": 1.3415e-05, + "num_tokens": 449672.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.048, + "grad_norm": 1.4822731018066406, + "learning_rate": 1.341e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0033, + "grad_norm": 0.47298771142959595, + "learning_rate": 1.3405000000000001e-05, + "num_tokens": 450275.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.0885, + "grad_norm": 2.084674119949341, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0571, + "grad_norm": 1.5821152925491333, + "learning_rate": 1.3395e-05, + "num_tokens": 451299.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.003, + "grad_norm": 0.44274547696113586, + "learning_rate": 1.339e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0461, + "grad_norm": 1.7462387084960938, + "learning_rate": 1.3385000000000001e-05, + "num_tokens": 451902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0557, + "grad_norm": 1.9857844114303589, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0419, + "grad_norm": 1.386896014213562, + "learning_rate": 1.3375e-05, + "num_tokens": 452926.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0457, + "grad_norm": 1.6964994668960571, + "learning_rate": 1.337e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.0029, + "grad_norm": 0.42876869440078735, + "learning_rate": 1.3365000000000002e-05, + "num_tokens": 453529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.1072, + "grad_norm": 2.350618839263916, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0495, + "grad_norm": 1.449182152748108, + "learning_rate": 1.3355e-05, + "num_tokens": 454553.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0608, + "grad_norm": 2.024829149246216, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0431, + "grad_norm": 1.3092213869094849, + "learning_rate": 1.3345000000000002e-05, + "num_tokens": 455577.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0035, + "grad_norm": 0.5321254134178162, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 1.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0033, + "grad_norm": 0.4984612762928009, + "learning_rate": 1.3335e-05, + "num_tokens": 455759.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.2288, + "grad_norm": 3.947110652923584, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0031, + "grad_norm": 0.4745834767818451, + "learning_rate": 1.3325000000000002e-05, + "num_tokens": 456362.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0032, + "grad_norm": 0.5151614546775818, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0542, + "grad_norm": 1.0336432456970215, + "learning_rate": 1.3315e-05, + "num_tokens": 456965.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0562, + "grad_norm": 1.5250927209854126, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0431, + "grad_norm": 1.4132592678070068, + "learning_rate": 1.3305e-05, + "num_tokens": 457989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.074, + "grad_norm": 1.864004373550415, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.0023, + "grad_norm": 0.32277822494506836, + "learning_rate": 1.3295e-05, + "num_tokens": 458592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": 0.0656, + "grad_norm": 1.8421293497085571, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0784, + "grad_norm": 1.431746482849121, + "learning_rate": 1.3285e-05, + "num_tokens": 459616.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0859, + "grad_norm": 2.2143869400024414, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0638, + "grad_norm": 2.397982597351074, + "learning_rate": 1.3275e-05, + "num_tokens": 460640.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.072, + "grad_norm": 1.9987224340438843, + "learning_rate": 1.327e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0026, + "grad_norm": 0.3712107837200165, + "learning_rate": 1.3265000000000001e-05, + "num_tokens": 461243.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0027, + "grad_norm": 0.3893998861312866, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 1.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0024, + "grad_norm": 0.3540315330028534, + "learning_rate": 1.3255e-05, + "num_tokens": 461425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0022, + "grad_norm": 0.3253246545791626, + "learning_rate": 1.325e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 1.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0585, + "grad_norm": 1.6001460552215576, + "learning_rate": 1.3245000000000001e-05, + "num_tokens": 462028.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.0472, + "grad_norm": 1.4387136697769165, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.002, + "grad_norm": 0.2645460069179535, + "learning_rate": 1.3235e-05, + "num_tokens": 462631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0486, + "grad_norm": 1.7650330066680908, + "learning_rate": 1.323e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0018, + "grad_norm": 0.23414187133312225, + "learning_rate": 1.3225000000000001e-05, + "num_tokens": 463234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0019, + "grad_norm": 0.2595520317554474, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0598, + "grad_norm": 1.4952349662780762, + "learning_rate": 1.3215e-05, + "num_tokens": 463837.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.0777, + "grad_norm": 1.956957221031189, + "learning_rate": 1.321e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0426, + "grad_norm": 1.263728141784668, + "learning_rate": 1.3205000000000001e-05, + "num_tokens": 464861.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0018, + "grad_norm": 0.2717933654785156, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 1.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0017, + "grad_norm": 0.24730290472507477, + "learning_rate": 1.3195e-05, + "num_tokens": 465043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0017, + "grad_norm": 0.25752246379852295, + "learning_rate": 1.319e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0594, + "grad_norm": 1.2743943929672241, + "learning_rate": 1.3185000000000002e-05, + "num_tokens": 465646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0468, + "grad_norm": 1.4228495359420776, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0015, + "grad_norm": 0.2151045948266983, + "learning_rate": 1.3175e-05, + "num_tokens": 466249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0707, + "grad_norm": 1.637633204460144, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0557, + "grad_norm": 1.91914963722229, + "learning_rate": 1.3165000000000002e-05, + "num_tokens": 467273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0017, + "grad_norm": 0.22663576900959015, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0447, + "grad_norm": 1.3842930793762207, + "learning_rate": 1.3155e-05, + "num_tokens": 467876.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0402, + "grad_norm": 1.3382936716079712, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.0722, + "grad_norm": 1.7016624212265015, + "learning_rate": 1.3145e-05, + "num_tokens": 468900.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0603, + "grad_norm": 1.7416592836380005, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0545, + "grad_norm": 2.0610973834991455, + "learning_rate": 1.3135e-05, + "num_tokens": 469924.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0027, + "grad_norm": 0.42048102617263794, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0647, + "grad_norm": 1.5505709648132324, + "learning_rate": 1.3125e-05, + "num_tokens": 470527.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0899, + "grad_norm": 1.7793169021606445, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0032, + "grad_norm": 0.5216090083122253, + "learning_rate": 1.3115000000000002e-05, + "num_tokens": 471130.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0886, + "grad_norm": 1.749000906944275, + "learning_rate": 1.311e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0917, + "grad_norm": 2.4577291011810303, + "learning_rate": 1.3105e-05, + "num_tokens": 472154.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0032, + "grad_norm": 0.5224512815475464, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0637, + "grad_norm": 1.690381646156311, + "learning_rate": 1.3095000000000003e-05, + "num_tokens": 472757.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0718, + "grad_norm": 2.1140615940093994, + "learning_rate": 1.309e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0041, + "grad_norm": 0.6610037684440613, + "learning_rate": 1.3085000000000001e-05, + "num_tokens": 473360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.1995, + "grad_norm": 5.919976711273193, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0035, + "grad_norm": 0.5762227177619934, + "learning_rate": 1.3075000000000003e-05, + "num_tokens": 473963.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0035, + "grad_norm": 0.558562695980072, + "learning_rate": 1.307e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 1.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0029, + "grad_norm": 0.4903852343559265, + "learning_rate": 1.3065000000000001e-05, + "num_tokens": 474145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0024, + "grad_norm": 0.40001630783081055, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 1.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.002, + "grad_norm": 0.3093484044075012, + "learning_rate": 1.3055000000000003e-05, + "num_tokens": 474327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0813, + "grad_norm": 1.846347451210022, + "learning_rate": 1.305e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0466, + "grad_norm": 1.9397575855255127, + "learning_rate": 1.3045000000000001e-05, + "num_tokens": 475351.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.0012, + "grad_norm": 0.1433739811182022, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 1.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0647, + "grad_norm": 1.7246447801589966, + "learning_rate": 1.3035000000000001e-05, + "num_tokens": 475954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0774, + "grad_norm": 1.6557238101959229, + "learning_rate": 1.303e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0732, + "grad_norm": 1.2370885610580444, + "learning_rate": 1.3025000000000002e-05, + "num_tokens": 476978.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0011, + "grad_norm": 0.11068759858608246, + "learning_rate": 1.302e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 1.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0921, + "grad_norm": 2.1499900817871094, + "learning_rate": 1.3015000000000002e-05, + "num_tokens": 477581.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0012, + "grad_norm": 0.12917853891849518, + "learning_rate": 1.301e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0644, + "grad_norm": 1.2409875392913818, + "learning_rate": 1.3005000000000002e-05, + "num_tokens": 478184.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0594, + "grad_norm": 1.3983649015426636, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0013, + "grad_norm": 0.17072346806526184, + "learning_rate": 1.2995000000000002e-05, + "num_tokens": 478787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0623, + "grad_norm": 1.6930880546569824, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0395, + "grad_norm": 1.0536465644836426, + "learning_rate": 1.2985e-05, + "num_tokens": 479811.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0593, + "grad_norm": 1.2563151121139526, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.0455, + "grad_norm": 1.3295787572860718, + "learning_rate": 1.2975000000000002e-05, + "num_tokens": 480835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.074, + "grad_norm": 1.3767396211624146, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0456, + "grad_norm": 1.3392114639282227, + "learning_rate": 1.2965e-05, + "num_tokens": 481859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.091, + "grad_norm": 2.6617116928100586, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0783, + "grad_norm": 2.208951473236084, + "learning_rate": 1.2955000000000002e-05, + "num_tokens": 482883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0026, + "grad_norm": 0.425293892621994, + "learning_rate": 1.295e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 1.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0751, + "grad_norm": 1.7252588272094727, + "learning_rate": 1.2945e-05, + "num_tokens": 483486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0032, + "grad_norm": 0.5211181640625, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 1.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": 0.0931, + "grad_norm": 2.448201894760132, + "learning_rate": 1.2935000000000002e-05, + "num_tokens": 484089.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.059, + "grad_norm": 1.2256298065185547, + "learning_rate": 1.293e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0037, + "grad_norm": 0.5853725671768188, + "learning_rate": 1.2925e-05, + "num_tokens": 484692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0667, + "grad_norm": 1.6646796464920044, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0034, + "grad_norm": 0.5198765993118286, + "learning_rate": 1.2915000000000003e-05, + "num_tokens": 485295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.06, + "grad_norm": 1.8327956199645996, + "learning_rate": 1.291e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0578, + "grad_norm": 1.4550710916519165, + "learning_rate": 1.2905000000000001e-05, + "num_tokens": 486319.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0035, + "grad_norm": 0.5253085494041443, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 1.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": 0.0664, + "grad_norm": 2.0553388595581055, + "learning_rate": 1.2895000000000003e-05, + "num_tokens": 486922.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0507, + "grad_norm": 1.2666943073272705, + "learning_rate": 1.289e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0419, + "grad_norm": 1.1951980590820312, + "learning_rate": 1.2885000000000001e-05, + "num_tokens": 487946.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0521, + "grad_norm": 1.5074187517166138, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0039, + "grad_norm": 0.5865699648857117, + "learning_rate": 1.2875000000000001e-05, + "num_tokens": 488549.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0032, + "grad_norm": 0.4775572121143341, + "learning_rate": 1.287e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 1.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0861, + "grad_norm": 1.977977991104126, + "learning_rate": 1.2865000000000001e-05, + "num_tokens": 489152.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0415, + "grad_norm": 1.351745843887329, + "learning_rate": 1.286e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0027, + "grad_norm": 0.3994472920894623, + "learning_rate": 1.2855000000000001e-05, + "num_tokens": 489755.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0027, + "grad_norm": 0.40307220816612244, + "learning_rate": 1.285e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 1.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0023, + "grad_norm": 0.3672088086605072, + "learning_rate": 1.2845000000000002e-05, + "num_tokens": 489937.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0024, + "grad_norm": 0.3693186938762665, + "learning_rate": 1.284e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0022, + "grad_norm": 0.3379809856414795, + "learning_rate": 1.2835000000000002e-05, + "num_tokens": 490119.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0692, + "grad_norm": 1.80624520778656, + "learning_rate": 1.283e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0015, + "grad_norm": 0.19782321155071259, + "learning_rate": 1.2825e-05, + "num_tokens": 490722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.0765, + "grad_norm": 2.1652674674987793, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0524, + "grad_norm": 1.3651760816574097, + "learning_rate": 1.2815000000000002e-05, + "num_tokens": 491746.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0013, + "grad_norm": 0.15779025852680206, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 1.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0405, + "grad_norm": 1.4021095037460327, + "learning_rate": 1.2805e-05, + "num_tokens": 492349.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0012, + "grad_norm": 0.14934077858924866, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0466, + "grad_norm": 1.3255256414413452, + "learning_rate": 1.2795000000000002e-05, + "num_tokens": 492952.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0011, + "grad_norm": 0.13669109344482422, + "learning_rate": 1.279e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0706, + "grad_norm": 2.915336847305298, + "learning_rate": 1.2785e-05, + "num_tokens": 493555.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0012, + "grad_norm": 0.14015723764896393, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 1.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0731, + "grad_norm": 1.5240583419799805, + "learning_rate": 1.2775000000000002e-05, + "num_tokens": 494158.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0011, + "grad_norm": 0.11803555488586426, + "learning_rate": 1.277e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0011, + "grad_norm": 0.13458400964736938, + "learning_rate": 1.2765e-05, + "num_tokens": 494340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0012, + "grad_norm": 0.14607498049736023, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0011, + "grad_norm": 0.12011824548244476, + "learning_rate": 1.2755000000000002e-05, + "num_tokens": 494522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0011, + "grad_norm": 0.13116565346717834, + "learning_rate": 1.275e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0011, + "grad_norm": 0.11727877706289291, + "learning_rate": 1.2745e-05, + "num_tokens": 494704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.0501, + "grad_norm": 1.6986955404281616, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0472, + "grad_norm": 1.4376126527786255, + "learning_rate": 1.2735000000000003e-05, + "num_tokens": 495728.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.001, + "grad_norm": 0.11870448291301727, + "learning_rate": 1.273e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0011, + "grad_norm": 0.11969612538814545, + "learning_rate": 1.2725000000000001e-05, + "num_tokens": 495910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0493, + "grad_norm": 1.3840702772140503, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.001, + "grad_norm": 0.10890035331249237, + "learning_rate": 1.2715000000000001e-05, + "num_tokens": 496513.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0011, + "grad_norm": 0.12227390706539154, + "learning_rate": 1.271e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0752, + "grad_norm": 2.110506057739258, + "learning_rate": 1.2705000000000001e-05, + "num_tokens": 497116.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0011, + "grad_norm": 0.1325536072254181, + "learning_rate": 1.27e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 1.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0386, + "grad_norm": 1.118979811668396, + "learning_rate": 1.2695000000000001e-05, + "num_tokens": 497719.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.066, + "grad_norm": 1.572615623474121, + "learning_rate": 1.269e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0679, + "grad_norm": 1.6447997093200684, + "learning_rate": 1.2685000000000001e-05, + "num_tokens": 498743.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0012, + "grad_norm": 0.1418675184249878, + "learning_rate": 1.268e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0471, + "grad_norm": 1.3554447889328003, + "learning_rate": 1.2675000000000001e-05, + "num_tokens": 499346.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0012, + "grad_norm": 0.1589028388261795, + "learning_rate": 1.267e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0943, + "grad_norm": 2.5991010665893555, + "learning_rate": 1.2665e-05, + "num_tokens": 499949.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0495, + "grad_norm": 1.6441336870193481, + "learning_rate": 1.266e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0765, + "grad_norm": 1.842661738395691, + "learning_rate": 1.2655000000000002e-05, + "num_tokens": 500973.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0016, + "grad_norm": 0.22247855365276337, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0401, + "grad_norm": 1.3632177114486694, + "learning_rate": 1.2645e-05, + "num_tokens": 501576.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0021, + "grad_norm": 0.31719765067100525, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0376, + "grad_norm": 1.1765908002853394, + "learning_rate": 1.2635000000000002e-05, + "num_tokens": 502179.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0024, + "grad_norm": 0.33981993794441223, + "learning_rate": 1.263e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 1.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0712, + "grad_norm": 1.7833467721939087, + "learning_rate": 1.2625e-05, + "num_tokens": 502782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0407, + "grad_norm": 1.2483290433883667, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0597, + "grad_norm": 1.2847890853881836, + "learning_rate": 1.2615000000000002e-05, + "num_tokens": 503806.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0706, + "grad_norm": 2.0048041343688965, + "learning_rate": 1.261e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0033, + "grad_norm": 0.48029038310050964, + "learning_rate": 1.2605e-05, + "num_tokens": 504409.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0853, + "grad_norm": 1.8489866256713867, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0664, + "grad_norm": 1.9049607515335083, + "learning_rate": 1.2595000000000002e-05, + "num_tokens": 505433.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0038, + "grad_norm": 0.5629300475120544, + "learning_rate": 1.259e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0035, + "grad_norm": 0.5016162395477295, + "learning_rate": 1.2585e-05, + "num_tokens": 505615.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.0034, + "grad_norm": 0.533896803855896, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 1.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0398, + "grad_norm": 1.6724116802215576, + "learning_rate": 1.2575000000000002e-05, + "num_tokens": 506218.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0649, + "grad_norm": 1.1757819652557373, + "learning_rate": 1.257e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0028, + "grad_norm": 0.3974631726741791, + "learning_rate": 1.2565e-05, + "num_tokens": 506821.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0022, + "grad_norm": 0.33079567551612854, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 1.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0612, + "grad_norm": 1.6804654598236084, + "learning_rate": 1.2555000000000001e-05, + "num_tokens": 507424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0814, + "grad_norm": 1.6637822389602661, + "learning_rate": 1.255e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0463, + "grad_norm": 1.2395890951156616, + "learning_rate": 1.2545000000000001e-05, + "num_tokens": 508448.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0022, + "grad_norm": 0.3290168046951294, + "learning_rate": 1.254e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0474, + "grad_norm": 1.62813138961792, + "learning_rate": 1.2535000000000001e-05, + "num_tokens": 509051.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0432, + "grad_norm": 1.1684247255325317, + "learning_rate": 1.253e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.191, + "grad_norm": 4.108924865722656, + "learning_rate": 1.2525000000000001e-05, + "num_tokens": 510075.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0022, + "grad_norm": 0.32842448353767395, + "learning_rate": 1.252e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0692, + "grad_norm": 1.0593329668045044, + "learning_rate": 1.2515000000000001e-05, + "num_tokens": 510678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.046, + "grad_norm": 1.279249906539917, + "learning_rate": 1.251e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0021, + "grad_norm": 0.32091253995895386, + "learning_rate": 1.2505e-05, + "num_tokens": 511281.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0404, + "grad_norm": 1.2973002195358276, + "learning_rate": 1.25e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0023, + "grad_norm": 0.34064143896102905, + "learning_rate": 1.2495000000000001e-05, + "num_tokens": 511884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0793, + "grad_norm": 1.864046573638916, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0023, + "grad_norm": 0.3757898211479187, + "learning_rate": 1.2485e-05, + "num_tokens": 512487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0024, + "grad_norm": 0.381061315536499, + "learning_rate": 1.248e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 1.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0676, + "grad_norm": 1.62307608127594, + "learning_rate": 1.2475000000000002e-05, + "num_tokens": 513090.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.047, + "grad_norm": 1.570786476135254, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0566, + "grad_norm": 1.7626087665557861, + "learning_rate": 1.2465e-05, + "num_tokens": 514114.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0581, + "grad_norm": 1.7678264379501343, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0417, + "grad_norm": 1.4467406272888184, + "learning_rate": 1.2455000000000002e-05, + "num_tokens": 515138.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0712, + "grad_norm": 1.5711795091629028, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.0026, + "grad_norm": 0.41801631450653076, + "learning_rate": 1.2445e-05, + "num_tokens": 515741.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0499, + "grad_norm": 1.5882858037948608, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0504, + "grad_norm": 1.1772035360336304, + "learning_rate": 1.2435000000000002e-05, + "num_tokens": 516765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0618, + "grad_norm": 1.7687872648239136, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0664, + "grad_norm": 1.677937626838684, + "learning_rate": 1.2425e-05, + "num_tokens": 517789.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.004, + "grad_norm": 0.654071569442749, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 1.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0409, + "grad_norm": 1.5208879709243774, + "learning_rate": 1.2415000000000002e-05, + "num_tokens": 518392.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0035, + "grad_norm": 0.5567553639411926, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 1.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0798, + "grad_norm": 2.2302029132843018, + "learning_rate": 1.2405e-05, + "num_tokens": 518995.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0669, + "grad_norm": 2.0240256786346436, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0839, + "grad_norm": 1.8468784093856812, + "learning_rate": 1.2395e-05, + "num_tokens": 520019.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0584, + "grad_norm": 2.1111018657684326, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0043, + "grad_norm": 0.755431592464447, + "learning_rate": 1.2385000000000001e-05, + "num_tokens": 520622.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0432, + "grad_norm": 1.864660620689392, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0693, + "grad_norm": 3.3374569416046143, + "learning_rate": 1.2375000000000001e-05, + "num_tokens": 521646.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0388, + "grad_norm": 1.5575084686279297, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.0645, + "grad_norm": 1.5467334985733032, + "learning_rate": 1.2365000000000001e-05, + "num_tokens": 522670.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0037, + "grad_norm": 0.5897421836853027, + "learning_rate": 1.236e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 1.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0819, + "grad_norm": 3.0543386936187744, + "learning_rate": 1.2355000000000001e-05, + "num_tokens": 523273.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.004, + "grad_norm": 0.647894024848938, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0032, + "grad_norm": 0.5120076537132263, + "learning_rate": 1.2345e-05, + "num_tokens": 523455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0032, + "grad_norm": 0.50294429063797, + "learning_rate": 1.234e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0654, + "grad_norm": 1.3424628973007202, + "learning_rate": 1.2335000000000001e-05, + "num_tokens": 524058.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0898, + "grad_norm": 2.0473086833953857, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0023, + "grad_norm": 0.36929139494895935, + "learning_rate": 1.2325e-05, + "num_tokens": 524661.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0021, + "grad_norm": 0.3227180540561676, + "learning_rate": 1.232e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0663, + "grad_norm": 1.83015775680542, + "learning_rate": 1.2315000000000002e-05, + "num_tokens": 525264.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0657, + "grad_norm": 1.8247884511947632, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0016, + "grad_norm": 0.21814872324466705, + "learning_rate": 1.2305e-05, + "num_tokens": 525867.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.07, + "grad_norm": 1.3606796264648438, + "learning_rate": 1.23e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.0521, + "grad_norm": 1.5558913946151733, + "learning_rate": 1.2295000000000002e-05, + "num_tokens": 526891.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": 0.0768, + "grad_norm": 1.718390703201294, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.2012, + "grad_norm": 3.623452663421631, + "learning_rate": 1.2285e-05, + "num_tokens": 527915.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0922, + "grad_norm": 2.289684534072876, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.0665, + "grad_norm": 1.6864427328109741, + "learning_rate": 1.2275000000000002e-05, + "num_tokens": 528939.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0017, + "grad_norm": 0.2226596623659134, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.195, + "grad_norm": 3.805149555206299, + "learning_rate": 1.2265e-05, + "num_tokens": 529542.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0651, + "grad_norm": 1.3887238502502441, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0016, + "grad_norm": 0.20220878720283508, + "learning_rate": 1.2255000000000002e-05, + "num_tokens": 530145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0456, + "grad_norm": 1.4763877391815186, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0017, + "grad_norm": 0.2297908216714859, + "learning_rate": 1.2245e-05, + "num_tokens": 530748.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0479, + "grad_norm": 1.846569538116455, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0018, + "grad_norm": 0.2527587115764618, + "learning_rate": 1.2235e-05, + "num_tokens": 531351.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0828, + "grad_norm": 1.8091585636138916, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.002, + "grad_norm": 0.29240918159484863, + "learning_rate": 1.2225e-05, + "num_tokens": 531954.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.0568, + "grad_norm": 1.4905025959014893, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0022, + "grad_norm": 0.29934078454971313, + "learning_rate": 1.2215e-05, + "num_tokens": 532557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.0655, + "grad_norm": 1.620811939239502, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0699, + "grad_norm": 1.4509178400039673, + "learning_rate": 1.2205000000000001e-05, + "num_tokens": 533581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0537, + "grad_norm": 1.6190178394317627, + "learning_rate": 1.22e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.083, + "grad_norm": 2.0025248527526855, + "learning_rate": 1.2195000000000001e-05, + "num_tokens": 534605.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3503265976905823, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": 0.0699, + "grad_norm": 1.2692803144454956, + "learning_rate": 1.2185e-05, + "num_tokens": 535208.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0024, + "grad_norm": 0.3514065146446228, + "learning_rate": 1.218e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 1.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0025, + "grad_norm": 0.3770548701286316, + "learning_rate": 1.2175000000000001e-05, + "num_tokens": 535390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0024, + "grad_norm": 0.3553021550178528, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 1.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0657, + "grad_norm": 1.3145198822021484, + "learning_rate": 1.2165e-05, + "num_tokens": 535993.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0453, + "grad_norm": 1.1688368320465088, + "learning_rate": 1.216e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.1801, + "grad_norm": 3.7217485904693604, + "learning_rate": 1.2155000000000001e-05, + "num_tokens": 537017.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0029, + "grad_norm": 0.4446180462837219, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 1.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0863, + "grad_norm": 2.0155787467956543, + "learning_rate": 1.2145e-05, + "num_tokens": 537620.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0022, + "grad_norm": 0.3482968807220459, + "learning_rate": 1.214e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0023, + "grad_norm": 0.32771721482276917, + "learning_rate": 1.2135000000000002e-05, + "num_tokens": 537802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.056, + "grad_norm": 1.8173542022705078, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.039, + "grad_norm": 1.1963605880737305, + "learning_rate": 1.2125e-05, + "num_tokens": 538826.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.0594, + "grad_norm": 1.7138198614120483, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.002, + "grad_norm": 0.2943565249443054, + "learning_rate": 1.2115000000000002e-05, + "num_tokens": 539429.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.002, + "grad_norm": 0.2892753481864929, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0019, + "grad_norm": 0.2714136838912964, + "learning_rate": 1.2105e-05, + "num_tokens": 539611.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0504, + "grad_norm": 1.0601574182510376, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0019, + "grad_norm": 0.2627917230129242, + "learning_rate": 1.2095000000000002e-05, + "num_tokens": 540214.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0566, + "grad_norm": 1.1405881643295288, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0018, + "grad_norm": 0.2452574223279953, + "learning_rate": 1.2085e-05, + "num_tokens": 540817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.0018, + "grad_norm": 0.24650417268276215, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0014, + "grad_norm": 0.19634543359279633, + "learning_rate": 1.2075e-05, + "num_tokens": 540999.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.0014, + "grad_norm": 0.17830893397331238, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.045, + "grad_norm": 1.1427490711212158, + "learning_rate": 1.2065e-05, + "num_tokens": 541602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.043, + "grad_norm": 1.0804896354675293, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0729, + "grad_norm": 1.6100242137908936, + "learning_rate": 1.2055e-05, + "num_tokens": 542626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.0585, + "grad_norm": 1.2319777011871338, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0014, + "grad_norm": 0.18333016335964203, + "learning_rate": 1.2045e-05, + "num_tokens": 543229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0014, + "grad_norm": 0.17933838069438934, + "learning_rate": 1.204e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.0606, + "grad_norm": 1.531948208808899, + "learning_rate": 1.2035e-05, + "num_tokens": 543832.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.0798, + "grad_norm": 1.4439104795455933, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.0798, + "grad_norm": 1.6658635139465332, + "learning_rate": 1.2025e-05, + "num_tokens": 544856.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.0666, + "grad_norm": 1.2919996976852417, + "learning_rate": 1.202e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.0526, + "grad_norm": 1.7219940423965454, + "learning_rate": 1.2015000000000001e-05, + "num_tokens": 545880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0453, + "grad_norm": 1.3877556324005127, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0675, + "grad_norm": 1.6357606649398804, + "learning_rate": 1.2005e-05, + "num_tokens": 546904.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0023, + "grad_norm": 0.3360651433467865, + "learning_rate": 1.2e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0025, + "grad_norm": 0.36647501587867737, + "learning_rate": 1.1995000000000001e-05, + "num_tokens": 547086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.1876, + "grad_norm": 3.880563974380493, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0026, + "grad_norm": 0.3927272856235504, + "learning_rate": 1.1985e-05, + "num_tokens": 547689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0616, + "grad_norm": 1.807646632194519, + "learning_rate": 1.198e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.0939, + "grad_norm": 3.455456018447876, + "learning_rate": 1.1975000000000001e-05, + "num_tokens": 548713.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0576, + "grad_norm": 1.2851530313491821, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0613, + "grad_norm": 1.2460367679595947, + "learning_rate": 1.1965e-05, + "num_tokens": 549737.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0498, + "grad_norm": 1.8220652341842651, + "learning_rate": 1.196e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0029, + "grad_norm": 0.43996259570121765, + "learning_rate": 1.1955000000000002e-05, + "num_tokens": 550340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.164, + "grad_norm": 3.639434814453125, + "learning_rate": 1.195e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0033, + "grad_norm": 0.49846982955932617, + "learning_rate": 1.1945e-05, + "num_tokens": 550943.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.0034, + "grad_norm": 0.5146701335906982, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0552, + "grad_norm": 0.9798343777656555, + "learning_rate": 1.1935000000000002e-05, + "num_tokens": 551546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0033, + "grad_norm": 0.49275118112564087, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 1.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.0699, + "grad_norm": 1.1279994249343872, + "learning_rate": 1.1925e-05, + "num_tokens": 552149.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0029, + "grad_norm": 0.4336951673030853, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0682, + "grad_norm": 1.8408714532852173, + "learning_rate": 1.1915e-05, + "num_tokens": 552752.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0025, + "grad_norm": 0.3696609139442444, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0468, + "grad_norm": 1.6169545650482178, + "learning_rate": 1.1905e-05, + "num_tokens": 553355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0669, + "grad_norm": 1.641153335571289, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0019, + "grad_norm": 0.2700659930706024, + "learning_rate": 1.1895e-05, + "num_tokens": 553958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0021, + "grad_norm": 0.30612003803253174, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0755, + "grad_norm": 1.821285367012024, + "learning_rate": 1.1885e-05, + "num_tokens": 554561.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0628, + "grad_norm": 1.6025607585906982, + "learning_rate": 1.188e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0018, + "grad_norm": 0.24747499823570251, + "learning_rate": 1.1875e-05, + "num_tokens": 555164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0017, + "grad_norm": 0.2355332225561142, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 1.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0016, + "grad_norm": 0.22167058289051056, + "learning_rate": 1.1865000000000002e-05, + "num_tokens": 555346.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0014, + "grad_norm": 0.1909945011138916, + "learning_rate": 1.186e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0014, + "grad_norm": 0.17070873081684113, + "learning_rate": 1.1855e-05, + "num_tokens": 555528.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0714, + "grad_norm": 1.4018418788909912, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0859, + "grad_norm": 2.558520793914795, + "learning_rate": 1.1845000000000003e-05, + "num_tokens": 556552.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0012, + "grad_norm": 0.14977574348449707, + "learning_rate": 1.184e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0011, + "grad_norm": 0.12937067449092865, + "learning_rate": 1.1835000000000001e-05, + "num_tokens": 556734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0604, + "grad_norm": 1.5028055906295776, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0012, + "grad_norm": 0.13798221945762634, + "learning_rate": 1.1825000000000003e-05, + "num_tokens": 557337.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0508, + "grad_norm": 1.1325984001159668, + "learning_rate": 1.182e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0608, + "grad_norm": 1.3021001815795898, + "learning_rate": 1.1815000000000001e-05, + "num_tokens": 558361.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0563, + "grad_norm": 1.5208338499069214, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.0669, + "grad_norm": 1.6899033784866333, + "learning_rate": 1.1805000000000001e-05, + "num_tokens": 559385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0582, + "grad_norm": 1.563767910003662, + "learning_rate": 1.18e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.0674, + "grad_norm": 1.4604460000991821, + "learning_rate": 1.1795000000000001e-05, + "num_tokens": 560409.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.051, + "grad_norm": 1.4536890983581543, + "learning_rate": 1.179e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0681, + "grad_norm": 1.4582575559616089, + "learning_rate": 1.1785000000000002e-05, + "num_tokens": 561433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0625, + "grad_norm": 1.5202876329421997, + "learning_rate": 1.178e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0018, + "grad_norm": 0.25325441360473633, + "learning_rate": 1.1775000000000002e-05, + "num_tokens": 562036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0533, + "grad_norm": 1.4468379020690918, + "learning_rate": 1.177e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0023, + "grad_norm": 0.32276058197021484, + "learning_rate": 1.1765000000000002e-05, + "num_tokens": 562639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0025, + "grad_norm": 0.36645182967185974, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.067, + "grad_norm": 2.532277822494507, + "learning_rate": 1.1755e-05, + "num_tokens": 563242.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0025, + "grad_norm": 0.3641115427017212, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 1.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0621, + "grad_norm": 1.6259859800338745, + "learning_rate": 1.1745000000000002e-05, + "num_tokens": 563845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.0431, + "grad_norm": 1.5126338005065918, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0416, + "grad_norm": 1.3851490020751953, + "learning_rate": 1.1735e-05, + "num_tokens": 564869.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0621, + "grad_norm": 1.7890119552612305, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.0661, + "grad_norm": 1.2367877960205078, + "learning_rate": 1.1725000000000002e-05, + "num_tokens": 565893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0032, + "grad_norm": 0.49922677874565125, + "learning_rate": 1.172e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 1.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.0033, + "grad_norm": 0.49921202659606934, + "learning_rate": 1.1715e-05, + "num_tokens": 566075.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.0035, + "grad_norm": 0.5215579867362976, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0031, + "grad_norm": 0.43590739369392395, + "learning_rate": 1.1705000000000002e-05, + "num_tokens": 566257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0397, + "grad_norm": 1.2309280633926392, + "learning_rate": 1.17e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.04, + "grad_norm": 1.2009049654006958, + "learning_rate": 1.1695e-05, + "num_tokens": 567281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0608, + "grad_norm": 1.7890830039978027, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0022, + "grad_norm": 0.33328190445899963, + "learning_rate": 1.1685000000000002e-05, + "num_tokens": 567884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0725, + "grad_norm": 1.7722251415252686, + "learning_rate": 1.168e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.002, + "grad_norm": 0.2905958592891693, + "learning_rate": 1.1675000000000001e-05, + "num_tokens": 568487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0778, + "grad_norm": 1.8844209909439087, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0558, + "grad_norm": 1.4232587814331055, + "learning_rate": 1.1665000000000003e-05, + "num_tokens": 569511.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0588, + "grad_norm": 1.4562510251998901, + "learning_rate": 1.166e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.0019, + "grad_norm": 0.2660907804965973, + "learning_rate": 1.1655000000000001e-05, + "num_tokens": 570114.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.079, + "grad_norm": 1.9491440057754517, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.055, + "grad_norm": 1.847509741783142, + "learning_rate": 1.1645000000000001e-05, + "num_tokens": 571138.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0517, + "grad_norm": 1.504838466644287, + "learning_rate": 1.164e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": 0.0416, + "grad_norm": 1.0979009866714478, + "learning_rate": 1.1635000000000001e-05, + "num_tokens": 572162.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0448, + "grad_norm": 1.3496202230453491, + "learning_rate": 1.163e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0026, + "grad_norm": 0.382183700799942, + "learning_rate": 1.1625000000000001e-05, + "num_tokens": 572765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0026, + "grad_norm": 0.37047019600868225, + "learning_rate": 1.162e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 1.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0702, + "grad_norm": 1.7991583347320557, + "learning_rate": 1.1615000000000001e-05, + "num_tokens": 573368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0442, + "grad_norm": 1.4013893604278564, + "learning_rate": 1.161e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.0409, + "grad_norm": 1.3295344114303589, + "learning_rate": 1.1605000000000002e-05, + "num_tokens": 574392.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.0388, + "grad_norm": 1.3626537322998047, + "learning_rate": 1.16e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0031, + "grad_norm": 0.4437231123447418, + "learning_rate": 1.1595e-05, + "num_tokens": 574995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0036, + "grad_norm": 0.5210691094398499, + "learning_rate": 1.159e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.065, + "grad_norm": 2.1340172290802, + "learning_rate": 1.1585000000000002e-05, + "num_tokens": 575598.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0647, + "grad_norm": 1.9830479621887207, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0684, + "grad_norm": 2.2673563957214355, + "learning_rate": 1.1575e-05, + "num_tokens": 576622.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0031, + "grad_norm": 0.44506582617759705, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.038, + "grad_norm": 1.131693959236145, + "learning_rate": 1.1565000000000002e-05, + "num_tokens": 577225.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.0369, + "grad_norm": 1.1869642734527588, + "learning_rate": 1.156e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0031, + "grad_norm": 0.4332590401172638, + "learning_rate": 1.1555e-05, + "num_tokens": 577828.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0026, + "grad_norm": 0.359754741191864, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0028, + "grad_norm": 0.3960857689380646, + "learning_rate": 1.1545000000000002e-05, + "num_tokens": 578010.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0569, + "grad_norm": 1.7389343976974487, + "learning_rate": 1.154e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0714, + "grad_norm": 1.75542414188385, + "learning_rate": 1.1535e-05, + "num_tokens": 579034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0026, + "grad_norm": 0.3733665943145752, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 1.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151729702949524, + "learning_rate": 1.1525000000000002e-05, + "num_tokens": 579216.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0725, + "grad_norm": 2.008699417114258, + "learning_rate": 1.152e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0679, + "grad_norm": 2.3607006072998047, + "learning_rate": 1.1515e-05, + "num_tokens": 580240.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.043, + "grad_norm": 1.3802534341812134, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0018, + "grad_norm": 0.24884727597236633, + "learning_rate": 1.1505000000000003e-05, + "num_tokens": 580843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0517, + "grad_norm": 1.4253575801849365, + "learning_rate": 1.15e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0479, + "grad_norm": 1.2443790435791016, + "learning_rate": 1.1495000000000001e-05, + "num_tokens": 581867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0017, + "grad_norm": 0.22854706645011902, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0742, + "grad_norm": 1.5941340923309326, + "learning_rate": 1.1485000000000001e-05, + "num_tokens": 582470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.002, + "grad_norm": 0.27522599697113037, + "learning_rate": 1.148e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0019, + "grad_norm": 0.2548190653324127, + "learning_rate": 1.1475000000000001e-05, + "num_tokens": 582652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0586, + "grad_norm": 0.9956546425819397, + "learning_rate": 1.147e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0399, + "grad_norm": 1.2318187952041626, + "learning_rate": 1.1465000000000001e-05, + "num_tokens": 583676.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.057, + "grad_norm": 1.2258297204971313, + "learning_rate": 1.146e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0593, + "grad_norm": 1.4450581073760986, + "learning_rate": 1.1455000000000001e-05, + "num_tokens": 584700.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0589, + "grad_norm": 2.703789472579956, + "learning_rate": 1.145e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0022, + "grad_norm": 0.2988422214984894, + "learning_rate": 1.1445000000000001e-05, + "num_tokens": 585303.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.002, + "grad_norm": 0.2543957829475403, + "learning_rate": 1.144e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0634, + "grad_norm": 1.5069470405578613, + "learning_rate": 1.1435e-05, + "num_tokens": 585906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0777, + "grad_norm": 1.8321071863174438, + "learning_rate": 1.143e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0705, + "grad_norm": 1.7684837579727173, + "learning_rate": 1.1425000000000002e-05, + "num_tokens": 586930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0646, + "grad_norm": 1.7334975004196167, + "learning_rate": 1.142e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0657, + "grad_norm": 1.7223514318466187, + "learning_rate": 1.1415e-05, + "num_tokens": 587954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0551, + "grad_norm": 2.0270273685455322, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0642, + "grad_norm": 1.5014370679855347, + "learning_rate": 1.1405000000000002e-05, + "num_tokens": 588978.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0766, + "grad_norm": 1.7329357862472534, + "learning_rate": 1.14e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.0038, + "grad_norm": 0.5561279654502869, + "learning_rate": 1.1395e-05, + "num_tokens": 589581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0805, + "grad_norm": 2.5624947547912598, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0036, + "grad_norm": 0.5101985931396484, + "learning_rate": 1.1385000000000002e-05, + "num_tokens": 590184.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0564, + "grad_norm": 1.227173924446106, + "learning_rate": 1.138e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0036, + "grad_norm": 0.5354023575782776, + "learning_rate": 1.1375e-05, + "num_tokens": 590787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0443, + "grad_norm": 1.4744853973388672, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0715, + "grad_norm": 1.5623061656951904, + "learning_rate": 1.1365000000000002e-05, + "num_tokens": 591811.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0529, + "grad_norm": 1.357082486152649, + "learning_rate": 1.136e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0037, + "grad_norm": 0.54876309633255, + "learning_rate": 1.1355e-05, + "num_tokens": 592414.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.0635, + "grad_norm": 1.2679226398468018, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0458, + "grad_norm": 1.1748446226119995, + "learning_rate": 1.1345000000000002e-05, + "num_tokens": 593438.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0035, + "grad_norm": 0.5624827146530151, + "learning_rate": 1.134e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.003, + "grad_norm": 0.4557420015335083, + "learning_rate": 1.1335e-05, + "num_tokens": 593620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.003, + "grad_norm": 0.46185532212257385, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0028, + "grad_norm": 0.42278051376342773, + "learning_rate": 1.1325e-05, + "num_tokens": 593802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0453, + "grad_norm": 1.387130856513977, + "learning_rate": 1.132e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025925099849701, + "learning_rate": 1.1315000000000001e-05, + "num_tokens": 594405.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0022, + "grad_norm": 0.33897924423217773, + "learning_rate": 1.131e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 1.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0737, + "grad_norm": 1.979303240776062, + "learning_rate": 1.1305000000000001e-05, + "num_tokens": 595008.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.064, + "grad_norm": 1.5425118207931519, + "learning_rate": 1.13e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0671, + "grad_norm": 1.1620323657989502, + "learning_rate": 1.1295000000000001e-05, + "num_tokens": 596032.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0785, + "grad_norm": 2.378268003463745, + "learning_rate": 1.129e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0016, + "grad_norm": 0.22170788049697876, + "learning_rate": 1.1285000000000001e-05, + "num_tokens": 596635.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0015, + "grad_norm": 0.20151561498641968, + "learning_rate": 1.128e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0017, + "grad_norm": 0.2272740602493286, + "learning_rate": 1.1275e-05, + "num_tokens": 596817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.0013, + "grad_norm": 0.15716217458248138, + "learning_rate": 1.127e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0609, + "grad_norm": 1.5205357074737549, + "learning_rate": 1.1265000000000001e-05, + "num_tokens": 597420.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0013, + "grad_norm": 0.16709472239017487, + "learning_rate": 1.126e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0771, + "grad_norm": 1.7946810722351074, + "learning_rate": 1.1255e-05, + "num_tokens": 598023.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0581, + "grad_norm": 1.250422716140747, + "learning_rate": 1.125e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0566, + "grad_norm": 1.8859542608261108, + "learning_rate": 1.1245000000000002e-05, + "num_tokens": 599047.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.042, + "grad_norm": 1.3896710872650146, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0012, + "grad_norm": 0.13600599765777588, + "learning_rate": 1.1235e-05, + "num_tokens": 599650.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0455, + "grad_norm": 1.2671265602111816, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0792, + "grad_norm": 1.9507051706314087, + "learning_rate": 1.1225000000000002e-05, + "num_tokens": 600674.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0015, + "grad_norm": 0.18869547545909882, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0643, + "grad_norm": 2.124163866043091, + "learning_rate": 1.1215e-05, + "num_tokens": 601277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0017, + "grad_norm": 0.22649085521697998, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 1.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0014, + "grad_norm": 0.1775384545326233, + "learning_rate": 1.1205000000000002e-05, + "num_tokens": 601459.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0675, + "grad_norm": 2.2713491916656494, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0504, + "grad_norm": 1.3982276916503906, + "learning_rate": 1.1195e-05, + "num_tokens": 602483.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0478, + "grad_norm": 1.40345299243927, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0655, + "grad_norm": 2.0257670879364014, + "learning_rate": 1.1185000000000002e-05, + "num_tokens": 603507.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.0019, + "grad_norm": 0.2651630938053131, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": 0.0772, + "grad_norm": 2.0185799598693848, + "learning_rate": 1.1175e-05, + "num_tokens": 604110.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0022, + "grad_norm": 0.30773913860321045, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 1.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0401, + "grad_norm": 1.1661447286605835, + "learning_rate": 1.1165e-05, + "num_tokens": 604713.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0805, + "grad_norm": 2.5561182498931885, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.0023, + "grad_norm": 0.3356492221355438, + "learning_rate": 1.1155e-05, + "num_tokens": 605316.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0519, + "grad_norm": 1.2280339002609253, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.0412, + "grad_norm": 1.1461997032165527, + "learning_rate": 1.1145000000000001e-05, + "num_tokens": 606340.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0024, + "grad_norm": 0.33912718296051025, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0026, + "grad_norm": 0.3827052116394043, + "learning_rate": 1.1135000000000001e-05, + "num_tokens": 606522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025944471359253, + "learning_rate": 1.113e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0025, + "grad_norm": 0.34845641255378723, + "learning_rate": 1.1125000000000001e-05, + "num_tokens": 606704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0704, + "grad_norm": 1.9853920936584473, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0431, + "grad_norm": 1.3894938230514526, + "learning_rate": 1.1115e-05, + "num_tokens": 607728.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.069, + "grad_norm": 1.2977555990219116, + "learning_rate": 1.111e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0366, + "grad_norm": 1.1859874725341797, + "learning_rate": 1.1105000000000001e-05, + "num_tokens": 608752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0022, + "grad_norm": 0.3078896105289459, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.002, + "grad_norm": 0.28668129444122314, + "learning_rate": 1.1095e-05, + "num_tokens": 608934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0021, + "grad_norm": 0.30314162373542786, + "learning_rate": 1.109e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0741, + "grad_norm": 1.5230200290679932, + "learning_rate": 1.1085000000000001e-05, + "num_tokens": 609537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.002, + "grad_norm": 0.26326534152030945, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 1.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.002, + "grad_norm": 0.2711552381515503, + "learning_rate": 1.1075e-05, + "num_tokens": 609719.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.0616, + "grad_norm": 1.274338960647583, + "learning_rate": 1.107e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.0016, + "grad_norm": 0.2114490568637848, + "learning_rate": 1.1065000000000002e-05, + "num_tokens": 610322.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0682, + "grad_norm": 1.6731176376342773, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0813, + "grad_norm": 1.9255222082138062, + "learning_rate": 1.1055e-05, + "num_tokens": 611346.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0016, + "grad_norm": 0.21615324914455414, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0688, + "grad_norm": 1.5003544092178345, + "learning_rate": 1.1045000000000002e-05, + "num_tokens": 611949.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0014, + "grad_norm": 0.18165816366672516, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 1.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0583, + "grad_norm": 1.9068502187728882, + "learning_rate": 1.1035e-05, + "num_tokens": 612552.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0015, + "grad_norm": 0.18768055737018585, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0014, + "grad_norm": 0.1921229511499405, + "learning_rate": 1.1025000000000002e-05, + "num_tokens": 612734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0015, + "grad_norm": 0.19404935836791992, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0619, + "grad_norm": 1.6527628898620605, + "learning_rate": 1.1015e-05, + "num_tokens": 613337.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0413, + "grad_norm": 1.2340315580368042, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0015, + "grad_norm": 0.19533570110797882, + "learning_rate": 1.1005e-05, + "num_tokens": 613940.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0639, + "grad_norm": 1.0601844787597656, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0014, + "grad_norm": 0.18472979962825775, + "learning_rate": 1.0995e-05, + "num_tokens": 614543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0642, + "grad_norm": 1.2736060619354248, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0581, + "grad_norm": 1.4980621337890625, + "learning_rate": 1.0985e-05, + "num_tokens": 615567.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0489, + "grad_norm": 1.1453659534454346, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0625, + "grad_norm": 1.6183781623840332, + "learning_rate": 1.0975e-05, + "num_tokens": 616591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0018, + "grad_norm": 0.24508105218410492, + "learning_rate": 1.097e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 1.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.002, + "grad_norm": 0.2894340753555298, + "learning_rate": 1.0965000000000001e-05, + "num_tokens": 616773.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0394, + "grad_norm": 1.3422820568084717, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0018, + "grad_norm": 0.26346835494041443, + "learning_rate": 1.0955e-05, + "num_tokens": 617376.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.002, + "grad_norm": 0.28616681694984436, + "learning_rate": 1.095e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0629, + "grad_norm": 1.515001654624939, + "learning_rate": 1.0945000000000001e-05, + "num_tokens": 617979.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.0429, + "grad_norm": 1.3231642246246338, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0498, + "grad_norm": 1.3477892875671387, + "learning_rate": 1.0935e-05, + "num_tokens": 619003.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.0686, + "grad_norm": 1.4584791660308838, + "learning_rate": 1.093e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.0021, + "grad_norm": 0.29815393686294556, + "learning_rate": 1.0925000000000001e-05, + "num_tokens": 619606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.087, + "grad_norm": 2.550358533859253, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.0021, + "grad_norm": 0.3024434447288513, + "learning_rate": 1.0915e-05, + "num_tokens": 620209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0559, + "grad_norm": 1.8500303030014038, + "learning_rate": 1.091e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0024, + "grad_norm": 0.3702225685119629, + "learning_rate": 1.0905000000000001e-05, + "num_tokens": 620812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0782, + "grad_norm": 1.9154956340789795, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0613, + "grad_norm": 1.6961833238601685, + "learning_rate": 1.0895e-05, + "num_tokens": 621836.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0022, + "grad_norm": 0.3193221390247345, + "learning_rate": 1.089e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0025, + "grad_norm": 0.36297887563705444, + "learning_rate": 1.0885000000000002e-05, + "num_tokens": 622018.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0022, + "grad_norm": 0.3415636420249939, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0408, + "grad_norm": 1.2334237098693848, + "learning_rate": 1.0875e-05, + "num_tokens": 622621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.002, + "grad_norm": 0.2912217974662781, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.043, + "grad_norm": 1.9397270679473877, + "learning_rate": 1.0865000000000002e-05, + "num_tokens": 623224.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.0395, + "grad_norm": 1.2516388893127441, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0018, + "grad_norm": 0.24329343438148499, + "learning_rate": 1.0855e-05, + "num_tokens": 623827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0019, + "grad_norm": 0.2603467106819153, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0588, + "grad_norm": 1.736319661140442, + "learning_rate": 1.0845e-05, + "num_tokens": 624430.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0014, + "grad_norm": 0.19694186747074127, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 1.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.0015, + "grad_norm": 0.20471760630607605, + "learning_rate": 1.0835e-05, + "num_tokens": 624612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0016, + "grad_norm": 0.21806074678897858, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0014, + "grad_norm": 0.19000421464443207, + "learning_rate": 1.0825e-05, + "num_tokens": 624794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.0516, + "grad_norm": 1.4601935148239136, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0713, + "grad_norm": 2.011367082595825, + "learning_rate": 1.0815e-05, + "num_tokens": 625818.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0012, + "grad_norm": 0.15841880440711975, + "learning_rate": 1.081e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 1.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0711, + "grad_norm": 2.100233793258667, + "learning_rate": 1.0805e-05, + "num_tokens": 626421.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0012, + "grad_norm": 0.1544499695301056, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0012, + "grad_norm": 0.15288732945919037, + "learning_rate": 1.0794999999999999e-05, + "num_tokens": 626603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.0379, + "grad_norm": 1.210354208946228, + "learning_rate": 1.079e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.042, + "grad_norm": 1.1011019945144653, + "learning_rate": 1.0785000000000001e-05, + "num_tokens": 627627.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0646, + "grad_norm": 1.4223557710647583, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0011, + "grad_norm": 0.14515887200832367, + "learning_rate": 1.0775e-05, + "num_tokens": 628230.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0012, + "grad_norm": 0.14745497703552246, + "learning_rate": 1.077e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0013, + "grad_norm": 0.16342398524284363, + "learning_rate": 1.0765000000000001e-05, + "num_tokens": 628412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0711, + "grad_norm": 1.4518134593963623, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0719, + "grad_norm": 1.6602455377578735, + "learning_rate": 1.0755e-05, + "num_tokens": 629436.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0676, + "grad_norm": 1.4668382406234741, + "learning_rate": 1.075e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0675, + "grad_norm": 1.7040259838104248, + "learning_rate": 1.0745000000000001e-05, + "num_tokens": 630460.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0015, + "grad_norm": 0.2076033502817154, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 1.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0578, + "grad_norm": 1.4224144220352173, + "learning_rate": 1.0735e-05, + "num_tokens": 631063.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0359, + "grad_norm": 1.0415198802947998, + "learning_rate": 1.073e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0549, + "grad_norm": 1.3249598741531372, + "learning_rate": 1.0725000000000001e-05, + "num_tokens": 632087.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27819395065307617, + "learning_rate": 1.072e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 1.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.002, + "grad_norm": 0.28510138392448425, + "learning_rate": 1.0715e-05, + "num_tokens": 632269.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0023, + "grad_norm": 0.33845254778862, + "learning_rate": 1.071e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.0022, + "grad_norm": 0.3247784972190857, + "learning_rate": 1.0705000000000002e-05, + "num_tokens": 632451.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.043, + "grad_norm": 1.0912247896194458, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.0578, + "grad_norm": 1.1355180740356445, + "learning_rate": 1.0695e-05, + "num_tokens": 633475.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.0024, + "grad_norm": 0.3479563593864441, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 1.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0022, + "grad_norm": 0.3158959448337555, + "learning_rate": 1.0685e-05, + "num_tokens": 633657.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": 0.0428, + "grad_norm": 1.4031771421432495, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.055, + "grad_norm": 1.2979878187179565, + "learning_rate": 1.0675e-05, + "num_tokens": 634681.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0021, + "grad_norm": 0.30659785866737366, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0407, + "grad_norm": 1.1281771659851074, + "learning_rate": 1.0665e-05, + "num_tokens": 635284.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0021, + "grad_norm": 0.3046596050262451, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 1.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.002, + "grad_norm": 0.29561498761177063, + "learning_rate": 1.0655e-05, + "num_tokens": 635466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.042, + "grad_norm": 1.11528480052948, + "learning_rate": 1.065e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0603, + "grad_norm": 1.633859634399414, + "learning_rate": 1.0645e-05, + "num_tokens": 636490.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0617, + "grad_norm": 1.5089678764343262, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0393, + "grad_norm": 1.644981026649475, + "learning_rate": 1.0634999999999999e-05, + "num_tokens": 637514.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0548, + "grad_norm": 1.4219714403152466, + "learning_rate": 1.063e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0022, + "grad_norm": 0.3061341941356659, + "learning_rate": 1.0625e-05, + "num_tokens": 638117.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0439, + "grad_norm": 1.3055533170700073, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0844, + "grad_norm": 2.4925858974456787, + "learning_rate": 1.0615000000000003e-05, + "num_tokens": 639141.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0409, + "grad_norm": 1.2279584407806396, + "learning_rate": 1.061e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.0023, + "grad_norm": 0.3406059145927429, + "learning_rate": 1.0605000000000001e-05, + "num_tokens": 639744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.0024, + "grad_norm": 0.3423788249492645, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 1.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0758, + "grad_norm": 2.193775177001953, + "learning_rate": 1.0595000000000003e-05, + "num_tokens": 640347.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0397, + "grad_norm": 1.2993077039718628, + "learning_rate": 1.059e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0025, + "grad_norm": 0.37831318378448486, + "learning_rate": 1.0585000000000001e-05, + "num_tokens": 640950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0695, + "grad_norm": 1.9661240577697754, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0514, + "grad_norm": 1.348526954650879, + "learning_rate": 1.0575000000000001e-05, + "num_tokens": 641974.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0422, + "grad_norm": 1.4465380907058716, + "learning_rate": 1.057e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0792, + "grad_norm": 1.823074460029602, + "learning_rate": 1.0565000000000001e-05, + "num_tokens": 642998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0707, + "grad_norm": 1.9393905401229858, + "learning_rate": 1.056e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": 0.0765, + "grad_norm": 2.4390299320220947, + "learning_rate": 1.0555000000000001e-05, + "num_tokens": 644022.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0377, + "grad_norm": 1.2858082056045532, + "learning_rate": 1.055e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.036, + "grad_norm": 1.1891300678253174, + "learning_rate": 1.0545000000000002e-05, + "num_tokens": 645046.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0596, + "grad_norm": 1.3432769775390625, + "learning_rate": 1.054e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0409, + "grad_norm": 1.3289687633514404, + "learning_rate": 1.0535000000000002e-05, + "num_tokens": 646070.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0703, + "grad_norm": 1.9712656736373901, + "learning_rate": 1.053e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0674, + "grad_norm": 1.360931634902954, + "learning_rate": 1.0525e-05, + "num_tokens": 647094.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0712, + "grad_norm": 1.7070671319961548, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0553, + "grad_norm": 1.2540414333343506, + "learning_rate": 1.0515000000000002e-05, + "num_tokens": 648118.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0462, + "grad_norm": 1.0861750841140747, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0606, + "grad_norm": 1.2730586528778076, + "learning_rate": 1.0505e-05, + "num_tokens": 649142.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0678, + "grad_norm": 1.881486177444458, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0358, + "grad_norm": 1.520228385925293, + "learning_rate": 1.0495000000000002e-05, + "num_tokens": 650166.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0102, + "grad_norm": 1.2519571781158447, + "learning_rate": 1.049e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0603, + "grad_norm": 1.7512507438659668, + "learning_rate": 1.0485e-05, + "num_tokens": 650769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": 0.0422, + "grad_norm": 1.2172882556915283, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0111, + "grad_norm": 1.2125916481018066, + "learning_rate": 1.0475000000000002e-05, + "num_tokens": 651372.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0104, + "grad_norm": 1.187291145324707, + "learning_rate": 1.047e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.067, + "grad_norm": 1.5227930545806885, + "learning_rate": 1.0465e-05, + "num_tokens": 651975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0512, + "grad_norm": 1.1584064960479736, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0585, + "grad_norm": 1.5452741384506226, + "learning_rate": 1.0455000000000002e-05, + "num_tokens": 652999.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.037, + "grad_norm": 1.2185399532318115, + "learning_rate": 1.045e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0073, + "grad_norm": 0.8913355469703674, + "learning_rate": 1.0445e-05, + "num_tokens": 653602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.1718, + "grad_norm": 3.605719804763794, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0545, + "grad_norm": 0.8743512034416199, + "learning_rate": 1.0435000000000003e-05, + "num_tokens": 654626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0601, + "grad_norm": 1.5047037601470947, + "learning_rate": 1.043e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0048, + "grad_norm": 0.6472101211547852, + "learning_rate": 1.0425000000000001e-05, + "num_tokens": 655229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0819, + "grad_norm": 2.8786802291870117, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0722, + "grad_norm": 1.6400585174560547, + "learning_rate": 1.0415000000000001e-05, + "num_tokens": 656253.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0379, + "grad_norm": 1.1578104496002197, + "learning_rate": 1.041e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0651, + "grad_norm": 1.9455623626708984, + "learning_rate": 1.0405000000000001e-05, + "num_tokens": 657277.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0588, + "grad_norm": 1.3513238430023193, + "learning_rate": 1.04e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0584, + "grad_norm": 2.0099873542785645, + "learning_rate": 1.0395000000000001e-05, + "num_tokens": 658301.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0422, + "grad_norm": 1.1260371208190918, + "learning_rate": 1.039e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.1567, + "grad_norm": 4.341492652893066, + "learning_rate": 1.0385000000000001e-05, + "num_tokens": 659325.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0034, + "grad_norm": 0.5023797154426575, + "learning_rate": 1.038e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.0515, + "grad_norm": 1.3957620859146118, + "learning_rate": 1.0375000000000001e-05, + "num_tokens": 659928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.074, + "grad_norm": 1.8058022260665894, + "learning_rate": 1.037e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.0683, + "grad_norm": 1.5976930856704712, + "learning_rate": 1.0365e-05, + "num_tokens": 660952.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.042, + "grad_norm": 1.2127424478530884, + "learning_rate": 1.036e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0525, + "grad_norm": 1.24295175075531, + "learning_rate": 1.0355000000000002e-05, + "num_tokens": 661976.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0553, + "grad_norm": 1.3676091432571411, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0043, + "grad_norm": 0.5990502834320068, + "learning_rate": 1.0345e-05, + "num_tokens": 662579.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.0651, + "grad_norm": 1.8467062711715698, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.0035, + "grad_norm": 0.4997740089893341, + "learning_rate": 1.0335000000000002e-05, + "num_tokens": 663182.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0564, + "grad_norm": 0.9972801804542542, + "learning_rate": 1.033e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0587, + "grad_norm": 1.6288121938705444, + "learning_rate": 1.0325e-05, + "num_tokens": 664206.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0387, + "grad_norm": 1.0264148712158203, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.0044, + "grad_norm": 0.6445260047912598, + "learning_rate": 1.0315000000000002e-05, + "num_tokens": 664809.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0379, + "grad_norm": 1.0764647722244263, + "learning_rate": 1.031e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.0483, + "grad_norm": 1.6414856910705566, + "learning_rate": 1.0305e-05, + "num_tokens": 665833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.0392, + "grad_norm": 1.0878779888153076, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0721, + "grad_norm": 1.8314939737319946, + "learning_rate": 1.0295000000000002e-05, + "num_tokens": 666857.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0404, + "grad_norm": 1.2442834377288818, + "learning_rate": 1.029e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0344, + "grad_norm": 1.0829095840454102, + "learning_rate": 1.0285e-05, + "num_tokens": 667881.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.005, + "grad_norm": 0.7069464921951294, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 1.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0701, + "grad_norm": 1.8649088144302368, + "learning_rate": 1.0275000000000002e-05, + "num_tokens": 668484.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0445, + "grad_norm": 1.5859991312026978, + "learning_rate": 1.027e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.0617, + "grad_norm": 1.400742530822754, + "learning_rate": 1.0265e-05, + "num_tokens": 669508.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0527, + "grad_norm": 1.4805254936218262, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.0052, + "grad_norm": 0.7180629968643188, + "learning_rate": 1.0255000000000001e-05, + "num_tokens": 670111.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0404, + "grad_norm": 1.3597116470336914, + "learning_rate": 1.025e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0054, + "grad_norm": 0.7400949597358704, + "learning_rate": 1.0245000000000001e-05, + "num_tokens": 670714.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.0049, + "grad_norm": 0.6836004853248596, + "learning_rate": 1.024e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 1.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0736, + "grad_norm": 2.3706512451171875, + "learning_rate": 1.0235000000000001e-05, + "num_tokens": 671317.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0045, + "grad_norm": 0.6252732872962952, + "learning_rate": 1.023e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0538, + "grad_norm": 1.2009153366088867, + "learning_rate": 1.0225000000000001e-05, + "num_tokens": 671920.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0032, + "grad_norm": 0.4667681157588959, + "learning_rate": 1.022e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 1.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0612, + "grad_norm": 1.505027413368225, + "learning_rate": 1.0215000000000001e-05, + "num_tokens": 672523.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.0551, + "grad_norm": 1.3336291313171387, + "learning_rate": 1.021e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0402, + "grad_norm": 1.1181267499923706, + "learning_rate": 1.0205e-05, + "num_tokens": 673547.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0633, + "grad_norm": 1.5764997005462646, + "learning_rate": 1.02e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0024, + "grad_norm": 0.33718812465667725, + "learning_rate": 1.0195000000000001e-05, + "num_tokens": 674150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0683, + "grad_norm": 1.428412675857544, + "learning_rate": 1.019e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441157937049866, + "learning_rate": 1.0185e-05, + "num_tokens": 674753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0023, + "grad_norm": 0.33211714029312134, + "learning_rate": 1.018e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0022, + "grad_norm": 0.3089843988418579, + "learning_rate": 1.0175000000000002e-05, + "num_tokens": 674935.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0635, + "grad_norm": 1.286823034286499, + "learning_rate": 1.017e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0419, + "grad_norm": 1.0465713739395142, + "learning_rate": 1.0165e-05, + "num_tokens": 675959.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0019, + "grad_norm": 0.27270686626434326, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 1.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0614, + "grad_norm": 1.536331295967102, + "learning_rate": 1.0155000000000002e-05, + "num_tokens": 676562.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0521, + "grad_norm": 1.3282392024993896, + "learning_rate": 1.015e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0573, + "grad_norm": 1.3458013534545898, + "learning_rate": 1.0145e-05, + "num_tokens": 677586.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0607, + "grad_norm": 1.5142616033554077, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0427, + "grad_norm": 1.3866674900054932, + "learning_rate": 1.0135000000000002e-05, + "num_tokens": 678610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0652, + "grad_norm": 1.3013007640838623, + "learning_rate": 1.013e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0021, + "grad_norm": 0.2967868447303772, + "learning_rate": 1.0125e-05, + "num_tokens": 679213.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.002, + "grad_norm": 0.2977685332298279, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0022, + "grad_norm": 0.3109460473060608, + "learning_rate": 1.0115000000000002e-05, + "num_tokens": 679395.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0563, + "grad_norm": 1.1927019357681274, + "learning_rate": 1.011e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0019, + "grad_norm": 0.27015697956085205, + "learning_rate": 1.0105e-05, + "num_tokens": 679998.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.054, + "grad_norm": 1.8113130331039429, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0688, + "grad_norm": 1.6508032083511353, + "learning_rate": 1.0095e-05, + "num_tokens": 681022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0502, + "grad_norm": 1.1528620719909668, + "learning_rate": 1.009e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29425331950187683, + "learning_rate": 1.0085000000000001e-05, + "num_tokens": 681625.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0641, + "grad_norm": 1.702049732208252, + "learning_rate": 1.008e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.036, + "grad_norm": 1.1969891786575317, + "learning_rate": 1.0075000000000001e-05, + "num_tokens": 682649.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0022, + "grad_norm": 0.31679248809814453, + "learning_rate": 1.007e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 1.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.0403, + "grad_norm": 1.1920922994613647, + "learning_rate": 1.0065000000000001e-05, + "num_tokens": 683252.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0544, + "grad_norm": 1.1415454149246216, + "learning_rate": 1.006e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0028, + "grad_norm": 0.42351487278938293, + "learning_rate": 1.0055000000000001e-05, + "num_tokens": 683855.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0466, + "grad_norm": 1.6247456073760986, + "learning_rate": 1.005e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0524, + "grad_norm": 1.2605568170547485, + "learning_rate": 1.0045e-05, + "num_tokens": 684879.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.057, + "grad_norm": 1.483921766281128, + "learning_rate": 1.004e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0029, + "grad_norm": 0.420865923166275, + "learning_rate": 1.0035000000000001e-05, + "num_tokens": 685482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0483, + "grad_norm": 1.9411001205444336, + "learning_rate": 1.003e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0418, + "grad_norm": 1.1357734203338623, + "learning_rate": 1.0025e-05, + "num_tokens": 686506.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0031, + "grad_norm": 0.4264874756336212, + "learning_rate": 1.002e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0628, + "grad_norm": 1.5096089839935303, + "learning_rate": 1.0015000000000002e-05, + "num_tokens": 687109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.003, + "grad_norm": 0.41657188534736633, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0, + "step": 2000 + }, + { + "loss": 0.0028, + "grad_norm": 0.3918426036834717, + "learning_rate": 1.0005e-05, + "num_tokens": 687291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0005, + "step": 2001 + }, + { + "loss": 0.0524, + "grad_norm": 1.1938209533691406, + "learning_rate": 1e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.001, + "step": 2002 + }, + { + "loss": 0.0027, + "grad_norm": 0.3788990080356598, + "learning_rate": 9.995000000000002e-06, + "num_tokens": 687894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0015, + "step": 2003 + }, + { + "loss": 0.0025, + "grad_norm": 0.3577810227870941, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 687985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.002, + "step": 2004 + }, + { + "loss": 0.0024, + "grad_norm": 0.3305366039276123, + "learning_rate": 9.985000000000002e-06, + "num_tokens": 688076.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0025, + "step": 2005 + }, + { + "loss": 0.002, + "grad_norm": 0.277047336101532, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 688167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.003, + "step": 2006 + }, + { + "loss": 0.0019, + "grad_norm": 0.2567979693412781, + "learning_rate": 9.975000000000002e-06, + "num_tokens": 688258.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0035, + "step": 2007 + }, + { + "loss": 0.0682, + "grad_norm": 1.844512701034546, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.004, + "step": 2008 + }, + { + "loss": 0.0487, + "grad_norm": 1.2499569654464722, + "learning_rate": 9.965000000000002e-06, + "num_tokens": 689282.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0045, + "step": 2009 + }, + { + "loss": 0.0432, + "grad_norm": 1.2406448125839233, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.005, + "step": 2010 + }, + { + "loss": 0.0804, + "grad_norm": 1.833058476448059, + "learning_rate": 9.955000000000002e-06, + "num_tokens": 690306.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0055, + "step": 2011 + }, + { + "loss": 0.0464, + "grad_norm": 1.3244189023971558, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.006, + "step": 2012 + }, + { + "loss": 0.0416, + "grad_norm": 1.044066309928894, + "learning_rate": 9.945e-06, + "num_tokens": 691330.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0065, + "step": 2013 + }, + { + "loss": 0.0646, + "grad_norm": 1.5272581577301025, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.007, + "step": 2014 + }, + { + "loss": 0.0401, + "grad_norm": 1.2222588062286377, + "learning_rate": 9.935e-06, + "num_tokens": 692354.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0075, + "step": 2015 + }, + { + "loss": 0.0833, + "grad_norm": 2.3880302906036377, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.008, + "step": 2016 + }, + { + "loss": 0.0661, + "grad_norm": 1.666345238685608, + "learning_rate": 9.925e-06, + "num_tokens": 693378.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0085, + "step": 2017 + }, + { + "loss": 0.061, + "grad_norm": 1.2552286386489868, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.009, + "step": 2018 + }, + { + "loss": 0.0022, + "grad_norm": 0.2978605329990387, + "learning_rate": 9.915e-06, + "num_tokens": 693981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0095, + "step": 2019 + }, + { + "loss": 0.0419, + "grad_norm": 1.1351749897003174, + "learning_rate": 9.91e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.01, + "step": 2020 + }, + { + "loss": 0.0028, + "grad_norm": 0.4339805245399475, + "learning_rate": 9.905000000000001e-06, + "num_tokens": 694584.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0105, + "step": 2021 + }, + { + "loss": 0.0027, + "grad_norm": 0.3737834393978119, + "learning_rate": 9.9e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 2022 + }, + { + "loss": 0.0724, + "grad_norm": 1.6216633319854736, + "learning_rate": 9.895000000000001e-06, + "num_tokens": 695187.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0115, + "step": 2023 + }, + { + "loss": 0.0026, + "grad_norm": 0.38558149337768555, + "learning_rate": 9.89e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 1.0, + "epoch": 1.012, + "step": 2024 + }, + { + "loss": 0.0457, + "grad_norm": 1.2241498231887817, + "learning_rate": 9.885000000000001e-06, + "num_tokens": 695790.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0125, + "step": 2025 + }, + { + "loss": 0.0387, + "grad_norm": 1.4335367679595947, + "learning_rate": 9.88e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.013, + "step": 2026 + }, + { + "loss": 0.0716, + "grad_norm": 1.5836760997772217, + "learning_rate": 9.875000000000001e-06, + "num_tokens": 696814.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0135, + "step": 2027 + }, + { + "loss": 0.0419, + "grad_norm": 1.2072887420654297, + "learning_rate": 9.87e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.014, + "step": 2028 + }, + { + "loss": 0.0376, + "grad_norm": 0.9630845189094543, + "learning_rate": 9.865000000000001e-06, + "num_tokens": 697838.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.0145, + "step": 2029 + }, + { + "loss": 0.0562, + "grad_norm": 1.396782636642456, + "learning_rate": 9.86e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.015, + "step": 2030 + }, + { + "loss": 0.0611, + "grad_norm": 1.526076316833496, + "learning_rate": 9.855000000000001e-06, + "num_tokens": 698862.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0155, + "step": 2031 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280098915100098, + "learning_rate": 9.85e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.016, + "step": 2032 + }, + { + "loss": 0.0036, + "grad_norm": 0.5271911025047302, + "learning_rate": 9.845000000000001e-06, + "num_tokens": 699044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0165, + "step": 2033 + }, + { + "loss": 0.0638, + "grad_norm": 1.2341188192367554, + "learning_rate": 9.84e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.017, + "step": 2034 + }, + { + "loss": 0.0386, + "grad_norm": 1.0637688636779785, + "learning_rate": 9.835000000000002e-06, + "num_tokens": 700068.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0175, + "step": 2035 + }, + { + "loss": 0.0036, + "grad_norm": 0.52369225025177, + "learning_rate": 9.83e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 1.0, + "epoch": 1.018, + "step": 2036 + }, + { + "loss": 0.0494, + "grad_norm": 2.351320266723633, + "learning_rate": 9.825000000000002e-06, + "num_tokens": 700671.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0185, + "step": 2037 + }, + { + "loss": 0.0034, + "grad_norm": 0.4984705150127411, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.019, + "step": 2038 + }, + { + "loss": 0.0406, + "grad_norm": 1.5286310911178589, + "learning_rate": 9.815000000000002e-06, + "num_tokens": 701274.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0195, + "step": 2039 + }, + { + "loss": 0.0523, + "grad_norm": 1.7273446321487427, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.02, + "step": 2040 + }, + { + "loss": 0.0033, + "grad_norm": 0.4823690950870514, + "learning_rate": 9.805000000000002e-06, + "num_tokens": 701877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0205, + "step": 2041 + }, + { + "loss": 0.0032, + "grad_norm": 0.4507608711719513, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.021, + "step": 2042 + }, + { + "loss": 0.0703, + "grad_norm": 1.77262544631958, + "learning_rate": 9.795000000000002e-06, + "num_tokens": 702480.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0215, + "step": 2043 + }, + { + "loss": 0.0026, + "grad_norm": 0.3709382116794586, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.022, + "step": 2044 + }, + { + "loss": 0.0683, + "grad_norm": 3.5564355850219727, + "learning_rate": 9.785e-06, + "num_tokens": 703083.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0225, + "step": 2045 + }, + { + "loss": 0.0024, + "grad_norm": 0.3166162967681885, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.023, + "step": 2046 + }, + { + "loss": 0.0022, + "grad_norm": 0.2928009331226349, + "learning_rate": 9.775e-06, + "num_tokens": 703265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0235, + "step": 2047 + }, + { + "loss": 0.0621, + "grad_norm": 1.902612566947937, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.024, + "step": 2048 + }, + { + "loss": 0.0018, + "grad_norm": 0.23954610526561737, + "learning_rate": 9.765e-06, + "num_tokens": 703868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0245, + "step": 2049 + }, + { + "loss": 0.0409, + "grad_norm": 1.3355653285980225, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.025, + "step": 2050 + }, + { + "loss": 0.0705, + "grad_norm": 1.6696054935455322, + "learning_rate": 9.755e-06, + "num_tokens": 704892.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0255, + "step": 2051 + }, + { + "loss": 0.0016, + "grad_norm": 0.22299779951572418, + "learning_rate": 9.75e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.026, + "step": 2052 + }, + { + "loss": 0.0016, + "grad_norm": 0.21063728630542755, + "learning_rate": 9.745e-06, + "num_tokens": 705074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0265, + "step": 2053 + }, + { + "loss": 0.0696, + "grad_norm": 1.6844984292984009, + "learning_rate": 9.74e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.027, + "step": 2054 + }, + { + "loss": 0.0714, + "grad_norm": 1.5383219718933105, + "learning_rate": 9.735e-06, + "num_tokens": 706098.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0275, + "step": 2055 + }, + { + "loss": 0.0015, + "grad_norm": 0.19807161390781403, + "learning_rate": 9.73e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.028, + "step": 2056 + }, + { + "loss": 0.0014, + "grad_norm": 0.19030039012432098, + "learning_rate": 9.725000000000001e-06, + "num_tokens": 706280.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0285, + "step": 2057 + }, + { + "loss": 0.0013, + "grad_norm": 0.16322408616542816, + "learning_rate": 9.72e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 1.0, + "epoch": 1.029, + "step": 2058 + }, + { + "loss": 0.0014, + "grad_norm": 0.17665083706378937, + "learning_rate": 9.715000000000001e-06, + "num_tokens": 706462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0295, + "step": 2059 + }, + { + "loss": 0.0669, + "grad_norm": 1.8765722513198853, + "learning_rate": 9.71e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.03, + "step": 2060 + }, + { + "loss": 0.0768, + "grad_norm": 1.7586760520935059, + "learning_rate": 9.705000000000001e-06, + "num_tokens": 707486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0305, + "step": 2061 + }, + { + "loss": 0.0696, + "grad_norm": 1.258619785308838, + "learning_rate": 9.7e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.031, + "step": 2062 + }, + { + "loss": 0.0493, + "grad_norm": 1.2884832620620728, + "learning_rate": 9.695000000000001e-06, + "num_tokens": 708510.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0315, + "step": 2063 + }, + { + "loss": 0.0012, + "grad_norm": 0.15901947021484375, + "learning_rate": 9.69e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.032, + "step": 2064 + }, + { + "loss": 0.0656, + "grad_norm": 1.3002307415008545, + "learning_rate": 9.685000000000001e-06, + "num_tokens": 709113.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0325, + "step": 2065 + }, + { + "loss": 0.0013, + "grad_norm": 0.17090171575546265, + "learning_rate": 9.68e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.033, + "step": 2066 + }, + { + "loss": 0.0013, + "grad_norm": 0.1825355738401413, + "learning_rate": 9.675000000000001e-06, + "num_tokens": 709295.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0335, + "step": 2067 + }, + { + "loss": 0.0459, + "grad_norm": 1.092247724533081, + "learning_rate": 9.67e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.034, + "step": 2068 + }, + { + "loss": 0.0648, + "grad_norm": 1.4761494398117065, + "learning_rate": 9.665000000000001e-06, + "num_tokens": 710319.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0345, + "step": 2069 + }, + { + "loss": 0.0014, + "grad_norm": 0.1826472133398056, + "learning_rate": 9.66e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 1.0, + "epoch": 1.035, + "step": 2070 + }, + { + "loss": 0.0461, + "grad_norm": 1.338349461555481, + "learning_rate": 9.655000000000002e-06, + "num_tokens": 710922.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0355, + "step": 2071 + }, + { + "loss": 0.0567, + "grad_norm": 1.0566164255142212, + "learning_rate": 9.65e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.036, + "step": 2072 + }, + { + "loss": 0.0015, + "grad_norm": 0.19834326207637787, + "learning_rate": 9.645000000000002e-06, + "num_tokens": 711525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0365, + "step": 2073 + }, + { + "loss": 0.0418, + "grad_norm": 1.210045576095581, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.037, + "step": 2074 + }, + { + "loss": 0.0016, + "grad_norm": 0.22290614247322083, + "learning_rate": 9.635000000000002e-06, + "num_tokens": 712128.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0375, + "step": 2075 + }, + { + "loss": 0.0695, + "grad_norm": 1.4690190553665161, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.038, + "step": 2076 + }, + { + "loss": 0.0016, + "grad_norm": 0.2209765613079071, + "learning_rate": 9.625e-06, + "num_tokens": 712731.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0385, + "step": 2077 + }, + { + "loss": 0.0018, + "grad_norm": 0.23313096165657043, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 2078 + }, + { + "loss": 0.0017, + "grad_norm": 0.23196078836917877, + "learning_rate": 9.615e-06, + "num_tokens": 712913.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0395, + "step": 2079 + }, + { + "loss": 0.0541, + "grad_norm": 1.220723032951355, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.04, + "step": 2080 + }, + { + "loss": 0.0018, + "grad_norm": 0.2516387403011322, + "learning_rate": 9.605e-06, + "num_tokens": 713516.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0405, + "step": 2081 + }, + { + "loss": 0.0424, + "grad_norm": 1.0561903715133667, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.041, + "step": 2082 + }, + { + "loss": 0.0438, + "grad_norm": 1.2110846042633057, + "learning_rate": 9.595e-06, + "num_tokens": 714540.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0415, + "step": 2083 + }, + { + "loss": 0.0018, + "grad_norm": 0.24697688221931458, + "learning_rate": 9.59e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 1.0, + "epoch": 1.042, + "step": 2084 + }, + { + "loss": 0.0388, + "grad_norm": 1.0054850578308105, + "learning_rate": 9.585e-06, + "num_tokens": 715143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0425, + "step": 2085 + }, + { + "loss": 0.0713, + "grad_norm": 1.8077067136764526, + "learning_rate": 9.58e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.043, + "step": 2086 + }, + { + "loss": 0.0018, + "grad_norm": 0.24363017082214355, + "learning_rate": 9.575e-06, + "num_tokens": 715746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0435, + "step": 2087 + }, + { + "loss": 0.0016, + "grad_norm": 0.21341845393180847, + "learning_rate": 9.57e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 2088 + }, + { + "loss": 0.0391, + "grad_norm": 1.3833376169204712, + "learning_rate": 9.565e-06, + "num_tokens": 716349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0445, + "step": 2089 + }, + { + "loss": 0.0393, + "grad_norm": 0.9772108793258667, + "learning_rate": 9.56e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.045, + "step": 2090 + }, + { + "loss": 0.002, + "grad_norm": 0.283633828163147, + "learning_rate": 9.555e-06, + "num_tokens": 716952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0455, + "step": 2091 + }, + { + "loss": 0.0728, + "grad_norm": 1.849652647972107, + "learning_rate": 9.55e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.046, + "step": 2092 + }, + { + "loss": 0.0022, + "grad_norm": 0.3161669969558716, + "learning_rate": 9.545000000000001e-06, + "num_tokens": 717555.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0465, + "step": 2093 + }, + { + "loss": 0.0587, + "grad_norm": 1.600858449935913, + "learning_rate": 9.54e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.047, + "step": 2094 + }, + { + "loss": 0.0021, + "grad_norm": 0.2948978543281555, + "learning_rate": 9.535000000000001e-06, + "num_tokens": 718158.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0475, + "step": 2095 + }, + { + "loss": 0.0019, + "grad_norm": 0.27492448687553406, + "learning_rate": 9.53e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.048, + "step": 2096 + }, + { + "loss": 0.0382, + "grad_norm": 1.2440471649169922, + "learning_rate": 9.525000000000001e-06, + "num_tokens": 718761.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0485, + "step": 2097 + }, + { + "loss": 0.058, + "grad_norm": 1.5657495260238647, + "learning_rate": 9.52e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 2098 + }, + { + "loss": 0.0018, + "grad_norm": 0.2510983645915985, + "learning_rate": 9.515000000000001e-06, + "num_tokens": 719364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0495, + "step": 2099 + }, + { + "loss": 0.0677, + "grad_norm": 2.6615045070648193, + "learning_rate": 9.51e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.05, + "step": 2100 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355963945388794, + "learning_rate": 9.505000000000001e-06, + "num_tokens": 719967.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0505, + "step": 2101 + }, + { + "loss": 0.0628, + "grad_norm": 1.4263781309127808, + "learning_rate": 9.5e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.051, + "step": 2102 + }, + { + "loss": 0.0384, + "grad_norm": 1.3316160440444946, + "learning_rate": 9.495000000000001e-06, + "num_tokens": 720991.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0515, + "step": 2103 + }, + { + "loss": 0.0413, + "grad_norm": 1.2754371166229248, + "learning_rate": 9.49e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.052, + "step": 2104 + }, + { + "loss": 0.0551, + "grad_norm": 1.9524251222610474, + "learning_rate": 9.485000000000002e-06, + "num_tokens": 722015.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0525, + "step": 2105 + }, + { + "loss": 0.0551, + "grad_norm": 1.5522267818450928, + "learning_rate": 9.48e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.053, + "step": 2106 + }, + { + "loss": 0.0019, + "grad_norm": 0.27614012360572815, + "learning_rate": 9.475000000000002e-06, + "num_tokens": 722618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0535, + "step": 2107 + }, + { + "loss": 0.0606, + "grad_norm": 1.409346103668213, + "learning_rate": 9.47e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.054, + "step": 2108 + }, + { + "loss": 0.0024, + "grad_norm": 0.357972115278244, + "learning_rate": 9.465e-06, + "num_tokens": 723221.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0545, + "step": 2109 + }, + { + "loss": 0.0023, + "grad_norm": 0.3270082175731659, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 1.0, + "epoch": 1.055, + "step": 2110 + }, + { + "loss": 0.0024, + "grad_norm": 0.3454654812812805, + "learning_rate": 9.455e-06, + "num_tokens": 723403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0555, + "step": 2111 + }, + { + "loss": 0.0024, + "grad_norm": 0.352299302816391, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 1.0, + "epoch": 1.056, + "step": 2112 + }, + { + "loss": 0.002, + "grad_norm": 0.27746516466140747, + "learning_rate": 9.445e-06, + "num_tokens": 723585.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0565, + "step": 2113 + }, + { + "loss": 0.002, + "grad_norm": 0.2780683636665344, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 1.0, + "epoch": 1.057, + "step": 2114 + }, + { + "loss": 0.0464, + "grad_norm": 1.5355291366577148, + "learning_rate": 9.435e-06, + "num_tokens": 724188.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0575, + "step": 2115 + }, + { + "loss": 0.0017, + "grad_norm": 0.2329765260219574, + "learning_rate": 9.43e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.058, + "step": 2116 + }, + { + "loss": 0.0015, + "grad_norm": 0.20377217233181, + "learning_rate": 9.425e-06, + "num_tokens": 724370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0585, + "step": 2117 + }, + { + "loss": 0.0014, + "grad_norm": 0.1731068193912506, + "learning_rate": 9.42e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 2118 + }, + { + "loss": 0.0349, + "grad_norm": 1.301210641860962, + "learning_rate": 9.415e-06, + "num_tokens": 724973.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.0594999999999999, + "step": 2119 + }, + { + "loss": 0.0012, + "grad_norm": 0.15070641040802002, + "learning_rate": 9.41e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.06, + "step": 2120 + }, + { + "loss": 0.0012, + "grad_norm": 0.13666701316833496, + "learning_rate": 9.405e-06, + "num_tokens": 725155.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0605, + "step": 2121 + }, + { + "loss": 0.0011, + "grad_norm": 0.13183920085430145, + "learning_rate": 9.4e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 2122 + }, + { + "loss": 0.0735, + "grad_norm": 2.157339096069336, + "learning_rate": 9.395e-06, + "num_tokens": 725758.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.0615, + "step": 2123 + }, + { + "loss": 0.0434, + "grad_norm": 1.441329836845398, + "learning_rate": 9.39e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.062, + "step": 2124 + }, + { + "loss": 0.001, + "grad_norm": 0.11148537695407867, + "learning_rate": 9.385e-06, + "num_tokens": 726361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0625, + "step": 2125 + }, + { + "loss": 0.0363, + "grad_norm": 1.2650766372680664, + "learning_rate": 9.38e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 1.063, + "step": 2126 + }, + { + "loss": 0.042, + "grad_norm": 1.170820951461792, + "learning_rate": 9.375000000000001e-06, + "num_tokens": 727385.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0635, + "step": 2127 + }, + { + "loss": 0.0375, + "grad_norm": 1.31922447681427, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.064, + "step": 2128 + }, + { + "loss": 0.0009, + "grad_norm": 0.10702881962060928, + "learning_rate": 9.365000000000001e-06, + "num_tokens": 727988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0645, + "step": 2129 + }, + { + "loss": 0.001, + "grad_norm": 0.12134991586208344, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 2130 + }, + { + "loss": 0.001, + "grad_norm": 0.12518537044525146, + "learning_rate": 9.355000000000001e-06, + "num_tokens": 728170.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0655000000000001, + "step": 2131 + }, + { + "loss": 0.0443, + "grad_norm": 1.5640217065811157, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.066, + "step": 2132 + }, + { + "loss": 0.043, + "grad_norm": 1.7402693033218384, + "learning_rate": 9.345000000000001e-06, + "num_tokens": 729194.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0665, + "step": 2133 + }, + { + "loss": 0.0572, + "grad_norm": 1.478943109512329, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.067, + "step": 2134 + }, + { + "loss": 0.0436, + "grad_norm": 1.75895357131958, + "learning_rate": 9.335000000000001e-06, + "num_tokens": 730218.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0675, + "step": 2135 + }, + { + "loss": 0.0011, + "grad_norm": 0.14104828238487244, + "learning_rate": 9.33e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.068, + "step": 2136 + }, + { + "loss": 0.0014, + "grad_norm": 0.1940988302230835, + "learning_rate": 9.325000000000001e-06, + "num_tokens": 730400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0685, + "step": 2137 + }, + { + "loss": 0.0012, + "grad_norm": 0.15279027819633484, + "learning_rate": 9.32e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 2138 + }, + { + "loss": 0.0627, + "grad_norm": 1.8744264841079712, + "learning_rate": 9.315000000000001e-06, + "num_tokens": 731003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0695000000000001, + "step": 2139 + }, + { + "loss": 0.045, + "grad_norm": 1.4347468614578247, + "learning_rate": 9.31e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.07, + "step": 2140 + }, + { + "loss": 0.0711, + "grad_norm": 1.9654953479766846, + "learning_rate": 9.305000000000002e-06, + "num_tokens": 732027.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0705, + "step": 2141 + }, + { + "loss": 0.0723, + "grad_norm": 1.851762294769287, + "learning_rate": 9.3e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 2142 + }, + { + "loss": 0.0397, + "grad_norm": 1.1016762256622314, + "learning_rate": 9.295e-06, + "num_tokens": 733051.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0715, + "step": 2143 + }, + { + "loss": 0.0614, + "grad_norm": 1.278972864151001, + "learning_rate": 9.29e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.072, + "step": 2144 + }, + { + "loss": 0.0578, + "grad_norm": 1.5237491130828857, + "learning_rate": 9.285e-06, + "num_tokens": 734075.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0725, + "step": 2145 + }, + { + "loss": 0.0021, + "grad_norm": 0.29453045129776, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.073, + "step": 2146 + }, + { + "loss": 0.0756, + "grad_norm": 1.90165376663208, + "learning_rate": 9.275e-06, + "num_tokens": 734678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0735, + "step": 2147 + }, + { + "loss": 0.0025, + "grad_norm": 0.3552635610103607, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.074, + "step": 2148 + }, + { + "loss": 0.0615, + "grad_norm": 1.3596733808517456, + "learning_rate": 9.265e-06, + "num_tokens": 735281.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0745, + "step": 2149 + }, + { + "loss": 0.0571, + "grad_norm": 1.0499508380889893, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.075, + "step": 2150 + }, + { + "loss": 0.0593, + "grad_norm": 1.4813532829284668, + "learning_rate": 9.255e-06, + "num_tokens": 736305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0755, + "step": 2151 + }, + { + "loss": 0.0451, + "grad_norm": 1.1956957578659058, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.076, + "step": 2152 + }, + { + "loss": 0.0035, + "grad_norm": 0.5021563172340393, + "learning_rate": 9.245e-06, + "num_tokens": 736908.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0765, + "step": 2153 + }, + { + "loss": 0.0035, + "grad_norm": 0.5023340582847595, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 2154 + }, + { + "loss": 0.0593, + "grad_norm": 1.3515294790267944, + "learning_rate": 9.235e-06, + "num_tokens": 737511.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0775, + "step": 2155 + }, + { + "loss": 0.0036, + "grad_norm": 0.5020677447319031, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.078, + "step": 2156 + }, + { + "loss": 0.0034, + "grad_norm": 0.4873979985713959, + "learning_rate": 9.225e-06, + "num_tokens": 737693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0785, + "step": 2157 + }, + { + "loss": 0.0582, + "grad_norm": 1.3766424655914307, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.079, + "step": 2158 + }, + { + "loss": 0.0631, + "grad_norm": 1.1943955421447754, + "learning_rate": 9.215e-06, + "num_tokens": 738717.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0795, + "step": 2159 + }, + { + "loss": 0.003, + "grad_norm": 0.43413516879081726, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.08, + "step": 2160 + }, + { + "loss": 0.0031, + "grad_norm": 0.44669783115386963, + "learning_rate": 9.205e-06, + "num_tokens": 738899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0805, + "step": 2161 + }, + { + "loss": 0.0561, + "grad_norm": 1.3388497829437256, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.081, + "step": 2162 + }, + { + "loss": 0.0426, + "grad_norm": 1.8933428525924683, + "learning_rate": 9.195000000000001e-06, + "num_tokens": 739923.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0815, + "step": 2163 + }, + { + "loss": 0.06, + "grad_norm": 1.3706074953079224, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.082, + "step": 2164 + }, + { + "loss": 0.0621, + "grad_norm": 1.443211555480957, + "learning_rate": 9.185000000000001e-06, + "num_tokens": 740947.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0825, + "step": 2165 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098005950450897, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 1.0, + "epoch": 1.083, + "step": 2166 + }, + { + "loss": 0.06, + "grad_norm": 1.2332003116607666, + "learning_rate": 9.175000000000001e-06, + "num_tokens": 741550.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.0835, + "step": 2167 + }, + { + "loss": 0.0682, + "grad_norm": 1.4077450037002563, + "learning_rate": 9.17e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.084, + "step": 2168 + }, + { + "loss": 0.0584, + "grad_norm": 1.4201141595840454, + "learning_rate": 9.165000000000001e-06, + "num_tokens": 742574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0845, + "step": 2169 + }, + { + "loss": 0.0024, + "grad_norm": 0.3220980167388916, + "learning_rate": 9.16e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.085, + "step": 2170 + }, + { + "loss": 0.0571, + "grad_norm": 1.3979272842407227, + "learning_rate": 9.155000000000001e-06, + "num_tokens": 743177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0855, + "step": 2171 + }, + { + "loss": 0.0572, + "grad_norm": 1.6924889087677002, + "learning_rate": 9.15e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.086, + "step": 2172 + }, + { + "loss": 0.0708, + "grad_norm": 1.7350118160247803, + "learning_rate": 9.145000000000001e-06, + "num_tokens": 744201.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0865, + "step": 2173 + }, + { + "loss": 0.0024, + "grad_norm": 0.3453267812728882, + "learning_rate": 9.14e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 1.0, + "epoch": 1.087, + "step": 2174 + }, + { + "loss": 0.0028, + "grad_norm": 0.3845599293708801, + "learning_rate": 9.135e-06, + "num_tokens": 744383.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0875, + "step": 2175 + }, + { + "loss": 0.0023, + "grad_norm": 0.32928982377052307, + "learning_rate": 9.13e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.088, + "step": 2176 + }, + { + "loss": 0.0025, + "grad_norm": 0.3593277335166931, + "learning_rate": 9.125e-06, + "num_tokens": 744565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0885, + "step": 2177 + }, + { + "loss": 0.0447, + "grad_norm": 1.6252307891845703, + "learning_rate": 9.12e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.089, + "step": 2178 + }, + { + "loss": 0.0664, + "grad_norm": 1.3326979875564575, + "learning_rate": 9.115e-06, + "num_tokens": 745589.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0895, + "step": 2179 + }, + { + "loss": 0.0713, + "grad_norm": 2.490602493286133, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.09, + "step": 2180 + }, + { + "loss": 0.0577, + "grad_norm": 1.2613682746887207, + "learning_rate": 9.105e-06, + "num_tokens": 746613.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0905, + "step": 2181 + }, + { + "loss": 0.0604, + "grad_norm": 1.8400533199310303, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.091, + "step": 2182 + }, + { + "loss": 0.0546, + "grad_norm": 1.577405571937561, + "learning_rate": 9.095e-06, + "num_tokens": 747637.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0915, + "step": 2183 + }, + { + "loss": 0.1758, + "grad_norm": 3.9485361576080322, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 1.092, + "step": 2184 + }, + { + "loss": 0.0407, + "grad_norm": 1.4230077266693115, + "learning_rate": 9.085e-06, + "num_tokens": 748661.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0925, + "step": 2185 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441873788833618, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 1.0, + "epoch": 1.093, + "step": 2186 + }, + { + "loss": 0.0574, + "grad_norm": 1.059336543083191, + "learning_rate": 9.075e-06, + "num_tokens": 749264.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0935, + "step": 2187 + }, + { + "loss": 0.0022, + "grad_norm": 0.3150666058063507, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.094, + "step": 2188 + }, + { + "loss": 0.0401, + "grad_norm": 1.1904288530349731, + "learning_rate": 9.065e-06, + "num_tokens": 749867.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0945, + "step": 2189 + }, + { + "loss": 0.0024, + "grad_norm": 0.3425971567630768, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 1.0, + "epoch": 1.095, + "step": 2190 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606398403644562, + "learning_rate": 9.055e-06, + "num_tokens": 750049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0955, + "step": 2191 + }, + { + "loss": 0.0025, + "grad_norm": 0.3754805028438568, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.096, + "step": 2192 + }, + { + "loss": 0.0512, + "grad_norm": 1.1577214002609253, + "learning_rate": 9.045e-06, + "num_tokens": 750652.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0965, + "step": 2193 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151845633983612, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 1.0, + "epoch": 1.097, + "step": 2194 + }, + { + "loss": 0.0386, + "grad_norm": 1.1814777851104736, + "learning_rate": 9.035e-06, + "num_tokens": 751255.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0975, + "step": 2195 + }, + { + "loss": 0.002, + "grad_norm": 0.2940640151500702, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.098, + "step": 2196 + }, + { + "loss": 0.0021, + "grad_norm": 0.3114289939403534, + "learning_rate": 9.025e-06, + "num_tokens": 751437.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0985, + "step": 2197 + }, + { + "loss": 0.0587, + "grad_norm": 1.5265949964523315, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.099, + "step": 2198 + }, + { + "loss": 0.0584, + "grad_norm": 1.182391881942749, + "learning_rate": 9.015000000000001e-06, + "num_tokens": 752461.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0995, + "step": 2199 + }, + { + "loss": 0.0018, + "grad_norm": 0.2633577287197113, + "learning_rate": 9.01e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1, + "step": 2200 + }, + { + "loss": 0.0019, + "grad_norm": 0.26985710859298706, + "learning_rate": 9.005000000000001e-06, + "num_tokens": 752643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1005, + "step": 2201 + }, + { + "loss": 0.0017, + "grad_norm": 0.23652321100234985, + "learning_rate": 9e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 2202 + }, + { + "loss": 0.0578, + "grad_norm": 1.4083077907562256, + "learning_rate": 8.995000000000001e-06, + "num_tokens": 753246.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1015, + "step": 2203 + }, + { + "loss": 0.0595, + "grad_norm": 1.427134394645691, + "learning_rate": 8.99e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.102, + "step": 2204 + }, + { + "loss": 0.0539, + "grad_norm": 1.3228328227996826, + "learning_rate": 8.985000000000001e-06, + "num_tokens": 754270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1025, + "step": 2205 + }, + { + "loss": 0.0015, + "grad_norm": 0.2133481651544571, + "learning_rate": 8.98e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.103, + "step": 2206 + }, + { + "loss": 0.0821, + "grad_norm": 2.5287461280822754, + "learning_rate": 8.975e-06, + "num_tokens": 754873.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1035, + "step": 2207 + }, + { + "loss": 0.0623, + "grad_norm": 1.4041988849639893, + "learning_rate": 8.97e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.104, + "step": 2208 + }, + { + "loss": 0.0409, + "grad_norm": 1.1858478784561157, + "learning_rate": 8.965e-06, + "num_tokens": 755897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1045, + "step": 2209 + }, + { + "loss": 0.0583, + "grad_norm": 1.219450831413269, + "learning_rate": 8.96e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.105, + "step": 2210 + }, + { + "loss": 0.0414, + "grad_norm": 1.1721197366714478, + "learning_rate": 8.955e-06, + "num_tokens": 756921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1055, + "step": 2211 + }, + { + "loss": 0.053, + "grad_norm": 1.277345895767212, + "learning_rate": 8.95e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.106, + "step": 2212 + }, + { + "loss": 0.0625, + "grad_norm": 1.3503938913345337, + "learning_rate": 8.945e-06, + "num_tokens": 757945.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1065, + "step": 2213 + }, + { + "loss": 0.002, + "grad_norm": 0.30203038454055786, + "learning_rate": 8.94e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.107, + "step": 2214 + }, + { + "loss": 0.0022, + "grad_norm": 0.35174328088760376, + "learning_rate": 8.935e-06, + "num_tokens": 758127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1075, + "step": 2215 + }, + { + "loss": 0.0423, + "grad_norm": 1.168192744255066, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.108, + "step": 2216 + }, + { + "loss": 0.0764, + "grad_norm": 1.3265845775604248, + "learning_rate": 8.925e-06, + "num_tokens": 759151.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1085, + "step": 2217 + }, + { + "loss": 0.1833, + "grad_norm": 3.288583755493164, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 1.109, + "step": 2218 + }, + { + "loss": 0.0029, + "grad_norm": 0.44568195939064026, + "learning_rate": 8.915e-06, + "num_tokens": 759754.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1095, + "step": 2219 + }, + { + "loss": 0.0027, + "grad_norm": 0.409576416015625, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 2220 + }, + { + "loss": 0.0033, + "grad_norm": 0.4960649907588959, + "learning_rate": 8.905e-06, + "num_tokens": 759936.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1105, + "step": 2221 + }, + { + "loss": 0.1642, + "grad_norm": 2.6913421154022217, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.111, + "step": 2222 + }, + { + "loss": 0.0715, + "grad_norm": 1.5037237405776978, + "learning_rate": 8.895e-06, + "num_tokens": 760960.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1115, + "step": 2223 + }, + { + "loss": 0.0562, + "grad_norm": 1.152312159538269, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.112, + "step": 2224 + }, + { + "loss": 0.0025, + "grad_norm": 0.3840191960334778, + "learning_rate": 8.885e-06, + "num_tokens": 761563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1125, + "step": 2225 + }, + { + "loss": 0.0421, + "grad_norm": 1.0708019733428955, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.113, + "step": 2226 + }, + { + "loss": 0.0713, + "grad_norm": 1.2928557395935059, + "learning_rate": 8.875e-06, + "num_tokens": 762587.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1135, + "step": 2227 + }, + { + "loss": 0.0622, + "grad_norm": 1.3733391761779785, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 2228 + }, + { + "loss": 0.0029, + "grad_norm": 0.42555150389671326, + "learning_rate": 8.865e-06, + "num_tokens": 763190.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1145, + "step": 2229 + }, + { + "loss": 0.0457, + "grad_norm": 1.3084357976913452, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.115, + "step": 2230 + }, + { + "loss": 0.0027, + "grad_norm": 0.3956111967563629, + "learning_rate": 8.855e-06, + "num_tokens": 763793.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1155, + "step": 2231 + }, + { + "loss": 0.066, + "grad_norm": 1.3650692701339722, + "learning_rate": 8.85e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.116, + "step": 2232 + }, + { + "loss": 0.0029, + "grad_norm": 0.4088021516799927, + "learning_rate": 8.845000000000001e-06, + "num_tokens": 764396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1165, + "step": 2233 + }, + { + "loss": 0.0397, + "grad_norm": 1.2808146476745605, + "learning_rate": 8.84e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.117, + "step": 2234 + }, + { + "loss": 0.0027, + "grad_norm": 0.3983195126056671, + "learning_rate": 8.835000000000001e-06, + "num_tokens": 764999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1175, + "step": 2235 + }, + { + "loss": 0.0423, + "grad_norm": 1.1593605279922485, + "learning_rate": 8.83e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1179999999999999, + "step": 2236 + }, + { + "loss": 0.0649, + "grad_norm": 1.5087552070617676, + "learning_rate": 8.825000000000001e-06, + "num_tokens": 766023.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1185, + "step": 2237 + }, + { + "loss": 0.0683, + "grad_norm": 1.5192102193832397, + "learning_rate": 8.82e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.119, + "step": 2238 + }, + { + "loss": 0.0588, + "grad_norm": 1.386413812637329, + "learning_rate": 8.815e-06, + "num_tokens": 767047.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1195, + "step": 2239 + }, + { + "loss": 0.14, + "grad_norm": 2.439119815826416, + "learning_rate": 8.81e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.12, + "step": 2240 + }, + { + "loss": 0.0029, + "grad_norm": 0.4191952049732208, + "learning_rate": 8.805e-06, + "num_tokens": 767650.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1205, + "step": 2241 + }, + { + "loss": 0.0397, + "grad_norm": 1.169542908668518, + "learning_rate": 8.8e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.121, + "step": 2242 + }, + { + "loss": 0.0584, + "grad_norm": 1.2895692586898804, + "learning_rate": 8.795e-06, + "num_tokens": 768674.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1215, + "step": 2243 + }, + { + "loss": 0.0582, + "grad_norm": 1.274592638015747, + "learning_rate": 8.79e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1219999999999999, + "step": 2244 + }, + { + "loss": 0.0032, + "grad_norm": 0.44238153100013733, + "learning_rate": 8.785e-06, + "num_tokens": 769277.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1225, + "step": 2245 + }, + { + "loss": 0.0032, + "grad_norm": 0.4488213360309601, + "learning_rate": 8.78e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.123, + "step": 2246 + }, + { + "loss": 0.003, + "grad_norm": 0.43088752031326294, + "learning_rate": 8.775e-06, + "num_tokens": 769459.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1235, + "step": 2247 + }, + { + "loss": 0.0366, + "grad_norm": 1.2531421184539795, + "learning_rate": 8.77e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.124, + "step": 2248 + }, + { + "loss": 0.0029, + "grad_norm": 0.40329650044441223, + "learning_rate": 8.765e-06, + "num_tokens": 770062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1245, + "step": 2249 + }, + { + "loss": 0.0527, + "grad_norm": 1.196119785308838, + "learning_rate": 8.76e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.125, + "step": 2250 + }, + { + "loss": 0.0468, + "grad_norm": 1.571480393409729, + "learning_rate": 8.755e-06, + "num_tokens": 771086.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1255, + "step": 2251 + }, + { + "loss": 0.0024, + "grad_norm": 0.32946687936782837, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.126, + "step": 2252 + }, + { + "loss": 0.0023, + "grad_norm": 0.3213779628276825, + "learning_rate": 8.745000000000002e-06, + "num_tokens": 771268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1265, + "step": 2253 + }, + { + "loss": 0.0381, + "grad_norm": 1.36893630027771, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.127, + "step": 2254 + }, + { + "loss": 0.0023, + "grad_norm": 0.3214550316333771, + "learning_rate": 8.735000000000002e-06, + "num_tokens": 771871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1275, + "step": 2255 + }, + { + "loss": 0.0389, + "grad_norm": 1.1307684183120728, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1280000000000001, + "step": 2256 + }, + { + "loss": 0.0021, + "grad_norm": 0.30145928263664246, + "learning_rate": 8.725000000000002e-06, + "num_tokens": 772474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1285, + "step": 2257 + }, + { + "loss": 0.0018, + "grad_norm": 0.24611108005046844, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.129, + "step": 2258 + }, + { + "loss": 0.0652, + "grad_norm": 1.5593312978744507, + "learning_rate": 8.715e-06, + "num_tokens": 773077.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1295, + "step": 2259 + }, + { + "loss": 0.1724, + "grad_norm": 3.1925134658813477, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.13, + "step": 2260 + }, + { + "loss": 0.0016, + "grad_norm": 0.2210361361503601, + "learning_rate": 8.705e-06, + "num_tokens": 773680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1305, + "step": 2261 + }, + { + "loss": 0.044, + "grad_norm": 1.1579885482788086, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.131, + "step": 2262 + }, + { + "loss": 0.0812, + "grad_norm": 2.0770068168640137, + "learning_rate": 8.695e-06, + "num_tokens": 774704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1315, + "step": 2263 + }, + { + "loss": 0.0376, + "grad_norm": 1.1654012203216553, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1320000000000001, + "step": 2264 + }, + { + "loss": 0.0017, + "grad_norm": 0.22535240650177002, + "learning_rate": 8.685e-06, + "num_tokens": 775307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1325, + "step": 2265 + }, + { + "loss": 0.0017, + "grad_norm": 0.2348785251379013, + "learning_rate": 8.68e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 2266 + }, + { + "loss": 0.0017, + "grad_norm": 0.24279342591762543, + "learning_rate": 8.675e-06, + "num_tokens": 775489.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1335, + "step": 2267 + }, + { + "loss": 0.0748, + "grad_norm": 1.5453892946243286, + "learning_rate": 8.67e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.134, + "step": 2268 + }, + { + "loss": 0.0015, + "grad_norm": 0.20795051753520966, + "learning_rate": 8.665000000000001e-06, + "num_tokens": 776092.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1345, + "step": 2269 + }, + { + "loss": 0.0016, + "grad_norm": 0.21314096450805664, + "learning_rate": 8.66e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 2270 + }, + { + "loss": 0.0016, + "grad_norm": 0.22147318720817566, + "learning_rate": 8.655000000000001e-06, + "num_tokens": 776274.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1355, + "step": 2271 + }, + { + "loss": 0.0511, + "grad_norm": 1.1325373649597168, + "learning_rate": 8.65e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1360000000000001, + "step": 2272 + }, + { + "loss": 0.0014, + "grad_norm": 0.18845656514167786, + "learning_rate": 8.645000000000001e-06, + "num_tokens": 776877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1365, + "step": 2273 + }, + { + "loss": 0.0013, + "grad_norm": 0.16952817142009735, + "learning_rate": 8.64e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 2274 + }, + { + "loss": 0.0621, + "grad_norm": 1.329026222229004, + "learning_rate": 8.635000000000001e-06, + "num_tokens": 777480.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1375, + "step": 2275 + }, + { + "loss": 0.0416, + "grad_norm": 1.105779767036438, + "learning_rate": 8.63e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.138, + "step": 2276 + }, + { + "loss": 0.0467, + "grad_norm": 1.1847842931747437, + "learning_rate": 8.625000000000001e-06, + "num_tokens": 778504.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1385, + "step": 2277 + }, + { + "loss": 0.0414, + "grad_norm": 1.0636855363845825, + "learning_rate": 8.62e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.139, + "step": 2278 + }, + { + "loss": 0.058, + "grad_norm": 1.3789916038513184, + "learning_rate": 8.615000000000001e-06, + "num_tokens": 779528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1395, + "step": 2279 + }, + { + "loss": 0.0649, + "grad_norm": 1.1419354677200317, + "learning_rate": 8.61e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1400000000000001, + "step": 2280 + }, + { + "loss": 0.0014, + "grad_norm": 0.19384142756462097, + "learning_rate": 8.605000000000001e-06, + "num_tokens": 780131.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1405, + "step": 2281 + }, + { + "loss": 0.0015, + "grad_norm": 0.19773858785629272, + "learning_rate": 8.6e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 2282 + }, + { + "loss": 0.0557, + "grad_norm": 1.190521001815796, + "learning_rate": 8.595000000000002e-06, + "num_tokens": 780734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1415, + "step": 2283 + }, + { + "loss": 0.0017, + "grad_norm": 0.23638860881328583, + "learning_rate": 8.59e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 1.0, + "epoch": 1.142, + "step": 2284 + }, + { + "loss": 0.0017, + "grad_norm": 0.24933819472789764, + "learning_rate": 8.585000000000002e-06, + "num_tokens": 780916.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1425, + "step": 2285 + }, + { + "loss": 0.0017, + "grad_norm": 0.22720065712928772, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 2286 + }, + { + "loss": 0.0416, + "grad_norm": 1.214958667755127, + "learning_rate": 8.575000000000002e-06, + "num_tokens": 781519.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1435, + "step": 2287 + }, + { + "loss": 0.054, + "grad_norm": 0.9985194206237793, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.144, + "step": 2288 + }, + { + "loss": 0.0017, + "grad_norm": 0.24114187061786652, + "learning_rate": 8.565000000000002e-06, + "num_tokens": 782122.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1445, + "step": 2289 + }, + { + "loss": 0.0574, + "grad_norm": 1.4530028104782104, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.145, + "step": 2290 + }, + { + "loss": 0.0018, + "grad_norm": 0.2544173002243042, + "learning_rate": 8.555e-06, + "num_tokens": 782725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1455, + "step": 2291 + }, + { + "loss": 0.0017, + "grad_norm": 0.23475930094718933, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 2292 + }, + { + "loss": 0.0708, + "grad_norm": 1.619470477104187, + "learning_rate": 8.545e-06, + "num_tokens": 783328.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1465, + "step": 2293 + }, + { + "loss": 0.0019, + "grad_norm": 0.2572467029094696, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 1.0, + "epoch": 1.147, + "step": 2294 + }, + { + "loss": 0.0019, + "grad_norm": 0.26701951026916504, + "learning_rate": 8.535e-06, + "num_tokens": 783510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1475, + "step": 2295 + }, + { + "loss": 0.0471, + "grad_norm": 1.147359848022461, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.148, + "step": 2296 + }, + { + "loss": 0.0485, + "grad_norm": 1.0665885210037231, + "learning_rate": 8.525e-06, + "num_tokens": 784534.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1485, + "step": 2297 + }, + { + "loss": 0.0017, + "grad_norm": 0.23322324454784393, + "learning_rate": 8.52e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.149, + "step": 2298 + }, + { + "loss": 0.0667, + "grad_norm": 1.4317374229431152, + "learning_rate": 8.515e-06, + "num_tokens": 785137.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1495, + "step": 2299 + }, + { + "loss": 0.0651, + "grad_norm": 1.4495528936386108, + "learning_rate": 8.51e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.15, + "step": 2300 + }, + { + "loss": 0.0018, + "grad_norm": 0.24990759789943695, + "learning_rate": 8.505e-06, + "num_tokens": 785740.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1505, + "step": 2301 + }, + { + "loss": 0.0673, + "grad_norm": 1.3833082914352417, + "learning_rate": 8.5e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.151, + "step": 2302 + }, + { + "loss": 0.0384, + "grad_norm": 1.0650711059570312, + "learning_rate": 8.495e-06, + "num_tokens": 786764.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1515, + "step": 2303 + }, + { + "loss": 0.0017, + "grad_norm": 0.2362237423658371, + "learning_rate": 8.49e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.152, + "step": 2304 + }, + { + "loss": 0.0362, + "grad_norm": 1.2261658906936646, + "learning_rate": 8.485000000000001e-06, + "num_tokens": 787367.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1525, + "step": 2305 + }, + { + "loss": 0.0021, + "grad_norm": 0.285277396440506, + "learning_rate": 8.48e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 1.0, + "epoch": 1.153, + "step": 2306 + }, + { + "loss": 0.0018, + "grad_norm": 0.24331547319889069, + "learning_rate": 8.475000000000001e-06, + "num_tokens": 787549.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1535, + "step": 2307 + }, + { + "loss": 0.057, + "grad_norm": 1.260392427444458, + "learning_rate": 8.47e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.154, + "step": 2308 + }, + { + "loss": 0.002, + "grad_norm": 0.26841071248054504, + "learning_rate": 8.465000000000001e-06, + "num_tokens": 788152.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1545, + "step": 2309 + }, + { + "loss": 0.0018, + "grad_norm": 0.25016698241233826, + "learning_rate": 8.46e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 2310 + }, + { + "loss": 0.002, + "grad_norm": 0.2738337218761444, + "learning_rate": 8.455000000000001e-06, + "num_tokens": 788334.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1555, + "step": 2311 + }, + { + "loss": 0.0017, + "grad_norm": 0.2311965376138687, + "learning_rate": 8.45e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 2312 + }, + { + "loss": 0.0608, + "grad_norm": 1.6522681713104248, + "learning_rate": 8.445000000000001e-06, + "num_tokens": 788937.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1565, + "step": 2313 + }, + { + "loss": 0.0595, + "grad_norm": 1.3370118141174316, + "learning_rate": 8.44e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.157, + "step": 2314 + }, + { + "loss": 0.0706, + "grad_norm": 1.5185800790786743, + "learning_rate": 8.435000000000001e-06, + "num_tokens": 789961.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1575, + "step": 2315 + }, + { + "loss": 0.0015, + "grad_norm": 0.20058579742908478, + "learning_rate": 8.43e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 1.0, + "epoch": 1.158, + "step": 2316 + }, + { + "loss": 0.0736, + "grad_norm": 1.6871758699417114, + "learning_rate": 8.425000000000001e-06, + "num_tokens": 790564.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1585, + "step": 2317 + }, + { + "loss": 0.0684, + "grad_norm": 1.7638912200927734, + "learning_rate": 8.42e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.159, + "step": 2318 + }, + { + "loss": 0.0017, + "grad_norm": 0.23336097598075867, + "learning_rate": 8.415000000000002e-06, + "num_tokens": 791167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1595, + "step": 2319 + }, + { + "loss": 0.0596, + "grad_norm": 1.3170890808105469, + "learning_rate": 8.41e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.16, + "step": 2320 + }, + { + "loss": 0.0566, + "grad_norm": 1.8501343727111816, + "learning_rate": 8.405000000000002e-06, + "num_tokens": 792191.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1605, + "step": 2321 + }, + { + "loss": 0.0679, + "grad_norm": 1.3065072298049927, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.161, + "step": 2322 + }, + { + "loss": 0.0577, + "grad_norm": 1.3374840021133423, + "learning_rate": 8.395e-06, + "num_tokens": 793215.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1615, + "step": 2323 + }, + { + "loss": 0.0651, + "grad_norm": 1.2627785205841064, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.162, + "step": 2324 + }, + { + "loss": 0.0589, + "grad_norm": 1.1249433755874634, + "learning_rate": 8.385e-06, + "num_tokens": 794239.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1625, + "step": 2325 + }, + { + "loss": 0.0022, + "grad_norm": 0.31153878569602966, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.163, + "step": 2326 + }, + { + "loss": 0.0376, + "grad_norm": 1.2043869495391846, + "learning_rate": 8.375e-06, + "num_tokens": 794842.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1635, + "step": 2327 + }, + { + "loss": 0.0024, + "grad_norm": 0.3410504162311554, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 1.0, + "epoch": 1.164, + "step": 2328 + }, + { + "loss": 0.0497, + "grad_norm": 1.3358232975006104, + "learning_rate": 8.365e-06, + "num_tokens": 795445.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1645, + "step": 2329 + }, + { + "loss": 0.062, + "grad_norm": 1.3019129037857056, + "learning_rate": 8.36e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.165, + "step": 2330 + }, + { + "loss": 0.1411, + "grad_norm": 3.1003713607788086, + "learning_rate": 8.355e-06, + "num_tokens": 796469.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.1655, + "step": 2331 + }, + { + "loss": 0.0675, + "grad_norm": 1.4928791522979736, + "learning_rate": 8.35e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.166, + "step": 2332 + }, + { + "loss": 0.0032, + "grad_norm": 0.47702810168266296, + "learning_rate": 8.345e-06, + "num_tokens": 797072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1665, + "step": 2333 + }, + { + "loss": 0.0486, + "grad_norm": 1.189456820487976, + "learning_rate": 8.34e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.167, + "step": 2334 + }, + { + "loss": 0.0033, + "grad_norm": 0.5152677893638611, + "learning_rate": 8.335e-06, + "num_tokens": 797675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1675, + "step": 2335 + }, + { + "loss": 0.0463, + "grad_norm": 1.3805276155471802, + "learning_rate": 8.33e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.168, + "step": 2336 + }, + { + "loss": 0.0653, + "grad_norm": 1.7025351524353027, + "learning_rate": 8.325e-06, + "num_tokens": 798699.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1685, + "step": 2337 + }, + { + "loss": 0.0031, + "grad_norm": 0.44580474495887756, + "learning_rate": 8.32e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 1.0, + "epoch": 1.169, + "step": 2338 + }, + { + "loss": 0.0462, + "grad_norm": 1.3915964365005493, + "learning_rate": 8.315000000000001e-06, + "num_tokens": 799302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1695, + "step": 2339 + }, + { + "loss": 0.0689, + "grad_norm": 1.3206253051757812, + "learning_rate": 8.31e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.17, + "step": 2340 + }, + { + "loss": 0.0616, + "grad_norm": 1.0774954557418823, + "learning_rate": 8.305000000000001e-06, + "num_tokens": 800326.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1705, + "step": 2341 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280348658561707, + "learning_rate": 8.3e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 1.0, + "epoch": 1.171, + "step": 2342 + }, + { + "loss": 0.0534, + "grad_norm": 1.1514171361923218, + "learning_rate": 8.295000000000001e-06, + "num_tokens": 800929.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1715, + "step": 2343 + }, + { + "loss": 0.0034, + "grad_norm": 0.4936150014400482, + "learning_rate": 8.29e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 1.0, + "epoch": 1.172, + "step": 2344 + }, + { + "loss": 0.0411, + "grad_norm": 1.091706395149231, + "learning_rate": 8.285000000000001e-06, + "num_tokens": 801532.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1724999999999999, + "step": 2345 + }, + { + "loss": 0.0633, + "grad_norm": 1.2277299165725708, + "learning_rate": 8.28e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.173, + "step": 2346 + }, + { + "loss": 0.0032, + "grad_norm": 0.4532278776168823, + "learning_rate": 8.275000000000001e-06, + "num_tokens": 802135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1735, + "step": 2347 + }, + { + "loss": 0.0033, + "grad_norm": 0.467818021774292, + "learning_rate": 8.27e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.174, + "step": 2348 + }, + { + "loss": 0.0528, + "grad_norm": 1.7821072340011597, + "learning_rate": 8.265000000000001e-06, + "num_tokens": 802738.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1745, + "step": 2349 + }, + { + "loss": 0.0415, + "grad_norm": 1.4086565971374512, + "learning_rate": 8.26e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.175, + "step": 2350 + }, + { + "loss": 0.045, + "grad_norm": 1.1930326223373413, + "learning_rate": 8.255000000000001e-06, + "num_tokens": 803762.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1755, + "step": 2351 + }, + { + "loss": 0.0028, + "grad_norm": 0.4077257215976715, + "learning_rate": 8.25e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 1.0, + "epoch": 1.176, + "step": 2352 + }, + { + "loss": 0.0535, + "grad_norm": 1.0156196355819702, + "learning_rate": 8.245000000000002e-06, + "num_tokens": 804365.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1764999999999999, + "step": 2353 + }, + { + "loss": 0.0544, + "grad_norm": 1.701621413230896, + "learning_rate": 8.24e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.177, + "step": 2354 + }, + { + "loss": 0.0408, + "grad_norm": 1.3804023265838623, + "learning_rate": 8.235e-06, + "num_tokens": 805389.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1775, + "step": 2355 + }, + { + "loss": 0.0538, + "grad_norm": 1.4935331344604492, + "learning_rate": 8.23e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.178, + "step": 2356 + }, + { + "loss": 0.0031, + "grad_norm": 0.46967241168022156, + "learning_rate": 8.225e-06, + "num_tokens": 805992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1785, + "step": 2357 + }, + { + "loss": 0.003, + "grad_norm": 0.4181312620639801, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.179, + "step": 2358 + }, + { + "loss": 0.003, + "grad_norm": 0.4292071461677551, + "learning_rate": 8.215e-06, + "num_tokens": 806174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1795, + "step": 2359 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606574833393097, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.18, + "step": 2360 + }, + { + "loss": 0.0384, + "grad_norm": 1.0812703371047974, + "learning_rate": 8.205e-06, + "num_tokens": 806777.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1804999999999999, + "step": 2361 + }, + { + "loss": 0.0025, + "grad_norm": 0.36413413286209106, + "learning_rate": 8.2e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.181, + "step": 2362 + }, + { + "loss": 0.0632, + "grad_norm": 1.3525351285934448, + "learning_rate": 8.195e-06, + "num_tokens": 807380.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1815, + "step": 2363 + }, + { + "loss": 0.0021, + "grad_norm": 0.29519718885421753, + "learning_rate": 8.19e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.182, + "step": 2364 + }, + { + "loss": 0.002, + "grad_norm": 0.28825369477272034, + "learning_rate": 8.185e-06, + "num_tokens": 807562.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1825, + "step": 2365 + }, + { + "loss": 0.0364, + "grad_norm": 1.0907576084136963, + "learning_rate": 8.18e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.183, + "step": 2366 + }, + { + "loss": 0.0682, + "grad_norm": 1.3050081729888916, + "learning_rate": 8.175e-06, + "num_tokens": 808586.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1835, + "step": 2367 + }, + { + "loss": 0.0424, + "grad_norm": 1.141483187675476, + "learning_rate": 8.17e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.184, + "step": 2368 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355233788490295, + "learning_rate": 8.165e-06, + "num_tokens": 809189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1844999999999999, + "step": 2369 + }, + { + "loss": 0.0744, + "grad_norm": 1.7785593271255493, + "learning_rate": 8.16e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.185, + "step": 2370 + }, + { + "loss": 0.0657, + "grad_norm": 1.3623268604278564, + "learning_rate": 8.155e-06, + "num_tokens": 810213.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1855, + "step": 2371 + }, + { + "loss": 0.0549, + "grad_norm": 1.1436368227005005, + "learning_rate": 8.15e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.186, + "step": 2372 + }, + { + "loss": 0.0539, + "grad_norm": 1.2383182048797607, + "learning_rate": 8.145e-06, + "num_tokens": 811237.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1865, + "step": 2373 + }, + { + "loss": 0.0018, + "grad_norm": 0.24816246330738068, + "learning_rate": 8.14e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.187, + "step": 2374 + }, + { + "loss": 0.0409, + "grad_norm": 1.240695834159851, + "learning_rate": 8.135000000000001e-06, + "num_tokens": 811840.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1875, + "step": 2375 + }, + { + "loss": 0.0364, + "grad_norm": 0.927349328994751, + "learning_rate": 8.13e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.188, + "step": 2376 + }, + { + "loss": 0.002, + "grad_norm": 0.28636854887008667, + "learning_rate": 8.125000000000001e-06, + "num_tokens": 812443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1885, + "step": 2377 + }, + { + "loss": 0.0021, + "grad_norm": 0.3085651397705078, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 2378 + }, + { + "loss": 0.0733, + "grad_norm": 1.627233862876892, + "learning_rate": 8.115000000000001e-06, + "num_tokens": 813046.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1895, + "step": 2379 + }, + { + "loss": 0.0523, + "grad_norm": 1.2803730964660645, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.19, + "step": 2380 + }, + { + "loss": 0.0358, + "grad_norm": 1.134440302848816, + "learning_rate": 8.105000000000001e-06, + "num_tokens": 814070.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1905000000000001, + "step": 2381 + }, + { + "loss": 0.062, + "grad_norm": 1.7024178504943848, + "learning_rate": 8.1e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.191, + "step": 2382 + }, + { + "loss": 0.0555, + "grad_norm": 1.755904197692871, + "learning_rate": 8.095000000000001e-06, + "num_tokens": 815094.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1915, + "step": 2383 + }, + { + "loss": 0.0028, + "grad_norm": 0.4056146442890167, + "learning_rate": 8.09e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 1.0, + "epoch": 1.192, + "step": 2384 + }, + { + "loss": 0.0415, + "grad_norm": 1.3847079277038574, + "learning_rate": 8.085000000000001e-06, + "num_tokens": 815697.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1925, + "step": 2385 + }, + { + "loss": 0.041, + "grad_norm": 1.05851149559021, + "learning_rate": 8.08e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.193, + "step": 2386 + }, + { + "loss": 0.0683, + "grad_norm": 1.5797926187515259, + "learning_rate": 8.075000000000001e-06, + "num_tokens": 816721.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1935, + "step": 2387 + }, + { + "loss": 0.003, + "grad_norm": 0.44755682349205017, + "learning_rate": 8.07e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 1.0, + "epoch": 1.194, + "step": 2388 + }, + { + "loss": 0.0035, + "grad_norm": 0.5333588719367981, + "learning_rate": 8.065e-06, + "num_tokens": 816903.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1945000000000001, + "step": 2389 + }, + { + "loss": 0.0034, + "grad_norm": 0.5025861263275146, + "learning_rate": 8.06e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 2390 + }, + { + "loss": 0.0657, + "grad_norm": 1.9265213012695312, + "learning_rate": 8.055e-06, + "num_tokens": 817506.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1955, + "step": 2391 + }, + { + "loss": 0.0029, + "grad_norm": 0.4326709508895874, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.196, + "step": 2392 + }, + { + "loss": 0.0385, + "grad_norm": 1.282583236694336, + "learning_rate": 8.045e-06, + "num_tokens": 818109.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1965, + "step": 2393 + }, + { + "loss": 0.048, + "grad_norm": 1.7246921062469482, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.197, + "step": 2394 + }, + { + "loss": 0.0529, + "grad_norm": 1.3816536664962769, + "learning_rate": 8.035e-06, + "num_tokens": 819133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1975, + "step": 2395 + }, + { + "loss": 0.0025, + "grad_norm": 0.36934202909469604, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 1.0, + "epoch": 1.198, + "step": 2396 + }, + { + "loss": 0.0701, + "grad_norm": 1.844415307044983, + "learning_rate": 8.025e-06, + "num_tokens": 819736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1985000000000001, + "step": 2397 + }, + { + "loss": 0.0026, + "grad_norm": 0.3918537199497223, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 1.0, + "epoch": 1.199, + "step": 2398 + }, + { + "loss": 0.0025, + "grad_norm": 0.3629172444343567, + "learning_rate": 8.015e-06, + "num_tokens": 819918.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1995, + "step": 2399 + }, + { + "loss": 0.0593, + "grad_norm": 1.3562273979187012, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2, + "step": 2400 + }, + { + "loss": 0.0415, + "grad_norm": 1.1191670894622803, + "learning_rate": 8.005e-06, + "num_tokens": 820942.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2005, + "step": 2401 + }, + { + "loss": 0.0021, + "grad_norm": 0.3028194308280945, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 1.0, + "epoch": 1.201, + "step": 2402 + }, + { + "loss": 0.0021, + "grad_norm": 0.3161010444164276, + "learning_rate": 7.995e-06, + "num_tokens": 821124.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2015, + "step": 2403 + }, + { + "loss": 0.0631, + "grad_norm": 1.4275634288787842, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.202, + "step": 2404 + }, + { + "loss": 0.0018, + "grad_norm": 0.2525792121887207, + "learning_rate": 7.985e-06, + "num_tokens": 821727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2025000000000001, + "step": 2405 + }, + { + "loss": 0.0576, + "grad_norm": 1.2019566297531128, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.203, + "step": 2406 + }, + { + "loss": 0.0019, + "grad_norm": 0.28433406352996826, + "learning_rate": 7.975e-06, + "num_tokens": 822330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2035, + "step": 2407 + }, + { + "loss": 0.0018, + "grad_norm": 0.26680925488471985, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 1.0, + "epoch": 1.204, + "step": 2408 + }, + { + "loss": 0.0523, + "grad_norm": 1.5135900974273682, + "learning_rate": 7.965e-06, + "num_tokens": 822933.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2045, + "step": 2409 + }, + { + "loss": 0.0595, + "grad_norm": 1.425874948501587, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.205, + "step": 2410 + }, + { + "loss": 0.0688, + "grad_norm": 1.7353657484054565, + "learning_rate": 7.955000000000001e-06, + "num_tokens": 823957.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2055, + "step": 2411 + }, + { + "loss": 0.0016, + "grad_norm": 0.22734731435775757, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.206, + "step": 2412 + }, + { + "loss": 0.0016, + "grad_norm": 0.22473861277103424, + "learning_rate": 7.945000000000001e-06, + "num_tokens": 824139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2065, + "step": 2413 + }, + { + "loss": 0.0016, + "grad_norm": 0.23369428515434265, + "learning_rate": 7.94e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 2414 + }, + { + "loss": 0.0018, + "grad_norm": 0.25014567375183105, + "learning_rate": 7.935000000000001e-06, + "num_tokens": 824321.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2075, + "step": 2415 + }, + { + "loss": 0.0701, + "grad_norm": 1.4806315898895264, + "learning_rate": 7.93e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.208, + "step": 2416 + }, + { + "loss": 0.0015, + "grad_norm": 0.1993637979030609, + "learning_rate": 7.925000000000001e-06, + "num_tokens": 824924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2085, + "step": 2417 + }, + { + "loss": 0.0548, + "grad_norm": 1.2813140153884888, + "learning_rate": 7.92e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.209, + "step": 2418 + }, + { + "loss": 0.0552, + "grad_norm": 1.2722525596618652, + "learning_rate": 7.915000000000001e-06, + "num_tokens": 825948.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2095, + "step": 2419 + }, + { + "loss": 0.0013, + "grad_norm": 0.17925392091274261, + "learning_rate": 7.91e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 1.0, + "epoch": 1.21, + "step": 2420 + }, + { + "loss": 0.0013, + "grad_norm": 0.18519414961338043, + "learning_rate": 7.905e-06, + "num_tokens": 826130.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2105, + "step": 2421 + }, + { + "loss": 0.041, + "grad_norm": 1.3869478702545166, + "learning_rate": 7.9e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.211, + "step": 2422 + }, + { + "loss": 0.0013, + "grad_norm": 0.1751483976840973, + "learning_rate": 7.895e-06, + "num_tokens": 826733.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2115, + "step": 2423 + }, + { + "loss": 0.05, + "grad_norm": 1.0098025798797607, + "learning_rate": 7.89e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.212, + "step": 2424 + }, + { + "loss": 0.0605, + "grad_norm": 1.3178874254226685, + "learning_rate": 7.885e-06, + "num_tokens": 827757.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2125, + "step": 2425 + }, + { + "loss": 0.0013, + "grad_norm": 0.18827441334724426, + "learning_rate": 7.88e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 1.0, + "epoch": 1.213, + "step": 2426 + }, + { + "loss": 0.064, + "grad_norm": 1.4484566450119019, + "learning_rate": 7.875e-06, + "num_tokens": 828360.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2135, + "step": 2427 + }, + { + "loss": 0.0014, + "grad_norm": 0.19540052115917206, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 1.0, + "epoch": 1.214, + "step": 2428 + }, + { + "loss": 0.0623, + "grad_norm": 1.3592177629470825, + "learning_rate": 7.865e-06, + "num_tokens": 828963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2145, + "step": 2429 + }, + { + "loss": 0.0014, + "grad_norm": 0.20412060618400574, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.215, + "step": 2430 + }, + { + "loss": 0.0617, + "grad_norm": 1.755582332611084, + "learning_rate": 7.855e-06, + "num_tokens": 829566.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2155, + "step": 2431 + }, + { + "loss": 0.0631, + "grad_norm": 1.2380058765411377, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.216, + "step": 2432 + }, + { + "loss": 0.0375, + "grad_norm": 1.3119670152664185, + "learning_rate": 7.845e-06, + "num_tokens": 830590.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2165, + "step": 2433 + }, + { + "loss": 0.0015, + "grad_norm": 0.22137387096881866, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 1.0, + "epoch": 1.217, + "step": 2434 + }, + { + "loss": 0.0017, + "grad_norm": 0.2416553795337677, + "learning_rate": 7.835e-06, + "num_tokens": 830772.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2175, + "step": 2435 + }, + { + "loss": 0.0015, + "grad_norm": 0.21708650887012482, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 2436 + }, + { + "loss": 0.0016, + "grad_norm": 0.23922832310199738, + "learning_rate": 7.825e-06, + "num_tokens": 830954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2185, + "step": 2437 + }, + { + "loss": 0.0016, + "grad_norm": 0.2385343313217163, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 2438 + }, + { + "loss": 0.065, + "grad_norm": 1.4742591381072998, + "learning_rate": 7.815e-06, + "num_tokens": 831557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2195, + "step": 2439 + }, + { + "loss": 0.0016, + "grad_norm": 0.2341725379228592, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 1.0, + "epoch": 1.22, + "step": 2440 + }, + { + "loss": 0.0615, + "grad_norm": 1.4791371822357178, + "learning_rate": 7.805e-06, + "num_tokens": 832160.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2205, + "step": 2441 + }, + { + "loss": 0.048, + "grad_norm": 1.601716160774231, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.221, + "step": 2442 + }, + { + "loss": 0.0014, + "grad_norm": 0.19947591423988342, + "learning_rate": 7.795e-06, + "num_tokens": 832763.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2215, + "step": 2443 + }, + { + "loss": 0.0801, + "grad_norm": 1.753954291343689, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.222, + "step": 2444 + }, + { + "loss": 0.0015, + "grad_norm": 0.21398615837097168, + "learning_rate": 7.785000000000001e-06, + "num_tokens": 833366.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2225, + "step": 2445 + }, + { + "loss": 0.0655, + "grad_norm": 1.799574851989746, + "learning_rate": 7.78e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.223, + "step": 2446 + }, + { + "loss": 0.0438, + "grad_norm": 1.332261085510254, + "learning_rate": 7.775000000000001e-06, + "num_tokens": 834390.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2235, + "step": 2447 + }, + { + "loss": 0.044, + "grad_norm": 1.238344430923462, + "learning_rate": 7.77e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.224, + "step": 2448 + }, + { + "loss": 0.0015, + "grad_norm": 0.2137579768896103, + "learning_rate": 7.765000000000001e-06, + "num_tokens": 834993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2245, + "step": 2449 + }, + { + "loss": 0.0438, + "grad_norm": 1.1821973323822021, + "learning_rate": 7.76e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.225, + "step": 2450 + }, + { + "loss": 0.0562, + "grad_norm": 1.4905529022216797, + "learning_rate": 7.755000000000001e-06, + "num_tokens": 836017.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2255, + "step": 2451 + }, + { + "loss": 0.0015, + "grad_norm": 0.21731820702552795, + "learning_rate": 7.75e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 1.0, + "epoch": 1.226, + "step": 2452 + }, + { + "loss": 0.0017, + "grad_norm": 0.25909724831581116, + "learning_rate": 7.745e-06, + "num_tokens": 836199.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2265, + "step": 2453 + }, + { + "loss": 0.0016, + "grad_norm": 0.22781187295913696, + "learning_rate": 7.74e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 2454 + }, + { + "loss": 0.0016, + "grad_norm": 0.24323998391628265, + "learning_rate": 7.735e-06, + "num_tokens": 836381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2275, + "step": 2455 + }, + { + "loss": 0.0594, + "grad_norm": 1.5349161624908447, + "learning_rate": 7.73e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.228, + "step": 2456 + }, + { + "loss": 0.0017, + "grad_norm": 0.24151335656642914, + "learning_rate": 7.725e-06, + "num_tokens": 836984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2285, + "step": 2457 + }, + { + "loss": 0.0016, + "grad_norm": 0.23347225785255432, + "learning_rate": 7.72e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 2458 + }, + { + "loss": 0.0017, + "grad_norm": 0.24232612550258636, + "learning_rate": 7.715e-06, + "num_tokens": 837166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2295, + "step": 2459 + }, + { + "loss": 0.0016, + "grad_norm": 0.23151801526546478, + "learning_rate": 7.71e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 2460 + }, + { + "loss": 0.0586, + "grad_norm": 1.4122602939605713, + "learning_rate": 7.705e-06, + "num_tokens": 837769.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2305, + "step": 2461 + }, + { + "loss": 0.0014, + "grad_norm": 0.19469626247882843, + "learning_rate": 7.7e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.231, + "step": 2462 + }, + { + "loss": 0.0637, + "grad_norm": 1.675697684288025, + "learning_rate": 7.695e-06, + "num_tokens": 838372.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2315, + "step": 2463 + }, + { + "loss": 0.0013, + "grad_norm": 0.17535777390003204, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.232, + "step": 2464 + }, + { + "loss": 0.0549, + "grad_norm": 1.1719900369644165, + "learning_rate": 7.685e-06, + "num_tokens": 838975.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2325, + "step": 2465 + }, + { + "loss": 0.0013, + "grad_norm": 0.16398227214813232, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.233, + "step": 2466 + }, + { + "loss": 0.0674, + "grad_norm": 1.7502342462539673, + "learning_rate": 7.675e-06, + "num_tokens": 839578.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2335, + "step": 2467 + }, + { + "loss": 0.0013, + "grad_norm": 0.17352193593978882, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.234, + "step": 2468 + }, + { + "loss": 0.063, + "grad_norm": 1.5015274286270142, + "learning_rate": 7.665e-06, + "num_tokens": 840181.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2345, + "step": 2469 + }, + { + "loss": 0.0611, + "grad_norm": 1.3142430782318115, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2349999999999999, + "step": 2470 + }, + { + "loss": 0.0589, + "grad_norm": 1.3366830348968506, + "learning_rate": 7.655e-06, + "num_tokens": 841205.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2355, + "step": 2471 + }, + { + "loss": 0.0013, + "grad_norm": 0.17301248013973236, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.236, + "step": 2472 + }, + { + "loss": 0.0435, + "grad_norm": 1.1996126174926758, + "learning_rate": 7.645e-06, + "num_tokens": 841808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2365, + "step": 2473 + }, + { + "loss": 0.0015, + "grad_norm": 0.21387803554534912, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.237, + "step": 2474 + }, + { + "loss": 0.064, + "grad_norm": 1.3917018175125122, + "learning_rate": 7.635e-06, + "num_tokens": 842411.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2375, + "step": 2475 + }, + { + "loss": 0.0014, + "grad_norm": 0.20352397859096527, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.238, + "step": 2476 + }, + { + "loss": 0.0015, + "grad_norm": 0.21035854518413544, + "learning_rate": 7.625e-06, + "num_tokens": 842593.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2385, + "step": 2477 + }, + { + "loss": 0.0384, + "grad_norm": 1.1954495906829834, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.2389999999999999, + "step": 2478 + }, + { + "loss": 0.0398, + "grad_norm": 1.3171675205230713, + "learning_rate": 7.615e-06, + "num_tokens": 843617.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2395, + "step": 2479 + }, + { + "loss": 0.0016, + "grad_norm": 0.22742266952991486, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.24, + "step": 2480 + }, + { + "loss": 0.0505, + "grad_norm": 1.463847041130066, + "learning_rate": 7.605e-06, + "num_tokens": 844220.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2405, + "step": 2481 + }, + { + "loss": 0.0634, + "grad_norm": 1.0150220394134521, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.241, + "step": 2482 + }, + { + "loss": 0.0628, + "grad_norm": 1.2490217685699463, + "learning_rate": 7.595e-06, + "num_tokens": 845244.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2415, + "step": 2483 + }, + { + "loss": 0.0568, + "grad_norm": 0.9812212586402893, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.242, + "step": 2484 + }, + { + "loss": 0.0684, + "grad_norm": 1.4887269735336304, + "learning_rate": 7.585e-06, + "num_tokens": 846268.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2425, + "step": 2485 + }, + { + "loss": 0.002, + "grad_norm": 0.2907889485359192, + "learning_rate": 7.58e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2429999999999999, + "step": 2486 + }, + { + "loss": 0.0024, + "grad_norm": 0.3490116596221924, + "learning_rate": 7.575e-06, + "num_tokens": 846450.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2435, + "step": 2487 + }, + { + "loss": 0.0379, + "grad_norm": 0.9351921081542969, + "learning_rate": 7.57e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.244, + "step": 2488 + }, + { + "loss": 0.0409, + "grad_norm": 1.486227035522461, + "learning_rate": 7.565e-06, + "num_tokens": 847474.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2445, + "step": 2489 + }, + { + "loss": 0.0024, + "grad_norm": 0.35926783084869385, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.245, + "step": 2490 + }, + { + "loss": 0.0547, + "grad_norm": 1.216343879699707, + "learning_rate": 7.5550000000000005e-06, + "num_tokens": 848077.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2455, + "step": 2491 + }, + { + "loss": 0.0622, + "grad_norm": 1.0978708267211914, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.246, + "step": 2492 + }, + { + "loss": 0.0026, + "grad_norm": 0.3695952892303467, + "learning_rate": 7.545e-06, + "num_tokens": 848680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2465, + "step": 2493 + }, + { + "loss": 0.0712, + "grad_norm": 1.1717898845672607, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2469999999999999, + "step": 2494 + }, + { + "loss": 0.003, + "grad_norm": 0.4548373818397522, + "learning_rate": 7.535e-06, + "num_tokens": 849283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2475, + "step": 2495 + }, + { + "loss": 0.003, + "grad_norm": 0.4568769335746765, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.248, + "step": 2496 + }, + { + "loss": 0.0024, + "grad_norm": 0.36542901396751404, + "learning_rate": 7.525e-06, + "num_tokens": 849465.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2485, + "step": 2497 + }, + { + "loss": 0.0566, + "grad_norm": 1.315274715423584, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.249, + "step": 2498 + }, + { + "loss": 0.0026, + "grad_norm": 0.39514294266700745, + "learning_rate": 7.515e-06, + "num_tokens": 850068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2495, + "step": 2499 + }, + { + "loss": 0.0678, + "grad_norm": 1.530604362487793, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.25, + "step": 2500 + }, + { + "loss": 0.0022, + "grad_norm": 0.3104536533355713, + "learning_rate": 7.505e-06, + "num_tokens": 850671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2505, + "step": 2501 + }, + { + "loss": 0.0019, + "grad_norm": 0.2783941924571991, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.251, + "step": 2502 + }, + { + "loss": 0.0597, + "grad_norm": 1.77070951461792, + "learning_rate": 7.495000000000001e-06, + "num_tokens": 851274.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2515, + "step": 2503 + }, + { + "loss": 0.0019, + "grad_norm": 0.2808924913406372, + "learning_rate": 7.49e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 1.0, + "epoch": 1.252, + "step": 2504 + }, + { + "loss": 0.0441, + "grad_norm": 1.070281982421875, + "learning_rate": 7.485000000000001e-06, + "num_tokens": 851877.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2525, + "step": 2505 + }, + { + "loss": 0.0018, + "grad_norm": 0.25118544697761536, + "learning_rate": 7.48e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2530000000000001, + "step": 2506 + }, + { + "loss": 0.0698, + "grad_norm": 1.3499447107315063, + "learning_rate": 7.475000000000001e-06, + "num_tokens": 852480.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2535, + "step": 2507 + }, + { + "loss": 0.0016, + "grad_norm": 0.23157145082950592, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.254, + "step": 2508 + }, + { + "loss": 0.0384, + "grad_norm": 1.1759817600250244, + "learning_rate": 7.465000000000001e-06, + "num_tokens": 853083.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2545, + "step": 2509 + }, + { + "loss": 0.0017, + "grad_norm": 0.24023179709911346, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.255, + "step": 2510 + }, + { + "loss": 0.0559, + "grad_norm": 1.3075677156448364, + "learning_rate": 7.4550000000000015e-06, + "num_tokens": 853686.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2555, + "step": 2511 + }, + { + "loss": 0.0691, + "grad_norm": 1.5931618213653564, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.256, + "step": 2512 + }, + { + "loss": 0.0015, + "grad_norm": 0.21379417181015015, + "learning_rate": 7.445000000000001e-06, + "num_tokens": 854289.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2565, + "step": 2513 + }, + { + "loss": 0.0016, + "grad_norm": 0.22427783906459808, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 2514 + }, + { + "loss": 0.0585, + "grad_norm": 1.3955110311508179, + "learning_rate": 7.435000000000001e-06, + "num_tokens": 854892.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2575, + "step": 2515 + }, + { + "loss": 0.0016, + "grad_norm": 0.22540539503097534, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.258, + "step": 2516 + }, + { + "loss": 0.0015, + "grad_norm": 0.20957466959953308, + "learning_rate": 7.425000000000001e-06, + "num_tokens": 855074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2585, + "step": 2517 + }, + { + "loss": 0.0013, + "grad_norm": 0.17798997461795807, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 2518 + }, + { + "loss": 0.0681, + "grad_norm": 1.692757487297058, + "learning_rate": 7.415000000000001e-06, + "num_tokens": 855677.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2595, + "step": 2519 + }, + { + "loss": 0.0013, + "grad_norm": 0.18327295780181885, + "learning_rate": 7.41e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 1.0, + "epoch": 1.26, + "step": 2520 + }, + { + "loss": 0.0694, + "grad_norm": 1.3426337242126465, + "learning_rate": 7.405000000000001e-06, + "num_tokens": 856280.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2605, + "step": 2521 + }, + { + "loss": 0.0575, + "grad_norm": 1.3755184412002563, + "learning_rate": 7.4e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2610000000000001, + "step": 2522 + }, + { + "loss": 0.0012, + "grad_norm": 0.15550144016742706, + "learning_rate": 7.395000000000001e-06, + "num_tokens": 856883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2615, + "step": 2523 + }, + { + "loss": 0.0013, + "grad_norm": 0.18434429168701172, + "learning_rate": 7.39e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 2524 + }, + { + "loss": 0.0561, + "grad_norm": 1.3532037734985352, + "learning_rate": 7.385000000000001e-06, + "num_tokens": 857486.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2625, + "step": 2525 + }, + { + "loss": 0.0783, + "grad_norm": 2.749722719192505, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.263, + "step": 2526 + }, + { + "loss": 0.0739, + "grad_norm": 1.7389228343963623, + "learning_rate": 7.375000000000001e-06, + "num_tokens": 858510.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2635, + "step": 2527 + }, + { + "loss": 0.0596, + "grad_norm": 1.5434712171554565, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 2528 + }, + { + "loss": 0.0012, + "grad_norm": 0.16660870611667633, + "learning_rate": 7.365000000000001e-06, + "num_tokens": 859113.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2645, + "step": 2529 + }, + { + "loss": 0.0466, + "grad_norm": 1.1618560552597046, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2650000000000001, + "step": 2530 + }, + { + "loss": 0.066, + "grad_norm": 1.4426238536834717, + "learning_rate": 7.355000000000001e-06, + "num_tokens": 860137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2655, + "step": 2531 + }, + { + "loss": 0.0014, + "grad_norm": 0.1874425858259201, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 1.0, + "epoch": 1.266, + "step": 2532 + }, + { + "loss": 0.0574, + "grad_norm": 1.2460824251174927, + "learning_rate": 7.345000000000001e-06, + "num_tokens": 860740.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2665, + "step": 2533 + }, + { + "loss": 0.0722, + "grad_norm": 1.7045679092407227, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.267, + "step": 2534 + }, + { + "loss": 0.0641, + "grad_norm": 1.4023394584655762, + "learning_rate": 7.335000000000001e-06, + "num_tokens": 861764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2675, + "step": 2535 + }, + { + "loss": 0.0018, + "grad_norm": 0.25083932280540466, + "learning_rate": 7.33e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.268, + "step": 2536 + }, + { + "loss": 0.0625, + "grad_norm": 1.2308841943740845, + "learning_rate": 7.325000000000001e-06, + "num_tokens": 862367.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2685, + "step": 2537 + }, + { + "loss": 0.1399, + "grad_norm": 2.6957058906555176, + "learning_rate": 7.32e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.2690000000000001, + "step": 2538 + }, + { + "loss": 0.0403, + "grad_norm": 1.0539931058883667, + "learning_rate": 7.315000000000001e-06, + "num_tokens": 863391.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2695, + "step": 2539 + }, + { + "loss": 0.0603, + "grad_norm": 1.6862679719924927, + "learning_rate": 7.31e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.27, + "step": 2540 + }, + { + "loss": 0.0022, + "grad_norm": 0.3110877275466919, + "learning_rate": 7.305000000000001e-06, + "num_tokens": 863994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2705, + "step": 2541 + }, + { + "loss": 0.0521, + "grad_norm": 1.1967720985412598, + "learning_rate": 7.3e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.271, + "step": 2542 + }, + { + "loss": 0.1383, + "grad_norm": 2.653751850128174, + "learning_rate": 7.295000000000001e-06, + "num_tokens": 865018.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.2715, + "step": 2543 + }, + { + "loss": 0.0025, + "grad_norm": 0.3700110614299774, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.272, + "step": 2544 + }, + { + "loss": 0.0031, + "grad_norm": 0.42906609177589417, + "learning_rate": 7.2850000000000006e-06, + "num_tokens": 865200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2725, + "step": 2545 + }, + { + "loss": 0.0437, + "grad_norm": 1.104537010192871, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2730000000000001, + "step": 2546 + }, + { + "loss": 0.0027, + "grad_norm": 0.3919247090816498, + "learning_rate": 7.275000000000001e-06, + "num_tokens": 865803.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2735, + "step": 2547 + }, + { + "loss": 0.0029, + "grad_norm": 0.4317328929901123, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 2548 + }, + { + "loss": 0.0025, + "grad_norm": 0.37341031432151794, + "learning_rate": 7.265000000000001e-06, + "num_tokens": 865985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2745, + "step": 2549 + }, + { + "loss": 0.0416, + "grad_norm": 1.0737035274505615, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.275, + "step": 2550 + }, + { + "loss": 0.0646, + "grad_norm": 1.3107216358184814, + "learning_rate": 7.255000000000001e-06, + "num_tokens": 867009.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2755, + "step": 2551 + }, + { + "loss": 0.0381, + "grad_norm": 0.9233097434043884, + "learning_rate": 7.25e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.276, + "step": 2552 + }, + { + "loss": 0.056, + "grad_norm": 1.2655408382415771, + "learning_rate": 7.245000000000001e-06, + "num_tokens": 868033.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2765, + "step": 2553 + }, + { + "loss": 0.0519, + "grad_norm": 1.2633070945739746, + "learning_rate": 7.24e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2770000000000001, + "step": 2554 + }, + { + "loss": 0.0666, + "grad_norm": 1.5826315879821777, + "learning_rate": 7.235000000000001e-06, + "num_tokens": 869057.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2775, + "step": 2555 + }, + { + "loss": 0.0026, + "grad_norm": 0.3732459545135498, + "learning_rate": 7.23e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 1.0, + "epoch": 1.278, + "step": 2556 + }, + { + "loss": 0.0384, + "grad_norm": 0.9308870434761047, + "learning_rate": 7.225000000000001e-06, + "num_tokens": 869660.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2785, + "step": 2557 + }, + { + "loss": 0.0027, + "grad_norm": 0.3898535668849945, + "learning_rate": 7.22e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 1.0, + "epoch": 1.279, + "step": 2558 + }, + { + "loss": 0.0416, + "grad_norm": 1.0320757627487183, + "learning_rate": 7.215000000000001e-06, + "num_tokens": 870263.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2795, + "step": 2559 + }, + { + "loss": 0.0028, + "grad_norm": 0.4121858477592468, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 1.0, + "epoch": 1.28, + "step": 2560 + }, + { + "loss": 0.0028, + "grad_norm": 0.4276776611804962, + "learning_rate": 7.2050000000000005e-06, + "num_tokens": 870445.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2805, + "step": 2561 + }, + { + "loss": 0.0407, + "grad_norm": 0.9345077872276306, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2810000000000001, + "step": 2562 + }, + { + "loss": 0.0025, + "grad_norm": 0.3605985641479492, + "learning_rate": 7.1950000000000006e-06, + "num_tokens": 871048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2814999999999999, + "step": 2563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346655070781708, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.282, + "step": 2564 + }, + { + "loss": 0.0744, + "grad_norm": 1.8985601663589478, + "learning_rate": 7.185000000000001e-06, + "num_tokens": 871651.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2825, + "step": 2565 + }, + { + "loss": 0.0388, + "grad_norm": 0.96394282579422, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.283, + "step": 2566 + }, + { + "loss": 0.0682, + "grad_norm": 1.4056230783462524, + "learning_rate": 7.175000000000001e-06, + "num_tokens": 872675.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2835, + "step": 2567 + }, + { + "loss": 0.0022, + "grad_norm": 0.3106633722782135, + "learning_rate": 7.17e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 1.0, + "epoch": 1.284, + "step": 2568 + }, + { + "loss": 0.0384, + "grad_norm": 1.064553141593933, + "learning_rate": 7.165000000000001e-06, + "num_tokens": 873278.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2845, + "step": 2569 + }, + { + "loss": 0.0626, + "grad_norm": 1.0392028093338013, + "learning_rate": 7.16e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.285, + "step": 2570 + }, + { + "loss": 0.0022, + "grad_norm": 0.30655112862586975, + "learning_rate": 7.155000000000001e-06, + "num_tokens": 873881.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2854999999999999, + "step": 2571 + }, + { + "loss": 0.0673, + "grad_norm": 1.5468289852142334, + "learning_rate": 7.15e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.286, + "step": 2572 + }, + { + "loss": 0.0498, + "grad_norm": 1.2830432653427124, + "learning_rate": 7.145000000000001e-06, + "num_tokens": 874905.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2865, + "step": 2573 + }, + { + "loss": 0.055, + "grad_norm": 1.0863239765167236, + "learning_rate": 7.14e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 2574 + }, + { + "loss": 0.0606, + "grad_norm": 1.434999704360962, + "learning_rate": 7.135000000000001e-06, + "num_tokens": 875929.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2875, + "step": 2575 + }, + { + "loss": 0.0532, + "grad_norm": 1.290963888168335, + "learning_rate": 7.13e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.288, + "step": 2576 + }, + { + "loss": 0.0026, + "grad_norm": 0.36665645241737366, + "learning_rate": 7.125e-06, + "num_tokens": 876532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2885, + "step": 2577 + }, + { + "loss": 0.0485, + "grad_norm": 1.2393323183059692, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.289, + "step": 2578 + }, + { + "loss": 0.0029, + "grad_norm": 0.3994691073894501, + "learning_rate": 7.1150000000000005e-06, + "num_tokens": 877135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2894999999999999, + "step": 2579 + }, + { + "loss": 0.0544, + "grad_norm": 1.361981987953186, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.29, + "step": 2580 + }, + { + "loss": 0.0529, + "grad_norm": 1.1892880201339722, + "learning_rate": 7.105000000000001e-06, + "num_tokens": 878159.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2905, + "step": 2581 + }, + { + "loss": 0.069, + "grad_norm": 1.5022639036178589, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.291, + "step": 2582 + }, + { + "loss": 0.0594, + "grad_norm": 1.2174897193908691, + "learning_rate": 7.095000000000001e-06, + "num_tokens": 879183.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2915, + "step": 2583 + }, + { + "loss": 0.0723, + "grad_norm": 2.1814920902252197, + "learning_rate": 7.09e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.292, + "step": 2584 + }, + { + "loss": 0.0544, + "grad_norm": 1.1524139642715454, + "learning_rate": 7.085000000000001e-06, + "num_tokens": 880207.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2925, + "step": 2585 + }, + { + "loss": 0.0035, + "grad_norm": 0.5082859396934509, + "learning_rate": 7.08e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.293, + "step": 2586 + }, + { + "loss": 0.0034, + "grad_norm": 0.49455657601356506, + "learning_rate": 7.075000000000001e-06, + "num_tokens": 880389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2934999999999999, + "step": 2587 + }, + { + "loss": 0.0516, + "grad_norm": 1.1291673183441162, + "learning_rate": 7.07e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.294, + "step": 2588 + }, + { + "loss": 0.0402, + "grad_norm": 1.073132038116455, + "learning_rate": 7.065000000000001e-06, + "num_tokens": 881413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2945, + "step": 2589 + }, + { + "loss": 0.0409, + "grad_norm": 1.1712205410003662, + "learning_rate": 7.06e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.295, + "step": 2590 + }, + { + "loss": 0.0596, + "grad_norm": 1.2515616416931152, + "learning_rate": 7.055000000000001e-06, + "num_tokens": 882437.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2955, + "step": 2591 + }, + { + "loss": 0.0039, + "grad_norm": 0.5442217588424683, + "learning_rate": 7.05e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.296, + "step": 2592 + }, + { + "loss": 0.0041, + "grad_norm": 0.5982818603515625, + "learning_rate": 7.045e-06, + "num_tokens": 882619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2965, + "step": 2593 + }, + { + "loss": 0.0558, + "grad_norm": 1.3499200344085693, + "learning_rate": 7.04e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.297, + "step": 2594 + }, + { + "loss": 0.0038, + "grad_norm": 0.5531075596809387, + "learning_rate": 7.035e-06, + "num_tokens": 883222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2974999999999999, + "step": 2595 + }, + { + "loss": 0.0716, + "grad_norm": 1.8495835065841675, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.298, + "step": 2596 + }, + { + "loss": 0.0387, + "grad_norm": 1.2195173501968384, + "learning_rate": 7.0250000000000005e-06, + "num_tokens": 884246.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2985, + "step": 2597 + }, + { + "loss": 0.0715, + "grad_norm": 1.7892330884933472, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.299, + "step": 2598 + }, + { + "loss": 0.0034, + "grad_norm": 0.5045487284660339, + "learning_rate": 7.015000000000001e-06, + "num_tokens": 884849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2995, + "step": 2599 + }, + { + "loss": 0.0551, + "grad_norm": 1.5834842920303345, + "learning_rate": 7.01e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3, + "step": 2600 + }, + { + "loss": 0.0037, + "grad_norm": 0.5456190705299377, + "learning_rate": 7.005000000000001e-06, + "num_tokens": 885452.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3005, + "step": 2601 + }, + { + "loss": 0.0036, + "grad_norm": 0.5648893117904663, + "learning_rate": 7e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.301, + "step": 2602 + }, + { + "loss": 0.06, + "grad_norm": 1.417505145072937, + "learning_rate": 6.995000000000001e-06, + "num_tokens": 886055.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3014999999999999, + "step": 2603 + }, + { + "loss": 0.0684, + "grad_norm": 1.5355315208435059, + "learning_rate": 6.99e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.302, + "step": 2604 + }, + { + "loss": 0.0027, + "grad_norm": 0.4013388454914093, + "learning_rate": 6.985000000000001e-06, + "num_tokens": 886658.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3025, + "step": 2605 + }, + { + "loss": 0.0026, + "grad_norm": 0.38935649394989014, + "learning_rate": 6.98e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 1.0, + "epoch": 1.303, + "step": 2606 + }, + { + "loss": 0.0578, + "grad_norm": 1.1277109384536743, + "learning_rate": 6.975000000000001e-06, + "num_tokens": 887261.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3035, + "step": 2607 + }, + { + "loss": 0.0023, + "grad_norm": 0.3507567048072815, + "learning_rate": 6.97e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.304, + "step": 2608 + }, + { + "loss": 0.0021, + "grad_norm": 0.3047695755958557, + "learning_rate": 6.965e-06, + "num_tokens": 887443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3045, + "step": 2609 + }, + { + "loss": 0.0564, + "grad_norm": 1.2580876350402832, + "learning_rate": 6.96e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.305, + "step": 2610 + }, + { + "loss": 0.0018, + "grad_norm": 0.26692500710487366, + "learning_rate": 6.955e-06, + "num_tokens": 888046.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3054999999999999, + "step": 2611 + }, + { + "loss": 0.0601, + "grad_norm": 1.2882280349731445, + "learning_rate": 6.95e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.306, + "step": 2612 + }, + { + "loss": 0.0662, + "grad_norm": 1.3626042604446411, + "learning_rate": 6.945e-06, + "num_tokens": 889070.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3065, + "step": 2613 + }, + { + "loss": 0.0015, + "grad_norm": 0.20663970708847046, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 1.0, + "epoch": 1.307, + "step": 2614 + }, + { + "loss": 0.0421, + "grad_norm": 1.0858242511749268, + "learning_rate": 6.9350000000000005e-06, + "num_tokens": 889673.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3075, + "step": 2615 + }, + { + "loss": 0.061, + "grad_norm": 1.1361438035964966, + "learning_rate": 6.93e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.308, + "step": 2616 + }, + { + "loss": 0.053, + "grad_norm": 1.0651867389678955, + "learning_rate": 6.925000000000001e-06, + "num_tokens": 890697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3085, + "step": 2617 + }, + { + "loss": 0.0648, + "grad_norm": 1.4413301944732666, + "learning_rate": 6.92e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.309, + "step": 2618 + }, + { + "loss": 0.0016, + "grad_norm": 0.23106220364570618, + "learning_rate": 6.915000000000001e-06, + "num_tokens": 891300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3094999999999999, + "step": 2619 + }, + { + "loss": 0.0596, + "grad_norm": 1.1959160566329956, + "learning_rate": 6.91e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.31, + "step": 2620 + }, + { + "loss": 0.0625, + "grad_norm": 1.4631091356277466, + "learning_rate": 6.905000000000001e-06, + "num_tokens": 892324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3105, + "step": 2621 + }, + { + "loss": 0.0385, + "grad_norm": 1.1421785354614258, + "learning_rate": 6.9e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.311, + "step": 2622 + }, + { + "loss": 0.0644, + "grad_norm": 1.3361622095108032, + "learning_rate": 6.895000000000001e-06, + "num_tokens": 893348.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3115, + "step": 2623 + }, + { + "loss": 0.0393, + "grad_norm": 1.3101776838302612, + "learning_rate": 6.89e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.312, + "step": 2624 + }, + { + "loss": 0.0415, + "grad_norm": 1.2668944597244263, + "learning_rate": 6.885e-06, + "num_tokens": 894372.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3125, + "step": 2625 + }, + { + "loss": 0.0637, + "grad_norm": 1.8910597562789917, + "learning_rate": 6.88e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.313, + "step": 2626 + }, + { + "loss": 0.0385, + "grad_norm": 1.383195161819458, + "learning_rate": 6.875e-06, + "num_tokens": 895396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3135, + "step": 2627 + }, + { + "loss": 0.0029, + "grad_norm": 0.41114333271980286, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.314, + "step": 2628 + }, + { + "loss": 0.0709, + "grad_norm": 2.5799410343170166, + "learning_rate": 6.865e-06, + "num_tokens": 895999.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3145, + "step": 2629 + }, + { + "loss": 0.0717, + "grad_norm": 1.9481109380722046, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.315, + "step": 2630 + }, + { + "loss": 0.0031, + "grad_norm": 0.4399254620075226, + "learning_rate": 6.8550000000000004e-06, + "num_tokens": 896602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3155000000000001, + "step": 2631 + }, + { + "loss": 0.0692, + "grad_norm": 1.7998204231262207, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.316, + "step": 2632 + }, + { + "loss": 0.0589, + "grad_norm": 1.2681806087493896, + "learning_rate": 6.8450000000000005e-06, + "num_tokens": 897626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3165, + "step": 2633 + }, + { + "loss": 0.1572, + "grad_norm": 2.9861464500427246, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.317, + "step": 2634 + }, + { + "loss": 0.0033, + "grad_norm": 0.4804554879665375, + "learning_rate": 6.835000000000001e-06, + "num_tokens": 898229.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3175, + "step": 2635 + }, + { + "loss": 0.0039, + "grad_norm": 0.5298879742622375, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 1.0, + "epoch": 1.318, + "step": 2636 + }, + { + "loss": 0.0033, + "grad_norm": 0.45830750465393066, + "learning_rate": 6.825000000000001e-06, + "num_tokens": 898411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3185, + "step": 2637 + }, + { + "loss": 0.0759, + "grad_norm": 2.195838451385498, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.319, + "step": 2638 + }, + { + "loss": 0.0028, + "grad_norm": 0.3985951840877533, + "learning_rate": 6.815000000000001e-06, + "num_tokens": 899014.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3195000000000001, + "step": 2639 + }, + { + "loss": 0.0435, + "grad_norm": 1.082383155822754, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.32, + "step": 2640 + }, + { + "loss": 0.0031, + "grad_norm": 0.4386924207210541, + "learning_rate": 6.805000000000001e-06, + "num_tokens": 899617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3205, + "step": 2641 + }, + { + "loss": 0.044, + "grad_norm": 1.3280903100967407, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.321, + "step": 2642 + }, + { + "loss": 0.0024, + "grad_norm": 0.34161683917045593, + "learning_rate": 6.795e-06, + "num_tokens": 900220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3215, + "step": 2643 + }, + { + "loss": 0.0026, + "grad_norm": 0.3536019027233124, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.322, + "step": 2644 + }, + { + "loss": 0.0721, + "grad_norm": 1.825214147567749, + "learning_rate": 6.785e-06, + "num_tokens": 900823.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.3225, + "step": 2645 + }, + { + "loss": 0.0603, + "grad_norm": 1.441401481628418, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.323, + "step": 2646 + }, + { + "loss": 0.0552, + "grad_norm": 1.026498556137085, + "learning_rate": 6.775e-06, + "num_tokens": 901847.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3235000000000001, + "step": 2647 + }, + { + "loss": 0.0607, + "grad_norm": 1.567400574684143, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.324, + "step": 2648 + }, + { + "loss": 0.0365, + "grad_norm": 1.1754707098007202, + "learning_rate": 6.7650000000000005e-06, + "num_tokens": 902871.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3245, + "step": 2649 + }, + { + "loss": 0.0634, + "grad_norm": 1.0925911664962769, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.325, + "step": 2650 + }, + { + "loss": 0.0022, + "grad_norm": 0.3080379068851471, + "learning_rate": 6.7550000000000005e-06, + "num_tokens": 903474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3255, + "step": 2651 + }, + { + "loss": 0.0024, + "grad_norm": 0.3412145972251892, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.326, + "step": 2652 + }, + { + "loss": 0.0612, + "grad_norm": 1.387506127357483, + "learning_rate": 6.745000000000001e-06, + "num_tokens": 904077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3265, + "step": 2653 + }, + { + "loss": 0.0543, + "grad_norm": 1.0726388692855835, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 2654 + }, + { + "loss": 0.0515, + "grad_norm": 1.3620095252990723, + "learning_rate": 6.735000000000001e-06, + "num_tokens": 905101.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3275000000000001, + "step": 2655 + }, + { + "loss": 0.0536, + "grad_norm": 0.999693751335144, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.328, + "step": 2656 + }, + { + "loss": 0.0725, + "grad_norm": 1.338326096534729, + "learning_rate": 6.725000000000001e-06, + "num_tokens": 906125.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3285, + "step": 2657 + }, + { + "loss": 0.0025, + "grad_norm": 0.3621944487094879, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.329, + "step": 2658 + }, + { + "loss": 0.0027, + "grad_norm": 0.3732605576515198, + "learning_rate": 6.715e-06, + "num_tokens": 906307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3295, + "step": 2659 + }, + { + "loss": 0.0025, + "grad_norm": 0.3675785958766937, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 2660 + }, + { + "loss": 0.0546, + "grad_norm": 1.420166015625, + "learning_rate": 6.705e-06, + "num_tokens": 906910.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3305, + "step": 2661 + }, + { + "loss": 0.065, + "grad_norm": 1.7972251176834106, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.331, + "step": 2662 + }, + { + "loss": 0.0026, + "grad_norm": 0.38739708065986633, + "learning_rate": 6.695e-06, + "num_tokens": 907513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3315000000000001, + "step": 2663 + }, + { + "loss": 0.0621, + "grad_norm": 1.1773098707199097, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.332, + "step": 2664 + }, + { + "loss": 0.047, + "grad_norm": 1.3367711305618286, + "learning_rate": 6.685e-06, + "num_tokens": 908537.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3325, + "step": 2665 + }, + { + "loss": 0.0614, + "grad_norm": 1.5761219263076782, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.333, + "step": 2666 + }, + { + "loss": 0.0028, + "grad_norm": 0.39666748046875, + "learning_rate": 6.6750000000000005e-06, + "num_tokens": 909140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3335, + "step": 2667 + }, + { + "loss": 0.0026, + "grad_norm": 0.38161027431488037, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 2668 + }, + { + "loss": 0.0027, + "grad_norm": 0.3782355785369873, + "learning_rate": 6.6650000000000006e-06, + "num_tokens": 909322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3345, + "step": 2669 + }, + { + "loss": 0.0449, + "grad_norm": 1.2690225839614868, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.335, + "step": 2670 + }, + { + "loss": 0.0618, + "grad_norm": 1.4404915571212769, + "learning_rate": 6.655000000000001e-06, + "num_tokens": 910346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3355000000000001, + "step": 2671 + }, + { + "loss": 0.0593, + "grad_norm": 1.6381967067718506, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.336, + "step": 2672 + }, + { + "loss": 0.0023, + "grad_norm": 0.3195578455924988, + "learning_rate": 6.645000000000001e-06, + "num_tokens": 910949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3365, + "step": 2673 + }, + { + "loss": 0.1244, + "grad_norm": 2.2930221557617188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.337, + "step": 2674 + }, + { + "loss": 0.061, + "grad_norm": 1.1066110134124756, + "learning_rate": 6.635e-06, + "num_tokens": 911973.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3375, + "step": 2675 + }, + { + "loss": 0.0023, + "grad_norm": 0.3287852704524994, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.338, + "step": 2676 + }, + { + "loss": 0.0723, + "grad_norm": 1.8842978477478027, + "learning_rate": 6.625e-06, + "num_tokens": 912576.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3385, + "step": 2677 + }, + { + "loss": 0.0616, + "grad_norm": 1.410254955291748, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.339, + "step": 2678 + }, + { + "loss": 0.0661, + "grad_norm": 1.7658559083938599, + "learning_rate": 6.615e-06, + "num_tokens": 913600.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3395000000000001, + "step": 2679 + }, + { + "loss": 0.0023, + "grad_norm": 0.3321514427661896, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.34, + "step": 2680 + }, + { + "loss": 0.0026, + "grad_norm": 0.38943803310394287, + "learning_rate": 6.605e-06, + "num_tokens": 913782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3405, + "step": 2681 + }, + { + "loss": 0.0533, + "grad_norm": 1.220119833946228, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.341, + "step": 2682 + }, + { + "loss": 0.0577, + "grad_norm": 1.4489399194717407, + "learning_rate": 6.595e-06, + "num_tokens": 914806.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3415, + "step": 2683 + }, + { + "loss": 0.0534, + "grad_norm": 1.437482237815857, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.342, + "step": 2684 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185518980026245, + "learning_rate": 6.5850000000000005e-06, + "num_tokens": 915409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3425, + "step": 2685 + }, + { + "loss": 0.0557, + "grad_norm": 1.233544945716858, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.343, + "step": 2686 + }, + { + "loss": 0.1326, + "grad_norm": 2.9976046085357666, + "learning_rate": 6.5750000000000006e-06, + "num_tokens": 916433.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3435000000000001, + "step": 2687 + }, + { + "loss": 0.0555, + "grad_norm": 1.1236023902893066, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3439999999999999, + "step": 2688 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615230619907379, + "learning_rate": 6.565000000000001e-06, + "num_tokens": 917036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3445, + "step": 2689 + }, + { + "loss": 0.0613, + "grad_norm": 1.391479730606079, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.345, + "step": 2690 + }, + { + "loss": 0.0023, + "grad_norm": 0.32829907536506653, + "learning_rate": 6.555e-06, + "num_tokens": 917639.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3455, + "step": 2691 + }, + { + "loss": 0.0025, + "grad_norm": 0.35658934712409973, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 1.0, + "epoch": 1.346, + "step": 2692 + }, + { + "loss": 0.0028, + "grad_norm": 0.40413787961006165, + "learning_rate": 6.545e-06, + "num_tokens": 917821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3465, + "step": 2693 + }, + { + "loss": 0.0023, + "grad_norm": 0.3243667185306549, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 1.0, + "epoch": 1.347, + "step": 2694 + }, + { + "loss": 0.0023, + "grad_norm": 0.33630460500717163, + "learning_rate": 6.535e-06, + "num_tokens": 918003.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3475, + "step": 2695 + }, + { + "loss": 0.0529, + "grad_norm": 1.6163023710250854, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3479999999999999, + "step": 2696 + }, + { + "loss": 0.0678, + "grad_norm": 1.5625479221343994, + "learning_rate": 6.525e-06, + "num_tokens": 919027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3485, + "step": 2697 + }, + { + "loss": 0.0676, + "grad_norm": 1.5719348192214966, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.349, + "step": 2698 + }, + { + "loss": 0.002, + "grad_norm": 0.2859533727169037, + "learning_rate": 6.515e-06, + "num_tokens": 919630.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3495, + "step": 2699 + }, + { + "loss": 0.0434, + "grad_norm": 1.324418067932129, + "learning_rate": 6.51e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.35, + "step": 2700 + }, + { + "loss": 0.042, + "grad_norm": 1.3165403604507446, + "learning_rate": 6.505e-06, + "num_tokens": 920654.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3505, + "step": 2701 + }, + { + "loss": 0.0018, + "grad_norm": 0.2492700070142746, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.351, + "step": 2702 + }, + { + "loss": 0.1336, + "grad_norm": 2.710927963256836, + "learning_rate": 6.4950000000000005e-06, + "num_tokens": 921257.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.3515, + "step": 2703 + }, + { + "loss": 0.059, + "grad_norm": 1.8472118377685547, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3519999999999999, + "step": 2704 + }, + { + "loss": 0.0448, + "grad_norm": 1.164633870124817, + "learning_rate": 6.485000000000001e-06, + "num_tokens": 922281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3525, + "step": 2705 + }, + { + "loss": 0.0544, + "grad_norm": 1.3916175365447998, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.353, + "step": 2706 + }, + { + "loss": 0.0463, + "grad_norm": 1.397131085395813, + "learning_rate": 6.475e-06, + "num_tokens": 923305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3535, + "step": 2707 + }, + { + "loss": 0.0019, + "grad_norm": 0.26947012543678284, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.354, + "step": 2708 + }, + { + "loss": 0.0017, + "grad_norm": 0.23892365396022797, + "learning_rate": 6.465e-06, + "num_tokens": 923487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3545, + "step": 2709 + }, + { + "loss": 0.0018, + "grad_norm": 0.25066784024238586, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 2710 + }, + { + "loss": 0.0435, + "grad_norm": 1.2238185405731201, + "learning_rate": 6.455e-06, + "num_tokens": 924090.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3555, + "step": 2711 + }, + { + "loss": 0.0019, + "grad_norm": 0.26420801877975464, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3559999999999999, + "step": 2712 + }, + { + "loss": 0.0572, + "grad_norm": 1.1416776180267334, + "learning_rate": 6.445e-06, + "num_tokens": 924693.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3565, + "step": 2713 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754037082195282, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.357, + "step": 2714 + }, + { + "loss": 0.0018, + "grad_norm": 0.25344598293304443, + "learning_rate": 6.435e-06, + "num_tokens": 924875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3575, + "step": 2715 + }, + { + "loss": 0.0017, + "grad_norm": 0.23587873578071594, + "learning_rate": 6.43e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 2716 + }, + { + "loss": 0.0701, + "grad_norm": 1.6822742223739624, + "learning_rate": 6.425e-06, + "num_tokens": 925478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3585, + "step": 2717 + }, + { + "loss": 0.0017, + "grad_norm": 0.22698912024497986, + "learning_rate": 6.42e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 1.0, + "epoch": 1.359, + "step": 2718 + }, + { + "loss": 0.044, + "grad_norm": 1.2083390951156616, + "learning_rate": 6.415e-06, + "num_tokens": 926081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3595, + "step": 2719 + }, + { + "loss": 0.0017, + "grad_norm": 0.23327840864658356, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3599999999999999, + "step": 2720 + }, + { + "loss": 0.0557, + "grad_norm": 1.281182885169983, + "learning_rate": 6.4050000000000005e-06, + "num_tokens": 926684.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3605, + "step": 2721 + }, + { + "loss": 0.0539, + "grad_norm": 1.1743288040161133, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.361, + "step": 2722 + }, + { + "loss": 0.0646, + "grad_norm": 1.2470465898513794, + "learning_rate": 6.395e-06, + "num_tokens": 927708.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3615, + "step": 2723 + }, + { + "loss": 0.0015, + "grad_norm": 0.20256949961185455, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 1.0, + "epoch": 1.362, + "step": 2724 + }, + { + "loss": 0.0394, + "grad_norm": 1.1593482494354248, + "learning_rate": 6.385e-06, + "num_tokens": 928311.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3625, + "step": 2725 + }, + { + "loss": 0.0737, + "grad_norm": 1.937491774559021, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.363, + "step": 2726 + }, + { + "loss": 0.0438, + "grad_norm": 1.1960216760635376, + "learning_rate": 6.375e-06, + "num_tokens": 929335.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3635, + "step": 2727 + }, + { + "loss": 0.0016, + "grad_norm": 0.21763351559638977, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3639999999999999, + "step": 2728 + }, + { + "loss": 0.0017, + "grad_norm": 0.24479590356349945, + "learning_rate": 6.365e-06, + "num_tokens": 929517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3645, + "step": 2729 + }, + { + "loss": 0.0619, + "grad_norm": 1.315623164176941, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.365, + "step": 2730 + }, + { + "loss": 0.0016, + "grad_norm": 0.2220989614725113, + "learning_rate": 6.355e-06, + "num_tokens": 930120.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3655, + "step": 2731 + }, + { + "loss": 0.0017, + "grad_norm": 0.2321062982082367, + "learning_rate": 6.35e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 2732 + }, + { + "loss": 0.0017, + "grad_norm": 0.23798637092113495, + "learning_rate": 6.345e-06, + "num_tokens": 930302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3665, + "step": 2733 + }, + { + "loss": 0.0577, + "grad_norm": 1.2568942308425903, + "learning_rate": 6.34e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.367, + "step": 2734 + }, + { + "loss": 0.041, + "grad_norm": 1.6406105756759644, + "learning_rate": 6.335e-06, + "num_tokens": 931326.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3675, + "step": 2735 + }, + { + "loss": 0.0517, + "grad_norm": 1.235734224319458, + "learning_rate": 6.33e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 2736 + }, + { + "loss": 0.0423, + "grad_norm": 0.9826679825782776, + "learning_rate": 6.3250000000000004e-06, + "num_tokens": 932350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3685, + "step": 2737 + }, + { + "loss": 0.0018, + "grad_norm": 0.26410505175590515, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.369, + "step": 2738 + }, + { + "loss": 0.002, + "grad_norm": 0.2839818596839905, + "learning_rate": 6.315e-06, + "num_tokens": 932532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3695, + "step": 2739 + }, + { + "loss": 0.0533, + "grad_norm": 1.2392011880874634, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.37, + "step": 2740 + }, + { + "loss": 0.0017, + "grad_norm": 0.23982419073581696, + "learning_rate": 6.305e-06, + "num_tokens": 933135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3705, + "step": 2741 + }, + { + "loss": 0.0548, + "grad_norm": 1.4777438640594482, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.371, + "step": 2742 + }, + { + "loss": 0.0019, + "grad_norm": 0.2724550664424896, + "learning_rate": 6.295e-06, + "num_tokens": 933738.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3715, + "step": 2743 + }, + { + "loss": 0.0019, + "grad_norm": 0.2623855173587799, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3719999999999999, + "step": 2744 + }, + { + "loss": 0.0583, + "grad_norm": 1.0648019313812256, + "learning_rate": 6.285e-06, + "num_tokens": 934341.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3725, + "step": 2745 + }, + { + "loss": 0.0725, + "grad_norm": 1.589500069618225, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.373, + "step": 2746 + }, + { + "loss": 0.0617, + "grad_norm": 1.4101024866104126, + "learning_rate": 6.275e-06, + "num_tokens": 935365.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3735, + "step": 2747 + }, + { + "loss": 0.0019, + "grad_norm": 0.2686757743358612, + "learning_rate": 6.27e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 1.0, + "epoch": 1.374, + "step": 2748 + }, + { + "loss": 0.0451, + "grad_norm": 1.6723026037216187, + "learning_rate": 6.265e-06, + "num_tokens": 935968.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3745, + "step": 2749 + }, + { + "loss": 0.1481, + "grad_norm": 2.561096668243408, + "learning_rate": 6.26e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.375, + "step": 2750 + }, + { + "loss": 0.0593, + "grad_norm": 1.1495637893676758, + "learning_rate": 6.255e-06, + "num_tokens": 936992.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3755, + "step": 2751 + }, + { + "loss": 0.0583, + "grad_norm": 1.0880846977233887, + "learning_rate": 6.25e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.376, + "step": 2752 + }, + { + "loss": 0.0641, + "grad_norm": 1.4671814441680908, + "learning_rate": 6.245000000000001e-06, + "num_tokens": 938016.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3765, + "step": 2753 + }, + { + "loss": 0.0022, + "grad_norm": 0.3182397186756134, + "learning_rate": 6.24e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 1.0, + "epoch": 1.377, + "step": 2754 + }, + { + "loss": 0.0605, + "grad_norm": 1.1844297647476196, + "learning_rate": 6.235000000000001e-06, + "num_tokens": 938619.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3775, + "step": 2755 + }, + { + "loss": 0.0633, + "grad_norm": 1.227432131767273, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3780000000000001, + "step": 2756 + }, + { + "loss": 0.0026, + "grad_norm": 0.3716835677623749, + "learning_rate": 6.225000000000001e-06, + "num_tokens": 939222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3785, + "step": 2757 + }, + { + "loss": 0.0599, + "grad_norm": 1.3364546298980713, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.379, + "step": 2758 + }, + { + "loss": 0.0532, + "grad_norm": 1.3746514320373535, + "learning_rate": 6.215000000000001e-06, + "num_tokens": 940246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3795, + "step": 2759 + }, + { + "loss": 0.0696, + "grad_norm": 1.6494160890579224, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.38, + "step": 2760 + }, + { + "loss": 0.0031, + "grad_norm": 0.4407944083213806, + "learning_rate": 6.205000000000001e-06, + "num_tokens": 940849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3805, + "step": 2761 + }, + { + "loss": 0.0559, + "grad_norm": 1.3899201154708862, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.381, + "step": 2762 + }, + { + "loss": 0.0393, + "grad_norm": 1.0294471979141235, + "learning_rate": 6.195000000000001e-06, + "num_tokens": 941873.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3815, + "step": 2763 + }, + { + "loss": 0.0028, + "grad_norm": 0.41492387652397156, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3820000000000001, + "step": 2764 + }, + { + "loss": 0.039, + "grad_norm": 1.2755433320999146, + "learning_rate": 6.185000000000001e-06, + "num_tokens": 942476.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3825, + "step": 2765 + }, + { + "loss": 0.0407, + "grad_norm": 1.1641042232513428, + "learning_rate": 6.18e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.383, + "step": 2766 + }, + { + "loss": 0.0033, + "grad_norm": 0.45876702666282654, + "learning_rate": 6.175000000000001e-06, + "num_tokens": 943079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3835, + "step": 2767 + }, + { + "loss": 0.053, + "grad_norm": 1.1277137994766235, + "learning_rate": 6.17e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.384, + "step": 2768 + }, + { + "loss": 0.069, + "grad_norm": 1.974735140800476, + "learning_rate": 6.165000000000001e-06, + "num_tokens": 944103.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3845, + "step": 2769 + }, + { + "loss": 0.0399, + "grad_norm": 1.308519959449768, + "learning_rate": 6.16e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.385, + "step": 2770 + }, + { + "loss": 0.0399, + "grad_norm": 1.3881995677947998, + "learning_rate": 6.155000000000001e-06, + "num_tokens": 945127.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3855, + "step": 2771 + }, + { + "loss": 0.0388, + "grad_norm": 1.376846194267273, + "learning_rate": 6.15e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3860000000000001, + "step": 2772 + }, + { + "loss": 0.0565, + "grad_norm": 1.6753615140914917, + "learning_rate": 6.145000000000001e-06, + "num_tokens": 946151.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3865, + "step": 2773 + }, + { + "loss": 0.0537, + "grad_norm": 1.350510597229004, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.387, + "step": 2774 + }, + { + "loss": 0.0348, + "grad_norm": 1.0870490074157715, + "learning_rate": 6.1350000000000006e-06, + "num_tokens": 947175.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3875, + "step": 2775 + }, + { + "loss": 0.0041, + "grad_norm": 0.5800921320915222, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 1.0, + "epoch": 1.388, + "step": 2776 + }, + { + "loss": 0.0046, + "grad_norm": 0.6146813631057739, + "learning_rate": 6.125000000000001e-06, + "num_tokens": 947357.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3885, + "step": 2777 + }, + { + "loss": 0.0685, + "grad_norm": 2.028545618057251, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.389, + "step": 2778 + }, + { + "loss": 0.0562, + "grad_norm": 1.10191011428833, + "learning_rate": 6.115000000000001e-06, + "num_tokens": 948381.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3895, + "step": 2779 + }, + { + "loss": 0.057, + "grad_norm": 1.6782788038253784, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3900000000000001, + "step": 2780 + }, + { + "loss": 0.0048, + "grad_norm": 0.6447672843933105, + "learning_rate": 6.105000000000001e-06, + "num_tokens": 948984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3905, + "step": 2781 + }, + { + "loss": 0.0045, + "grad_norm": 0.6120741963386536, + "learning_rate": 6.1e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.391, + "step": 2782 + }, + { + "loss": 0.0037, + "grad_norm": 0.5294094085693359, + "learning_rate": 6.095000000000001e-06, + "num_tokens": 949166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3915, + "step": 2783 + }, + { + "loss": 0.0041, + "grad_norm": 0.5634744167327881, + "learning_rate": 6.09e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.392, + "step": 2784 + }, + { + "loss": 0.0543, + "grad_norm": 1.1946736574172974, + "learning_rate": 6.085000000000001e-06, + "num_tokens": 949769.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3925, + "step": 2785 + }, + { + "loss": 0.0393, + "grad_norm": 1.366204857826233, + "learning_rate": 6.08e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.393, + "step": 2786 + }, + { + "loss": 0.0031, + "grad_norm": 0.4588482677936554, + "learning_rate": 6.075000000000001e-06, + "num_tokens": 950372.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3935, + "step": 2787 + }, + { + "loss": 0.0741, + "grad_norm": 1.6554986238479614, + "learning_rate": 6.07e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.3940000000000001, + "step": 2788 + }, + { + "loss": 0.0358, + "grad_norm": 1.0052374601364136, + "learning_rate": 6.065000000000001e-06, + "num_tokens": 951396.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3945, + "step": 2789 + }, + { + "loss": 0.0029, + "grad_norm": 0.4081237316131592, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.395, + "step": 2790 + }, + { + "loss": 0.0627, + "grad_norm": 1.5037425756454468, + "learning_rate": 6.0550000000000005e-06, + "num_tokens": 951999.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3955, + "step": 2791 + }, + { + "loss": 0.0024, + "grad_norm": 0.36483630537986755, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.396, + "step": 2792 + }, + { + "loss": 0.0455, + "grad_norm": 1.2050751447677612, + "learning_rate": 6.0450000000000006e-06, + "num_tokens": 952602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3965, + "step": 2793 + }, + { + "loss": 0.0021, + "grad_norm": 0.3035581111907959, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.397, + "step": 2794 + }, + { + "loss": 0.0025, + "grad_norm": 0.3607647716999054, + "learning_rate": 6.035000000000001e-06, + "num_tokens": 952784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3975, + "step": 2795 + }, + { + "loss": 0.0625, + "grad_norm": 1.2081470489501953, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3980000000000001, + "step": 2796 + }, + { + "loss": 0.0425, + "grad_norm": 1.0764844417572021, + "learning_rate": 6.025000000000001e-06, + "num_tokens": 953808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3985, + "step": 2797 + }, + { + "loss": 0.0632, + "grad_norm": 1.425076961517334, + "learning_rate": 6.02e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.399, + "step": 2798 + }, + { + "loss": 0.0395, + "grad_norm": 0.9470378160476685, + "learning_rate": 6.015000000000001e-06, + "num_tokens": 954832.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3995, + "step": 2799 + }, + { + "loss": 0.0404, + "grad_norm": 1.0599867105484009, + "learning_rate": 6.01e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4, + "step": 2800 + }, + { + "loss": 0.0577, + "grad_norm": 1.2933481931686401, + "learning_rate": 6.005000000000001e-06, + "num_tokens": 955856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4005, + "step": 2801 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215804398059845, + "learning_rate": 6e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 1.0, + "epoch": 1.401, + "step": 2802 + }, + { + "loss": 0.0601, + "grad_norm": 1.4103161096572876, + "learning_rate": 5.995000000000001e-06, + "num_tokens": 956459.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4015, + "step": 2803 + }, + { + "loss": 0.0022, + "grad_norm": 0.303093820810318, + "learning_rate": 5.99e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4020000000000001, + "step": 2804 + }, + { + "loss": 0.0663, + "grad_norm": 1.360801339149475, + "learning_rate": 5.985000000000001e-06, + "num_tokens": 957062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4025, + "step": 2805 + }, + { + "loss": 0.0022, + "grad_norm": 0.3075718581676483, + "learning_rate": 5.98e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 1.0, + "epoch": 1.403, + "step": 2806 + }, + { + "loss": 0.0602, + "grad_norm": 1.137125849723816, + "learning_rate": 5.975e-06, + "num_tokens": 957665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4035, + "step": 2807 + }, + { + "loss": 0.0022, + "grad_norm": 0.30045661330223083, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.404, + "step": 2808 + }, + { + "loss": 0.0392, + "grad_norm": 1.0042834281921387, + "learning_rate": 5.9650000000000005e-06, + "num_tokens": 958268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4045, + "step": 2809 + }, + { + "loss": 0.0401, + "grad_norm": 1.117727279663086, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.405, + "step": 2810 + }, + { + "loss": 0.0703, + "grad_norm": 1.4459725618362427, + "learning_rate": 5.955000000000001e-06, + "num_tokens": 959292.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4055, + "step": 2811 + }, + { + "loss": 0.0621, + "grad_norm": 1.3719003200531006, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4060000000000001, + "step": 2812 + }, + { + "loss": 0.0023, + "grad_norm": 0.31605690717697144, + "learning_rate": 5.945000000000001e-06, + "num_tokens": 959895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4064999999999999, + "step": 2813 + }, + { + "loss": 0.0605, + "grad_norm": 1.3043557405471802, + "learning_rate": 5.94e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.407, + "step": 2814 + }, + { + "loss": 0.0653, + "grad_norm": 1.2358129024505615, + "learning_rate": 5.935000000000001e-06, + "num_tokens": 960919.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4075, + "step": 2815 + }, + { + "loss": 0.0025, + "grad_norm": 0.3330060839653015, + "learning_rate": 5.93e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.408, + "step": 2816 + }, + { + "loss": 0.058, + "grad_norm": 1.1393845081329346, + "learning_rate": 5.925000000000001e-06, + "num_tokens": 961522.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4085, + "step": 2817 + }, + { + "loss": 0.0689, + "grad_norm": 1.4732993841171265, + "learning_rate": 5.92e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.409, + "step": 2818 + }, + { + "loss": 0.0028, + "grad_norm": 0.37631359696388245, + "learning_rate": 5.915000000000001e-06, + "num_tokens": 962125.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4095, + "step": 2819 + }, + { + "loss": 0.0026, + "grad_norm": 0.35936713218688965, + "learning_rate": 5.91e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.41, + "step": 2820 + }, + { + "loss": 0.0558, + "grad_norm": 1.2061470746994019, + "learning_rate": 5.905000000000001e-06, + "num_tokens": 962728.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4104999999999999, + "step": 2821 + }, + { + "loss": 0.0582, + "grad_norm": 1.513380527496338, + "learning_rate": 5.9e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 2822 + }, + { + "loss": 0.0418, + "grad_norm": 1.2391456365585327, + "learning_rate": 5.895e-06, + "num_tokens": 963752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4115, + "step": 2823 + }, + { + "loss": 0.069, + "grad_norm": 1.4670116901397705, + "learning_rate": 5.89e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.412, + "step": 2824 + }, + { + "loss": 0.0028, + "grad_norm": 0.3788264989852905, + "learning_rate": 5.885e-06, + "num_tokens": 964355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4125, + "step": 2825 + }, + { + "loss": 0.0027, + "grad_norm": 0.3687077462673187, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 1.0, + "epoch": 1.413, + "step": 2826 + }, + { + "loss": 0.0399, + "grad_norm": 1.233347773551941, + "learning_rate": 5.8750000000000005e-06, + "num_tokens": 964958.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4135, + "step": 2827 + }, + { + "loss": 0.0027, + "grad_norm": 0.37683984637260437, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.414, + "step": 2828 + }, + { + "loss": 0.048, + "grad_norm": 1.2649948596954346, + "learning_rate": 5.865000000000001e-06, + "num_tokens": 965561.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4144999999999999, + "step": 2829 + }, + { + "loss": 0.0589, + "grad_norm": 1.3882242441177368, + "learning_rate": 5.86e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.415, + "step": 2830 + }, + { + "loss": 0.0362, + "grad_norm": 1.1658241748809814, + "learning_rate": 5.855000000000001e-06, + "num_tokens": 966585.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4155, + "step": 2831 + }, + { + "loss": 0.0521, + "grad_norm": 1.0679434537887573, + "learning_rate": 5.85e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.416, + "step": 2832 + }, + { + "loss": 0.003, + "grad_norm": 0.40383246541023254, + "learning_rate": 5.845000000000001e-06, + "num_tokens": 967188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4165, + "step": 2833 + }, + { + "loss": 0.0427, + "grad_norm": 1.2304917573928833, + "learning_rate": 5.84e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.417, + "step": 2834 + }, + { + "loss": 0.0538, + "grad_norm": 1.1524217128753662, + "learning_rate": 5.835000000000001e-06, + "num_tokens": 968212.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4175, + "step": 2835 + }, + { + "loss": 0.0379, + "grad_norm": 0.9404373168945312, + "learning_rate": 5.83e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.418, + "step": 2836 + }, + { + "loss": 0.0031, + "grad_norm": 0.4096873104572296, + "learning_rate": 5.825000000000001e-06, + "num_tokens": 968815.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4184999999999999, + "step": 2837 + }, + { + "loss": 0.0028, + "grad_norm": 0.37403908371925354, + "learning_rate": 5.82e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.419, + "step": 2838 + }, + { + "loss": 0.0361, + "grad_norm": 0.9613595604896545, + "learning_rate": 5.815e-06, + "num_tokens": 969418.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.4195, + "step": 2839 + }, + { + "loss": 0.0571, + "grad_norm": 1.3871361017227173, + "learning_rate": 5.81e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.42, + "step": 2840 + }, + { + "loss": 0.0365, + "grad_norm": 1.060208797454834, + "learning_rate": 5.805e-06, + "num_tokens": 970442.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4205, + "step": 2841 + }, + { + "loss": 0.0031, + "grad_norm": 0.4013337790966034, + "learning_rate": 5.8e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 1.0, + "epoch": 1.421, + "step": 2842 + }, + { + "loss": 0.041, + "grad_norm": 1.2097371816635132, + "learning_rate": 5.795e-06, + "num_tokens": 971045.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4215, + "step": 2843 + }, + { + "loss": 0.0614, + "grad_norm": 1.1929858922958374, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.422, + "step": 2844 + }, + { + "loss": 0.0559, + "grad_norm": 1.3881855010986328, + "learning_rate": 5.7850000000000005e-06, + "num_tokens": 972069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4224999999999999, + "step": 2845 + }, + { + "loss": 0.0649, + "grad_norm": 1.5359828472137451, + "learning_rate": 5.78e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.423, + "step": 2846 + }, + { + "loss": 0.0562, + "grad_norm": 1.2387086153030396, + "learning_rate": 5.775000000000001e-06, + "num_tokens": 973093.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4235, + "step": 2847 + }, + { + "loss": 0.0634, + "grad_norm": 1.30796480178833, + "learning_rate": 5.77e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.424, + "step": 2848 + }, + { + "loss": 0.0035, + "grad_norm": 0.4502550959587097, + "learning_rate": 5.765000000000001e-06, + "num_tokens": 973696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4245, + "step": 2849 + }, + { + "loss": 0.0625, + "grad_norm": 1.4468958377838135, + "learning_rate": 5.76e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.425, + "step": 2850 + }, + { + "loss": 0.0675, + "grad_norm": 1.6001074314117432, + "learning_rate": 5.755000000000001e-06, + "num_tokens": 974720.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4255, + "step": 2851 + }, + { + "loss": 0.0039, + "grad_norm": 0.5094487071037292, + "learning_rate": 5.75e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.426, + "step": 2852 + }, + { + "loss": 0.039, + "grad_norm": 0.9305217266082764, + "learning_rate": 5.745000000000001e-06, + "num_tokens": 975323.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4264999999999999, + "step": 2853 + }, + { + "loss": 0.0379, + "grad_norm": 0.9311109185218811, + "learning_rate": 5.74e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.427, + "step": 2854 + }, + { + "loss": 0.0656, + "grad_norm": 1.3803378343582153, + "learning_rate": 5.735e-06, + "num_tokens": 976347.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4275, + "step": 2855 + }, + { + "loss": 0.0495, + "grad_norm": 1.455142855644226, + "learning_rate": 5.73e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.428, + "step": 2856 + }, + { + "loss": 0.048, + "grad_norm": 0.9757342338562012, + "learning_rate": 5.725e-06, + "num_tokens": 977371.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4285, + "step": 2857 + }, + { + "loss": 0.07, + "grad_norm": 1.3820722103118896, + "learning_rate": 5.72e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.429, + "step": 2858 + }, + { + "loss": 0.0496, + "grad_norm": 0.9005600810050964, + "learning_rate": 5.715e-06, + "num_tokens": 978395.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4295, + "step": 2859 + }, + { + "loss": 0.0588, + "grad_norm": 1.1311612129211426, + "learning_rate": 5.71e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.43, + "step": 2860 + }, + { + "loss": 0.0603, + "grad_norm": 1.2565733194351196, + "learning_rate": 5.7050000000000004e-06, + "num_tokens": 979419.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4304999999999999, + "step": 2861 + }, + { + "loss": 0.0061, + "grad_norm": 0.7569929361343384, + "learning_rate": 5.7e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.431, + "step": 2862 + }, + { + "loss": 0.0061, + "grad_norm": 0.757468044757843, + "learning_rate": 5.6950000000000005e-06, + "num_tokens": 979601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4315, + "step": 2863 + }, + { + "loss": 0.0442, + "grad_norm": 1.3257757425308228, + "learning_rate": 5.69e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.432, + "step": 2864 + }, + { + "loss": 0.0054, + "grad_norm": 0.7246440649032593, + "learning_rate": 5.685000000000001e-06, + "num_tokens": 980204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4325, + "step": 2865 + }, + { + "loss": 0.0558, + "grad_norm": 1.1359434127807617, + "learning_rate": 5.68e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.433, + "step": 2866 + }, + { + "loss": 0.0059, + "grad_norm": 0.7417834997177124, + "learning_rate": 5.675000000000001e-06, + "num_tokens": 980807.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4335, + "step": 2867 + }, + { + "loss": 0.0046, + "grad_norm": 0.6065738201141357, + "learning_rate": 5.67e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 1.0, + "epoch": 1.434, + "step": 2868 + }, + { + "loss": 0.0045, + "grad_norm": 0.6112881898880005, + "learning_rate": 5.665000000000001e-06, + "num_tokens": 980989.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4344999999999999, + "step": 2869 + }, + { + "loss": 0.0598, + "grad_norm": 1.1446788311004639, + "learning_rate": 5.66e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.435, + "step": 2870 + }, + { + "loss": 0.004, + "grad_norm": 0.5359569787979126, + "learning_rate": 5.655e-06, + "num_tokens": 981592.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4355, + "step": 2871 + }, + { + "loss": 0.0372, + "grad_norm": 1.0225598812103271, + "learning_rate": 5.65e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.436, + "step": 2872 + }, + { + "loss": 0.0031, + "grad_norm": 0.4344872236251831, + "learning_rate": 5.645e-06, + "num_tokens": 982195.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4365, + "step": 2873 + }, + { + "loss": 0.0035, + "grad_norm": 0.4770989418029785, + "learning_rate": 5.64e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 1.0, + "epoch": 1.437, + "step": 2874 + }, + { + "loss": 0.1529, + "grad_norm": 2.6292223930358887, + "learning_rate": 5.635e-06, + "num_tokens": 982798.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4375, + "step": 2875 + }, + { + "loss": 0.0536, + "grad_norm": 1.1502479314804077, + "learning_rate": 5.63e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.438, + "step": 2876 + }, + { + "loss": 0.0541, + "grad_norm": 1.5837680101394653, + "learning_rate": 5.625e-06, + "num_tokens": 983822.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4385, + "step": 2877 + }, + { + "loss": 0.0621, + "grad_norm": 1.0932730436325073, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.439, + "step": 2878 + }, + { + "loss": 0.0024, + "grad_norm": 0.3176769018173218, + "learning_rate": 5.6150000000000005e-06, + "num_tokens": 984425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4395, + "step": 2879 + }, + { + "loss": 0.056, + "grad_norm": 1.2500354051589966, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.44, + "step": 2880 + }, + { + "loss": 0.046, + "grad_norm": 1.282015323638916, + "learning_rate": 5.6050000000000005e-06, + "num_tokens": 985449.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4405000000000001, + "step": 2881 + }, + { + "loss": 0.0672, + "grad_norm": 1.5532522201538086, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.441, + "step": 2882 + }, + { + "loss": 0.0571, + "grad_norm": 1.1880862712860107, + "learning_rate": 5.595000000000001e-06, + "num_tokens": 986473.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4415, + "step": 2883 + }, + { + "loss": 0.0019, + "grad_norm": 0.26678329706192017, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.442, + "step": 2884 + }, + { + "loss": 0.002, + "grad_norm": 0.26291605830192566, + "learning_rate": 5.585000000000001e-06, + "num_tokens": 986655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4425, + "step": 2885 + }, + { + "loss": 0.002, + "grad_norm": 0.2711234986782074, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.443, + "step": 2886 + }, + { + "loss": 0.0021, + "grad_norm": 0.2862178087234497, + "learning_rate": 5.575000000000001e-06, + "num_tokens": 986837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4435, + "step": 2887 + }, + { + "loss": 0.0571, + "grad_norm": 1.3704899549484253, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.444, + "step": 2888 + }, + { + "loss": 0.0585, + "grad_norm": 1.0157582759857178, + "learning_rate": 5.565e-06, + "num_tokens": 987861.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4445000000000001, + "step": 2889 + }, + { + "loss": 0.0377, + "grad_norm": 1.079724669456482, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.445, + "step": 2890 + }, + { + "loss": 0.14, + "grad_norm": 1.9184038639068604, + "learning_rate": 5.555e-06, + "num_tokens": 988885.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4455, + "step": 2891 + }, + { + "loss": 0.0019, + "grad_norm": 0.25762176513671875, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.446, + "step": 2892 + }, + { + "loss": 0.0702, + "grad_norm": 1.5166800022125244, + "learning_rate": 5.545e-06, + "num_tokens": 989488.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4465, + "step": 2893 + }, + { + "loss": 0.0394, + "grad_norm": 1.1091899871826172, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.447, + "step": 2894 + }, + { + "loss": 0.0647, + "grad_norm": 1.4911457300186157, + "learning_rate": 5.535e-06, + "num_tokens": 990512.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4475, + "step": 2895 + }, + { + "loss": 0.063, + "grad_norm": 1.6225489377975464, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.448, + "step": 2896 + }, + { + "loss": 0.041, + "grad_norm": 1.3053377866744995, + "learning_rate": 5.5250000000000005e-06, + "num_tokens": 991536.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4485000000000001, + "step": 2897 + }, + { + "loss": 0.002, + "grad_norm": 0.27576708793640137, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 1.0, + "epoch": 1.449, + "step": 2898 + }, + { + "loss": 0.0019, + "grad_norm": 0.26415082812309265, + "learning_rate": 5.5150000000000006e-06, + "num_tokens": 991718.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4495, + "step": 2899 + }, + { + "loss": 0.0021, + "grad_norm": 0.29174545407295227, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 2900 + }, + { + "loss": 0.0573, + "grad_norm": 1.38834810256958, + "learning_rate": 5.505000000000001e-06, + "num_tokens": 992321.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4505, + "step": 2901 + }, + { + "loss": 0.0443, + "grad_norm": 1.4421913623809814, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 2902 + }, + { + "loss": 0.0022, + "grad_norm": 0.29639050364494324, + "learning_rate": 5.495000000000001e-06, + "num_tokens": 992924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4515, + "step": 2903 + }, + { + "loss": 0.0655, + "grad_norm": 1.5755751132965088, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.452, + "step": 2904 + }, + { + "loss": 0.0022, + "grad_norm": 0.2955166697502136, + "learning_rate": 5.485e-06, + "num_tokens": 993527.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4525000000000001, + "step": 2905 + }, + { + "loss": 0.0021, + "grad_norm": 0.2841387689113617, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.453, + "step": 2906 + }, + { + "loss": 0.0021, + "grad_norm": 0.286550909280777, + "learning_rate": 5.475e-06, + "num_tokens": 993709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4535, + "step": 2907 + }, + { + "loss": 0.0357, + "grad_norm": 1.0881201028823853, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.454, + "step": 2908 + }, + { + "loss": 0.0409, + "grad_norm": 1.0831390619277954, + "learning_rate": 5.465e-06, + "num_tokens": 994733.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4545, + "step": 2909 + }, + { + "loss": 0.0573, + "grad_norm": 1.2077234983444214, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.455, + "step": 2910 + }, + { + "loss": 0.0567, + "grad_norm": 1.2307626008987427, + "learning_rate": 5.455e-06, + "num_tokens": 995757.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4555, + "step": 2911 + }, + { + "loss": 0.067, + "grad_norm": 1.356170654296875, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.456, + "step": 2912 + }, + { + "loss": 0.0019, + "grad_norm": 0.2535565495491028, + "learning_rate": 5.445e-06, + "num_tokens": 996360.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4565000000000001, + "step": 2913 + }, + { + "loss": 0.0366, + "grad_norm": 1.0972084999084473, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.457, + "step": 2914 + }, + { + "loss": 0.054, + "grad_norm": 1.0509806871414185, + "learning_rate": 5.4350000000000005e-06, + "num_tokens": 997384.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4575, + "step": 2915 + }, + { + "loss": 0.0609, + "grad_norm": 1.3918635845184326, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.458, + "step": 2916 + }, + { + "loss": 0.0388, + "grad_norm": 1.0420371294021606, + "learning_rate": 5.4250000000000006e-06, + "num_tokens": 998408.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4585, + "step": 2917 + }, + { + "loss": 0.072, + "grad_norm": 1.3679769039154053, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.459, + "step": 2918 + }, + { + "loss": 0.0027, + "grad_norm": 0.3709925413131714, + "learning_rate": 5.415000000000001e-06, + "num_tokens": 999011.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4595, + "step": 2919 + }, + { + "loss": 0.0661, + "grad_norm": 1.381754755973816, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.46, + "step": 2920 + }, + { + "loss": 0.041, + "grad_norm": 1.2045968770980835, + "learning_rate": 5.405e-06, + "num_tokens": 1000035.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4605000000000001, + "step": 2921 + }, + { + "loss": 0.0023, + "grad_norm": 0.3062268793582916, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 1.0, + "epoch": 1.461, + "step": 2922 + }, + { + "loss": 0.0464, + "grad_norm": 1.0317680835723877, + "learning_rate": 5.395e-06, + "num_tokens": 1000638.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4615, + "step": 2923 + }, + { + "loss": 0.0495, + "grad_norm": 1.3268100023269653, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.462, + "step": 2924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6260963678359985, + "learning_rate": 5.385e-06, + "num_tokens": 1001662.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4625, + "step": 2925 + }, + { + "loss": 0.0553, + "grad_norm": 1.0903215408325195, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.463, + "step": 2926 + }, + { + "loss": 0.0029, + "grad_norm": 0.3851076066493988, + "learning_rate": 5.375e-06, + "num_tokens": 1002265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4635, + "step": 2927 + }, + { + "loss": 0.0692, + "grad_norm": 1.6572927236557007, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.464, + "step": 2928 + }, + { + "loss": 0.0625, + "grad_norm": 1.5664637088775635, + "learning_rate": 5.365e-06, + "num_tokens": 1003289.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4645000000000001, + "step": 2929 + }, + { + "loss": 0.0626, + "grad_norm": 1.198908805847168, + "learning_rate": 5.36e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.465, + "step": 2930 + }, + { + "loss": 0.0641, + "grad_norm": 1.2499873638153076, + "learning_rate": 5.355e-06, + "num_tokens": 1004313.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4655, + "step": 2931 + }, + { + "loss": 0.0042, + "grad_norm": 0.5362296104431152, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 1.0, + "epoch": 1.466, + "step": 2932 + }, + { + "loss": 0.0037, + "grad_norm": 0.49612900614738464, + "learning_rate": 5.3450000000000005e-06, + "num_tokens": 1004495.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4665, + "step": 2933 + }, + { + "loss": 0.0039, + "grad_norm": 0.5115715861320496, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.467, + "step": 2934 + }, + { + "loss": 0.056, + "grad_norm": 1.3353906869888306, + "learning_rate": 5.335000000000001e-06, + "num_tokens": 1005098.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4675, + "step": 2935 + }, + { + "loss": 0.0407, + "grad_norm": 1.1807116270065308, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.468, + "step": 2936 + }, + { + "loss": 0.0551, + "grad_norm": 1.257308006286621, + "learning_rate": 5.325e-06, + "num_tokens": 1006122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4685000000000001, + "step": 2937 + }, + { + "loss": 0.0606, + "grad_norm": 1.2219009399414062, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4689999999999999, + "step": 2938 + }, + { + "loss": 0.0403, + "grad_norm": 1.094189167022705, + "learning_rate": 5.315e-06, + "num_tokens": 1007146.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4695, + "step": 2939 + }, + { + "loss": 0.0467, + "grad_norm": 1.1191236972808838, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 2940 + }, + { + "loss": 0.0556, + "grad_norm": 1.1905457973480225, + "learning_rate": 5.305e-06, + "num_tokens": 1008170.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4705, + "step": 2941 + }, + { + "loss": 0.0038, + "grad_norm": 0.5084776282310486, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 1.0, + "epoch": 1.471, + "step": 2942 + }, + { + "loss": 0.0558, + "grad_norm": 0.9725843071937561, + "learning_rate": 5.295e-06, + "num_tokens": 1008773.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4715, + "step": 2943 + }, + { + "loss": 0.058, + "grad_norm": 1.1404790878295898, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.472, + "step": 2944 + }, + { + "loss": 0.0038, + "grad_norm": 0.4927501380443573, + "learning_rate": 5.285e-06, + "num_tokens": 1009376.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4725, + "step": 2945 + }, + { + "loss": 0.052, + "grad_norm": 1.0383561849594116, + "learning_rate": 5.28e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4729999999999999, + "step": 2946 + }, + { + "loss": 0.0039, + "grad_norm": 0.5245242118835449, + "learning_rate": 5.275e-06, + "num_tokens": 1009979.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4735, + "step": 2947 + }, + { + "loss": 0.0599, + "grad_norm": 1.137878179550171, + "learning_rate": 5.27e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.474, + "step": 2948 + }, + { + "loss": 0.0039, + "grad_norm": 0.5066397190093994, + "learning_rate": 5.265e-06, + "num_tokens": 1010582.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4745, + "step": 2949 + }, + { + "loss": 0.0037, + "grad_norm": 0.4922652542591095, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 1.0, + "epoch": 1.475, + "step": 2950 + }, + { + "loss": 0.0402, + "grad_norm": 1.1538424491882324, + "learning_rate": 5.2550000000000005e-06, + "num_tokens": 1011185.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4755, + "step": 2951 + }, + { + "loss": 0.0562, + "grad_norm": 1.8279345035552979, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.476, + "step": 2952 + }, + { + "loss": 0.0636, + "grad_norm": 1.2982397079467773, + "learning_rate": 5.245e-06, + "num_tokens": 1012209.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4765, + "step": 2953 + }, + { + "loss": 0.0033, + "grad_norm": 0.4363272488117218, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4769999999999999, + "step": 2954 + }, + { + "loss": 0.0549, + "grad_norm": 1.556806206703186, + "learning_rate": 5.235e-06, + "num_tokens": 1012812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4775, + "step": 2955 + }, + { + "loss": 0.0358, + "grad_norm": 1.0845907926559448, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.478, + "step": 2956 + }, + { + "loss": 0.0032, + "grad_norm": 0.4301038384437561, + "learning_rate": 5.225e-06, + "num_tokens": 1013415.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4785, + "step": 2957 + }, + { + "loss": 0.003, + "grad_norm": 0.3937813341617584, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 1.0, + "epoch": 1.479, + "step": 2958 + }, + { + "loss": 0.0403, + "grad_norm": 0.9416876435279846, + "learning_rate": 5.215e-06, + "num_tokens": 1014018.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4795, + "step": 2959 + }, + { + "loss": 0.0029, + "grad_norm": 0.3991153836250305, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.48, + "step": 2960 + }, + { + "loss": 0.0367, + "grad_norm": 1.106955885887146, + "learning_rate": 5.205e-06, + "num_tokens": 1014621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4805, + "step": 2961 + }, + { + "loss": 0.0586, + "grad_norm": 1.3418941497802734, + "learning_rate": 5.2e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4809999999999999, + "step": 2962 + }, + { + "loss": 0.0358, + "grad_norm": 0.9489701390266418, + "learning_rate": 5.195e-06, + "num_tokens": 1015645.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4815, + "step": 2963 + }, + { + "loss": 0.0629, + "grad_norm": 1.0855809450149536, + "learning_rate": 5.19e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.482, + "step": 2964 + }, + { + "loss": 0.0027, + "grad_norm": 0.3812173306941986, + "learning_rate": 5.185e-06, + "num_tokens": 1016248.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4825, + "step": 2965 + }, + { + "loss": 0.0028, + "grad_norm": 0.3925476372241974, + "learning_rate": 5.18e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 1.0, + "epoch": 1.483, + "step": 2966 + }, + { + "loss": 0.0567, + "grad_norm": 1.3809915781021118, + "learning_rate": 5.1750000000000004e-06, + "num_tokens": 1016851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4835, + "step": 2967 + }, + { + "loss": 0.0428, + "grad_norm": 1.4269046783447266, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.484, + "step": 2968 + }, + { + "loss": 0.0026, + "grad_norm": 0.3535688519477844, + "learning_rate": 5.165e-06, + "num_tokens": 1017454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4845, + "step": 2969 + }, + { + "loss": 0.0025, + "grad_norm": 0.34918057918548584, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4849999999999999, + "step": 2970 + }, + { + "loss": 0.0025, + "grad_norm": 0.34093669056892395, + "learning_rate": 5.155e-06, + "num_tokens": 1017636.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4855, + "step": 2971 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282490372657776, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.486, + "step": 2972 + }, + { + "loss": 0.0762, + "grad_norm": 2.083855628967285, + "learning_rate": 5.145e-06, + "num_tokens": 1018239.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4865, + "step": 2973 + }, + { + "loss": 0.0548, + "grad_norm": 1.5333393812179565, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.487, + "step": 2974 + }, + { + "loss": 0.0373, + "grad_norm": 1.078650712966919, + "learning_rate": 5.135e-06, + "num_tokens": 1019263.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4875, + "step": 2975 + }, + { + "loss": 0.0447, + "grad_norm": 1.3176923990249634, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.488, + "step": 2976 + }, + { + "loss": 0.0023, + "grad_norm": 0.3142336308956146, + "learning_rate": 5.125e-06, + "num_tokens": 1019866.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4885, + "step": 2977 + }, + { + "loss": 0.0021, + "grad_norm": 0.2898966073989868, + "learning_rate": 5.12e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 2978 + }, + { + "loss": 0.046, + "grad_norm": 1.2612260580062866, + "learning_rate": 5.115e-06, + "num_tokens": 1020469.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4895, + "step": 2979 + }, + { + "loss": 0.0718, + "grad_norm": 2.1195919513702393, + "learning_rate": 5.11e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.49, + "step": 2980 + }, + { + "loss": 0.002, + "grad_norm": 0.2805778682231903, + "learning_rate": 5.105e-06, + "num_tokens": 1021072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4905, + "step": 2981 + }, + { + "loss": 0.002, + "grad_norm": 0.2843017280101776, + "learning_rate": 5.1e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 2982 + }, + { + "loss": 0.002, + "grad_norm": 0.277892529964447, + "learning_rate": 5.095e-06, + "num_tokens": 1021254.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4915, + "step": 2983 + }, + { + "loss": 0.0422, + "grad_norm": 1.0654278993606567, + "learning_rate": 5.09e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.492, + "step": 2984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29488760232925415, + "learning_rate": 5.085e-06, + "num_tokens": 1021857.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4925, + "step": 2985 + }, + { + "loss": 0.0392, + "grad_norm": 1.086630940437317, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4929999999999999, + "step": 2986 + }, + { + "loss": 0.0018, + "grad_norm": 0.24030831456184387, + "learning_rate": 5.075e-06, + "num_tokens": 1022460.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4935, + "step": 2987 + }, + { + "loss": 0.0406, + "grad_norm": 0.9846900105476379, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.494, + "step": 2988 + }, + { + "loss": 0.0418, + "grad_norm": 1.6849744319915771, + "learning_rate": 5.065e-06, + "num_tokens": 1023484.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4945, + "step": 2989 + }, + { + "loss": 0.0015, + "grad_norm": 0.2105080932378769, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 1.0, + "epoch": 1.495, + "step": 2990 + }, + { + "loss": 0.0019, + "grad_norm": 0.26552438735961914, + "learning_rate": 5.055e-06, + "num_tokens": 1023666.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4955, + "step": 2991 + }, + { + "loss": 0.0016, + "grad_norm": 0.21752813458442688, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 2992 + }, + { + "loss": 0.0666, + "grad_norm": 1.4344254732131958, + "learning_rate": 5.045e-06, + "num_tokens": 1024269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4965, + "step": 2993 + }, + { + "loss": 0.0415, + "grad_norm": 1.1530293226242065, + "learning_rate": 5.04e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4969999999999999, + "step": 2994 + }, + { + "loss": 0.0365, + "grad_norm": 1.0033750534057617, + "learning_rate": 5.035e-06, + "num_tokens": 1025293.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4975, + "step": 2995 + }, + { + "loss": 0.0369, + "grad_norm": 1.062666654586792, + "learning_rate": 5.03e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.498, + "step": 2996 + }, + { + "loss": 0.0016, + "grad_norm": 0.23261243104934692, + "learning_rate": 5.025e-06, + "num_tokens": 1025896.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4985, + "step": 2997 + }, + { + "loss": 0.0019, + "grad_norm": 0.26436832547187805, + "learning_rate": 5.02e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 2998 + }, + { + "loss": 0.0395, + "grad_norm": 1.0828720331192017, + "learning_rate": 5.015e-06, + "num_tokens": 1026499.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4995, + "step": 2999 + }, + { + "loss": 0.0018, + "grad_norm": 0.24229036271572113, + "learning_rate": 5.01e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5, + "step": 3000 + }, + { + "loss": 0.0636, + "grad_norm": 1.5817841291427612, + "learning_rate": 5.0049999999999995e-06, + "num_tokens": 1027102.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5005, + "step": 3001 + }, + { + "loss": 0.0016, + "grad_norm": 0.21737374365329742, + "learning_rate": 5e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.501, + "step": 3002 + }, + { + "loss": 0.0535, + "grad_norm": 1.0760457515716553, + "learning_rate": 4.9950000000000005e-06, + "num_tokens": 1027705.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5015, + "step": 3003 + }, + { + "loss": 0.0702, + "grad_norm": 1.5160242319107056, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 3004 + }, + { + "loss": 0.002, + "grad_norm": 0.28444817662239075, + "learning_rate": 4.9850000000000006e-06, + "num_tokens": 1028308.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5025, + "step": 3005 + }, + { + "loss": 0.0659, + "grad_norm": 1.394598364830017, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5030000000000001, + "step": 3006 + }, + { + "loss": 0.0549, + "grad_norm": 1.4268598556518555, + "learning_rate": 4.975000000000001e-06, + "num_tokens": 1029332.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5034999999999998, + "step": 3007 + }, + { + "loss": 0.0693, + "grad_norm": 1.3022048473358154, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.504, + "step": 3008 + }, + { + "loss": 0.0577, + "grad_norm": 1.6034104824066162, + "learning_rate": 4.965000000000001e-06, + "num_tokens": 1030356.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5045, + "step": 3009 + }, + { + "loss": 0.002, + "grad_norm": 0.26663535833358765, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.505, + "step": 3010 + }, + { + "loss": 0.0021, + "grad_norm": 0.29342901706695557, + "learning_rate": 4.955e-06, + "num_tokens": 1030538.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5055, + "step": 3011 + }, + { + "loss": 0.0574, + "grad_norm": 1.232057809829712, + "learning_rate": 4.95e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.506, + "step": 3012 + }, + { + "loss": 0.0022, + "grad_norm": 0.2940972149372101, + "learning_rate": 4.945e-06, + "num_tokens": 1031141.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5065, + "step": 3013 + }, + { + "loss": 0.0022, + "grad_norm": 0.3054879307746887, + "learning_rate": 4.94e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 3014 + }, + { + "loss": 0.002, + "grad_norm": 0.2681850492954254, + "learning_rate": 4.935e-06, + "num_tokens": 1031323.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5074999999999998, + "step": 3015 + }, + { + "loss": 0.0018, + "grad_norm": 0.24893507361412048, + "learning_rate": 4.93e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 3016 + }, + { + "loss": 0.0514, + "grad_norm": 0.9832684993743896, + "learning_rate": 4.925e-06, + "num_tokens": 1031926.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5085, + "step": 3017 + }, + { + "loss": 0.0546, + "grad_norm": 1.0513758659362793, + "learning_rate": 4.92e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.509, + "step": 3018 + }, + { + "loss": 0.0438, + "grad_norm": 1.3256640434265137, + "learning_rate": 4.915e-06, + "num_tokens": 1032950.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5095, + "step": 3019 + }, + { + "loss": 0.039, + "grad_norm": 1.1269205808639526, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.51, + "step": 3020 + }, + { + "loss": 0.0606, + "grad_norm": 1.2971444129943848, + "learning_rate": 4.9050000000000005e-06, + "num_tokens": 1033974.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5105, + "step": 3021 + }, + { + "loss": 0.0018, + "grad_norm": 0.24280324578285217, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5110000000000001, + "step": 3022 + }, + { + "loss": 0.0726, + "grad_norm": 1.984804630279541, + "learning_rate": 4.8950000000000006e-06, + "num_tokens": 1034577.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.5114999999999998, + "step": 3023 + }, + { + "loss": 0.0444, + "grad_norm": 1.1891791820526123, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.512, + "step": 3024 + }, + { + "loss": 0.0425, + "grad_norm": 1.3020859956741333, + "learning_rate": 4.885000000000001e-06, + "num_tokens": 1035601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5125, + "step": 3025 + }, + { + "loss": 0.0397, + "grad_norm": 0.8992137312889099, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.513, + "step": 3026 + }, + { + "loss": 0.0518, + "grad_norm": 1.0060539245605469, + "learning_rate": 4.875e-06, + "num_tokens": 1036625.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5135, + "step": 3027 + }, + { + "loss": 0.0618, + "grad_norm": 1.2295892238616943, + "learning_rate": 4.87e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.514, + "step": 3028 + }, + { + "loss": 0.057, + "grad_norm": 1.2740446329116821, + "learning_rate": 4.865e-06, + "num_tokens": 1037649.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5145, + "step": 3029 + }, + { + "loss": 0.067, + "grad_norm": 1.2444658279418945, + "learning_rate": 4.86e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5150000000000001, + "step": 3030 + }, + { + "loss": 0.0389, + "grad_norm": 1.0539816617965698, + "learning_rate": 4.855e-06, + "num_tokens": 1038673.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5154999999999998, + "step": 3031 + }, + { + "loss": 0.0613, + "grad_norm": 1.2166608572006226, + "learning_rate": 4.85e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.516, + "step": 3032 + }, + { + "loss": 0.0636, + "grad_norm": 1.2355148792266846, + "learning_rate": 4.845e-06, + "num_tokens": 1039697.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5165, + "step": 3033 + }, + { + "loss": 0.0586, + "grad_norm": 1.195371150970459, + "learning_rate": 4.84e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.517, + "step": 3034 + }, + { + "loss": 0.0031, + "grad_norm": 0.4328796863555908, + "learning_rate": 4.835e-06, + "num_tokens": 1040300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5175, + "step": 3035 + }, + { + "loss": 0.0033, + "grad_norm": 0.4462224841117859, + "learning_rate": 4.83e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 3036 + }, + { + "loss": 0.0404, + "grad_norm": 1.2766720056533813, + "learning_rate": 4.825e-06, + "num_tokens": 1040903.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5185, + "step": 3037 + }, + { + "loss": 0.0038, + "grad_norm": 0.5095945000648499, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5190000000000001, + "step": 3038 + }, + { + "loss": 0.0528, + "grad_norm": 1.006589651107788, + "learning_rate": 4.8150000000000005e-06, + "num_tokens": 1041506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5194999999999999, + "step": 3039 + }, + { + "loss": 0.0417, + "grad_norm": 1.2964030504226685, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.52, + "step": 3040 + }, + { + "loss": 0.0592, + "grad_norm": 1.1840168237686157, + "learning_rate": 4.805000000000001e-06, + "num_tokens": 1042530.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5205, + "step": 3041 + }, + { + "loss": 0.0038, + "grad_norm": 0.49861085414886475, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 1.0, + "epoch": 1.521, + "step": 3042 + }, + { + "loss": 0.0037, + "grad_norm": 0.49751704931259155, + "learning_rate": 4.795e-06, + "num_tokens": 1042712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5215, + "step": 3043 + }, + { + "loss": 0.0481, + "grad_norm": 1.022782564163208, + "learning_rate": 4.79e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.522, + "step": 3044 + }, + { + "loss": 0.0038, + "grad_norm": 0.49228596687316895, + "learning_rate": 4.785e-06, + "num_tokens": 1043315.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5225, + "step": 3045 + }, + { + "loss": 0.0376, + "grad_norm": 1.1729862689971924, + "learning_rate": 4.78e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5230000000000001, + "step": 3046 + }, + { + "loss": 0.0653, + "grad_norm": 1.5206072330474854, + "learning_rate": 4.775e-06, + "num_tokens": 1044339.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5234999999999999, + "step": 3047 + }, + { + "loss": 0.0633, + "grad_norm": 1.2756298780441284, + "learning_rate": 4.77e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.524, + "step": 3048 + }, + { + "loss": 0.0036, + "grad_norm": 0.4977829158306122, + "learning_rate": 4.765e-06, + "num_tokens": 1044942.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5245, + "step": 3049 + }, + { + "loss": 0.0526, + "grad_norm": 1.0627686977386475, + "learning_rate": 4.76e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.525, + "step": 3050 + }, + { + "loss": 0.0381, + "grad_norm": 1.1623107194900513, + "learning_rate": 4.755e-06, + "num_tokens": 1045966.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5255, + "step": 3051 + }, + { + "loss": 0.0036, + "grad_norm": 0.5119946002960205, + "learning_rate": 4.75e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.526, + "step": 3052 + }, + { + "loss": 0.0581, + "grad_norm": 1.3532719612121582, + "learning_rate": 4.745e-06, + "num_tokens": 1046569.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5265, + "step": 3053 + }, + { + "loss": 0.0594, + "grad_norm": 1.2599351406097412, + "learning_rate": 4.74e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5270000000000001, + "step": 3054 + }, + { + "loss": 0.0033, + "grad_norm": 0.4622514843940735, + "learning_rate": 4.735e-06, + "num_tokens": 1047172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5274999999999999, + "step": 3055 + }, + { + "loss": 0.0728, + "grad_norm": 1.6162607669830322, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.528, + "step": 3056 + }, + { + "loss": 0.0627, + "grad_norm": 1.4714545011520386, + "learning_rate": 4.7250000000000005e-06, + "num_tokens": 1048196.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5285, + "step": 3057 + }, + { + "loss": 0.0034, + "grad_norm": 0.48141252994537354, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 1.0, + "epoch": 1.529, + "step": 3058 + }, + { + "loss": 0.0385, + "grad_norm": 1.0676530599594116, + "learning_rate": 4.715e-06, + "num_tokens": 1048799.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5295, + "step": 3059 + }, + { + "loss": 0.0032, + "grad_norm": 0.44829145073890686, + "learning_rate": 4.71e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 1.0, + "epoch": 1.53, + "step": 3060 + }, + { + "loss": 0.0031, + "grad_norm": 0.4258093535900116, + "learning_rate": 4.705e-06, + "num_tokens": 1048981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5305, + "step": 3061 + }, + { + "loss": 0.0715, + "grad_norm": 1.3509596586227417, + "learning_rate": 4.7e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5310000000000001, + "step": 3062 + }, + { + "loss": 0.0341, + "grad_norm": 1.0876250267028809, + "learning_rate": 4.695e-06, + "num_tokens": 1050005.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5314999999999999, + "step": 3063 + }, + { + "loss": 0.0611, + "grad_norm": 1.3174924850463867, + "learning_rate": 4.69e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.532, + "step": 3064 + }, + { + "loss": 0.0417, + "grad_norm": 1.123489499092102, + "learning_rate": 4.685000000000001e-06, + "num_tokens": 1051029.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5325, + "step": 3065 + }, + { + "loss": 0.066, + "grad_norm": 1.7399777173995972, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.533, + "step": 3066 + }, + { + "loss": 0.0028, + "grad_norm": 0.38190290331840515, + "learning_rate": 4.675000000000001e-06, + "num_tokens": 1051632.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5335, + "step": 3067 + }, + { + "loss": 0.0651, + "grad_norm": 1.4947158098220825, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.534, + "step": 3068 + }, + { + "loss": 0.003, + "grad_norm": 0.40696173906326294, + "learning_rate": 4.665e-06, + "num_tokens": 1052235.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5345, + "step": 3069 + }, + { + "loss": 0.0555, + "grad_norm": 1.2926570177078247, + "learning_rate": 4.66e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5350000000000001, + "step": 3070 + }, + { + "loss": 0.0625, + "grad_norm": 1.2110244035720825, + "learning_rate": 4.655e-06, + "num_tokens": 1053259.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5354999999999999, + "step": 3071 + }, + { + "loss": 0.0033, + "grad_norm": 0.44495561718940735, + "learning_rate": 4.65e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 1.0, + "epoch": 1.536, + "step": 3072 + }, + { + "loss": 0.0574, + "grad_norm": 1.1019057035446167, + "learning_rate": 4.645e-06, + "num_tokens": 1053862.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5365, + "step": 3073 + }, + { + "loss": 0.003, + "grad_norm": 0.4128797650337219, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.537, + "step": 3074 + }, + { + "loss": 0.0572, + "grad_norm": 1.164238452911377, + "learning_rate": 4.6350000000000005e-06, + "num_tokens": 1054465.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5375, + "step": 3075 + }, + { + "loss": 0.0631, + "grad_norm": 1.4220542907714844, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.538, + "step": 3076 + }, + { + "loss": 0.0377, + "grad_norm": 1.2259591817855835, + "learning_rate": 4.625000000000001e-06, + "num_tokens": 1055489.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5385, + "step": 3077 + }, + { + "loss": 0.003, + "grad_norm": 0.4099157154560089, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5390000000000001, + "step": 3078 + }, + { + "loss": 0.0027, + "grad_norm": 0.3750811219215393, + "learning_rate": 4.615000000000001e-06, + "num_tokens": 1055671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5394999999999999, + "step": 3079 + }, + { + "loss": 0.0621, + "grad_norm": 1.2325596809387207, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.54, + "step": 3080 + }, + { + "loss": 0.0504, + "grad_norm": 0.9959844350814819, + "learning_rate": 4.605000000000001e-06, + "num_tokens": 1056695.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5405, + "step": 3081 + }, + { + "loss": 0.0574, + "grad_norm": 1.0301742553710938, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.541, + "step": 3082 + }, + { + "loss": 0.0512, + "grad_norm": 1.0320547819137573, + "learning_rate": 4.595000000000001e-06, + "num_tokens": 1057719.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5415, + "step": 3083 + }, + { + "loss": 0.0561, + "grad_norm": 1.225005865097046, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.542, + "step": 3084 + }, + { + "loss": 0.0376, + "grad_norm": 1.1090381145477295, + "learning_rate": 4.585e-06, + "num_tokens": 1058743.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.5425, + "step": 3085 + }, + { + "loss": 0.0032, + "grad_norm": 0.44738513231277466, + "learning_rate": 4.58e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5430000000000001, + "step": 3086 + }, + { + "loss": 0.0031, + "grad_norm": 0.4485037624835968, + "learning_rate": 4.575e-06, + "num_tokens": 1058925.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5434999999999999, + "step": 3087 + }, + { + "loss": 0.0703, + "grad_norm": 1.630645751953125, + "learning_rate": 4.57e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.544, + "step": 3088 + }, + { + "loss": 0.0034, + "grad_norm": 0.4586680233478546, + "learning_rate": 4.565e-06, + "num_tokens": 1059528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5445, + "step": 3089 + }, + { + "loss": 0.003, + "grad_norm": 0.41872572898864746, + "learning_rate": 4.56e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.545, + "step": 3090 + }, + { + "loss": 0.0433, + "grad_norm": 1.1152652502059937, + "learning_rate": 4.5550000000000004e-06, + "num_tokens": 1060131.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5455, + "step": 3091 + }, + { + "loss": 0.0025, + "grad_norm": 0.35068032145500183, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.546, + "step": 3092 + }, + { + "loss": 0.0396, + "grad_norm": 1.0990018844604492, + "learning_rate": 4.5450000000000005e-06, + "num_tokens": 1060734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5465, + "step": 3093 + }, + { + "loss": 0.0635, + "grad_norm": 1.6193867921829224, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5470000000000002, + "step": 3094 + }, + { + "loss": 0.0027, + "grad_norm": 0.3813343644142151, + "learning_rate": 4.535000000000001e-06, + "num_tokens": 1061337.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5474999999999999, + "step": 3095 + }, + { + "loss": 0.0025, + "grad_norm": 0.3389427363872528, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 1.0, + "epoch": 1.548, + "step": 3096 + }, + { + "loss": 0.0652, + "grad_norm": 1.455460786819458, + "learning_rate": 4.525000000000001e-06, + "num_tokens": 1061940.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5485, + "step": 3097 + }, + { + "loss": 0.0596, + "grad_norm": 1.318932056427002, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.549, + "step": 3098 + }, + { + "loss": 0.0021, + "grad_norm": 0.30851492285728455, + "learning_rate": 4.515000000000001e-06, + "num_tokens": 1062543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5495, + "step": 3099 + }, + { + "loss": 0.0021, + "grad_norm": 0.29576948285102844, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 1.0, + "epoch": 1.55, + "step": 3100 + }, + { + "loss": 0.0021, + "grad_norm": 0.29117029905319214, + "learning_rate": 4.505e-06, + "num_tokens": 1062725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5505, + "step": 3101 + }, + { + "loss": 0.04, + "grad_norm": 1.1777619123458862, + "learning_rate": 4.5e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5510000000000002, + "step": 3102 + }, + { + "loss": 0.0538, + "grad_norm": 1.1641870737075806, + "learning_rate": 4.495e-06, + "num_tokens": 1063749.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5514999999999999, + "step": 3103 + }, + { + "loss": 0.0423, + "grad_norm": 1.3220707178115845, + "learning_rate": 4.49e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.552, + "step": 3104 + }, + { + "loss": 0.0021, + "grad_norm": 0.30619239807128906, + "learning_rate": 4.485e-06, + "num_tokens": 1064352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5525, + "step": 3105 + }, + { + "loss": 0.0681, + "grad_norm": 1.3809969425201416, + "learning_rate": 4.48e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.553, + "step": 3106 + }, + { + "loss": 0.055, + "grad_norm": 1.1956359148025513, + "learning_rate": 4.475e-06, + "num_tokens": 1065376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5535, + "step": 3107 + }, + { + "loss": 0.0573, + "grad_norm": 1.2887022495269775, + "learning_rate": 4.47e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.554, + "step": 3108 + }, + { + "loss": 0.0554, + "grad_norm": 1.1560310125350952, + "learning_rate": 4.4650000000000004e-06, + "num_tokens": 1066400.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5545, + "step": 3109 + }, + { + "loss": 0.0021, + "grad_norm": 0.29395192861557007, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5550000000000002, + "step": 3110 + }, + { + "loss": 0.0652, + "grad_norm": 1.608464002609253, + "learning_rate": 4.4550000000000005e-06, + "num_tokens": 1067003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5554999999999999, + "step": 3111 + }, + { + "loss": 0.0558, + "grad_norm": 1.2650138139724731, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.556, + "step": 3112 + }, + { + "loss": 0.0458, + "grad_norm": 1.2872962951660156, + "learning_rate": 4.445000000000001e-06, + "num_tokens": 1068027.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5565, + "step": 3113 + }, + { + "loss": 0.0022, + "grad_norm": 0.30732589960098267, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.557, + "step": 3114 + }, + { + "loss": 0.0558, + "grad_norm": 1.0926036834716797, + "learning_rate": 4.435000000000001e-06, + "num_tokens": 1068630.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5575, + "step": 3115 + }, + { + "loss": 0.0023, + "grad_norm": 0.32145828008651733, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 1.0, + "epoch": 1.558, + "step": 3116 + }, + { + "loss": 0.0373, + "grad_norm": 1.1655807495117188, + "learning_rate": 4.425e-06, + "num_tokens": 1069233.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5585, + "step": 3117 + }, + { + "loss": 0.0769, + "grad_norm": 1.796105980873108, + "learning_rate": 4.42e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.5590000000000002, + "step": 3118 + }, + { + "loss": 0.0026, + "grad_norm": 0.3620903789997101, + "learning_rate": 4.415e-06, + "num_tokens": 1069836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5594999999999999, + "step": 3119 + }, + { + "loss": 0.0429, + "grad_norm": 1.309659481048584, + "learning_rate": 4.41e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.56, + "step": 3120 + }, + { + "loss": 0.0023, + "grad_norm": 0.32819899916648865, + "learning_rate": 4.405e-06, + "num_tokens": 1070439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5605, + "step": 3121 + }, + { + "loss": 0.0576, + "grad_norm": 1.0110256671905518, + "learning_rate": 4.4e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.561, + "step": 3122 + }, + { + "loss": 0.0474, + "grad_norm": 1.327854037284851, + "learning_rate": 4.395e-06, + "num_tokens": 1071463.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5615, + "step": 3123 + }, + { + "loss": 0.0371, + "grad_norm": 1.2000775337219238, + "learning_rate": 4.39e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.562, + "step": 3124 + }, + { + "loss": 0.0532, + "grad_norm": 1.1874752044677734, + "learning_rate": 4.385e-06, + "num_tokens": 1072487.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5625, + "step": 3125 + }, + { + "loss": 0.0387, + "grad_norm": 1.2780605554580688, + "learning_rate": 4.38e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.563, + "step": 3126 + }, + { + "loss": 0.0029, + "grad_norm": 0.38496679067611694, + "learning_rate": 4.3750000000000005e-06, + "num_tokens": 1073090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5635, + "step": 3127 + }, + { + "loss": 0.0028, + "grad_norm": 0.3800834119319916, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.564, + "step": 3128 + }, + { + "loss": 0.0386, + "grad_norm": 1.077006459236145, + "learning_rate": 4.3650000000000006e-06, + "num_tokens": 1073693.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5645, + "step": 3129 + }, + { + "loss": 0.0669, + "grad_norm": 1.2879207134246826, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.565, + "step": 3130 + }, + { + "loss": 0.0027, + "grad_norm": 0.37664031982421875, + "learning_rate": 4.355000000000001e-06, + "num_tokens": 1074296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5655000000000001, + "step": 3131 + }, + { + "loss": 0.0026, + "grad_norm": 0.35762181878089905, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5659999999999998, + "step": 3132 + }, + { + "loss": 0.0026, + "grad_norm": 0.3616492450237274, + "learning_rate": 4.345000000000001e-06, + "num_tokens": 1074478.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5665, + "step": 3133 + }, + { + "loss": 0.054, + "grad_norm": 1.413800835609436, + "learning_rate": 4.34e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.567, + "step": 3134 + }, + { + "loss": 0.0549, + "grad_norm": 1.1791685819625854, + "learning_rate": 4.335e-06, + "num_tokens": 1075502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5675, + "step": 3135 + }, + { + "loss": 0.0382, + "grad_norm": 1.1417726278305054, + "learning_rate": 4.33e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.568, + "step": 3136 + }, + { + "loss": 0.0586, + "grad_norm": 1.360926866531372, + "learning_rate": 4.325e-06, + "num_tokens": 1076526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5685, + "step": 3137 + }, + { + "loss": 0.0569, + "grad_norm": 1.1636319160461426, + "learning_rate": 4.32e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.569, + "step": 3138 + }, + { + "loss": 0.0024, + "grad_norm": 0.3462548851966858, + "learning_rate": 4.315e-06, + "num_tokens": 1077129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5695000000000001, + "step": 3139 + }, + { + "loss": 0.0619, + "grad_norm": 1.3171995878219604, + "learning_rate": 4.31e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5699999999999998, + "step": 3140 + }, + { + "loss": 0.0026, + "grad_norm": 0.35494717955589294, + "learning_rate": 4.305e-06, + "num_tokens": 1077732.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5705, + "step": 3141 + }, + { + "loss": 0.003, + "grad_norm": 0.4175266921520233, + "learning_rate": 4.3e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 3142 + }, + { + "loss": 0.0588, + "grad_norm": 1.5107394456863403, + "learning_rate": 4.295e-06, + "num_tokens": 1078335.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5715, + "step": 3143 + }, + { + "loss": 0.0583, + "grad_norm": 1.5851935148239136, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.572, + "step": 3144 + }, + { + "loss": 0.0401, + "grad_norm": 1.1422215700149536, + "learning_rate": 4.2850000000000005e-06, + "num_tokens": 1079359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5725, + "step": 3145 + }, + { + "loss": 0.0429, + "grad_norm": 1.3809804916381836, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.573, + "step": 3146 + }, + { + "loss": 0.0397, + "grad_norm": 1.1466025114059448, + "learning_rate": 4.2750000000000006e-06, + "num_tokens": 1080383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5735000000000001, + "step": 3147 + }, + { + "loss": 0.0389, + "grad_norm": 1.035447120666504, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 3148 + }, + { + "loss": 0.0029, + "grad_norm": 0.39080947637557983, + "learning_rate": 4.265000000000001e-06, + "num_tokens": 1080986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5745, + "step": 3149 + }, + { + "loss": 0.0029, + "grad_norm": 0.39702585339546204, + "learning_rate": 4.26e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.575, + "step": 3150 + }, + { + "loss": 0.0376, + "grad_norm": 1.0406111478805542, + "learning_rate": 4.255e-06, + "num_tokens": 1081589.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5755, + "step": 3151 + }, + { + "loss": 0.0029, + "grad_norm": 0.40471911430358887, + "learning_rate": 4.25e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.576, + "step": 3152 + }, + { + "loss": 0.0542, + "grad_norm": 1.382663607597351, + "learning_rate": 4.245e-06, + "num_tokens": 1082192.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5765, + "step": 3153 + }, + { + "loss": 0.0026, + "grad_norm": 0.39454102516174316, + "learning_rate": 4.24e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.577, + "step": 3154 + }, + { + "loss": 0.0515, + "grad_norm": 1.1649845838546753, + "learning_rate": 4.235e-06, + "num_tokens": 1082795.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5775000000000001, + "step": 3155 + }, + { + "loss": 0.0383, + "grad_norm": 1.10068941116333, + "learning_rate": 4.23e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5779999999999998, + "step": 3156 + }, + { + "loss": 0.0417, + "grad_norm": 1.2253996133804321, + "learning_rate": 4.225e-06, + "num_tokens": 1083819.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5785, + "step": 3157 + }, + { + "loss": 0.0028, + "grad_norm": 0.3961932361125946, + "learning_rate": 4.22e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 1.0, + "epoch": 1.579, + "step": 3158 + }, + { + "loss": 0.0503, + "grad_norm": 1.089829921722412, + "learning_rate": 4.215e-06, + "num_tokens": 1084422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5795, + "step": 3159 + }, + { + "loss": 0.0026, + "grad_norm": 0.3804922103881836, + "learning_rate": 4.21e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.58, + "step": 3160 + }, + { + "loss": 0.0551, + "grad_norm": 1.131371259689331, + "learning_rate": 4.205e-06, + "num_tokens": 1085025.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5805, + "step": 3161 + }, + { + "loss": 0.0707, + "grad_norm": 1.5008512735366821, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.581, + "step": 3162 + }, + { + "loss": 0.1371, + "grad_norm": 2.452535629272461, + "learning_rate": 4.1950000000000005e-06, + "num_tokens": 1086049.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.5815000000000001, + "step": 3163 + }, + { + "loss": 0.0375, + "grad_norm": 1.132121205329895, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5819999999999999, + "step": 3164 + }, + { + "loss": 0.0372, + "grad_norm": 1.136691689491272, + "learning_rate": 4.185000000000001e-06, + "num_tokens": 1087073.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5825, + "step": 3165 + }, + { + "loss": 0.066, + "grad_norm": 1.451141595840454, + "learning_rate": 4.18e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.583, + "step": 3166 + }, + { + "loss": 0.0601, + "grad_norm": 1.3219071626663208, + "learning_rate": 4.175e-06, + "num_tokens": 1088097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5835, + "step": 3167 + }, + { + "loss": 0.0033, + "grad_norm": 0.44295263290405273, + "learning_rate": 4.17e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.584, + "step": 3168 + }, + { + "loss": 0.0033, + "grad_norm": 0.4387746751308441, + "learning_rate": 4.165e-06, + "num_tokens": 1088279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5845, + "step": 3169 + }, + { + "loss": 0.0031, + "grad_norm": 0.42495018243789673, + "learning_rate": 4.16e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 3170 + }, + { + "loss": 0.0032, + "grad_norm": 0.43195274472236633, + "learning_rate": 4.155e-06, + "num_tokens": 1088461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5855000000000001, + "step": 3171 + }, + { + "loss": 0.0383, + "grad_norm": 1.089600682258606, + "learning_rate": 4.15e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5859999999999999, + "step": 3172 + }, + { + "loss": 0.037, + "grad_norm": 1.125685691833496, + "learning_rate": 4.145e-06, + "num_tokens": 1089485.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5865, + "step": 3173 + }, + { + "loss": 0.0028, + "grad_norm": 0.3951958119869232, + "learning_rate": 4.14e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 1.0, + "epoch": 1.587, + "step": 3174 + }, + { + "loss": 0.0032, + "grad_norm": 0.4249975085258484, + "learning_rate": 4.135e-06, + "num_tokens": 1089667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5875, + "step": 3175 + }, + { + "loss": 0.003, + "grad_norm": 0.4017711281776428, + "learning_rate": 4.13e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 1.0, + "epoch": 1.588, + "step": 3176 + }, + { + "loss": 0.0554, + "grad_norm": 1.5242044925689697, + "learning_rate": 4.125e-06, + "num_tokens": 1090270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5885, + "step": 3177 + }, + { + "loss": 0.0397, + "grad_norm": 1.1341863870620728, + "learning_rate": 4.12e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.589, + "step": 3178 + }, + { + "loss": 0.0027, + "grad_norm": 0.36381402611732483, + "learning_rate": 4.115e-06, + "num_tokens": 1090873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5895000000000001, + "step": 3179 + }, + { + "loss": 0.0607, + "grad_norm": 1.1853790283203125, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5899999999999999, + "step": 3180 + }, + { + "loss": 0.0643, + "grad_norm": 1.3047658205032349, + "learning_rate": 4.1050000000000005e-06, + "num_tokens": 1091897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5905, + "step": 3181 + }, + { + "loss": 0.0026, + "grad_norm": 0.35462620854377747, + "learning_rate": 4.1e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.591, + "step": 3182 + }, + { + "loss": 0.0551, + "grad_norm": 1.313693642616272, + "learning_rate": 4.095e-06, + "num_tokens": 1092500.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5915, + "step": 3183 + }, + { + "loss": 0.0476, + "grad_norm": 1.3256938457489014, + "learning_rate": 4.09e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.592, + "step": 3184 + }, + { + "loss": 0.0674, + "grad_norm": 1.4579592943191528, + "learning_rate": 4.085e-06, + "num_tokens": 1093524.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5925, + "step": 3185 + }, + { + "loss": 0.0654, + "grad_norm": 1.39744234085083, + "learning_rate": 4.08e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.593, + "step": 3186 + }, + { + "loss": 0.0024, + "grad_norm": 0.3426502048969269, + "learning_rate": 4.075e-06, + "num_tokens": 1094127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5935000000000001, + "step": 3187 + }, + { + "loss": 0.0025, + "grad_norm": 0.34538590908050537, + "learning_rate": 4.07e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 3188 + }, + { + "loss": 0.0023, + "grad_norm": 0.317192405462265, + "learning_rate": 4.065e-06, + "num_tokens": 1094309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5945, + "step": 3189 + }, + { + "loss": 0.067, + "grad_norm": 1.3644077777862549, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.595, + "step": 3190 + }, + { + "loss": 0.0403, + "grad_norm": 1.0108872652053833, + "learning_rate": 4.055000000000001e-06, + "num_tokens": 1095333.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5955, + "step": 3191 + }, + { + "loss": 0.0023, + "grad_norm": 0.32959794998168945, + "learning_rate": 4.05e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 1.0, + "epoch": 1.596, + "step": 3192 + }, + { + "loss": 0.0695, + "grad_norm": 1.4694541692733765, + "learning_rate": 4.045e-06, + "num_tokens": 1095936.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5965, + "step": 3193 + }, + { + "loss": 0.0579, + "grad_norm": 1.4185339212417603, + "learning_rate": 4.04e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.597, + "step": 3194 + }, + { + "loss": 0.0023, + "grad_norm": 0.3271894156932831, + "learning_rate": 4.035e-06, + "num_tokens": 1096539.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5975000000000001, + "step": 3195 + }, + { + "loss": 0.0687, + "grad_norm": 1.3683706521987915, + "learning_rate": 4.03e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5979999999999999, + "step": 3196 + }, + { + "loss": 0.0022, + "grad_norm": 0.3076697289943695, + "learning_rate": 4.0250000000000004e-06, + "num_tokens": 1097142.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5985, + "step": 3197 + }, + { + "loss": 0.0633, + "grad_norm": 1.3920204639434814, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.599, + "step": 3198 + }, + { + "loss": 0.0025, + "grad_norm": 0.340093195438385, + "learning_rate": 4.0150000000000005e-06, + "num_tokens": 1097745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5995, + "step": 3199 + }, + { + "loss": 0.0446, + "grad_norm": 1.343589186668396, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6, + "step": 3200 + }, + { + "loss": 0.0019, + "grad_norm": 0.27124884724617004, + "learning_rate": 4.005000000000001e-06, + "num_tokens": 1098348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6005, + "step": 3201 + }, + { + "loss": 0.0404, + "grad_norm": 0.9648232460021973, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.601, + "step": 3202 + }, + { + "loss": 0.0019, + "grad_norm": 0.27278977632522583, + "learning_rate": 3.995000000000001e-06, + "num_tokens": 1098951.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6015000000000001, + "step": 3203 + }, + { + "loss": 0.0376, + "grad_norm": 1.0787500143051147, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6019999999999999, + "step": 3204 + }, + { + "loss": 0.0528, + "grad_norm": 1.1423871517181396, + "learning_rate": 3.985000000000001e-06, + "num_tokens": 1099975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6025, + "step": 3205 + }, + { + "loss": 0.0428, + "grad_norm": 1.0963202714920044, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.603, + "step": 3206 + }, + { + "loss": 0.0023, + "grad_norm": 0.3151981234550476, + "learning_rate": 3.975000000000001e-06, + "num_tokens": 1100578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6035, + "step": 3207 + }, + { + "loss": 0.0627, + "grad_norm": 1.3276523351669312, + "learning_rate": 3.97e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.604, + "step": 3208 + }, + { + "loss": 0.0644, + "grad_norm": 1.2610445022583008, + "learning_rate": 3.965e-06, + "num_tokens": 1101602.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6045, + "step": 3209 + }, + { + "loss": 0.0605, + "grad_norm": 1.5303077697753906, + "learning_rate": 3.96e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.605, + "step": 3210 + }, + { + "loss": 0.0428, + "grad_norm": 1.1033059358596802, + "learning_rate": 3.955e-06, + "num_tokens": 1102626.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6055000000000001, + "step": 3211 + }, + { + "loss": 0.0025, + "grad_norm": 0.3444884419441223, + "learning_rate": 3.95e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6059999999999999, + "step": 3212 + }, + { + "loss": 0.0021, + "grad_norm": 0.30967977643013, + "learning_rate": 3.945e-06, + "num_tokens": 1102808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6065, + "step": 3213 + }, + { + "loss": 0.0023, + "grad_norm": 0.3297445774078369, + "learning_rate": 3.94e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.607, + "step": 3214 + }, + { + "loss": 0.0389, + "grad_norm": 0.9863300323486328, + "learning_rate": 3.9350000000000004e-06, + "num_tokens": 1103411.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6075, + "step": 3215 + }, + { + "loss": 0.0024, + "grad_norm": 0.34737643599510193, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.608, + "step": 3216 + }, + { + "loss": 0.0636, + "grad_norm": 1.4206818342208862, + "learning_rate": 3.9250000000000005e-06, + "num_tokens": 1104014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6085, + "step": 3217 + }, + { + "loss": 0.0635, + "grad_norm": 1.3302878141403198, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.609, + "step": 3218 + }, + { + "loss": 0.0023, + "grad_norm": 0.34072810411453247, + "learning_rate": 3.915000000000001e-06, + "num_tokens": 1104617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6095000000000002, + "step": 3219 + }, + { + "loss": 0.0023, + "grad_norm": 0.324464350938797, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6099999999999999, + "step": 3220 + }, + { + "loss": 0.041, + "grad_norm": 1.2196465730667114, + "learning_rate": 3.905000000000001e-06, + "num_tokens": 1105220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6105, + "step": 3221 + }, + { + "loss": 0.0609, + "grad_norm": 1.3683393001556396, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.611, + "step": 3222 + }, + { + "loss": 0.067, + "grad_norm": 1.3955715894699097, + "learning_rate": 3.895000000000001e-06, + "num_tokens": 1106244.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6115, + "step": 3223 + }, + { + "loss": 0.0681, + "grad_norm": 1.2971601486206055, + "learning_rate": 3.89e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.612, + "step": 3224 + }, + { + "loss": 0.0399, + "grad_norm": 0.9620857834815979, + "learning_rate": 3.885e-06, + "num_tokens": 1107268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6125, + "step": 3225 + }, + { + "loss": 0.0563, + "grad_norm": 1.419252634048462, + "learning_rate": 3.88e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.613, + "step": 3226 + }, + { + "loss": 0.0025, + "grad_norm": 0.3523210883140564, + "learning_rate": 3.875e-06, + "num_tokens": 1107871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6135000000000002, + "step": 3227 + }, + { + "loss": 0.0025, + "grad_norm": 0.3481607437133789, + "learning_rate": 3.87e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 3228 + }, + { + "loss": 0.0668, + "grad_norm": 1.5234949588775635, + "learning_rate": 3.865e-06, + "num_tokens": 1108474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6145, + "step": 3229 + }, + { + "loss": 0.065, + "grad_norm": 1.0866061449050903, + "learning_rate": 3.86e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.615, + "step": 3230 + }, + { + "loss": 0.0023, + "grad_norm": 0.32322317361831665, + "learning_rate": 3.855e-06, + "num_tokens": 1109077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6155, + "step": 3231 + }, + { + "loss": 0.0028, + "grad_norm": 0.3983127474784851, + "learning_rate": 3.85e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 3232 + }, + { + "loss": 0.0028, + "grad_norm": 0.3855290114879608, + "learning_rate": 3.8450000000000005e-06, + "num_tokens": 1109259.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6165, + "step": 3233 + }, + { + "loss": 0.0628, + "grad_norm": 1.2134065628051758, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.617, + "step": 3234 + }, + { + "loss": 0.0026, + "grad_norm": 0.3645097613334656, + "learning_rate": 3.8350000000000006e-06, + "num_tokens": 1109862.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6175000000000002, + "step": 3235 + }, + { + "loss": 0.0564, + "grad_norm": 1.3227709531784058, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6179999999999999, + "step": 3236 + }, + { + "loss": 0.0356, + "grad_norm": 1.1357544660568237, + "learning_rate": 3.825000000000001e-06, + "num_tokens": 1110886.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6185, + "step": 3237 + }, + { + "loss": 0.002, + "grad_norm": 0.2842106819152832, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.619, + "step": 3238 + }, + { + "loss": 0.0021, + "grad_norm": 0.2954864501953125, + "learning_rate": 3.815000000000001e-06, + "num_tokens": 1111068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6195, + "step": 3239 + }, + { + "loss": 0.0535, + "grad_norm": 1.2989691495895386, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.62, + "step": 3240 + }, + { + "loss": 0.0633, + "grad_norm": 1.4842454195022583, + "learning_rate": 3.8050000000000004e-06, + "num_tokens": 1112092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6205, + "step": 3241 + }, + { + "loss": 0.0613, + "grad_norm": 1.4029802083969116, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.621, + "step": 3242 + }, + { + "loss": 0.0021, + "grad_norm": 0.3039712905883789, + "learning_rate": 3.7950000000000005e-06, + "num_tokens": 1112695.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6215000000000002, + "step": 3243 + }, + { + "loss": 0.0564, + "grad_norm": 1.3126254081726074, + "learning_rate": 3.79e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6219999999999999, + "step": 3244 + }, + { + "loss": 0.0372, + "grad_norm": 1.1704014539718628, + "learning_rate": 3.785e-06, + "num_tokens": 1113719.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6225, + "step": 3245 + }, + { + "loss": 0.0438, + "grad_norm": 1.2828481197357178, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.623, + "step": 3246 + }, + { + "loss": 0.0023, + "grad_norm": 0.343226820230484, + "learning_rate": 3.7750000000000003e-06, + "num_tokens": 1114322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6235, + "step": 3247 + }, + { + "loss": 0.0402, + "grad_norm": 1.072348952293396, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.624, + "step": 3248 + }, + { + "loss": 0.0372, + "grad_norm": 1.061455488204956, + "learning_rate": 3.7650000000000004e-06, + "num_tokens": 1115346.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6245, + "step": 3249 + }, + { + "loss": 0.0621, + "grad_norm": 1.3332241773605347, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.625, + "step": 3250 + }, + { + "loss": 0.0665, + "grad_norm": 1.4206236600875854, + "learning_rate": 3.7550000000000005e-06, + "num_tokens": 1116370.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6255, + "step": 3251 + }, + { + "loss": 0.0616, + "grad_norm": 1.5544387102127075, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.626, + "step": 3252 + }, + { + "loss": 0.0024, + "grad_norm": 0.34623461961746216, + "learning_rate": 3.745e-06, + "num_tokens": 1116973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6265, + "step": 3253 + }, + { + "loss": 0.0611, + "grad_norm": 1.2223175764083862, + "learning_rate": 3.74e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.627, + "step": 3254 + }, + { + "loss": 0.0517, + "grad_norm": 1.338625192642212, + "learning_rate": 3.7350000000000002e-06, + "num_tokens": 1117997.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6275, + "step": 3255 + }, + { + "loss": 0.0567, + "grad_norm": 1.3747273683547974, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6280000000000001, + "step": 3256 + }, + { + "loss": 0.0026, + "grad_norm": 0.36324965953826904, + "learning_rate": 3.7250000000000003e-06, + "num_tokens": 1118600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6284999999999998, + "step": 3257 + }, + { + "loss": 0.0025, + "grad_norm": 0.3447258472442627, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 3258 + }, + { + "loss": 0.0026, + "grad_norm": 0.36628466844558716, + "learning_rate": 3.7150000000000004e-06, + "num_tokens": 1118782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6295, + "step": 3259 + }, + { + "loss": 0.0535, + "grad_norm": 1.2702912092208862, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.63, + "step": 3260 + }, + { + "loss": 0.0026, + "grad_norm": 0.37140271067619324, + "learning_rate": 3.705e-06, + "num_tokens": 1119385.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6305, + "step": 3261 + }, + { + "loss": 0.003, + "grad_norm": 0.4019966721534729, + "learning_rate": 3.7e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 1.0, + "epoch": 1.631, + "step": 3262 + }, + { + "loss": 0.0669, + "grad_norm": 1.4418880939483643, + "learning_rate": 3.695e-06, + "num_tokens": 1119988.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6315, + "step": 3263 + }, + { + "loss": 0.0396, + "grad_norm": 1.2212142944335938, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6320000000000001, + "step": 3264 + }, + { + "loss": 0.0026, + "grad_norm": 0.37143605947494507, + "learning_rate": 3.6850000000000003e-06, + "num_tokens": 1120591.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6324999999999998, + "step": 3265 + }, + { + "loss": 0.0588, + "grad_norm": 1.3627078533172607, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.633, + "step": 3266 + }, + { + "loss": 0.0027, + "grad_norm": 0.3791561722755432, + "learning_rate": 3.6750000000000004e-06, + "num_tokens": 1121194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6335, + "step": 3267 + }, + { + "loss": 0.0567, + "grad_norm": 1.289622187614441, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.634, + "step": 3268 + }, + { + "loss": 0.0579, + "grad_norm": 1.220171332359314, + "learning_rate": 3.665e-06, + "num_tokens": 1122218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6345, + "step": 3269 + }, + { + "loss": 0.0543, + "grad_norm": 1.3633372783660889, + "learning_rate": 3.66e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.635, + "step": 3270 + }, + { + "loss": 0.0376, + "grad_norm": 1.1212244033813477, + "learning_rate": 3.655e-06, + "num_tokens": 1123242.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6355, + "step": 3271 + }, + { + "loss": 0.066, + "grad_norm": 1.352933645248413, + "learning_rate": 3.65e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6360000000000001, + "step": 3272 + }, + { + "loss": 0.0469, + "grad_norm": 1.09308922290802, + "learning_rate": 3.6450000000000003e-06, + "num_tokens": 1124266.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6364999999999998, + "step": 3273 + }, + { + "loss": 0.1411, + "grad_norm": 2.6187405586242676, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.637, + "step": 3274 + }, + { + "loss": 0.0414, + "grad_norm": 1.162994146347046, + "learning_rate": 3.6350000000000003e-06, + "num_tokens": 1125290.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6375, + "step": 3275 + }, + { + "loss": 0.0028, + "grad_norm": 0.3896919786930084, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.638, + "step": 3276 + }, + { + "loss": 0.0026, + "grad_norm": 0.3726244270801544, + "learning_rate": 3.625e-06, + "num_tokens": 1125472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6385, + "step": 3277 + }, + { + "loss": 0.0026, + "grad_norm": 0.36463192105293274, + "learning_rate": 3.62e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.639, + "step": 3278 + }, + { + "loss": 0.0507, + "grad_norm": 1.3470423221588135, + "learning_rate": 3.615e-06, + "num_tokens": 1126075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6395, + "step": 3279 + }, + { + "loss": 0.0683, + "grad_norm": 1.4609153270721436, + "learning_rate": 3.61e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.6400000000000001, + "step": 3280 + }, + { + "loss": 0.0535, + "grad_norm": 1.1537185907363892, + "learning_rate": 3.6050000000000002e-06, + "num_tokens": 1127099.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6404999999999998, + "step": 3281 + }, + { + "loss": 0.0608, + "grad_norm": 1.3845043182373047, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.641, + "step": 3282 + }, + { + "loss": 0.0447, + "grad_norm": 1.212424397468567, + "learning_rate": 3.5950000000000003e-06, + "num_tokens": 1128123.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6415, + "step": 3283 + }, + { + "loss": 0.0026, + "grad_norm": 0.37876564264297485, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 1.0, + "epoch": 1.642, + "step": 3284 + }, + { + "loss": 0.0408, + "grad_norm": 1.2840468883514404, + "learning_rate": 3.585e-06, + "num_tokens": 1128726.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6425, + "step": 3285 + }, + { + "loss": 0.0386, + "grad_norm": 1.1343239545822144, + "learning_rate": 3.58e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.643, + "step": 3286 + }, + { + "loss": 0.0381, + "grad_norm": 1.1031399965286255, + "learning_rate": 3.575e-06, + "num_tokens": 1129750.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6435, + "step": 3287 + }, + { + "loss": 0.0728, + "grad_norm": 1.8012501001358032, + "learning_rate": 3.57e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6440000000000001, + "step": 3288 + }, + { + "loss": 0.003, + "grad_norm": 0.42031532526016235, + "learning_rate": 3.565e-06, + "num_tokens": 1130353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6444999999999999, + "step": 3289 + }, + { + "loss": 0.0028, + "grad_norm": 0.42307499051094055, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 1.0, + "epoch": 1.645, + "step": 3290 + }, + { + "loss": 0.0656, + "grad_norm": 1.4206976890563965, + "learning_rate": 3.5550000000000003e-06, + "num_tokens": 1130956.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6455, + "step": 3291 + }, + { + "loss": 0.0373, + "grad_norm": 1.0836045742034912, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 3292 + }, + { + "loss": 0.0666, + "grad_norm": 1.4353013038635254, + "learning_rate": 3.545e-06, + "num_tokens": 1131980.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6465, + "step": 3293 + }, + { + "loss": 0.0033, + "grad_norm": 0.48532357811927795, + "learning_rate": 3.54e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.647, + "step": 3294 + }, + { + "loss": 0.0032, + "grad_norm": 0.4415268898010254, + "learning_rate": 3.535e-06, + "num_tokens": 1132162.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6475, + "step": 3295 + }, + { + "loss": 0.0029, + "grad_norm": 0.41665494441986084, + "learning_rate": 3.53e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 3296 + }, + { + "loss": 0.0638, + "grad_norm": 1.2469731569290161, + "learning_rate": 3.525e-06, + "num_tokens": 1132765.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6484999999999999, + "step": 3297 + }, + { + "loss": 0.0614, + "grad_norm": 1.251099944114685, + "learning_rate": 3.52e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.649, + "step": 3298 + }, + { + "loss": 0.0027, + "grad_norm": 0.39604058861732483, + "learning_rate": 3.5150000000000002e-06, + "num_tokens": 1133368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6495, + "step": 3299 + }, + { + "loss": 0.0588, + "grad_norm": 1.0699150562286377, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.65, + "step": 3300 + }, + { + "loss": 0.0583, + "grad_norm": 1.2757554054260254, + "learning_rate": 3.505e-06, + "num_tokens": 1134392.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6505, + "step": 3301 + }, + { + "loss": 0.0401, + "grad_norm": 1.3257462978363037, + "learning_rate": 3.5e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.651, + "step": 3302 + }, + { + "loss": 0.0643, + "grad_norm": 1.4011600017547607, + "learning_rate": 3.495e-06, + "num_tokens": 1135416.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6515, + "step": 3303 + }, + { + "loss": 0.0587, + "grad_norm": 1.5523959398269653, + "learning_rate": 3.49e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6520000000000001, + "step": 3304 + }, + { + "loss": 0.0602, + "grad_norm": 1.1153236627578735, + "learning_rate": 3.485e-06, + "num_tokens": 1136440.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6524999999999999, + "step": 3305 + }, + { + "loss": 0.0032, + "grad_norm": 0.4743506610393524, + "learning_rate": 3.48e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 1.0, + "epoch": 1.653, + "step": 3306 + }, + { + "loss": 0.0032, + "grad_norm": 0.44705691933631897, + "learning_rate": 3.475e-06, + "num_tokens": 1136622.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6535, + "step": 3307 + }, + { + "loss": 0.0627, + "grad_norm": 1.376706838607788, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.654, + "step": 3308 + }, + { + "loss": 0.0578, + "grad_norm": 1.3461076021194458, + "learning_rate": 3.465e-06, + "num_tokens": 1137646.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6545, + "step": 3309 + }, + { + "loss": 0.0028, + "grad_norm": 0.4053739011287689, + "learning_rate": 3.46e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 1.0, + "epoch": 1.655, + "step": 3310 + }, + { + "loss": 0.0028, + "grad_norm": 0.4151926636695862, + "learning_rate": 3.455e-06, + "num_tokens": 1137828.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6555, + "step": 3311 + }, + { + "loss": 0.003, + "grad_norm": 0.42436280846595764, + "learning_rate": 3.45e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6560000000000001, + "step": 3312 + }, + { + "loss": 0.0029, + "grad_norm": 0.41050389409065247, + "learning_rate": 3.445e-06, + "num_tokens": 1138010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6564999999999999, + "step": 3313 + }, + { + "loss": 0.0562, + "grad_norm": 1.2650190591812134, + "learning_rate": 3.44e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.657, + "step": 3314 + }, + { + "loss": 0.0558, + "grad_norm": 1.1567943096160889, + "learning_rate": 3.4350000000000006e-06, + "num_tokens": 1139034.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6575, + "step": 3315 + }, + { + "loss": 0.0413, + "grad_norm": 1.3011746406555176, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.658, + "step": 3316 + }, + { + "loss": 0.0569, + "grad_norm": 1.4117727279663086, + "learning_rate": 3.4250000000000007e-06, + "num_tokens": 1140058.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6585, + "step": 3317 + }, + { + "loss": 0.0027, + "grad_norm": 0.3829484283924103, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.659, + "step": 3318 + }, + { + "loss": 0.0516, + "grad_norm": 1.152258038520813, + "learning_rate": 3.4150000000000003e-06, + "num_tokens": 1140661.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6595, + "step": 3319 + }, + { + "loss": 0.0396, + "grad_norm": 1.20711088180542, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6600000000000001, + "step": 3320 + }, + { + "loss": 0.0522, + "grad_norm": 1.251099705696106, + "learning_rate": 3.4050000000000004e-06, + "num_tokens": 1141685.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6604999999999999, + "step": 3321 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730953454971313, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.661, + "step": 3322 + }, + { + "loss": 0.0613, + "grad_norm": 1.5974045991897583, + "learning_rate": 3.3950000000000005e-06, + "num_tokens": 1142709.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6615, + "step": 3323 + }, + { + "loss": 0.0522, + "grad_norm": 1.416182518005371, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.662, + "step": 3324 + }, + { + "loss": 0.0595, + "grad_norm": 1.381279706954956, + "learning_rate": 3.3850000000000006e-06, + "num_tokens": 1143733.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6625, + "step": 3325 + }, + { + "loss": 0.0563, + "grad_norm": 1.2484899759292603, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.663, + "step": 3326 + }, + { + "loss": 0.0029, + "grad_norm": 0.41797107458114624, + "learning_rate": 3.3750000000000003e-06, + "num_tokens": 1144336.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6635, + "step": 3327 + }, + { + "loss": 0.0027, + "grad_norm": 0.39544638991355896, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6640000000000001, + "step": 3328 + }, + { + "loss": 0.0371, + "grad_norm": 1.0045322179794312, + "learning_rate": 3.3650000000000004e-06, + "num_tokens": 1144939.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6644999999999999, + "step": 3329 + }, + { + "loss": 0.0671, + "grad_norm": 1.530097246170044, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.665, + "step": 3330 + }, + { + "loss": 0.0529, + "grad_norm": 1.179215669631958, + "learning_rate": 3.3550000000000005e-06, + "num_tokens": 1145963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6655, + "step": 3331 + }, + { + "loss": 0.0033, + "grad_norm": 0.46830442547798157, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.666, + "step": 3332 + }, + { + "loss": 0.0031, + "grad_norm": 0.44680675864219666, + "learning_rate": 3.3450000000000006e-06, + "num_tokens": 1146145.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6665, + "step": 3333 + }, + { + "loss": 0.0591, + "grad_norm": 2.0427138805389404, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.667, + "step": 3334 + }, + { + "loss": 0.0446, + "grad_norm": 1.0700162649154663, + "learning_rate": 3.3350000000000003e-06, + "num_tokens": 1147169.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6675, + "step": 3335 + }, + { + "loss": 0.0352, + "grad_norm": 0.953519344329834, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6680000000000001, + "step": 3336 + }, + { + "loss": 0.0402, + "grad_norm": 1.208362102508545, + "learning_rate": 3.3250000000000004e-06, + "num_tokens": 1148193.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6684999999999999, + "step": 3337 + }, + { + "loss": 0.0034, + "grad_norm": 0.48497405648231506, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 1.0, + "epoch": 1.669, + "step": 3338 + }, + { + "loss": 0.0031, + "grad_norm": 0.4533288776874542, + "learning_rate": 3.3150000000000004e-06, + "num_tokens": 1148375.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6695, + "step": 3339 + }, + { + "loss": 0.0531, + "grad_norm": 1.031333088874817, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.67, + "step": 3340 + }, + { + "loss": 0.0029, + "grad_norm": 0.40945783257484436, + "learning_rate": 3.3050000000000005e-06, + "num_tokens": 1148978.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6705, + "step": 3341 + }, + { + "loss": 0.0643, + "grad_norm": 1.0990197658538818, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.671, + "step": 3342 + }, + { + "loss": 0.0379, + "grad_norm": 1.0483911037445068, + "learning_rate": 3.2950000000000002e-06, + "num_tokens": 1150002.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6715, + "step": 3343 + }, + { + "loss": 0.0489, + "grad_norm": 1.0835374593734741, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6720000000000002, + "step": 3344 + }, + { + "loss": 0.0033, + "grad_norm": 0.4901528060436249, + "learning_rate": 3.2850000000000003e-06, + "num_tokens": 1150605.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6724999999999999, + "step": 3345 + }, + { + "loss": 0.0029, + "grad_norm": 0.41757330298423767, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.673, + "step": 3346 + }, + { + "loss": 0.0379, + "grad_norm": 0.9371951818466187, + "learning_rate": 3.2750000000000004e-06, + "num_tokens": 1151208.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6735, + "step": 3347 + }, + { + "loss": 0.0397, + "grad_norm": 1.0155102014541626, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 3348 + }, + { + "loss": 0.0027, + "grad_norm": 0.3897286653518677, + "learning_rate": 3.2650000000000005e-06, + "num_tokens": 1151811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6745, + "step": 3349 + }, + { + "loss": 0.0028, + "grad_norm": 0.4042399525642395, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 1.0, + "epoch": 1.675, + "step": 3350 + }, + { + "loss": 0.003, + "grad_norm": 0.43666109442710876, + "learning_rate": 3.255e-06, + "num_tokens": 1151993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6755, + "step": 3351 + }, + { + "loss": 0.0029, + "grad_norm": 0.42103472352027893, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6760000000000002, + "step": 3352 + }, + { + "loss": 0.0028, + "grad_norm": 0.41361838579177856, + "learning_rate": 3.2450000000000003e-06, + "num_tokens": 1152175.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6764999999999999, + "step": 3353 + }, + { + "loss": 0.0357, + "grad_norm": 0.9301024675369263, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.677, + "step": 3354 + }, + { + "loss": 0.0025, + "grad_norm": 0.3655649721622467, + "learning_rate": 3.2350000000000004e-06, + "num_tokens": 1152778.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6775, + "step": 3355 + }, + { + "loss": 0.0363, + "grad_norm": 1.0852001905441284, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.678, + "step": 3356 + }, + { + "loss": 0.0021, + "grad_norm": 0.3051436245441437, + "learning_rate": 3.2250000000000005e-06, + "num_tokens": 1153381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6785, + "step": 3357 + }, + { + "loss": 0.0025, + "grad_norm": 0.38162630796432495, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 3358 + }, + { + "loss": 0.0022, + "grad_norm": 0.33861595392227173, + "learning_rate": 3.215e-06, + "num_tokens": 1153563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6795, + "step": 3359 + }, + { + "loss": 0.0021, + "grad_norm": 0.311531126499176, + "learning_rate": 3.21e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 3360 + }, + { + "loss": 0.002, + "grad_norm": 0.30146220326423645, + "learning_rate": 3.2050000000000002e-06, + "num_tokens": 1153745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6804999999999999, + "step": 3361 + }, + { + "loss": 0.0019, + "grad_norm": 0.28205639123916626, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 3362 + }, + { + "loss": 0.0483, + "grad_norm": 1.185204029083252, + "learning_rate": 3.1950000000000003e-06, + "num_tokens": 1154348.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6815, + "step": 3363 + }, + { + "loss": 0.0705, + "grad_norm": 1.442715048789978, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.682, + "step": 3364 + }, + { + "loss": 0.059, + "grad_norm": 1.5234472751617432, + "learning_rate": 3.1850000000000004e-06, + "num_tokens": 1155372.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6825, + "step": 3365 + }, + { + "loss": 0.0712, + "grad_norm": 1.9519693851470947, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.683, + "step": 3366 + }, + { + "loss": 0.041, + "grad_norm": 1.0349758863449097, + "learning_rate": 3.175e-06, + "num_tokens": 1156396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6835, + "step": 3367 + }, + { + "loss": 0.0423, + "grad_norm": 1.263643503189087, + "learning_rate": 3.17e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 3368 + }, + { + "loss": 0.0015, + "grad_norm": 0.21718572080135345, + "learning_rate": 3.165e-06, + "num_tokens": 1156999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6844999999999999, + "step": 3369 + }, + { + "loss": 0.0612, + "grad_norm": 1.4974867105484009, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.685, + "step": 3370 + }, + { + "loss": 0.0684, + "grad_norm": 1.3690571784973145, + "learning_rate": 3.1550000000000003e-06, + "num_tokens": 1158023.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6855, + "step": 3371 + }, + { + "loss": 0.0015, + "grad_norm": 0.22092363238334656, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 1.0, + "epoch": 1.686, + "step": 3372 + }, + { + "loss": 0.0466, + "grad_norm": 1.359930157661438, + "learning_rate": 3.1450000000000004e-06, + "num_tokens": 1158626.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6865, + "step": 3373 + }, + { + "loss": 0.0017, + "grad_norm": 0.23505748808383942, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.687, + "step": 3374 + }, + { + "loss": 0.0412, + "grad_norm": 1.154797077178955, + "learning_rate": 3.135e-06, + "num_tokens": 1159229.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6875, + "step": 3375 + }, + { + "loss": 0.0688, + "grad_norm": 1.5609385967254639, + "learning_rate": 3.13e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.688, + "step": 3376 + }, + { + "loss": 0.0689, + "grad_norm": 1.9219101667404175, + "learning_rate": 3.125e-06, + "num_tokens": 1160253.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6885, + "step": 3377 + }, + { + "loss": 0.0528, + "grad_norm": 1.4017720222473145, + "learning_rate": 3.12e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 3378 + }, + { + "loss": 0.0018, + "grad_norm": 0.2644074261188507, + "learning_rate": 3.1150000000000002e-06, + "num_tokens": 1160856.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6895, + "step": 3379 + }, + { + "loss": 0.0359, + "grad_norm": 1.1351364850997925, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.69, + "step": 3380 + }, + { + "loss": 0.0561, + "grad_norm": 1.2852329015731812, + "learning_rate": 3.1050000000000003e-06, + "num_tokens": 1161880.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6905000000000001, + "step": 3381 + }, + { + "loss": 0.0019, + "grad_norm": 0.2809182107448578, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6909999999999998, + "step": 3382 + }, + { + "loss": 0.0019, + "grad_norm": 0.2629799544811249, + "learning_rate": 3.0950000000000004e-06, + "num_tokens": 1162062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6915, + "step": 3383 + }, + { + "loss": 0.0583, + "grad_norm": 1.3401031494140625, + "learning_rate": 3.09e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.692, + "step": 3384 + }, + { + "loss": 0.0019, + "grad_norm": 0.2741340398788452, + "learning_rate": 3.085e-06, + "num_tokens": 1162665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6925, + "step": 3385 + }, + { + "loss": 0.0019, + "grad_norm": 0.2670257091522217, + "learning_rate": 3.08e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 3386 + }, + { + "loss": 0.0529, + "grad_norm": 0.9913851022720337, + "learning_rate": 3.075e-06, + "num_tokens": 1163268.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6935, + "step": 3387 + }, + { + "loss": 0.0018, + "grad_norm": 0.2675456404685974, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.694, + "step": 3388 + }, + { + "loss": 0.0405, + "grad_norm": 1.6220101118087769, + "learning_rate": 3.0650000000000003e-06, + "num_tokens": 1163871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6945000000000001, + "step": 3389 + }, + { + "loss": 0.0478, + "grad_norm": 1.0595648288726807, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 3390 + }, + { + "loss": 0.0022, + "grad_norm": 0.3088478446006775, + "learning_rate": 3.0550000000000004e-06, + "num_tokens": 1164474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6955, + "step": 3391 + }, + { + "loss": 0.0501, + "grad_norm": 1.3393687009811401, + "learning_rate": 3.05e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.696, + "step": 3392 + }, + { + "loss": 0.0019, + "grad_norm": 0.2677120566368103, + "learning_rate": 3.045e-06, + "num_tokens": 1165077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6965, + "step": 3393 + }, + { + "loss": 0.0519, + "grad_norm": 1.1974607706069946, + "learning_rate": 3.04e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.697, + "step": 3394 + }, + { + "loss": 0.0406, + "grad_norm": 1.0820717811584473, + "learning_rate": 3.035e-06, + "num_tokens": 1166101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6975, + "step": 3395 + }, + { + "loss": 0.002, + "grad_norm": 0.2836916148662567, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.698, + "step": 3396 + }, + { + "loss": 0.002, + "grad_norm": 0.2837901711463928, + "learning_rate": 3.0250000000000003e-06, + "num_tokens": 1166283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6985000000000001, + "step": 3397 + }, + { + "loss": 0.0546, + "grad_norm": 1.4433382749557495, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6989999999999998, + "step": 3398 + }, + { + "loss": 0.0021, + "grad_norm": 0.2978130877017975, + "learning_rate": 3.0150000000000004e-06, + "num_tokens": 1166886.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6995, + "step": 3399 + }, + { + "loss": 0.002, + "grad_norm": 0.2806030511856079, + "learning_rate": 3.01e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 3400 + }, + { + "loss": 0.0636, + "grad_norm": 1.3879796266555786, + "learning_rate": 3.005e-06, + "num_tokens": 1167489.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7005, + "step": 3401 + }, + { + "loss": 0.002, + "grad_norm": 0.2759900689125061, + "learning_rate": 3e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.701, + "step": 3402 + }, + { + "loss": 0.0574, + "grad_norm": 1.3505700826644897, + "learning_rate": 2.995e-06, + "num_tokens": 1168092.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7015, + "step": 3403 + }, + { + "loss": 0.0554, + "grad_norm": 1.4108113050460815, + "learning_rate": 2.99e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.702, + "step": 3404 + }, + { + "loss": 0.0558, + "grad_norm": 1.5085475444793701, + "learning_rate": 2.9850000000000002e-06, + "num_tokens": 1169116.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7025000000000001, + "step": 3405 + }, + { + "loss": 0.0019, + "grad_norm": 0.2683292031288147, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7029999999999998, + "step": 3406 + }, + { + "loss": 0.0367, + "grad_norm": 1.1768198013305664, + "learning_rate": 2.9750000000000003e-06, + "num_tokens": 1169719.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7035, + "step": 3407 + }, + { + "loss": 0.002, + "grad_norm": 0.2821144759654999, + "learning_rate": 2.97e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 1.0, + "epoch": 1.704, + "step": 3408 + }, + { + "loss": 0.0018, + "grad_norm": 0.26630160212516785, + "learning_rate": 2.965e-06, + "num_tokens": 1169901.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7045, + "step": 3409 + }, + { + "loss": 0.0018, + "grad_norm": 0.2571128308773041, + "learning_rate": 2.96e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 3410 + }, + { + "loss": 0.002, + "grad_norm": 0.28111621737480164, + "learning_rate": 2.955e-06, + "num_tokens": 1170083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7055, + "step": 3411 + }, + { + "loss": 0.002, + "grad_norm": 0.27419018745422363, + "learning_rate": 2.95e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 3412 + }, + { + "loss": 0.0019, + "grad_norm": 0.26888176798820496, + "learning_rate": 2.945e-06, + "num_tokens": 1170265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7065000000000001, + "step": 3413 + }, + { + "loss": 0.0018, + "grad_norm": 0.2536250352859497, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 3414 + }, + { + "loss": 0.0018, + "grad_norm": 0.24844178557395935, + "learning_rate": 2.9350000000000003e-06, + "num_tokens": 1170447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7075, + "step": 3415 + }, + { + "loss": 0.0487, + "grad_norm": 1.4517875909805298, + "learning_rate": 2.93e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.708, + "step": 3416 + }, + { + "loss": 0.0564, + "grad_norm": 1.2101439237594604, + "learning_rate": 2.925e-06, + "num_tokens": 1171471.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7085, + "step": 3417 + }, + { + "loss": 0.043, + "grad_norm": 1.1227502822875977, + "learning_rate": 2.92e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.709, + "step": 3418 + }, + { + "loss": 0.0556, + "grad_norm": 1.1113651990890503, + "learning_rate": 2.915e-06, + "num_tokens": 1172495.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7095, + "step": 3419 + }, + { + "loss": 0.0015, + "grad_norm": 0.21050438284873962, + "learning_rate": 2.91e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.71, + "step": 3420 + }, + { + "loss": 0.0492, + "grad_norm": 1.136242389678955, + "learning_rate": 2.905e-06, + "num_tokens": 1173098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7105000000000001, + "step": 3421 + }, + { + "loss": 0.0549, + "grad_norm": 1.1831704378128052, + "learning_rate": 2.9e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7109999999999999, + "step": 3422 + }, + { + "loss": 0.0589, + "grad_norm": 1.318955659866333, + "learning_rate": 2.8950000000000002e-06, + "num_tokens": 1174122.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7115, + "step": 3423 + }, + { + "loss": 0.0385, + "grad_norm": 1.1089059114456177, + "learning_rate": 2.89e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.712, + "step": 3424 + }, + { + "loss": 0.0017, + "grad_norm": 0.24754203855991364, + "learning_rate": 2.885e-06, + "num_tokens": 1174725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7125, + "step": 3425 + }, + { + "loss": 0.0563, + "grad_norm": 1.1799119710922241, + "learning_rate": 2.88e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.713, + "step": 3426 + }, + { + "loss": 0.0017, + "grad_norm": 0.2318888157606125, + "learning_rate": 2.875e-06, + "num_tokens": 1175328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7135, + "step": 3427 + }, + { + "loss": 0.0623, + "grad_norm": 1.3154571056365967, + "learning_rate": 2.87e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.714, + "step": 3428 + }, + { + "loss": 0.0019, + "grad_norm": 0.26307183504104614, + "learning_rate": 2.865e-06, + "num_tokens": 1175931.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7145000000000001, + "step": 3429 + }, + { + "loss": 0.0018, + "grad_norm": 0.2589333653450012, + "learning_rate": 2.86e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 3430 + }, + { + "loss": 0.0504, + "grad_norm": 1.4614155292510986, + "learning_rate": 2.855e-06, + "num_tokens": 1176534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7155, + "step": 3431 + }, + { + "loss": 0.0018, + "grad_norm": 0.2591991722583771, + "learning_rate": 2.85e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.716, + "step": 3432 + }, + { + "loss": 0.0018, + "grad_norm": 0.25856250524520874, + "learning_rate": 2.845e-06, + "num_tokens": 1176716.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7165, + "step": 3433 + }, + { + "loss": 0.0368, + "grad_norm": 1.2794378995895386, + "learning_rate": 2.84e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.717, + "step": 3434 + }, + { + "loss": 0.0595, + "grad_norm": 1.1754332780838013, + "learning_rate": 2.835e-06, + "num_tokens": 1177740.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7175, + "step": 3435 + }, + { + "loss": 0.0016, + "grad_norm": 0.218499094247818, + "learning_rate": 2.83e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 1.0, + "epoch": 1.718, + "step": 3436 + }, + { + "loss": 0.0562, + "grad_norm": 1.4319361448287964, + "learning_rate": 2.825e-06, + "num_tokens": 1178343.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7185000000000001, + "step": 3437 + }, + { + "loss": 0.0548, + "grad_norm": 1.1614960432052612, + "learning_rate": 2.82e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7189999999999999, + "step": 3438 + }, + { + "loss": 0.0634, + "grad_norm": 1.559000849723816, + "learning_rate": 2.815e-06, + "num_tokens": 1179367.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7195, + "step": 3439 + }, + { + "loss": 0.0593, + "grad_norm": 1.1891441345214844, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 3440 + }, + { + "loss": 0.0638, + "grad_norm": 1.2654136419296265, + "learning_rate": 2.8050000000000007e-06, + "num_tokens": 1180391.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7205, + "step": 3441 + }, + { + "loss": 0.0411, + "grad_norm": 1.2888840436935425, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.721, + "step": 3442 + }, + { + "loss": 0.002, + "grad_norm": 0.2810196280479431, + "learning_rate": 2.7950000000000003e-06, + "num_tokens": 1180994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7215, + "step": 3443 + }, + { + "loss": 0.0393, + "grad_norm": 1.1534147262573242, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.722, + "step": 3444 + }, + { + "loss": 0.0019, + "grad_norm": 0.2703098952770233, + "learning_rate": 2.7850000000000004e-06, + "num_tokens": 1181597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7225000000000001, + "step": 3445 + }, + { + "loss": 0.0612, + "grad_norm": 1.2400104999542236, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7229999999999999, + "step": 3446 + }, + { + "loss": 0.0019, + "grad_norm": 0.27535656094551086, + "learning_rate": 2.7750000000000005e-06, + "num_tokens": 1182200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7235, + "step": 3447 + }, + { + "loss": 0.002, + "grad_norm": 0.2844158411026001, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 3448 + }, + { + "loss": 0.002, + "grad_norm": 0.2850154936313629, + "learning_rate": 2.7650000000000006e-06, + "num_tokens": 1182382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7245, + "step": 3449 + }, + { + "loss": 0.0018, + "grad_norm": 0.26619744300842285, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 3450 + }, + { + "loss": 0.0019, + "grad_norm": 0.2684476971626282, + "learning_rate": 2.7550000000000003e-06, + "num_tokens": 1182564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7255, + "step": 3451 + }, + { + "loss": 0.0577, + "grad_norm": 1.3094863891601562, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.726, + "step": 3452 + }, + { + "loss": 0.0378, + "grad_norm": 1.201589822769165, + "learning_rate": 2.7450000000000004e-06, + "num_tokens": 1183588.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7265000000000001, + "step": 3453 + }, + { + "loss": 0.0537, + "grad_norm": 1.2897847890853882, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7269999999999999, + "step": 3454 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792169749736786, + "learning_rate": 2.7350000000000005e-06, + "num_tokens": 1184191.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7275, + "step": 3455 + }, + { + "loss": 0.002, + "grad_norm": 0.28593137860298157, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 3456 + }, + { + "loss": 0.058, + "grad_norm": 1.3839404582977295, + "learning_rate": 2.7250000000000006e-06, + "num_tokens": 1184794.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7285, + "step": 3457 + }, + { + "loss": 0.0018, + "grad_norm": 0.2617915868759155, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 1.0, + "epoch": 1.729, + "step": 3458 + }, + { + "loss": 0.0019, + "grad_norm": 0.2803640067577362, + "learning_rate": 2.7150000000000003e-06, + "num_tokens": 1184976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7295, + "step": 3459 + }, + { + "loss": 0.0389, + "grad_norm": 1.0974253416061401, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.73, + "step": 3460 + }, + { + "loss": 0.0017, + "grad_norm": 0.24105492234230042, + "learning_rate": 2.7050000000000004e-06, + "num_tokens": 1185579.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7305000000000001, + "step": 3461 + }, + { + "loss": 0.0017, + "grad_norm": 0.2462151199579239, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 3462 + }, + { + "loss": 0.0681, + "grad_norm": 2.0248329639434814, + "learning_rate": 2.6950000000000005e-06, + "num_tokens": 1186182.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7315, + "step": 3463 + }, + { + "loss": 0.0506, + "grad_norm": 1.0506778955459595, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.732, + "step": 3464 + }, + { + "loss": 0.0414, + "grad_norm": 1.1461181640625, + "learning_rate": 2.6850000000000006e-06, + "num_tokens": 1187206.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7325, + "step": 3465 + }, + { + "loss": 0.002, + "grad_norm": 0.29532936215400696, + "learning_rate": 2.68e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 1.0, + "epoch": 1.733, + "step": 3466 + }, + { + "loss": 0.0018, + "grad_norm": 0.2511617839336395, + "learning_rate": 2.6750000000000002e-06, + "num_tokens": 1187388.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7335, + "step": 3467 + }, + { + "loss": 0.0017, + "grad_norm": 0.24015438556671143, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 3468 + }, + { + "loss": 0.0394, + "grad_norm": 1.186040997505188, + "learning_rate": 2.6650000000000003e-06, + "num_tokens": 1187991.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7345000000000002, + "step": 3469 + }, + { + "loss": 0.0516, + "grad_norm": 1.3716928958892822, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7349999999999999, + "step": 3470 + }, + { + "loss": 0.0017, + "grad_norm": 0.24118225276470184, + "learning_rate": 2.6550000000000004e-06, + "num_tokens": 1188594.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7355, + "step": 3471 + }, + { + "loss": 0.0634, + "grad_norm": 1.3280280828475952, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.736, + "step": 3472 + }, + { + "loss": 0.0606, + "grad_norm": 1.5957295894622803, + "learning_rate": 2.6450000000000005e-06, + "num_tokens": 1189618.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7365, + "step": 3473 + }, + { + "loss": 0.0019, + "grad_norm": 0.26652151346206665, + "learning_rate": 2.64e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.737, + "step": 3474 + }, + { + "loss": 0.0465, + "grad_norm": 1.2865381240844727, + "learning_rate": 2.635e-06, + "num_tokens": 1190221.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7375, + "step": 3475 + }, + { + "loss": 0.0696, + "grad_norm": 1.5268961191177368, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.738, + "step": 3476 + }, + { + "loss": 0.0016, + "grad_norm": 0.22352814674377441, + "learning_rate": 2.6250000000000003e-06, + "num_tokens": 1190824.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7385000000000002, + "step": 3477 + }, + { + "loss": 0.0398, + "grad_norm": 1.0832366943359375, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7389999999999999, + "step": 3478 + }, + { + "loss": 0.002, + "grad_norm": 0.2866823971271515, + "learning_rate": 2.6150000000000004e-06, + "num_tokens": 1191427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7395, + "step": 3479 + }, + { + "loss": 0.0017, + "grad_norm": 0.25320085883140564, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 3480 + }, + { + "loss": 0.0554, + "grad_norm": 1.305580496788025, + "learning_rate": 2.6050000000000005e-06, + "num_tokens": 1192030.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7405, + "step": 3481 + }, + { + "loss": 0.053, + "grad_norm": 1.3485558032989502, + "learning_rate": 2.6e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.741, + "step": 3482 + }, + { + "loss": 0.0597, + "grad_norm": 1.3094996213912964, + "learning_rate": 2.595e-06, + "num_tokens": 1193054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7415, + "step": 3483 + }, + { + "loss": 0.0361, + "grad_norm": 1.02549409866333, + "learning_rate": 2.59e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.742, + "step": 3484 + }, + { + "loss": 0.0549, + "grad_norm": 1.1604732275009155, + "learning_rate": 2.5850000000000002e-06, + "num_tokens": 1194078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7425000000000002, + "step": 3485 + }, + { + "loss": 0.0578, + "grad_norm": 1.1389886140823364, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7429999999999999, + "step": 3486 + }, + { + "loss": 0.0383, + "grad_norm": 1.1444112062454224, + "learning_rate": 2.5750000000000003e-06, + "num_tokens": 1195102.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7435, + "step": 3487 + }, + { + "loss": 0.0363, + "grad_norm": 1.2686033248901367, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.744, + "step": 3488 + }, + { + "loss": 0.0609, + "grad_norm": 1.2078722715377808, + "learning_rate": 2.5650000000000004e-06, + "num_tokens": 1196126.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7445, + "step": 3489 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754855155944824, + "learning_rate": 2.56e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 1.0, + "epoch": 1.745, + "step": 3490 + }, + { + "loss": 0.063, + "grad_norm": 1.346100091934204, + "learning_rate": 2.555e-06, + "num_tokens": 1196729.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7455, + "step": 3491 + }, + { + "loss": 0.0625, + "grad_norm": 1.3309886455535889, + "learning_rate": 2.55e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.746, + "step": 3492 + }, + { + "loss": 0.0023, + "grad_norm": 0.3301111161708832, + "learning_rate": 2.545e-06, + "num_tokens": 1197332.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7465000000000002, + "step": 3493 + }, + { + "loss": 0.0382, + "grad_norm": 1.0473533868789673, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7469999999999999, + "step": 3494 + }, + { + "loss": 0.0625, + "grad_norm": 1.2907440662384033, + "learning_rate": 2.5350000000000003e-06, + "num_tokens": 1198356.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7475, + "step": 3495 + }, + { + "loss": 0.0412, + "grad_norm": 1.1875349283218384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.748, + "step": 3496 + }, + { + "loss": 0.1176, + "grad_norm": 2.9710206985473633, + "learning_rate": 2.5250000000000004e-06, + "num_tokens": 1199380.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.7485, + "step": 3497 + }, + { + "loss": 0.0026, + "grad_norm": 0.36476898193359375, + "learning_rate": 2.52e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.749, + "step": 3498 + }, + { + "loss": 0.0379, + "grad_norm": 1.0208238363265991, + "learning_rate": 2.515e-06, + "num_tokens": 1199983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7495, + "step": 3499 + }, + { + "loss": 0.0026, + "grad_norm": 0.37356528639793396, + "learning_rate": 2.51e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.75, + "step": 3500 + }, + { + "loss": 0.0027, + "grad_norm": 0.39622190594673157, + "learning_rate": 2.505e-06, + "num_tokens": 1200165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7505, + "step": 3501 + }, + { + "loss": 0.0372, + "grad_norm": 1.0979310274124146, + "learning_rate": 2.5e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.751, + "step": 3502 + }, + { + "loss": 0.0362, + "grad_norm": 1.0418155193328857, + "learning_rate": 2.4950000000000003e-06, + "num_tokens": 1201189.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7515, + "step": 3503 + }, + { + "loss": 0.0632, + "grad_norm": 1.6260945796966553, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.752, + "step": 3504 + }, + { + "loss": 0.0029, + "grad_norm": 0.3957514762878418, + "learning_rate": 2.4850000000000003e-06, + "num_tokens": 1201792.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7525, + "step": 3505 + }, + { + "loss": 0.0024, + "grad_norm": 0.3393152356147766, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 3506 + }, + { + "loss": 0.0515, + "grad_norm": 1.1930348873138428, + "learning_rate": 2.475e-06, + "num_tokens": 1202395.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7534999999999998, + "step": 3507 + }, + { + "loss": 0.0026, + "grad_norm": 0.380045086145401, + "learning_rate": 2.47e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 1.0, + "epoch": 1.754, + "step": 3508 + }, + { + "loss": 0.0027, + "grad_norm": 0.3971390724182129, + "learning_rate": 2.465e-06, + "num_tokens": 1202577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7545, + "step": 3509 + }, + { + "loss": 0.0028, + "grad_norm": 0.38638150691986084, + "learning_rate": 2.46e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 1.0, + "epoch": 1.755, + "step": 3510 + }, + { + "loss": 0.0615, + "grad_norm": 1.3876094818115234, + "learning_rate": 2.4550000000000002e-06, + "num_tokens": 1203180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7555, + "step": 3511 + }, + { + "loss": 0.0432, + "grad_norm": 1.4136366844177246, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.756, + "step": 3512 + }, + { + "loss": 0.0024, + "grad_norm": 0.34141626954078674, + "learning_rate": 2.4450000000000003e-06, + "num_tokens": 1203783.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7565, + "step": 3513 + }, + { + "loss": 0.0566, + "grad_norm": 1.0875115394592285, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7570000000000001, + "step": 3514 + }, + { + "loss": 0.0482, + "grad_norm": 1.5494464635849, + "learning_rate": 2.435e-06, + "num_tokens": 1204807.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7574999999999998, + "step": 3515 + }, + { + "loss": 0.0413, + "grad_norm": 1.0267417430877686, + "learning_rate": 2.43e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.758, + "step": 3516 + }, + { + "loss": 0.0529, + "grad_norm": 1.3826123476028442, + "learning_rate": 2.425e-06, + "num_tokens": 1205831.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7585, + "step": 3517 + }, + { + "loss": 0.0622, + "grad_norm": 1.3799962997436523, + "learning_rate": 2.42e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.759, + "step": 3518 + }, + { + "loss": 0.0026, + "grad_norm": 0.36601629853248596, + "learning_rate": 2.415e-06, + "num_tokens": 1206434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7595, + "step": 3519 + }, + { + "loss": 0.057, + "grad_norm": 1.4413540363311768, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.76, + "step": 3520 + }, + { + "loss": 0.062, + "grad_norm": 1.5269067287445068, + "learning_rate": 2.4050000000000003e-06, + "num_tokens": 1207458.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7605, + "step": 3521 + }, + { + "loss": 0.0529, + "grad_norm": 1.1583778858184814, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7610000000000001, + "step": 3522 + }, + { + "loss": 0.0629, + "grad_norm": 1.502618432044983, + "learning_rate": 2.395e-06, + "num_tokens": 1208482.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7614999999999998, + "step": 3523 + }, + { + "loss": 0.0556, + "grad_norm": 1.4562733173370361, + "learning_rate": 2.39e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.762, + "step": 3524 + }, + { + "loss": 0.0028, + "grad_norm": 0.4034802317619324, + "learning_rate": 2.385e-06, + "num_tokens": 1209085.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7625, + "step": 3525 + }, + { + "loss": 0.0501, + "grad_norm": 1.3905121088027954, + "learning_rate": 2.38e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.763, + "step": 3526 + }, + { + "loss": 0.0628, + "grad_norm": 1.1878178119659424, + "learning_rate": 2.375e-06, + "num_tokens": 1210109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7635, + "step": 3527 + }, + { + "loss": 0.0371, + "grad_norm": 1.1999701261520386, + "learning_rate": 2.37e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.764, + "step": 3528 + }, + { + "loss": 0.0029, + "grad_norm": 0.40889084339141846, + "learning_rate": 2.3650000000000002e-06, + "num_tokens": 1210712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7645, + "step": 3529 + }, + { + "loss": 0.0389, + "grad_norm": 1.039504885673523, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7650000000000001, + "step": 3530 + }, + { + "loss": 0.068, + "grad_norm": 1.371443748474121, + "learning_rate": 2.355e-06, + "num_tokens": 1211736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7654999999999998, + "step": 3531 + }, + { + "loss": 0.0695, + "grad_norm": 1.7425730228424072, + "learning_rate": 2.35e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.766, + "step": 3532 + }, + { + "loss": 0.0523, + "grad_norm": 1.3040227890014648, + "learning_rate": 2.345e-06, + "num_tokens": 1212760.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7665, + "step": 3533 + }, + { + "loss": 0.0027, + "grad_norm": 0.3859405517578125, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 1.0, + "epoch": 1.767, + "step": 3534 + }, + { + "loss": 0.0385, + "grad_norm": 1.0744153261184692, + "learning_rate": 2.3350000000000005e-06, + "num_tokens": 1213363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7675, + "step": 3535 + }, + { + "loss": 0.0029, + "grad_norm": 0.4078717827796936, + "learning_rate": 2.33e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.768, + "step": 3536 + }, + { + "loss": 0.0464, + "grad_norm": 1.3526980876922607, + "learning_rate": 2.325e-06, + "num_tokens": 1213966.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7685, + "step": 3537 + }, + { + "loss": 0.0032, + "grad_norm": 0.44447413086891174, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7690000000000001, + "step": 3538 + }, + { + "loss": 0.0346, + "grad_norm": 0.9852960705757141, + "learning_rate": 2.3150000000000003e-06, + "num_tokens": 1214569.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7694999999999999, + "step": 3539 + }, + { + "loss": 0.0581, + "grad_norm": 1.1710577011108398, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.77, + "step": 3540 + }, + { + "loss": 0.003, + "grad_norm": 0.42533135414123535, + "learning_rate": 2.3050000000000004e-06, + "num_tokens": 1215172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7705, + "step": 3541 + }, + { + "loss": 0.0373, + "grad_norm": 0.9175604581832886, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.771, + "step": 3542 + }, + { + "loss": 0.0464, + "grad_norm": 1.2586400508880615, + "learning_rate": 2.2950000000000005e-06, + "num_tokens": 1216196.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.7715, + "step": 3543 + }, + { + "loss": 0.0557, + "grad_norm": 1.3000445365905762, + "learning_rate": 2.29e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.772, + "step": 3544 + }, + { + "loss": 0.0377, + "grad_norm": 1.0466715097427368, + "learning_rate": 2.285e-06, + "num_tokens": 1217220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7725, + "step": 3545 + }, + { + "loss": 0.003, + "grad_norm": 0.41341033577919006, + "learning_rate": 2.28e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7730000000000001, + "step": 3546 + }, + { + "loss": 0.0555, + "grad_norm": 1.2895411252975464, + "learning_rate": 2.2750000000000002e-06, + "num_tokens": 1217823.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7734999999999999, + "step": 3547 + }, + { + "loss": 0.0032, + "grad_norm": 0.4543672800064087, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 1.0, + "epoch": 1.774, + "step": 3548 + }, + { + "loss": 0.0033, + "grad_norm": 0.45242005586624146, + "learning_rate": 2.2650000000000003e-06, + "num_tokens": 1218005.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7745, + "step": 3549 + }, + { + "loss": 0.0664, + "grad_norm": 1.4492830038070679, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.775, + "step": 3550 + }, + { + "loss": 0.0621, + "grad_norm": 1.410575270652771, + "learning_rate": 2.2550000000000004e-06, + "num_tokens": 1219029.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7755, + "step": 3551 + }, + { + "loss": 0.0668, + "grad_norm": 1.4600263833999634, + "learning_rate": 2.25e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.776, + "step": 3552 + }, + { + "loss": 0.0518, + "grad_norm": 1.185958981513977, + "learning_rate": 2.245e-06, + "num_tokens": 1220053.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7765, + "step": 3553 + }, + { + "loss": 0.0031, + "grad_norm": 0.4426004886627197, + "learning_rate": 2.24e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7770000000000001, + "step": 3554 + }, + { + "loss": 0.0391, + "grad_norm": 1.1847765445709229, + "learning_rate": 2.235e-06, + "num_tokens": 1220656.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7774999999999999, + "step": 3555 + }, + { + "loss": 0.0387, + "grad_norm": 1.1244046688079834, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.778, + "step": 3556 + }, + { + "loss": 0.0639, + "grad_norm": 1.5144935846328735, + "learning_rate": 2.2250000000000003e-06, + "num_tokens": 1221680.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7785, + "step": 3557 + }, + { + "loss": 0.0504, + "grad_norm": 1.1694223880767822, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.779, + "step": 3558 + }, + { + "loss": 0.039, + "grad_norm": 1.198093295097351, + "learning_rate": 2.2150000000000004e-06, + "num_tokens": 1222704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7795, + "step": 3559 + }, + { + "loss": 0.0556, + "grad_norm": 1.4882034063339233, + "learning_rate": 2.21e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.78, + "step": 3560 + }, + { + "loss": 0.0033, + "grad_norm": 0.4605433940887451, + "learning_rate": 2.205e-06, + "num_tokens": 1223307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7805, + "step": 3561 + }, + { + "loss": 0.0427, + "grad_norm": 1.400830864906311, + "learning_rate": 2.2e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7810000000000001, + "step": 3562 + }, + { + "loss": 0.0596, + "grad_norm": 1.4765678644180298, + "learning_rate": 2.195e-06, + "num_tokens": 1224331.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7814999999999999, + "step": 3563 + }, + { + "loss": 0.0029, + "grad_norm": 0.4184083044528961, + "learning_rate": 2.19e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 1.0, + "epoch": 1.782, + "step": 3564 + }, + { + "loss": 0.0031, + "grad_norm": 0.4302586615085602, + "learning_rate": 2.1850000000000003e-06, + "num_tokens": 1224513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7825, + "step": 3565 + }, + { + "loss": 0.0031, + "grad_norm": 0.4298599362373352, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 1.0, + "epoch": 1.783, + "step": 3566 + }, + { + "loss": 0.065, + "grad_norm": 1.424648642539978, + "learning_rate": 2.1750000000000004e-06, + "num_tokens": 1225116.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7835, + "step": 3567 + }, + { + "loss": 0.0031, + "grad_norm": 0.4238447844982147, + "learning_rate": 2.17e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.784, + "step": 3568 + }, + { + "loss": 0.0031, + "grad_norm": 0.4220222532749176, + "learning_rate": 2.165e-06, + "num_tokens": 1225298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7845, + "step": 3569 + }, + { + "loss": 0.003, + "grad_norm": 0.42732101678848267, + "learning_rate": 2.16e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7850000000000001, + "step": 3570 + }, + { + "loss": 0.0346, + "grad_norm": 1.0672036409378052, + "learning_rate": 2.155e-06, + "num_tokens": 1225901.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7854999999999999, + "step": 3571 + }, + { + "loss": 0.0424, + "grad_norm": 1.0617742538452148, + "learning_rate": 2.15e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.786, + "step": 3572 + }, + { + "loss": 0.0592, + "grad_norm": 1.3852803707122803, + "learning_rate": 2.1450000000000002e-06, + "num_tokens": 1226925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7865, + "step": 3573 + }, + { + "loss": 0.0029, + "grad_norm": 0.4290924072265625, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 1.0, + "epoch": 1.787, + "step": 3574 + }, + { + "loss": 0.051, + "grad_norm": 1.1031818389892578, + "learning_rate": 2.1350000000000003e-06, + "num_tokens": 1227528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7875, + "step": 3575 + }, + { + "loss": 0.0393, + "grad_norm": 1.184659719467163, + "learning_rate": 2.13e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.788, + "step": 3576 + }, + { + "loss": 0.0755, + "grad_norm": 1.9755206108093262, + "learning_rate": 2.125e-06, + "num_tokens": 1228552.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.7885, + "step": 3577 + }, + { + "loss": 0.071, + "grad_norm": 1.4741475582122803, + "learning_rate": 2.12e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7890000000000001, + "step": 3578 + }, + { + "loss": 0.0609, + "grad_norm": 1.6418182849884033, + "learning_rate": 2.115e-06, + "num_tokens": 1229576.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7894999999999999, + "step": 3579 + }, + { + "loss": 0.0027, + "grad_norm": 0.40381157398223877, + "learning_rate": 2.11e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.79, + "step": 3580 + }, + { + "loss": 0.0551, + "grad_norm": 1.2949596643447876, + "learning_rate": 2.105e-06, + "num_tokens": 1230179.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7905, + "step": 3581 + }, + { + "loss": 0.0504, + "grad_norm": 1.073058843612671, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.791, + "step": 3582 + }, + { + "loss": 0.0028, + "grad_norm": 0.3910202980041504, + "learning_rate": 2.0950000000000003e-06, + "num_tokens": 1230782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7915, + "step": 3583 + }, + { + "loss": 0.0029, + "grad_norm": 0.40099310874938965, + "learning_rate": 2.09e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.792, + "step": 3584 + }, + { + "loss": 0.0686, + "grad_norm": 1.5408157110214233, + "learning_rate": 2.085e-06, + "num_tokens": 1231385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7925, + "step": 3585 + }, + { + "loss": 0.0547, + "grad_norm": 1.2888717651367188, + "learning_rate": 2.08e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7930000000000001, + "step": 3586 + }, + { + "loss": 0.0392, + "grad_norm": 1.1414070129394531, + "learning_rate": 2.075e-06, + "num_tokens": 1232409.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7934999999999999, + "step": 3587 + }, + { + "loss": 0.0567, + "grad_norm": 1.2421129941940308, + "learning_rate": 2.07e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.794, + "step": 3588 + }, + { + "loss": 0.0567, + "grad_norm": 1.2121027708053589, + "learning_rate": 2.065e-06, + "num_tokens": 1233433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7945, + "step": 3589 + }, + { + "loss": 0.0028, + "grad_norm": 0.4114837944507599, + "learning_rate": 2.06e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.795, + "step": 3590 + }, + { + "loss": 0.003, + "grad_norm": 0.4205188453197479, + "learning_rate": 2.0550000000000002e-06, + "num_tokens": 1233615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7955, + "step": 3591 + }, + { + "loss": 0.0029, + "grad_norm": 0.39967694878578186, + "learning_rate": 2.05e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 1.0, + "epoch": 1.796, + "step": 3592 + }, + { + "loss": 0.056, + "grad_norm": 1.251736044883728, + "learning_rate": 2.045e-06, + "num_tokens": 1234218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7965, + "step": 3593 + }, + { + "loss": 0.0028, + "grad_norm": 0.3914256989955902, + "learning_rate": 2.04e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7970000000000002, + "step": 3594 + }, + { + "loss": 0.0604, + "grad_norm": 1.1881632804870605, + "learning_rate": 2.035e-06, + "num_tokens": 1234821.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7974999999999999, + "step": 3595 + }, + { + "loss": 0.0622, + "grad_norm": 1.149919033050537, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.798, + "step": 3596 + }, + { + "loss": 0.0549, + "grad_norm": 1.0469919443130493, + "learning_rate": 2.025e-06, + "num_tokens": 1235845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7985, + "step": 3597 + }, + { + "loss": 0.0535, + "grad_norm": 1.3651666641235352, + "learning_rate": 2.02e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.799, + "step": 3598 + }, + { + "loss": 0.0026, + "grad_norm": 0.37465357780456543, + "learning_rate": 2.015e-06, + "num_tokens": 1236448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7995, + "step": 3599 + }, + { + "loss": 0.0365, + "grad_norm": 1.0199239253997803, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8, + "step": 3600 + }, + { + "loss": 0.0617, + "grad_norm": 1.1323697566986084, + "learning_rate": 2.0050000000000003e-06, + "num_tokens": 1237472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8005, + "step": 3601 + }, + { + "loss": 0.003, + "grad_norm": 0.4225693345069885, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8010000000000002, + "step": 3602 + }, + { + "loss": 0.0379, + "grad_norm": 1.1038097143173218, + "learning_rate": 1.9950000000000004e-06, + "num_tokens": 1238075.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8014999999999999, + "step": 3603 + }, + { + "loss": 0.003, + "grad_norm": 0.4044983685016632, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.802, + "step": 3604 + }, + { + "loss": 0.0655, + "grad_norm": 1.8133554458618164, + "learning_rate": 1.985e-06, + "num_tokens": 1238678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8025, + "step": 3605 + }, + { + "loss": 0.0028, + "grad_norm": 0.39725902676582336, + "learning_rate": 1.98e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.803, + "step": 3606 + }, + { + "loss": 0.003, + "grad_norm": 0.4250074028968811, + "learning_rate": 1.975e-06, + "num_tokens": 1238860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8035, + "step": 3607 + }, + { + "loss": 0.0378, + "grad_norm": 1.14003586769104, + "learning_rate": 1.97e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.804, + "step": 3608 + }, + { + "loss": 0.0028, + "grad_norm": 0.39355626702308655, + "learning_rate": 1.9650000000000002e-06, + "num_tokens": 1239463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8045, + "step": 3609 + }, + { + "loss": 0.0378, + "grad_norm": 1.2409162521362305, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8050000000000002, + "step": 3610 + }, + { + "loss": 0.0448, + "grad_norm": 1.4544258117675781, + "learning_rate": 1.9550000000000003e-06, + "num_tokens": 1240487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8054999999999999, + "step": 3611 + }, + { + "loss": 0.0027, + "grad_norm": 0.3753180205821991, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.806, + "step": 3612 + }, + { + "loss": 0.0029, + "grad_norm": 0.4058220088481903, + "learning_rate": 1.945e-06, + "num_tokens": 1240669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8065, + "step": 3613 + }, + { + "loss": 0.0574, + "grad_norm": 1.4277732372283936, + "learning_rate": 1.94e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.807, + "step": 3614 + }, + { + "loss": 0.0645, + "grad_norm": 1.5439943075180054, + "learning_rate": 1.935e-06, + "num_tokens": 1241693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8075, + "step": 3615 + }, + { + "loss": 0.0609, + "grad_norm": 1.4575119018554688, + "learning_rate": 1.93e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.808, + "step": 3616 + }, + { + "loss": 0.0024, + "grad_norm": 0.33791404962539673, + "learning_rate": 1.925e-06, + "num_tokens": 1242296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8085, + "step": 3617 + }, + { + "loss": 0.0392, + "grad_norm": 0.994301974773407, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8090000000000002, + "step": 3618 + }, + { + "loss": 0.0026, + "grad_norm": 0.35725516080856323, + "learning_rate": 1.9150000000000003e-06, + "num_tokens": 1242899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8094999999999999, + "step": 3619 + }, + { + "loss": 0.1147, + "grad_norm": 2.219489097595215, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.81, + "step": 3620 + }, + { + "loss": 0.0025, + "grad_norm": 0.358549028635025, + "learning_rate": 1.9050000000000002e-06, + "num_tokens": 1243502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8105, + "step": 3621 + }, + { + "loss": 0.0497, + "grad_norm": 1.0606470108032227, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.811, + "step": 3622 + }, + { + "loss": 0.0354, + "grad_norm": 1.1863391399383545, + "learning_rate": 1.895e-06, + "num_tokens": 1244526.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8115, + "step": 3623 + }, + { + "loss": 0.0617, + "grad_norm": 1.461073398590088, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.812, + "step": 3624 + }, + { + "loss": 0.0522, + "grad_norm": 1.180123209953308, + "learning_rate": 1.8850000000000002e-06, + "num_tokens": 1245550.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8125, + "step": 3625 + }, + { + "loss": 0.0513, + "grad_norm": 1.1050792932510376, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.813, + "step": 3626 + }, + { + "loss": 0.0382, + "grad_norm": 1.1048370599746704, + "learning_rate": 1.8750000000000003e-06, + "num_tokens": 1246574.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8135, + "step": 3627 + }, + { + "loss": 0.0594, + "grad_norm": 1.5278170108795166, + "learning_rate": 1.87e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.814, + "step": 3628 + }, + { + "loss": 0.0026, + "grad_norm": 0.3680756688117981, + "learning_rate": 1.8650000000000001e-06, + "num_tokens": 1247177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8145, + "step": 3629 + }, + { + "loss": 0.0025, + "grad_norm": 0.3478946387767792, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.815, + "step": 3630 + }, + { + "loss": 0.0602, + "grad_norm": 1.2490179538726807, + "learning_rate": 1.8550000000000002e-06, + "num_tokens": 1247780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8155000000000001, + "step": 3631 + }, + { + "loss": 0.0751, + "grad_norm": 1.6024861335754395, + "learning_rate": 1.85e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8159999999999998, + "step": 3632 + }, + { + "loss": 0.055, + "grad_norm": 1.4603705406188965, + "learning_rate": 1.8450000000000001e-06, + "num_tokens": 1248804.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8165, + "step": 3633 + }, + { + "loss": 0.0025, + "grad_norm": 0.37733298540115356, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.817, + "step": 3634 + }, + { + "loss": 0.0028, + "grad_norm": 0.3999163806438446, + "learning_rate": 1.8350000000000002e-06, + "num_tokens": 1248986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8175, + "step": 3635 + }, + { + "loss": 0.0027, + "grad_norm": 0.39710038900375366, + "learning_rate": 1.83e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.818, + "step": 3636 + }, + { + "loss": 0.0028, + "grad_norm": 0.39646029472351074, + "learning_rate": 1.825e-06, + "num_tokens": 1249168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8185, + "step": 3637 + }, + { + "loss": 0.0426, + "grad_norm": 1.3070132732391357, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.819, + "step": 3638 + }, + { + "loss": 0.039, + "grad_norm": 1.1619224548339844, + "learning_rate": 1.8150000000000002e-06, + "num_tokens": 1250192.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8195000000000001, + "step": 3639 + }, + { + "loss": 0.0367, + "grad_norm": 1.1559624671936035, + "learning_rate": 1.81e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8199999999999998, + "step": 3640 + }, + { + "loss": 0.053, + "grad_norm": 1.3208280801773071, + "learning_rate": 1.805e-06, + "num_tokens": 1251216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8205, + "step": 3641 + }, + { + "loss": 0.0544, + "grad_norm": 1.2948426008224487, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.821, + "step": 3642 + }, + { + "loss": 0.049, + "grad_norm": 1.0491054058074951, + "learning_rate": 1.7950000000000002e-06, + "num_tokens": 1252240.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8215, + "step": 3643 + }, + { + "loss": 0.037, + "grad_norm": 1.3279922008514404, + "learning_rate": 1.79e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.822, + "step": 3644 + }, + { + "loss": 0.0027, + "grad_norm": 0.38797032833099365, + "learning_rate": 1.785e-06, + "num_tokens": 1252843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8225, + "step": 3645 + }, + { + "loss": 0.0526, + "grad_norm": 1.3761346340179443, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.823, + "step": 3646 + }, + { + "loss": 0.0594, + "grad_norm": 1.5943882465362549, + "learning_rate": 1.7750000000000002e-06, + "num_tokens": 1253867.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8235000000000001, + "step": 3647 + }, + { + "loss": 0.0386, + "grad_norm": 1.1582005023956299, + "learning_rate": 1.77e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8239999999999998, + "step": 3648 + }, + { + "loss": 0.0625, + "grad_norm": 1.422128438949585, + "learning_rate": 1.765e-06, + "num_tokens": 1254891.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8245, + "step": 3649 + }, + { + "loss": 0.0027, + "grad_norm": 0.3794823884963989, + "learning_rate": 1.76e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.825, + "step": 3650 + }, + { + "loss": 0.0377, + "grad_norm": 1.0281649827957153, + "learning_rate": 1.7550000000000001e-06, + "num_tokens": 1255494.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8255, + "step": 3651 + }, + { + "loss": 0.057, + "grad_norm": 1.2542749643325806, + "learning_rate": 1.75e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.826, + "step": 3652 + }, + { + "loss": 0.0027, + "grad_norm": 0.3857089579105377, + "learning_rate": 1.745e-06, + "num_tokens": 1256097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8265, + "step": 3653 + }, + { + "loss": 0.0529, + "grad_norm": 1.148740291595459, + "learning_rate": 1.74e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.827, + "step": 3654 + }, + { + "loss": 0.003, + "grad_norm": 0.4200035333633423, + "learning_rate": 1.7350000000000001e-06, + "num_tokens": 1256700.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8275000000000001, + "step": 3655 + }, + { + "loss": 0.0028, + "grad_norm": 0.3945881426334381, + "learning_rate": 1.73e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8279999999999998, + "step": 3656 + }, + { + "loss": 0.039, + "grad_norm": 0.9618701934814453, + "learning_rate": 1.725e-06, + "num_tokens": 1257303.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8285, + "step": 3657 + }, + { + "loss": 0.0399, + "grad_norm": 1.2282723188400269, + "learning_rate": 1.72e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.829, + "step": 3658 + }, + { + "loss": 0.0509, + "grad_norm": 1.175613284111023, + "learning_rate": 1.7150000000000003e-06, + "num_tokens": 1258327.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8295, + "step": 3659 + }, + { + "loss": 0.0378, + "grad_norm": 1.1486104726791382, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.83, + "step": 3660 + }, + { + "loss": 0.0589, + "grad_norm": 1.3274273872375488, + "learning_rate": 1.7050000000000002e-06, + "num_tokens": 1259351.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8305, + "step": 3661 + }, + { + "loss": 0.046, + "grad_norm": 1.3887542486190796, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.831, + "step": 3662 + }, + { + "loss": 0.0029, + "grad_norm": 0.39590317010879517, + "learning_rate": 1.6950000000000003e-06, + "num_tokens": 1259954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8315000000000001, + "step": 3663 + }, + { + "loss": 0.0369, + "grad_norm": 1.080889105796814, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8319999999999999, + "step": 3664 + }, + { + "loss": 0.0535, + "grad_norm": 1.3136940002441406, + "learning_rate": 1.6850000000000002e-06, + "num_tokens": 1260978.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8325, + "step": 3665 + }, + { + "loss": 0.059, + "grad_norm": 1.5410752296447754, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 3666 + }, + { + "loss": 0.0029, + "grad_norm": 0.3952591121196747, + "learning_rate": 1.6750000000000003e-06, + "num_tokens": 1261581.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8335, + "step": 3667 + }, + { + "loss": 0.0518, + "grad_norm": 1.3276718854904175, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.834, + "step": 3668 + }, + { + "loss": 0.003, + "grad_norm": 0.4232414960861206, + "learning_rate": 1.6650000000000002e-06, + "num_tokens": 1262184.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8345, + "step": 3669 + }, + { + "loss": 0.0639, + "grad_norm": 1.2759331464767456, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.835, + "step": 3670 + }, + { + "loss": 0.0571, + "grad_norm": 1.5148133039474487, + "learning_rate": 1.6550000000000002e-06, + "num_tokens": 1263208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8355000000000001, + "step": 3671 + }, + { + "loss": 0.0637, + "grad_norm": 1.4910366535186768, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8359999999999999, + "step": 3672 + }, + { + "loss": 0.0029, + "grad_norm": 0.4135521948337555, + "learning_rate": 1.6450000000000001e-06, + "num_tokens": 1263811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8365, + "step": 3673 + }, + { + "loss": 0.0511, + "grad_norm": 1.2618604898452759, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.837, + "step": 3674 + }, + { + "loss": 0.0501, + "grad_norm": 1.1598845720291138, + "learning_rate": 1.6350000000000002e-06, + "num_tokens": 1264835.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8375, + "step": 3675 + }, + { + "loss": 0.0445, + "grad_norm": 1.0752735137939453, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.838, + "step": 3676 + }, + { + "loss": 0.003, + "grad_norm": 0.42967167496681213, + "learning_rate": 1.6250000000000001e-06, + "num_tokens": 1265438.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8385, + "step": 3677 + }, + { + "loss": 0.003, + "grad_norm": 0.41333630681037903, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 1.0, + "epoch": 1.839, + "step": 3678 + }, + { + "loss": 0.0033, + "grad_norm": 0.4601726531982422, + "learning_rate": 1.6150000000000002e-06, + "num_tokens": 1265620.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8395000000000001, + "step": 3679 + }, + { + "loss": 0.0648, + "grad_norm": 1.4645088911056519, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8399999999999999, + "step": 3680 + }, + { + "loss": 0.0371, + "grad_norm": 1.0282845497131348, + "learning_rate": 1.605e-06, + "num_tokens": 1266644.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8405, + "step": 3681 + }, + { + "loss": 0.0034, + "grad_norm": 0.4804507791996002, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 1.0, + "epoch": 1.841, + "step": 3682 + }, + { + "loss": 0.0611, + "grad_norm": 1.6006290912628174, + "learning_rate": 1.5950000000000002e-06, + "num_tokens": 1267247.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8415, + "step": 3683 + }, + { + "loss": 0.0032, + "grad_norm": 0.4456159472465515, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 1.0, + "epoch": 1.842, + "step": 3684 + }, + { + "loss": 0.0028, + "grad_norm": 0.39536213874816895, + "learning_rate": 1.585e-06, + "num_tokens": 1267429.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8425, + "step": 3685 + }, + { + "loss": 0.0441, + "grad_norm": 1.2790175676345825, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.843, + "step": 3686 + }, + { + "loss": 0.0545, + "grad_norm": 1.1657609939575195, + "learning_rate": 1.5750000000000002e-06, + "num_tokens": 1268453.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8435000000000001, + "step": 3687 + }, + { + "loss": 0.0536, + "grad_norm": 1.0926413536071777, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8439999999999999, + "step": 3688 + }, + { + "loss": 0.0362, + "grad_norm": 0.9912558197975159, + "learning_rate": 1.565e-06, + "num_tokens": 1269477.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8445, + "step": 3689 + }, + { + "loss": 0.0374, + "grad_norm": 1.0493851900100708, + "learning_rate": 1.56e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.845, + "step": 3690 + }, + { + "loss": 0.0028, + "grad_norm": 0.4059640169143677, + "learning_rate": 1.5550000000000001e-06, + "num_tokens": 1270080.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8455, + "step": 3691 + }, + { + "loss": 0.003, + "grad_norm": 0.4232662618160248, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 1.0, + "epoch": 1.846, + "step": 3692 + }, + { + "loss": 0.0031, + "grad_norm": 0.43225178122520447, + "learning_rate": 1.545e-06, + "num_tokens": 1270262.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8465, + "step": 3693 + }, + { + "loss": 0.0027, + "grad_norm": 0.3701487183570862, + "learning_rate": 1.54e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.847, + "step": 3694 + }, + { + "loss": 0.0545, + "grad_norm": 1.3909512758255005, + "learning_rate": 1.5350000000000001e-06, + "num_tokens": 1270865.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8475000000000001, + "step": 3695 + }, + { + "loss": 0.0027, + "grad_norm": 0.38712078332901, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8479999999999999, + "step": 3696 + }, + { + "loss": 0.0506, + "grad_norm": 1.0741735696792603, + "learning_rate": 1.525e-06, + "num_tokens": 1271468.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8485, + "step": 3697 + }, + { + "loss": 0.0693, + "grad_norm": 1.657240629196167, + "learning_rate": 1.52e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.849, + "step": 3698 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615441918373108, + "learning_rate": 1.5150000000000001e-06, + "num_tokens": 1272071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8495, + "step": 3699 + }, + { + "loss": 0.0355, + "grad_norm": 0.9562244415283203, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.85, + "step": 3700 + }, + { + "loss": 0.0026, + "grad_norm": 0.36725983023643494, + "learning_rate": 1.505e-06, + "num_tokens": 1272674.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8505, + "step": 3701 + }, + { + "loss": 0.0028, + "grad_norm": 0.3878721296787262, + "learning_rate": 1.5e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 1.0, + "epoch": 1.851, + "step": 3702 + }, + { + "loss": 0.0359, + "grad_norm": 1.0378117561340332, + "learning_rate": 1.495e-06, + "num_tokens": 1273277.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8515000000000001, + "step": 3703 + }, + { + "loss": 0.0656, + "grad_norm": 1.2746002674102783, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8519999999999999, + "step": 3704 + }, + { + "loss": 0.0026, + "grad_norm": 0.35767146944999695, + "learning_rate": 1.485e-06, + "num_tokens": 1273880.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8525, + "step": 3705 + }, + { + "loss": 0.0026, + "grad_norm": 0.36552944779396057, + "learning_rate": 1.48e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.853, + "step": 3706 + }, + { + "loss": 0.0473, + "grad_norm": 1.1046762466430664, + "learning_rate": 1.475e-06, + "num_tokens": 1274483.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8535, + "step": 3707 + }, + { + "loss": 0.0625, + "grad_norm": 1.4509928226470947, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.854, + "step": 3708 + }, + { + "loss": 0.0421, + "grad_norm": 1.1400452852249146, + "learning_rate": 1.465e-06, + "num_tokens": 1275507.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8545, + "step": 3709 + }, + { + "loss": 0.0026, + "grad_norm": 0.3619054853916168, + "learning_rate": 1.46e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 1.0, + "epoch": 1.855, + "step": 3710 + }, + { + "loss": 0.0026, + "grad_norm": 0.3667825162410736, + "learning_rate": 1.455e-06, + "num_tokens": 1275689.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8555000000000001, + "step": 3711 + }, + { + "loss": 0.0466, + "grad_norm": 1.255405068397522, + "learning_rate": 1.45e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8559999999999999, + "step": 3712 + }, + { + "loss": 0.0657, + "grad_norm": 1.4270333051681519, + "learning_rate": 1.445e-06, + "num_tokens": 1276713.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8565, + "step": 3713 + }, + { + "loss": 0.0356, + "grad_norm": 1.035252571105957, + "learning_rate": 1.44e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.857, + "step": 3714 + }, + { + "loss": 0.0024, + "grad_norm": 0.34851282835006714, + "learning_rate": 1.435e-06, + "num_tokens": 1277316.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8575, + "step": 3715 + }, + { + "loss": 0.0669, + "grad_norm": 1.6207127571105957, + "learning_rate": 1.43e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.858, + "step": 3716 + }, + { + "loss": 0.0025, + "grad_norm": 0.34068116545677185, + "learning_rate": 1.425e-06, + "num_tokens": 1277919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8585, + "step": 3717 + }, + { + "loss": 0.0023, + "grad_norm": 0.3336624801158905, + "learning_rate": 1.42e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 3718 + }, + { + "loss": 0.0663, + "grad_norm": 1.4342654943466187, + "learning_rate": 1.415e-06, + "num_tokens": 1278522.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8595000000000002, + "step": 3719 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730687618255615, + "learning_rate": 1.41e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8599999999999999, + "step": 3720 + }, + { + "loss": 0.062, + "grad_norm": 1.4714523553848267, + "learning_rate": 1.4050000000000003e-06, + "num_tokens": 1279546.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8605, + "step": 3721 + }, + { + "loss": 0.0514, + "grad_norm": 1.2004119157791138, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.861, + "step": 3722 + }, + { + "loss": 0.0023, + "grad_norm": 0.3368993103504181, + "learning_rate": 1.3950000000000002e-06, + "num_tokens": 1280149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8615, + "step": 3723 + }, + { + "loss": 0.0025, + "grad_norm": 0.3626645803451538, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 3724 + }, + { + "loss": 0.0379, + "grad_norm": 1.129130482673645, + "learning_rate": 1.3850000000000003e-06, + "num_tokens": 1280752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8625, + "step": 3725 + }, + { + "loss": 0.0026, + "grad_norm": 0.35549208521842957, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.863, + "step": 3726 + }, + { + "loss": 0.039, + "grad_norm": 1.0426714420318604, + "learning_rate": 1.3750000000000002e-06, + "num_tokens": 1281355.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8635000000000002, + "step": 3727 + }, + { + "loss": 0.0591, + "grad_norm": 1.4238243103027344, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8639999999999999, + "step": 3728 + }, + { + "loss": 0.0587, + "grad_norm": 1.182423710823059, + "learning_rate": 1.3650000000000003e-06, + "num_tokens": 1282379.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8645, + "step": 3729 + }, + { + "loss": 0.0344, + "grad_norm": 1.0535178184509277, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.865, + "step": 3730 + }, + { + "loss": 0.0024, + "grad_norm": 0.34818780422210693, + "learning_rate": 1.3550000000000002e-06, + "num_tokens": 1282982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8655, + "step": 3731 + }, + { + "loss": 0.0652, + "grad_norm": 1.3155183792114258, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.866, + "step": 3732 + }, + { + "loss": 0.0543, + "grad_norm": 1.2466151714324951, + "learning_rate": 1.3450000000000003e-06, + "num_tokens": 1284006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8665, + "step": 3733 + }, + { + "loss": 0.0366, + "grad_norm": 1.1111284494400024, + "learning_rate": 1.34e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.867, + "step": 3734 + }, + { + "loss": 0.036, + "grad_norm": 1.2413430213928223, + "learning_rate": 1.3350000000000001e-06, + "num_tokens": 1285030.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8675000000000002, + "step": 3735 + }, + { + "loss": 0.0503, + "grad_norm": 1.2572247982025146, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8679999999999999, + "step": 3736 + }, + { + "loss": 0.0634, + "grad_norm": 1.3656840324401855, + "learning_rate": 1.3250000000000002e-06, + "num_tokens": 1286054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8685, + "step": 3737 + }, + { + "loss": 0.0369, + "grad_norm": 1.1938374042510986, + "learning_rate": 1.32e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.869, + "step": 3738 + }, + { + "loss": 0.0619, + "grad_norm": 1.5963718891143799, + "learning_rate": 1.3150000000000001e-06, + "num_tokens": 1287078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8695, + "step": 3739 + }, + { + "loss": 0.0569, + "grad_norm": 1.3680788278579712, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.87, + "step": 3740 + }, + { + "loss": 0.0535, + "grad_norm": 1.175209879875183, + "learning_rate": 1.3050000000000002e-06, + "num_tokens": 1288102.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8705, + "step": 3741 + }, + { + "loss": 0.0026, + "grad_norm": 0.3611868619918823, + "learning_rate": 1.3e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.871, + "step": 3742 + }, + { + "loss": 0.0377, + "grad_norm": 1.2314857244491577, + "learning_rate": 1.295e-06, + "num_tokens": 1288705.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8715000000000002, + "step": 3743 + }, + { + "loss": 0.0511, + "grad_norm": 1.4128717184066772, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8719999999999999, + "step": 3744 + }, + { + "loss": 0.1336, + "grad_norm": 2.185844659805298, + "learning_rate": 1.2850000000000002e-06, + "num_tokens": 1289729.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.8725, + "step": 3745 + }, + { + "loss": 0.0025, + "grad_norm": 0.33957669138908386, + "learning_rate": 1.28e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 1.0, + "epoch": 1.873, + "step": 3746 + }, + { + "loss": 0.0027, + "grad_norm": 0.3769534230232239, + "learning_rate": 1.275e-06, + "num_tokens": 1289911.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8735, + "step": 3747 + }, + { + "loss": 0.0584, + "grad_norm": 1.4691829681396484, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.874, + "step": 3748 + }, + { + "loss": 0.0635, + "grad_norm": 1.6226807832717896, + "learning_rate": 1.2650000000000002e-06, + "num_tokens": 1290935.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8745, + "step": 3749 + }, + { + "loss": 0.0033, + "grad_norm": 0.4503451883792877, + "learning_rate": 1.26e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 1.0, + "epoch": 1.875, + "step": 3750 + }, + { + "loss": 0.0028, + "grad_norm": 0.39449983835220337, + "learning_rate": 1.255e-06, + "num_tokens": 1291117.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8755, + "step": 3751 + }, + { + "loss": 0.0029, + "grad_norm": 0.4101957678794861, + "learning_rate": 1.25e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 1.0, + "epoch": 1.876, + "step": 3752 + }, + { + "loss": 0.0359, + "grad_norm": 1.259843111038208, + "learning_rate": 1.2450000000000002e-06, + "num_tokens": 1291720.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8765, + "step": 3753 + }, + { + "loss": 0.0027, + "grad_norm": 0.372577965259552, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.877, + "step": 3754 + }, + { + "loss": 0.0596, + "grad_norm": 1.1994444131851196, + "learning_rate": 1.235e-06, + "num_tokens": 1292323.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8775, + "step": 3755 + }, + { + "loss": 0.0703, + "grad_norm": 1.5322065353393555, + "learning_rate": 1.23e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8780000000000001, + "step": 3756 + }, + { + "loss": 0.0643, + "grad_norm": 1.7045296430587769, + "learning_rate": 1.2250000000000001e-06, + "num_tokens": 1293347.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8784999999999998, + "step": 3757 + }, + { + "loss": 0.0439, + "grad_norm": 1.2476153373718262, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.879, + "step": 3758 + }, + { + "loss": 0.0402, + "grad_norm": 1.186736822128296, + "learning_rate": 1.215e-06, + "num_tokens": 1294371.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8795, + "step": 3759 + }, + { + "loss": 0.0029, + "grad_norm": 0.39700445532798767, + "learning_rate": 1.21e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.88, + "step": 3760 + }, + { + "loss": 0.1202, + "grad_norm": 3.1105434894561768, + "learning_rate": 1.2050000000000001e-06, + "num_tokens": 1294974.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.8805, + "step": 3761 + }, + { + "loss": 0.0408, + "grad_norm": 1.1640613079071045, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.881, + "step": 3762 + }, + { + "loss": 0.0023, + "grad_norm": 0.32245126366615295, + "learning_rate": 1.195e-06, + "num_tokens": 1295577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8815, + "step": 3763 + }, + { + "loss": 0.0644, + "grad_norm": 1.4617496728897095, + "learning_rate": 1.19e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8820000000000001, + "step": 3764 + }, + { + "loss": 0.0024, + "grad_norm": 0.3409968614578247, + "learning_rate": 1.185e-06, + "num_tokens": 1296180.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8824999999999998, + "step": 3765 + }, + { + "loss": 0.0666, + "grad_norm": 2.035632848739624, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.883, + "step": 3766 + }, + { + "loss": 0.0402, + "grad_norm": 1.1498757600784302, + "learning_rate": 1.175e-06, + "num_tokens": 1297204.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8835, + "step": 3767 + }, + { + "loss": 0.0593, + "grad_norm": 1.348196268081665, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.884, + "step": 3768 + }, + { + "loss": 0.0667, + "grad_norm": 1.692858099937439, + "learning_rate": 1.165e-06, + "num_tokens": 1298228.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8845, + "step": 3769 + }, + { + "loss": 0.0029, + "grad_norm": 0.40195682644844055, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 1.0, + "epoch": 1.885, + "step": 3770 + }, + { + "loss": 0.0515, + "grad_norm": 1.0095990896224976, + "learning_rate": 1.1550000000000002e-06, + "num_tokens": 1298831.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8855, + "step": 3771 + }, + { + "loss": 0.0411, + "grad_norm": 1.4529675245285034, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8860000000000001, + "step": 3772 + }, + { + "loss": 0.0029, + "grad_norm": 0.39934462308883667, + "learning_rate": 1.145e-06, + "num_tokens": 1299434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8864999999999998, + "step": 3773 + }, + { + "loss": 0.0026, + "grad_norm": 0.37341752648353577, + "learning_rate": 1.14e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.887, + "step": 3774 + }, + { + "loss": 0.003, + "grad_norm": 0.427602082490921, + "learning_rate": 1.1350000000000001e-06, + "num_tokens": 1299616.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8875, + "step": 3775 + }, + { + "loss": 0.0027, + "grad_norm": 0.38110828399658203, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 1.0, + "epoch": 1.888, + "step": 3776 + }, + { + "loss": 0.05, + "grad_norm": 1.3058017492294312, + "learning_rate": 1.125e-06, + "num_tokens": 1300219.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8885, + "step": 3777 + }, + { + "loss": 0.0551, + "grad_norm": 1.049538016319275, + "learning_rate": 1.12e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.889, + "step": 3778 + }, + { + "loss": 0.0543, + "grad_norm": 1.1460436582565308, + "learning_rate": 1.1150000000000001e-06, + "num_tokens": 1301243.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8895, + "step": 3779 + }, + { + "loss": 0.0402, + "grad_norm": 1.1601300239562988, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8900000000000001, + "step": 3780 + }, + { + "loss": 0.0571, + "grad_norm": 1.1402069330215454, + "learning_rate": 1.105e-06, + "num_tokens": 1302267.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8904999999999998, + "step": 3781 + }, + { + "loss": 0.0381, + "grad_norm": 1.2498735189437866, + "learning_rate": 1.1e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.891, + "step": 3782 + }, + { + "loss": 0.0658, + "grad_norm": 1.471903920173645, + "learning_rate": 1.095e-06, + "num_tokens": 1303291.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8915, + "step": 3783 + }, + { + "loss": 0.003, + "grad_norm": 0.40989261865615845, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.892, + "step": 3784 + }, + { + "loss": 0.0029, + "grad_norm": 0.4065409004688263, + "learning_rate": 1.085e-06, + "num_tokens": 1303473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8925, + "step": 3785 + }, + { + "loss": 0.0027, + "grad_norm": 0.38934385776519775, + "learning_rate": 1.08e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.893, + "step": 3786 + }, + { + "loss": 0.0028, + "grad_norm": 0.3856496810913086, + "learning_rate": 1.075e-06, + "num_tokens": 1303655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8935, + "step": 3787 + }, + { + "loss": 0.0422, + "grad_norm": 1.3679287433624268, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8940000000000001, + "step": 3788 + }, + { + "loss": 0.051, + "grad_norm": 1.206390619277954, + "learning_rate": 1.065e-06, + "num_tokens": 1304679.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8944999999999999, + "step": 3789 + }, + { + "loss": 0.0029, + "grad_norm": 0.41105058789253235, + "learning_rate": 1.06e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 1.0, + "epoch": 1.895, + "step": 3790 + }, + { + "loss": 0.0027, + "grad_norm": 0.3825374245643616, + "learning_rate": 1.055e-06, + "num_tokens": 1304861.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8955, + "step": 3791 + }, + { + "loss": 0.0024, + "grad_norm": 0.3389546871185303, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.896, + "step": 3792 + }, + { + "loss": 0.0027, + "grad_norm": 0.38113462924957275, + "learning_rate": 1.045e-06, + "num_tokens": 1305043.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8965, + "step": 3793 + }, + { + "loss": 0.0025, + "grad_norm": 0.35084959864616394, + "learning_rate": 1.04e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 1.0, + "epoch": 1.897, + "step": 3794 + }, + { + "loss": 0.056, + "grad_norm": 1.4280885457992554, + "learning_rate": 1.035e-06, + "num_tokens": 1305646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8975, + "step": 3795 + }, + { + "loss": 0.0584, + "grad_norm": 1.4864161014556885, + "learning_rate": 1.03e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8980000000000001, + "step": 3796 + }, + { + "loss": 0.0023, + "grad_norm": 0.32296261191368103, + "learning_rate": 1.025e-06, + "num_tokens": 1306249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8984999999999999, + "step": 3797 + }, + { + "loss": 0.0372, + "grad_norm": 1.1412842273712158, + "learning_rate": 1.02e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.899, + "step": 3798 + }, + { + "loss": 0.036, + "grad_norm": 1.0588805675506592, + "learning_rate": 1.0150000000000002e-06, + "num_tokens": 1307273.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8995, + "step": 3799 + }, + { + "loss": 0.0025, + "grad_norm": 0.34841030836105347, + "learning_rate": 1.01e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9, + "step": 3800 + }, + { + "loss": 0.0025, + "grad_norm": 0.3537651002407074, + "learning_rate": 1.0050000000000001e-06, + "num_tokens": 1307455.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9005, + "step": 3801 + }, + { + "loss": 0.0405, + "grad_norm": 1.1438575983047485, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.901, + "step": 3802 + }, + { + "loss": 0.0694, + "grad_norm": 1.4709012508392334, + "learning_rate": 9.950000000000002e-07, + "num_tokens": 1308479.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9015, + "step": 3803 + }, + { + "loss": 0.0023, + "grad_norm": 0.3326675593852997, + "learning_rate": 9.9e-07, + "num_tokens": 1308570.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9020000000000001, + "step": 3804 + }, + { + "loss": 0.0635, + "grad_norm": 1.4323761463165283, + "learning_rate": 9.85e-07, + "num_tokens": 1309082.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9024999999999999, + "step": 3805 + }, + { + "loss": 0.0683, + "grad_norm": 1.6102875471115112, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.903, + "step": 3806 + }, + { + "loss": 0.0022, + "grad_norm": 0.3131149709224701, + "learning_rate": 9.750000000000002e-07, + "num_tokens": 1309685.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9035, + "step": 3807 + }, + { + "loss": 0.0021, + "grad_norm": 0.30395570397377014, + "learning_rate": 9.7e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 3808 + }, + { + "loss": 0.056, + "grad_norm": 1.3097760677337646, + "learning_rate": 9.65e-07, + "num_tokens": 1310288.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9045, + "step": 3809 + }, + { + "loss": 0.0425, + "grad_norm": 1.2873075008392334, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.905, + "step": 3810 + }, + { + "loss": 0.0366, + "grad_norm": 1.1098606586456299, + "learning_rate": 9.550000000000002e-07, + "num_tokens": 1311312.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9055, + "step": 3811 + }, + { + "loss": 0.0023, + "grad_norm": 0.33073046803474426, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9060000000000001, + "step": 3812 + }, + { + "loss": 0.0558, + "grad_norm": 1.287516713142395, + "learning_rate": 9.450000000000001e-07, + "num_tokens": 1311915.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9064999999999999, + "step": 3813 + }, + { + "loss": 0.0023, + "grad_norm": 0.3197239935398102, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 1.0, + "epoch": 1.907, + "step": 3814 + }, + { + "loss": 0.0022, + "grad_norm": 0.3093603253364563, + "learning_rate": 9.35e-07, + "num_tokens": 1312097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9075, + "step": 3815 + }, + { + "loss": 0.0027, + "grad_norm": 0.3792094588279724, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.908, + "step": 3816 + }, + { + "loss": 0.0024, + "grad_norm": 0.33527225255966187, + "learning_rate": 9.25e-07, + "num_tokens": 1312279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9085, + "step": 3817 + }, + { + "loss": 0.0531, + "grad_norm": 1.204848051071167, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.909, + "step": 3818 + }, + { + "loss": 0.0702, + "grad_norm": 1.3416361808776855, + "learning_rate": 9.15e-07, + "num_tokens": 1313303.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9095, + "step": 3819 + }, + { + "loss": 0.0541, + "grad_norm": 1.515673279762268, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9100000000000001, + "step": 3820 + }, + { + "loss": 0.0024, + "grad_norm": 0.33284807205200195, + "learning_rate": 9.05e-07, + "num_tokens": 1313906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9104999999999999, + "step": 3821 + }, + { + "loss": 0.0023, + "grad_norm": 0.32082033157348633, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 3822 + }, + { + "loss": 0.056, + "grad_norm": 1.2340785264968872, + "learning_rate": 8.95e-07, + "num_tokens": 1314509.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9115, + "step": 3823 + }, + { + "loss": 0.0021, + "grad_norm": 0.3040038049221039, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.912, + "step": 3824 + }, + { + "loss": 0.0392, + "grad_norm": 1.3959851264953613, + "learning_rate": 8.85e-07, + "num_tokens": 1315112.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9125, + "step": 3825 + }, + { + "loss": 0.0027, + "grad_norm": 0.37887290120124817, + "learning_rate": 8.8e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 1.0, + "epoch": 1.913, + "step": 3826 + }, + { + "loss": 0.0022, + "grad_norm": 0.30666735768318176, + "learning_rate": 8.75e-07, + "num_tokens": 1315294.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9135, + "step": 3827 + }, + { + "loss": 0.0691, + "grad_norm": 1.3549600839614868, + "learning_rate": 8.7e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9140000000000001, + "step": 3828 + }, + { + "loss": 0.0675, + "grad_norm": 1.2945553064346313, + "learning_rate": 8.65e-07, + "num_tokens": 1316318.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9144999999999999, + "step": 3829 + }, + { + "loss": 0.0022, + "grad_norm": 0.3147728145122528, + "learning_rate": 8.6e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.915, + "step": 3830 + }, + { + "loss": 0.0531, + "grad_norm": 1.0365914106369019, + "learning_rate": 8.550000000000002e-07, + "num_tokens": 1316921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9155, + "step": 3831 + }, + { + "loss": 0.0416, + "grad_norm": 1.2123857736587524, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.916, + "step": 3832 + }, + { + "loss": 0.0023, + "grad_norm": 0.3252547085285187, + "learning_rate": 8.450000000000002e-07, + "num_tokens": 1317524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9165, + "step": 3833 + }, + { + "loss": 0.0021, + "grad_norm": 0.29913613200187683, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.917, + "step": 3834 + }, + { + "loss": 0.0688, + "grad_norm": 1.6491233110427856, + "learning_rate": 8.350000000000002e-07, + "num_tokens": 1318127.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9175, + "step": 3835 + }, + { + "loss": 0.0021, + "grad_norm": 0.3058773875236511, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9180000000000001, + "step": 3836 + }, + { + "loss": 0.038, + "grad_norm": 1.1742405891418457, + "learning_rate": 8.250000000000001e-07, + "num_tokens": 1318730.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9184999999999999, + "step": 3837 + }, + { + "loss": 0.002, + "grad_norm": 0.27437257766723633, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.919, + "step": 3838 + }, + { + "loss": 0.0397, + "grad_norm": 1.1734699010849, + "learning_rate": 8.150000000000001e-07, + "num_tokens": 1319333.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9195, + "step": 3839 + }, + { + "loss": 0.0688, + "grad_norm": 1.6114236116409302, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.92, + "step": 3840 + }, + { + "loss": 0.0396, + "grad_norm": 1.3022080659866333, + "learning_rate": 8.050000000000001e-07, + "num_tokens": 1320357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9205, + "step": 3841 + }, + { + "loss": 0.002, + "grad_norm": 0.2882446348667145, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.921, + "step": 3842 + }, + { + "loss": 0.0636, + "grad_norm": 1.4788239002227783, + "learning_rate": 7.950000000000001e-07, + "num_tokens": 1320960.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9215, + "step": 3843 + }, + { + "loss": 0.0554, + "grad_norm": 1.472805142402649, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 3844 + }, + { + "loss": 0.0382, + "grad_norm": 1.3122379779815674, + "learning_rate": 7.850000000000001e-07, + "num_tokens": 1321984.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9224999999999999, + "step": 3845 + }, + { + "loss": 0.0019, + "grad_norm": 0.27439191937446594, + "learning_rate": 7.8e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.923, + "step": 3846 + }, + { + "loss": 0.0021, + "grad_norm": 0.3059723973274231, + "learning_rate": 7.750000000000001e-07, + "num_tokens": 1322166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9235, + "step": 3847 + }, + { + "loss": 0.0021, + "grad_norm": 0.3025694489479065, + "learning_rate": 7.7e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 3848 + }, + { + "loss": 0.0416, + "grad_norm": 1.4384698867797852, + "learning_rate": 7.650000000000001e-07, + "num_tokens": 1322769.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9245, + "step": 3849 + }, + { + "loss": 0.0019, + "grad_norm": 0.26954689621925354, + "learning_rate": 7.6e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.925, + "step": 3850 + }, + { + "loss": 0.0373, + "grad_norm": 1.0434874296188354, + "learning_rate": 7.550000000000001e-07, + "num_tokens": 1323372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9255, + "step": 3851 + }, + { + "loss": 0.0384, + "grad_norm": 1.2146815061569214, + "learning_rate": 7.5e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9260000000000002, + "step": 3852 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992803454399109, + "learning_rate": 7.450000000000001e-07, + "num_tokens": 1323975.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9264999999999999, + "step": 3853 + }, + { + "loss": 0.0683, + "grad_norm": 2.0715625286102295, + "learning_rate": 7.4e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.927, + "step": 3854 + }, + { + "loss": 0.0687, + "grad_norm": 1.7195099592208862, + "learning_rate": 7.350000000000001e-07, + "num_tokens": 1324999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.9275, + "step": 3855 + }, + { + "loss": 0.0022, + "grad_norm": 0.31213998794555664, + "learning_rate": 7.3e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.928, + "step": 3856 + }, + { + "loss": 0.0446, + "grad_norm": 1.5833452939987183, + "learning_rate": 7.25e-07, + "num_tokens": 1325602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9285, + "step": 3857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27154725790023804, + "learning_rate": 7.2e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.929, + "step": 3858 + }, + { + "loss": 0.0385, + "grad_norm": 1.1363227367401123, + "learning_rate": 7.15e-07, + "num_tokens": 1326205.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9295, + "step": 3859 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992321252822876, + "learning_rate": 7.1e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9300000000000002, + "step": 3860 + }, + { + "loss": 0.0537, + "grad_norm": 1.2202407121658325, + "learning_rate": 7.05e-07, + "num_tokens": 1326808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9304999999999999, + "step": 3861 + }, + { + "loss": 0.0659, + "grad_norm": 1.3972662687301636, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.931, + "step": 3862 + }, + { + "loss": 0.0022, + "grad_norm": 0.3156076967716217, + "learning_rate": 6.950000000000001e-07, + "num_tokens": 1327411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9315, + "step": 3863 + }, + { + "loss": 0.002, + "grad_norm": 0.2746105492115021, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 3864 + }, + { + "loss": 0.0492, + "grad_norm": 1.111280083656311, + "learning_rate": 6.850000000000001e-07, + "num_tokens": 1328014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9325, + "step": 3865 + }, + { + "loss": 0.0557, + "grad_norm": 1.1395080089569092, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.933, + "step": 3866 + }, + { + "loss": 0.041, + "grad_norm": 1.1225674152374268, + "learning_rate": 6.750000000000001e-07, + "num_tokens": 1329038.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9335, + "step": 3867 + }, + { + "loss": 0.0021, + "grad_norm": 0.2975449860095978, + "learning_rate": 6.7e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9340000000000002, + "step": 3868 + }, + { + "loss": 0.002, + "grad_norm": 0.2790532410144806, + "learning_rate": 6.650000000000001e-07, + "num_tokens": 1329220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9344999999999999, + "step": 3869 + }, + { + "loss": 0.0019, + "grad_norm": 0.27045223116874695, + "learning_rate": 6.6e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 3870 + }, + { + "loss": 0.0587, + "grad_norm": 1.2998172044754028, + "learning_rate": 6.550000000000001e-07, + "num_tokens": 1329823.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9355, + "step": 3871 + }, + { + "loss": 0.1167, + "grad_norm": 2.1144580841064453, + "learning_rate": 6.5e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.936, + "step": 3872 + }, + { + "loss": 0.0021, + "grad_norm": 0.29768821597099304, + "learning_rate": 6.450000000000001e-07, + "num_tokens": 1330426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9365, + "step": 3873 + }, + { + "loss": 0.0021, + "grad_norm": 0.3033559024333954, + "learning_rate": 6.4e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 3874 + }, + { + "loss": 0.0017, + "grad_norm": 0.2499658465385437, + "learning_rate": 6.350000000000001e-07, + "num_tokens": 1330608.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9375, + "step": 3875 + }, + { + "loss": 0.002, + "grad_norm": 0.28729239106178284, + "learning_rate": 6.3e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 3876 + }, + { + "loss": 0.0538, + "grad_norm": 1.3207937479019165, + "learning_rate": 6.25e-07, + "num_tokens": 1331211.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9385, + "step": 3877 + }, + { + "loss": 0.0022, + "grad_norm": 0.3201894760131836, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.939, + "step": 3878 + }, + { + "loss": 0.058, + "grad_norm": 1.3156497478485107, + "learning_rate": 6.15e-07, + "num_tokens": 1331814.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9395, + "step": 3879 + }, + { + "loss": 0.0544, + "grad_norm": 1.192156195640564, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.94, + "step": 3880 + }, + { + "loss": 0.0634, + "grad_norm": 2.076542377471924, + "learning_rate": 6.05e-07, + "num_tokens": 1332838.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9405000000000001, + "step": 3881 + }, + { + "loss": 0.0488, + "grad_norm": 1.3221850395202637, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9409999999999998, + "step": 3882 + }, + { + "loss": 0.0021, + "grad_norm": 0.3004106283187866, + "learning_rate": 5.95e-07, + "num_tokens": 1333441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9415, + "step": 3883 + }, + { + "loss": 0.0541, + "grad_norm": 1.230305790901184, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.942, + "step": 3884 + }, + { + "loss": 0.002, + "grad_norm": 0.2805992662906647, + "learning_rate": 5.850000000000001e-07, + "num_tokens": 1334044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9425, + "step": 3885 + }, + { + "loss": 0.0019, + "grad_norm": 0.27598538994789124, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 3886 + }, + { + "loss": 0.0021, + "grad_norm": 0.3006319999694824, + "learning_rate": 5.750000000000001e-07, + "num_tokens": 1334226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9435, + "step": 3887 + }, + { + "loss": 0.0628, + "grad_norm": 1.3234870433807373, + "learning_rate": 5.7e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.944, + "step": 3888 + }, + { + "loss": 0.0368, + "grad_norm": 0.9632979035377502, + "learning_rate": 5.650000000000001e-07, + "num_tokens": 1335250.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9445000000000001, + "step": 3889 + }, + { + "loss": 0.0396, + "grad_norm": 1.0664863586425781, + "learning_rate": 5.6e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9449999999999998, + "step": 3890 + }, + { + "loss": 0.0361, + "grad_norm": 0.998447060585022, + "learning_rate": 5.550000000000001e-07, + "num_tokens": 1336274.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9455, + "step": 3891 + }, + { + "loss": 0.066, + "grad_norm": 1.6561861038208008, + "learning_rate": 5.5e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.946, + "step": 3892 + }, + { + "loss": 0.0564, + "grad_norm": 1.0982937812805176, + "learning_rate": 5.450000000000001e-07, + "num_tokens": 1337298.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9465, + "step": 3893 + }, + { + "loss": 0.0649, + "grad_norm": 1.3116402626037598, + "learning_rate": 5.4e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.947, + "step": 3894 + }, + { + "loss": 0.0393, + "grad_norm": 1.211995005607605, + "learning_rate": 5.350000000000001e-07, + "num_tokens": 1338322.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9475, + "step": 3895 + }, + { + "loss": 0.0656, + "grad_norm": 1.3053356409072876, + "learning_rate": 5.3e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.948, + "step": 3896 + }, + { + "loss": 0.059, + "grad_norm": 1.4926881790161133, + "learning_rate": 5.250000000000001e-07, + "num_tokens": 1339346.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9485000000000001, + "step": 3897 + }, + { + "loss": 0.0517, + "grad_norm": 1.099536657333374, + "learning_rate": 5.2e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9489999999999998, + "step": 3898 + }, + { + "loss": 0.002, + "grad_norm": 0.2851589620113373, + "learning_rate": 5.15e-07, + "num_tokens": 1339949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9495, + "step": 3899 + }, + { + "loss": 0.002, + "grad_norm": 0.2879925072193146, + "learning_rate": 5.1e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 3900 + }, + { + "loss": 0.0557, + "grad_norm": 1.0640603303909302, + "learning_rate": 5.05e-07, + "num_tokens": 1340552.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9505, + "step": 3901 + }, + { + "loss": 0.0021, + "grad_norm": 0.3005947470664978, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.951, + "step": 3902 + }, + { + "loss": 0.0021, + "grad_norm": 0.30592235922813416, + "learning_rate": 4.95e-07, + "num_tokens": 1340734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9515, + "step": 3903 + }, + { + "loss": 0.0508, + "grad_norm": 1.1045085191726685, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.952, + "step": 3904 + }, + { + "loss": 0.0539, + "grad_norm": 1.1382217407226562, + "learning_rate": 4.85e-07, + "num_tokens": 1341758.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9525000000000001, + "step": 3905 + }, + { + "loss": 0.0576, + "grad_norm": 1.5904083251953125, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9529999999999998, + "step": 3906 + }, + { + "loss": 0.0401, + "grad_norm": 1.0153878927230835, + "learning_rate": 4.7500000000000006e-07, + "num_tokens": 1342782.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9535, + "step": 3907 + }, + { + "loss": 0.0023, + "grad_norm": 0.32124239206314087, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.954, + "step": 3908 + }, + { + "loss": 0.037, + "grad_norm": 1.1176637411117554, + "learning_rate": 4.6500000000000005e-07, + "num_tokens": 1343385.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9545, + "step": 3909 + }, + { + "loss": 0.0414, + "grad_norm": 1.1863677501678467, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.955, + "step": 3910 + }, + { + "loss": 0.0697, + "grad_norm": 1.6575289964675903, + "learning_rate": 4.5500000000000004e-07, + "num_tokens": 1344409.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9555, + "step": 3911 + }, + { + "loss": 0.0384, + "grad_norm": 1.020317554473877, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.956, + "step": 3912 + }, + { + "loss": 0.0554, + "grad_norm": 1.1557419300079346, + "learning_rate": 4.4500000000000003e-07, + "num_tokens": 1345433.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9565000000000001, + "step": 3913 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282678723335266, + "learning_rate": 4.4e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9569999999999999, + "step": 3914 + }, + { + "loss": 0.0611, + "grad_norm": 1.4425996541976929, + "learning_rate": 4.35e-07, + "num_tokens": 1346036.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9575, + "step": 3915 + }, + { + "loss": 0.0021, + "grad_norm": 0.30943119525909424, + "learning_rate": 4.3e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.958, + "step": 3916 + }, + { + "loss": 0.0021, + "grad_norm": 0.29412642121315, + "learning_rate": 4.2500000000000006e-07, + "num_tokens": 1346218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9585, + "step": 3917 + }, + { + "loss": 0.0021, + "grad_norm": 0.2940139174461365, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.959, + "step": 3918 + }, + { + "loss": 0.0021, + "grad_norm": 0.3061344027519226, + "learning_rate": 4.1500000000000005e-07, + "num_tokens": 1346400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9595, + "step": 3919 + }, + { + "loss": 0.0399, + "grad_norm": 1.3357733488082886, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.96, + "step": 3920 + }, + { + "loss": 0.0548, + "grad_norm": 1.1528651714324951, + "learning_rate": 4.0500000000000004e-07, + "num_tokens": 1347424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9605000000000001, + "step": 3921 + }, + { + "loss": 0.0024, + "grad_norm": 0.3415958285331726, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9609999999999999, + "step": 3922 + }, + { + "loss": 0.0672, + "grad_norm": 1.716910719871521, + "learning_rate": 3.9500000000000003e-07, + "num_tokens": 1348027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9615, + "step": 3923 + }, + { + "loss": 0.0019, + "grad_norm": 0.2726108729839325, + "learning_rate": 3.9e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.962, + "step": 3924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6874312162399292, + "learning_rate": 3.85e-07, + "num_tokens": 1348630.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9625, + "step": 3925 + }, + { + "loss": 0.0677, + "grad_norm": 1.6080477237701416, + "learning_rate": 3.8e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 3926 + }, + { + "loss": 0.0455, + "grad_norm": 1.2764126062393188, + "learning_rate": 3.75e-07, + "num_tokens": 1349654.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9635, + "step": 3927 + }, + { + "loss": 0.0414, + "grad_norm": 1.4081971645355225, + "learning_rate": 3.7e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.964, + "step": 3928 + }, + { + "loss": 0.0022, + "grad_norm": 0.3177483081817627, + "learning_rate": 3.65e-07, + "num_tokens": 1350257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9645000000000001, + "step": 3929 + }, + { + "loss": 0.0024, + "grad_norm": 0.33574411273002625, + "learning_rate": 3.6e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 3930 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346923887729645, + "learning_rate": 3.55e-07, + "num_tokens": 1350439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9655, + "step": 3931 + }, + { + "loss": 0.0562, + "grad_norm": 1.2322405576705933, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.966, + "step": 3932 + }, + { + "loss": 0.0382, + "grad_norm": 1.126086711883545, + "learning_rate": 3.4500000000000003e-07, + "num_tokens": 1351463.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9665, + "step": 3933 + }, + { + "loss": 0.0679, + "grad_norm": 1.7950743436813354, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.967, + "step": 3934 + }, + { + "loss": 0.0023, + "grad_norm": 0.31813737750053406, + "learning_rate": 3.35e-07, + "num_tokens": 1352066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9675, + "step": 3935 + }, + { + "loss": 0.0563, + "grad_norm": 1.4460132122039795, + "learning_rate": 3.3e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.968, + "step": 3936 + }, + { + "loss": 0.0388, + "grad_norm": 1.2290942668914795, + "learning_rate": 3.25e-07, + "num_tokens": 1353090.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9685000000000001, + "step": 3937 + }, + { + "loss": 0.0624, + "grad_norm": 1.2616753578186035, + "learning_rate": 3.2e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9689999999999999, + "step": 3938 + }, + { + "loss": 0.0018, + "grad_norm": 0.258317232131958, + "learning_rate": 3.15e-07, + "num_tokens": 1353693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9695, + "step": 3939 + }, + { + "loss": 0.0021, + "grad_norm": 0.2969084680080414, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 3940 + }, + { + "loss": 0.0023, + "grad_norm": 0.3306228518486023, + "learning_rate": 3.0500000000000004e-07, + "num_tokens": 1353875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9705, + "step": 3941 + }, + { + "loss": 0.0021, + "grad_norm": 0.2877337336540222, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.971, + "step": 3942 + }, + { + "loss": 0.0385, + "grad_norm": 1.1180164813995361, + "learning_rate": 2.9500000000000003e-07, + "num_tokens": 1354478.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9715, + "step": 3943 + }, + { + "loss": 0.0422, + "grad_norm": 1.2713475227355957, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 3944 + }, + { + "loss": 0.0021, + "grad_norm": 0.30450907349586487, + "learning_rate": 2.85e-07, + "num_tokens": 1355081.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9725000000000001, + "step": 3945 + }, + { + "loss": 0.0369, + "grad_norm": 1.0453548431396484, + "learning_rate": 2.8e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9729999999999999, + "step": 3946 + }, + { + "loss": 0.0647, + "grad_norm": 1.4603972434997559, + "learning_rate": 2.75e-07, + "num_tokens": 1356105.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9735, + "step": 3947 + }, + { + "loss": 0.0572, + "grad_norm": 1.3418960571289062, + "learning_rate": 2.7e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.974, + "step": 3948 + }, + { + "loss": 0.0616, + "grad_norm": 1.2075037956237793, + "learning_rate": 2.65e-07, + "num_tokens": 1357129.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9745, + "step": 3949 + }, + { + "loss": 0.0561, + "grad_norm": 1.3293365240097046, + "learning_rate": 2.6e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.975, + "step": 3950 + }, + { + "loss": 0.0546, + "grad_norm": 1.1330344676971436, + "learning_rate": 2.55e-07, + "num_tokens": 1358153.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9755, + "step": 3951 + }, + { + "loss": 0.0553, + "grad_norm": 1.403975486755371, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 3952 + }, + { + "loss": 0.0589, + "grad_norm": 1.0574450492858887, + "learning_rate": 2.4500000000000004e-07, + "num_tokens": 1359177.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9765000000000001, + "step": 3953 + }, + { + "loss": 0.0024, + "grad_norm": 0.34114331007003784, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9769999999999999, + "step": 3954 + }, + { + "loss": 0.0531, + "grad_norm": 1.2925927639007568, + "learning_rate": 2.3500000000000003e-07, + "num_tokens": 1359780.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9775, + "step": 3955 + }, + { + "loss": 0.0023, + "grad_norm": 0.32414519786834717, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.978, + "step": 3956 + }, + { + "loss": 0.0409, + "grad_norm": 1.1193647384643555, + "learning_rate": 2.2500000000000002e-07, + "num_tokens": 1360383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9785, + "step": 3957 + }, + { + "loss": 0.0528, + "grad_norm": 1.0519967079162598, + "learning_rate": 2.2e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.979, + "step": 3958 + }, + { + "loss": 0.002, + "grad_norm": 0.290457159280777, + "learning_rate": 2.15e-07, + "num_tokens": 1360986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9795, + "step": 3959 + }, + { + "loss": 0.064, + "grad_norm": 1.5267326831817627, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.98, + "step": 3960 + }, + { + "loss": 0.0571, + "grad_norm": 1.354665756225586, + "learning_rate": 2.0500000000000002e-07, + "num_tokens": 1362010.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9805000000000001, + "step": 3961 + }, + { + "loss": 0.0023, + "grad_norm": 0.3175540566444397, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9809999999999999, + "step": 3962 + }, + { + "loss": 0.0022, + "grad_norm": 0.31645578145980835, + "learning_rate": 1.95e-07, + "num_tokens": 1362192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9815, + "step": 3963 + }, + { + "loss": 0.0023, + "grad_norm": 0.32781633734703064, + "learning_rate": 1.9e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 3964 + }, + { + "loss": 0.0022, + "grad_norm": 0.3074043393135071, + "learning_rate": 1.85e-07, + "num_tokens": 1362374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9825, + "step": 3965 + }, + { + "loss": 0.0616, + "grad_norm": 1.3107956647872925, + "learning_rate": 1.8e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.983, + "step": 3966 + }, + { + "loss": 0.0428, + "grad_norm": 1.0233242511749268, + "learning_rate": 1.7500000000000002e-07, + "num_tokens": 1363398.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9835, + "step": 3967 + }, + { + "loss": 0.0509, + "grad_norm": 1.1120326519012451, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.984, + "step": 3968 + }, + { + "loss": 0.0578, + "grad_norm": 1.1184195280075073, + "learning_rate": 1.65e-07, + "num_tokens": 1364422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9845000000000002, + "step": 3969 + }, + { + "loss": 0.0024, + "grad_norm": 0.3374731242656708, + "learning_rate": 1.6e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9849999999999999, + "step": 3970 + }, + { + "loss": 0.0647, + "grad_norm": 1.385146141052246, + "learning_rate": 1.5500000000000002e-07, + "num_tokens": 1365025.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9855, + "step": 3971 + }, + { + "loss": 0.0621, + "grad_norm": 1.3918462991714478, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.986, + "step": 3972 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185434639453888, + "learning_rate": 1.4500000000000001e-07, + "num_tokens": 1365628.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9865, + "step": 3973 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098815679550171, + "learning_rate": 1.4e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 3974 + }, + { + "loss": 0.0508, + "grad_norm": 1.1450035572052002, + "learning_rate": 1.35e-07, + "num_tokens": 1366231.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9875, + "step": 3975 + }, + { + "loss": 0.0545, + "grad_norm": 1.133862018585205, + "learning_rate": 1.3e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.988, + "step": 3976 + }, + { + "loss": 0.0575, + "grad_norm": 1.3929400444030762, + "learning_rate": 1.2500000000000002e-07, + "num_tokens": 1367255.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9885000000000002, + "step": 3977 + }, + { + "loss": 0.0023, + "grad_norm": 0.32601818442344666, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9889999999999999, + "step": 3978 + }, + { + "loss": 0.0614, + "grad_norm": 1.4804233312606812, + "learning_rate": 1.1500000000000001e-07, + "num_tokens": 1367858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9895, + "step": 3979 + }, + { + "loss": 0.0339, + "grad_norm": 1.0161491632461548, + "learning_rate": 1.1e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.99, + "step": 3980 + }, + { + "loss": 0.0374, + "grad_norm": 0.9113408327102661, + "learning_rate": 1.0500000000000001e-07, + "num_tokens": 1368882.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9905, + "step": 3981 + }, + { + "loss": 0.0022, + "grad_norm": 0.31800293922424316, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.991, + "step": 3982 + }, + { + "loss": 0.0022, + "grad_norm": 0.3091203570365906, + "learning_rate": 9.5e-08, + "num_tokens": 1369064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9915, + "step": 3983 + }, + { + "loss": 0.0697, + "grad_norm": 1.368817687034607, + "learning_rate": 9e-08, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.992, + "step": 3984 + }, + { + "loss": 0.0024, + "grad_norm": 0.334277480840683, + "learning_rate": 8.500000000000001e-08, + "num_tokens": 1369667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9925000000000002, + "step": 3985 + }, + { + "loss": 0.0545, + "grad_norm": 1.1396604776382446, + "learning_rate": 8e-08, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9929999999999999, + "step": 3986 + }, + { + "loss": 0.002, + "grad_norm": 0.2931969463825226, + "learning_rate": 7.500000000000001e-08, + "num_tokens": 1370270.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9935, + "step": 3987 + }, + { + "loss": 0.0021, + "grad_norm": 0.29304033517837524, + "learning_rate": 7e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 3988 + }, + { + "loss": 0.0579, + "grad_norm": 1.3336025476455688, + "learning_rate": 6.5e-08, + "num_tokens": 1370873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9945, + "step": 3989 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215644359588623, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.995, + "step": 3990 + }, + { + "loss": 0.0405, + "grad_norm": 1.221953272819519, + "learning_rate": 5.5e-08, + "num_tokens": 1371476.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9955, + "step": 3991 + }, + { + "loss": 0.0404, + "grad_norm": 1.0604480504989624, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.996, + "step": 3992 + }, + { + "loss": 0.0381, + "grad_norm": 0.919835090637207, + "learning_rate": 4.5e-08, + "num_tokens": 1372500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9965000000000002, + "step": 3993 + }, + { + "loss": 0.0378, + "grad_norm": 1.2490025758743286, + "learning_rate": 4e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9969999999999999, + "step": 3994 + }, + { + "loss": 0.0021, + "grad_norm": 0.3125726878643036, + "learning_rate": 3.5e-08, + "num_tokens": 1373103.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9975, + "step": 3995 + }, + { + "loss": 0.0023, + "grad_norm": 0.3294070065021515, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 3996 + }, + { + "loss": 0.002, + "grad_norm": 0.2793242931365967, + "learning_rate": 2.5000000000000002e-08, + "num_tokens": 1373285.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9985, + "step": 3997 + }, + { + "loss": 0.0386, + "grad_norm": 1.0813380479812622, + "learning_rate": 2e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.999, + "step": 3998 + }, + { + "loss": 0.0025, + "grad_norm": 0.3470178544521332, + "learning_rate": 1.5000000000000002e-08, + "num_tokens": 1373888.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9995, + "step": 3999 + }, + { + "loss": 0.0681, + "grad_norm": 1.5211089849472046, + "learning_rate": 1e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 2.0, + "step": 4000 + }, + { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898, + "epoch": 2.0, + "step": 4000 + } +] \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..be089935a10e89f2cb7ed806e7c10efa3baca54a --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "unsloth_available": false, + "train_runtime": 483.7085, + "train_loss": 0.11515871361242898, + "train_metrics": { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/availability.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/availability.json new file mode 100644 index 0000000000000000000000000000000000000000..7353b93e049865d037f56de7e4e4e7ef22ecc017 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/availability.json @@ -0,0 +1,27 @@ +{ + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "not_seen_in_status", + "grpo_postsave_inference": "not_seen_in_status", + "policy_ablation": "not_seen_in_status" + }, + "metrics": { + "sft_train_loss": 0.18184852770145518, + "sft_train_runtime": 372.1845, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.569, + "sft_last_loss": 0.0037, + "sft_best_loss": 0.0011, + "sft_last_token_accuracy": 1.0, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.762, + "sft_avg_latency_seconds": 2.748, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + } +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..40d6577dec4199160cb23eb63f3805b530c38dd7 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.762, + "avg_latency_seconds": 2.748, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %{ \"patient_id\": \"patient_8000\", \"rationale\": \"candidate_id=cand_08; The patient is experiencing a high fever, which is a symptom of an infection. This suggests that the patient may have a bacterial infection, which would be treated with antibiotics. Candidate_id=cand_08 prescribes antibiotics, which are appropriate for treating bacterial infections.\",", + "candidate_id": "cand_08", + "selected_candidate": "cand_08", + "env_reward": 0.806, + "latency_seconds": 3.164, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"candidate_id\": \"cand_05\", \"rationale\": \"patient mentions having a headache and taking medicine. cand_05 explains that the patient is taking medicine for a headache, which aligns with the patient's symptoms. cand_09 does not mention any medication. cand_01 mentions nausea but no headache. cand_02 mentions fever but", + "candidate_id": "cand_05", + "selected_candidate": "cand_05", + "env_reward": 0.792, + "latency_seconds": 2.656, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.641, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; //= = = = = = = = = = = = = = = FUNCTION select_best_candidate(candidate_ids=LIST_of_STR, patient_id=STR, candidates_dict=DICTIONARY_OF_TUPLES) ;; best_candidate_id := candidate_ids[0]; strongest_rationale := candidates_dict[candidate_ids[0]][1]; for i in range(1, len(candidate_ids)) { this", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.643, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.638, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/run_metadata.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5e7e4a3ead8458fcf1611ff54bdbc630d0bbdfea --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "model_index": 2, + "sft_epochs": 1, + "sft_max_steps": 0, + "sft_batch_size": 1, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_history.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..bd04c896532f5a5ae0fa8959979709a445323fb4 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.569, + "grad_norm": NaN, + "learning_rate": 2e-05, + "num_tokens": 91.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 3.569, + "grad_norm": NaN, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 1.2853, + "grad_norm": 1.139764428138733, + "learning_rate": 2e-05, + "num_tokens": 694.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 3.5581, + "grad_norm": NaN, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.002, + "step": 4 + }, + { + "loss": 0.8917, + "grad_norm": 1.0447810888290405, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 1297.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 1.1935, + "grad_norm": 0.8309267163276672, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 3.5163, + "grad_norm": 4.351670742034912, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1900.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 3.4885, + "grad_norm": 4.261757850646973, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 1.2711, + "grad_norm": 0.8578795790672302, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 2503.0, + "mean_token_accuracy": 0.7690802216529846, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.8313, + "grad_norm": 0.6491284370422363, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.005, + "step": 10 + }, + { + "loss": 1.2098, + "grad_norm": 0.8803694844245911, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 3527.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 3.3912, + "grad_norm": 3.3331027030944824, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 1.1925, + "grad_norm": 0.6839883327484131, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 4130.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 3.3481, + "grad_norm": 2.9968008995056152, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.8284, + "grad_norm": 0.5385816693305969, + "learning_rate": 1.989e-05, + "num_tokens": 4733.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 1.2033, + "grad_norm": 0.5642092823982239, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 1.2305, + "grad_norm": 0.6205269694328308, + "learning_rate": 1.987e-05, + "num_tokens": 5757.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 1.1978, + "grad_norm": 0.5339632630348206, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 3.2635, + "grad_norm": 2.3871994018554688, + "learning_rate": 1.985e-05, + "num_tokens": 6360.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 1.1722, + "grad_norm": 0.5115076303482056, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 1.234, + "grad_norm": 0.7502650618553162, + "learning_rate": 1.983e-05, + "num_tokens": 7384.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 1.2009, + "grad_norm": 0.563306450843811, + "learning_rate": 1.982e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 3.2024, + "grad_norm": 2.1435375213623047, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 7987.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 1.1136, + "grad_norm": 0.4755318760871887, + "learning_rate": 1.98e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.81, + "grad_norm": 0.42654362320899963, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 9011.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 3.1658, + "grad_norm": 2.022304058074951, + "learning_rate": 1.978e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 3.1525, + "grad_norm": 1.9966037273406982, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 9193.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 1.1701, + "grad_norm": 0.43180903792381287, + "learning_rate": 1.976e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 1.1161, + "grad_norm": 0.49122628569602966, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 10217.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 3.1096, + "grad_norm": 1.9505829811096191, + "learning_rate": 1.974e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 1.0957, + "grad_norm": 0.4052703380584717, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 10820.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 1.1922, + "grad_norm": 0.4599268436431885, + "learning_rate": 1.972e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 3.0661, + "grad_norm": 1.9074920415878296, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 11423.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 3.0517, + "grad_norm": 1.9043670892715454, + "learning_rate": 1.97e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.8217, + "grad_norm": 0.43874070048332214, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 12026.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 1.1533, + "grad_norm": 0.4097289741039276, + "learning_rate": 1.968e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 3.0079, + "grad_norm": 1.8589015007019043, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 12629.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 2.9929, + "grad_norm": 1.8493101596832275, + "learning_rate": 1.966e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 2.9771, + "grad_norm": 1.823657751083374, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 12811.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 1.1322, + "grad_norm": 0.41579654812812805, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 1.0436, + "grad_norm": 0.4191758632659912, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 13835.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.7707, + "grad_norm": 0.389350026845932, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.7557, + "grad_norm": 0.3683435320854187, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 14859.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 2.9037, + "grad_norm": 1.7245700359344482, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 2.8901, + "grad_norm": 1.7086819410324097, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 15041.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 1.0387, + "grad_norm": 0.40467050671577454, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 1.0567, + "grad_norm": 0.4369414746761322, + "learning_rate": 1.957e-05, + "num_tokens": 16065.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 1.1317, + "grad_norm": 0.4135839641094208, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 1.0284, + "grad_norm": 0.3962143063545227, + "learning_rate": 1.955e-05, + "num_tokens": 17089.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 2.8211, + "grad_norm": 1.6713019609451294, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.751, + "grad_norm": 0.3764272928237915, + "learning_rate": 1.953e-05, + "num_tokens": 17692.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 1.1035, + "grad_norm": 0.4032706618309021, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 1.066, + "grad_norm": 0.3904367685317993, + "learning_rate": 1.951e-05, + "num_tokens": 18716.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 2.7715, + "grad_norm": 1.6729886531829834, + "learning_rate": 1.95e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 2.7583, + "grad_norm": 1.668998122215271, + "learning_rate": 1.949e-05, + "num_tokens": 18898.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 2.7429, + "grad_norm": 1.6743063926696777, + "learning_rate": 1.948e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 1.1043, + "grad_norm": 0.41544175148010254, + "learning_rate": 1.947e-05, + "num_tokens": 19501.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 1.0547, + "grad_norm": 0.4136095345020294, + "learning_rate": 1.946e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 2.7022, + "grad_norm": 1.6811003684997559, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 20104.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 2.685, + "grad_norm": 1.6868253946304321, + "learning_rate": 1.944e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 2.6703, + "grad_norm": 1.6875874996185303, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 20286.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 1.0897, + "grad_norm": 0.3931529223918915, + "learning_rate": 1.942e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 1.0308, + "grad_norm": 0.4257798492908478, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 21310.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.752, + "grad_norm": 0.3678564429283142, + "learning_rate": 1.94e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 0.995, + "grad_norm": 0.414833128452301, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 22334.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 1.0055, + "grad_norm": 0.42559435963630676, + "learning_rate": 1.938e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 2.5807, + "grad_norm": 1.7541372776031494, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 22937.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 2.5636, + "grad_norm": 1.7794091701507568, + "learning_rate": 1.936e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 2.5482, + "grad_norm": 1.7919189929962158, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 23119.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.7033, + "grad_norm": 0.3789256811141968, + "learning_rate": 1.934e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.7623, + "grad_norm": 0.41511237621307373, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 24143.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 2.5008, + "grad_norm": 1.8457392454147339, + "learning_rate": 1.932e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 0.9835, + "grad_norm": 0.4251658618450165, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 24746.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.6836, + "grad_norm": 0.39055028557777405, + "learning_rate": 1.93e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 1.0516, + "grad_norm": 0.4297751784324646, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 25770.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": 0.9707, + "grad_norm": 0.408170223236084, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 1.0632, + "grad_norm": 0.4372476041316986, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 26794.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 2.419, + "grad_norm": 1.9062981605529785, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 2.4008, + "grad_norm": 1.9403553009033203, + "learning_rate": 1.925e-05, + "num_tokens": 26976.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 2.3866, + "grad_norm": 1.9395607709884644, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 2.3668, + "grad_norm": 1.948604941368103, + "learning_rate": 1.923e-05, + "num_tokens": 27158.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.7165, + "grad_norm": 0.3970690369606018, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 1.0087, + "grad_norm": 0.46349093317985535, + "learning_rate": 1.921e-05, + "num_tokens": 28182.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.7138, + "grad_norm": 0.3978181481361389, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.6682, + "grad_norm": 0.38714009523391724, + "learning_rate": 1.919e-05, + "num_tokens": 29206.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 2.2852, + "grad_norm": 1.8964459896087646, + "learning_rate": 1.918e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 2.2692, + "grad_norm": 1.8906216621398926, + "learning_rate": 1.917e-05, + "num_tokens": 29388.0, + "mean_token_accuracy": 0.644444465637207, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 2.253, + "grad_norm": 1.8771262168884277, + "learning_rate": 1.916e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.9113, + "grad_norm": 0.49527081847190857, + "learning_rate": 1.915e-05, + "num_tokens": 29991.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 1.0366, + "grad_norm": 0.4962358772754669, + "learning_rate": 1.914e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 2.2018, + "grad_norm": 1.8590370416641235, + "learning_rate": 1.913e-05, + "num_tokens": 30594.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 0.9951, + "grad_norm": 0.5745645761489868, + "learning_rate": 1.912e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.6545, + "grad_norm": 0.4285139739513397, + "learning_rate": 1.911e-05, + "num_tokens": 31618.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 2.1565, + "grad_norm": 1.8819890022277832, + "learning_rate": 1.91e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 2.1391, + "grad_norm": 1.9009383916854858, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 31800.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 0.9592, + "grad_norm": 0.5530417561531067, + "learning_rate": 1.908e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.639, + "grad_norm": 0.4635550081729889, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 32824.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 2.0893, + "grad_norm": 1.9755080938339233, + "learning_rate": 1.906e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 2.0698, + "grad_norm": 2.017965793609619, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 33006.0, + "mean_token_accuracy": 0.6666666865348816, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 2.0535, + "grad_norm": 2.0711710453033447, + "learning_rate": 1.904e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.6666666865348816, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 2.0313, + "grad_norm": 2.117086172103882, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 33188.0, + "mean_token_accuracy": 0.6666666865348816, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.6362, + "grad_norm": 0.48415306210517883, + "learning_rate": 1.902e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.6335, + "grad_norm": 0.5150465965270996, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 34212.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 0.9912, + "grad_norm": 0.6076453924179077, + "learning_rate": 1.9e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.052, + "step": 104 + }, + { + "loss": 0.9828, + "grad_norm": 0.5944868326187134, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 35236.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.8844, + "grad_norm": 0.5450642704963684, + "learning_rate": 1.898e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.9195, + "grad_norm": 0.5619152188301086, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 36260.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 1.9053, + "grad_norm": 2.4565858840942383, + "learning_rate": 1.896e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.6608, + "grad_norm": 0.5228564739227295, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 36863.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.6786, + "grad_norm": 0.5397571325302124, + "learning_rate": 1.894e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.6198, + "grad_norm": 0.537507176399231, + "learning_rate": 1.893e-05, + "num_tokens": 37887.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 1.8448, + "grad_norm": 2.565553665161133, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 0.9505, + "grad_norm": 0.5609534978866577, + "learning_rate": 1.891e-05, + "num_tokens": 38490.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": 0.6103, + "grad_norm": 0.5393182635307312, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 1.8089, + "grad_norm": 2.6849920749664307, + "learning_rate": 1.889e-05, + "num_tokens": 39093.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 0.961, + "grad_norm": 0.5978713035583496, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 1.777, + "grad_norm": 2.7187552452087402, + "learning_rate": 1.887e-05, + "num_tokens": 39696.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 1.7591, + "grad_norm": 2.7737131118774414, + "learning_rate": 1.886e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 1.74, + "grad_norm": 2.7507472038269043, + "learning_rate": 1.885e-05, + "num_tokens": 39878.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.6336, + "grad_norm": 0.6201249957084656, + "learning_rate": 1.884e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.5845, + "grad_norm": 0.5287116169929504, + "learning_rate": 1.883e-05, + "num_tokens": 40902.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.8665, + "grad_norm": 0.6071702241897583, + "learning_rate": 1.882e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.8748, + "grad_norm": 0.6387258172035217, + "learning_rate": 1.881e-05, + "num_tokens": 41926.0, + "mean_token_accuracy": 0.8258317112922668, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.875, + "grad_norm": 0.5957177877426147, + "learning_rate": 1.88e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8258317112922668, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.5784, + "grad_norm": 0.5134051442146301, + "learning_rate": 1.879e-05, + "num_tokens": 42950.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.5775, + "grad_norm": 0.5122160911560059, + "learning_rate": 1.878e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 1.6118, + "grad_norm": 2.893503189086914, + "learning_rate": 1.877e-05, + "num_tokens": 43553.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.6218, + "grad_norm": 0.5278106927871704, + "learning_rate": 1.876e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 1.5808, + "grad_norm": 2.9607582092285156, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 44156.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.802, + "grad_norm": 0.6248002052307129, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.8202, + "grad_norm": 0.6419914364814758, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 45180.0, + "mean_token_accuracy": 0.8238747715950012, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 1.534, + "grad_norm": 3.0163865089416504, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 1.5157, + "grad_norm": 3.01271390914917, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 45362.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 1.497, + "grad_norm": 2.959350824356079, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 1.4734, + "grad_norm": 2.8837082386016846, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 45544.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.8266, + "grad_norm": 0.6843762993812561, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.861, + "grad_norm": 0.7351704835891724, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 46568.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.845, + "grad_norm": 0.7598766088485718, + "learning_rate": 1.866e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 1.3777, + "grad_norm": 3.036391496658325, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 47171.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.5412, + "grad_norm": 0.6829193830490112, + "learning_rate": 1.864e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.7666, + "grad_norm": 0.7895976901054382, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 48195.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.5381, + "grad_norm": 0.790127694606781, + "learning_rate": 1.862e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 1.2811, + "grad_norm": 3.4602015018463135, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 48798.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 1.26, + "grad_norm": 3.52811336517334, + "learning_rate": 1.86e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 1.2314, + "grad_norm": 3.6009700298309326, + "learning_rate": 1.859e-05, + "num_tokens": 48980.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 1.2002, + "grad_norm": 3.6722474098205566, + "learning_rate": 1.858e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 1.1693, + "grad_norm": 3.4836974143981934, + "learning_rate": 1.857e-05, + "num_tokens": 49162.0, + "mean_token_accuracy": 0.7666666507720947, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 1.1338, + "grad_norm": 3.369781017303467, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 1.0973, + "grad_norm": 3.3117072582244873, + "learning_rate": 1.855e-05, + "num_tokens": 49344.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.8315, + "grad_norm": 0.9976187944412231, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 1.0272, + "grad_norm": 3.300879955291748, + "learning_rate": 1.853e-05, + "num_tokens": 49947.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 0.9891, + "grad_norm": 3.3772897720336914, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.5464, + "grad_norm": 0.9478758573532104, + "learning_rate": 1.851e-05, + "num_tokens": 50550.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.8039, + "grad_norm": 1.1654984951019287, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 0.8961, + "grad_norm": 4.251962184906006, + "learning_rate": 1.849e-05, + "num_tokens": 51153.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 0.8656, + "grad_norm": 4.492918491363525, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.8222222328186035, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.493, + "grad_norm": 0.8727006912231445, + "learning_rate": 1.847e-05, + "num_tokens": 51756.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.7707, + "grad_norm": 1.041538119316101, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.5714, + "grad_norm": 0.9487267136573792, + "learning_rate": 1.845e-05, + "num_tokens": 52780.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.4725, + "grad_norm": 0.798832356929779, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.7814, + "grad_norm": 0.9986205101013184, + "learning_rate": 1.843e-05, + "num_tokens": 53804.0, + "mean_token_accuracy": 0.8258317112922668, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.7441, + "grad_norm": 0.9336599707603455, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 0.7031, + "grad_norm": 5.16276741027832, + "learning_rate": 1.841e-05, + "num_tokens": 54407.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 0.679, + "grad_norm": 4.1701273918151855, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.7353, + "grad_norm": 1.0674586296081543, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 55010.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.7491, + "grad_norm": 1.21304452419281, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.6185, + "grad_norm": 4.724250316619873, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 55613.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.6687, + "grad_norm": 1.0483168363571167, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.5248, + "grad_norm": 1.1386994123458862, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 56637.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": 0.692, + "grad_norm": 1.000663161277771, + "learning_rate": 1.834e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.549, + "grad_norm": 5.925390720367432, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 57240.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.5316, + "grad_norm": 7.124028205871582, + "learning_rate": 1.832e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.6214, + "grad_norm": 1.0966285467147827, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 57843.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.482, + "grad_norm": 4.625036239624023, + "learning_rate": 1.83e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.6731, + "grad_norm": 1.3060588836669922, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 58446.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.5768, + "grad_norm": 1.7968002557754517, + "learning_rate": 1.828e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.6029, + "grad_norm": 1.7848604917526245, + "learning_rate": 1.827e-05, + "num_tokens": 59470.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.3979, + "grad_norm": 1.9516690969467163, + "learning_rate": 1.826e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.391, + "grad_norm": 3.8316330909729004, + "learning_rate": 1.825e-05, + "num_tokens": 60073.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.6449, + "grad_norm": 1.5616425275802612, + "learning_rate": 1.824e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.6533, + "grad_norm": 1.280671238899231, + "learning_rate": 1.823e-05, + "num_tokens": 61097.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.3584, + "grad_norm": 6.280538082122803, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.9444444179534912, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.3733, + "grad_norm": 1.0696591138839722, + "learning_rate": 1.821e-05, + "num_tokens": 61700.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.3357, + "grad_norm": 3.6380887031555176, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.9444444179534912, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.3244, + "grad_norm": 3.0167179107666016, + "learning_rate": 1.819e-05, + "num_tokens": 61882.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.5994, + "grad_norm": 1.6260021924972534, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.6215, + "grad_norm": 1.607763409614563, + "learning_rate": 1.817e-05, + "num_tokens": 62906.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.5443, + "grad_norm": 1.351562261581421, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.2865, + "grad_norm": 2.277933120727539, + "learning_rate": 1.815e-05, + "num_tokens": 63509.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.5709, + "grad_norm": 1.3398513793945312, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.2716, + "grad_norm": 3.923830986022949, + "learning_rate": 1.813e-05, + "num_tokens": 64112.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.509, + "grad_norm": 1.4502966403961182, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.4854, + "grad_norm": 1.4078965187072754, + "learning_rate": 1.811e-05, + "num_tokens": 65136.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.2501, + "grad_norm": 3.077928304672241, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.5453, + "grad_norm": 1.7737340927124023, + "learning_rate": 1.809e-05, + "num_tokens": 65739.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.239, + "grad_norm": 2.0369770526885986, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.2344, + "grad_norm": 1.9151840209960938, + "learning_rate": 1.807e-05, + "num_tokens": 65921.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.5325, + "grad_norm": 1.6656997203826904, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.4971, + "grad_norm": 1.9251680374145508, + "learning_rate": 1.805e-05, + "num_tokens": 66945.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.52, + "grad_norm": 1.8106904029846191, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.2154, + "grad_norm": 2.2629575729370117, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 67548.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.4612, + "grad_norm": 1.7021019458770752, + "learning_rate": 1.802e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.8962817788124084, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.4315, + "grad_norm": 2.6399946212768555, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 68572.0, + "mean_token_accuracy": 0.9060665369033813, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": 0.4603, + "grad_norm": 1.909094214439392, + "learning_rate": 1.8e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.4483, + "grad_norm": 1.7435243129730225, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 69596.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.4438, + "grad_norm": 2.1652462482452393, + "learning_rate": 1.798e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.4678, + "grad_norm": 2.338404417037964, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 70620.0, + "mean_token_accuracy": 0.8962817788124084, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.3195, + "grad_norm": 1.3209658861160278, + "learning_rate": 1.796e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.4409, + "grad_norm": 1.709653377532959, + "learning_rate": 1.795e-05, + "num_tokens": 71644.0, + "mean_token_accuracy": 0.8982387185096741, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.4037, + "grad_norm": 2.7179744243621826, + "learning_rate": 1.794e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.9060665369033813, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.2739, + "grad_norm": 1.0299943685531616, + "learning_rate": 1.793e-05, + "num_tokens": 72668.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.2022, + "grad_norm": 2.607898473739624, + "learning_rate": 1.792e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.2042, + "grad_norm": 2.916175127029419, + "learning_rate": 1.791e-05, + "num_tokens": 72850.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.3787, + "grad_norm": 2.026442527770996, + "learning_rate": 1.79e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.3879, + "grad_norm": 1.7650607824325562, + "learning_rate": 1.789e-05, + "num_tokens": 73874.0, + "mean_token_accuracy": 0.908023476600647, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.1951, + "grad_norm": 3.8692498207092285, + "learning_rate": 1.788e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.1904, + "grad_norm": 3.0922181606292725, + "learning_rate": 1.787e-05, + "num_tokens": 74056.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.301, + "grad_norm": 1.9583574533462524, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.1827, + "grad_norm": 1.9792364835739136, + "learning_rate": 1.785e-05, + "num_tokens": 74659.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.1794, + "grad_norm": 1.3933207988739014, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.3381, + "grad_norm": 1.6843299865722656, + "learning_rate": 1.783e-05, + "num_tokens": 75262.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.1732, + "grad_norm": 1.4762918949127197, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.1689, + "grad_norm": 1.1075265407562256, + "learning_rate": 1.781e-05, + "num_tokens": 75444.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.3562, + "grad_norm": 2.2154247760772705, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.1629, + "grad_norm": 1.3579362630844116, + "learning_rate": 1.779e-05, + "num_tokens": 76047.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.3199, + "grad_norm": 1.9855793714523315, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.3381, + "grad_norm": 1.787819266319275, + "learning_rate": 1.777e-05, + "num_tokens": 77071.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.1525, + "grad_norm": 1.0635879039764404, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.1496, + "grad_norm": 1.0544939041137695, + "learning_rate": 1.775e-05, + "num_tokens": 77253.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.1459, + "grad_norm": 1.147072672843933, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.1426, + "grad_norm": 1.0801589488983154, + "learning_rate": 1.773e-05, + "num_tokens": 77435.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.2557, + "grad_norm": 1.2963556051254272, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.1332, + "grad_norm": 1.3799799680709839, + "learning_rate": 1.771e-05, + "num_tokens": 78038.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.2481, + "grad_norm": 1.1608214378356934, + "learning_rate": 1.77e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.2642, + "grad_norm": 1.2985522747039795, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 79062.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.3124, + "grad_norm": 2.222142219543457, + "learning_rate": 1.768e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.3102, + "grad_norm": 2.533982753753662, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 80086.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.1218, + "grad_norm": 1.7190382480621338, + "learning_rate": 1.766e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.1169, + "grad_norm": 1.3357374668121338, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 80268.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.1147, + "grad_norm": 1.298270344734192, + "learning_rate": 1.764e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.3127, + "grad_norm": 2.2547061443328857, + "learning_rate": 1.763e-05, + "num_tokens": 80871.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.2312, + "grad_norm": 1.7744327783584595, + "learning_rate": 1.762e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.3975, + "grad_norm": 4.527610778808594, + "learning_rate": 1.761e-05, + "num_tokens": 81895.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.3551, + "grad_norm": 3.1718592643737793, + "learning_rate": 1.76e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.1045, + "grad_norm": 1.574190378189087, + "learning_rate": 1.759e-05, + "num_tokens": 82498.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.2236, + "grad_norm": 1.4468473196029663, + "learning_rate": 1.758e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.0999, + "grad_norm": 1.4842942953109741, + "learning_rate": 1.757e-05, + "num_tokens": 83101.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.2509, + "grad_norm": 1.7860370874404907, + "learning_rate": 1.756e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.2611, + "grad_norm": 1.6783521175384521, + "learning_rate": 1.755e-05, + "num_tokens": 84125.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.379, + "grad_norm": 2.3508005142211914, + "learning_rate": 1.754e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.0941, + "grad_norm": 2.0986952781677246, + "learning_rate": 1.753e-05, + "num_tokens": 84728.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.0924, + "grad_norm": 1.9180539846420288, + "learning_rate": 1.752e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.0906, + "grad_norm": 1.0870189666748047, + "learning_rate": 1.751e-05, + "num_tokens": 84910.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.2357, + "grad_norm": 1.0672377347946167, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.2584, + "grad_norm": 2.204198122024536, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 85934.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.0862, + "grad_norm": 2.385765552520752, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.2371, + "grad_norm": 1.8736376762390137, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 86537.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.2442, + "grad_norm": 1.8243354558944702, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.0824, + "grad_norm": 1.8955978155136108, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 87140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.3363, + "grad_norm": 2.798372507095337, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.0794, + "grad_norm": 1.304677128791809, + "learning_rate": 1.743e-05, + "num_tokens": 87743.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.0773, + "grad_norm": 1.626665711402893, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.1939, + "grad_norm": 1.7440603971481323, + "learning_rate": 1.741e-05, + "num_tokens": 88346.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.2501, + "grad_norm": 1.3810110092163086, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.3304, + "grad_norm": 3.183516025543213, + "learning_rate": 1.739e-05, + "num_tokens": 89370.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.2224, + "grad_norm": 2.094963550567627, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.2354, + "grad_norm": 1.3596550226211548, + "learning_rate": 1.737e-05, + "num_tokens": 90394.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.0727, + "grad_norm": 1.5260241031646729, + "learning_rate": 1.736e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.222, + "grad_norm": 1.5992202758789062, + "learning_rate": 1.735e-05, + "num_tokens": 90997.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.3177, + "grad_norm": 2.2656893730163574, + "learning_rate": 1.734e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.0713, + "grad_norm": 1.7473493814468384, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 91600.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.2135, + "grad_norm": 1.9787451028823853, + "learning_rate": 1.732e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.1763, + "grad_norm": 1.0072226524353027, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 92624.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.1957, + "grad_norm": 1.1664408445358276, + "learning_rate": 1.73e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.3349, + "grad_norm": 2.7109858989715576, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 93648.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.0711, + "grad_norm": 2.568545341491699, + "learning_rate": 1.728e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.1836, + "grad_norm": 1.850518822669983, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 94251.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.0695, + "grad_norm": 2.5018086433410645, + "learning_rate": 1.726e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.1961, + "grad_norm": 0.9769375324249268, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 94854.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.2135, + "grad_norm": 1.4824577569961548, + "learning_rate": 1.724e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.1623, + "grad_norm": 1.7970157861709595, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 95878.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.2098, + "grad_norm": 1.702469825744629, + "learning_rate": 1.722e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.0642, + "grad_norm": 1.6492910385131836, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 96481.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.1893, + "grad_norm": 1.3040688037872314, + "learning_rate": 1.72e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.0638, + "grad_norm": 2.035078287124634, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 97084.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.0617, + "grad_norm": 1.428052306175232, + "learning_rate": 1.718e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.1591, + "grad_norm": 1.416749119758606, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 97687.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.1787, + "grad_norm": 1.3673189878463745, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.324, + "grad_norm": 3.40804386138916, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 98711.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.0582, + "grad_norm": 2.4875428676605225, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.1816, + "grad_norm": 1.6370735168457031, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 99314.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.0556, + "grad_norm": 2.5525963306427, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.1861, + "grad_norm": 2.1719298362731934, + "learning_rate": 1.711e-05, + "num_tokens": 99917.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.201, + "grad_norm": 1.304052472114563, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.1531, + "grad_norm": 1.5254027843475342, + "learning_rate": 1.709e-05, + "num_tokens": 100941.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.2727, + "grad_norm": 2.922405242919922, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.1459, + "grad_norm": 1.7082411050796509, + "learning_rate": 1.707e-05, + "num_tokens": 101965.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.174, + "grad_norm": 1.3555234670639038, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.1749, + "grad_norm": 0.9526453018188477, + "learning_rate": 1.705e-05, + "num_tokens": 102989.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.1751, + "grad_norm": 1.491074800491333, + "learning_rate": 1.704e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.3221, + "grad_norm": 3.0102553367614746, + "learning_rate": 1.703e-05, + "num_tokens": 104013.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.1546, + "grad_norm": 2.2727670669555664, + "learning_rate": 1.702e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.1623, + "grad_norm": 1.1690260171890259, + "learning_rate": 1.701e-05, + "num_tokens": 105037.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.1757, + "grad_norm": 1.3821128606796265, + "learning_rate": 1.7e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.1345, + "grad_norm": 1.1042118072509766, + "learning_rate": 1.699e-05, + "num_tokens": 106061.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.1709, + "grad_norm": 1.283263087272644, + "learning_rate": 1.698e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.1741, + "grad_norm": 1.0933341979980469, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 107085.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.1479, + "grad_norm": 1.3540836572647095, + "learning_rate": 1.696e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.094, + "grad_norm": 5.643751621246338, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 107688.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.091, + "grad_norm": 5.622400760650635, + "learning_rate": 1.694e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.1534, + "grad_norm": 0.9459224343299866, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 108291.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.0764, + "grad_norm": 4.563518047332764, + "learning_rate": 1.692e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.0689, + "grad_norm": 3.9746463298797607, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 108473.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.1265, + "grad_norm": 1.5034980773925781, + "learning_rate": 1.69e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.055, + "grad_norm": 2.8813798427581787, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 109076.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.0502, + "grad_norm": 2.0983633995056152, + "learning_rate": 1.688e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.1459, + "grad_norm": 2.4966609477996826, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 109679.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.1373, + "grad_norm": 1.884824514389038, + "learning_rate": 1.686e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.12, + "grad_norm": 1.6215541362762451, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 110703.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.0514, + "grad_norm": 3.570695400238037, + "learning_rate": 1.684e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.0503, + "grad_norm": 3.7310097217559814, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 110885.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.1698, + "grad_norm": 1.3565757274627686, + "learning_rate": 1.682e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.144, + "grad_norm": 1.7988064289093018, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 111909.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.1553, + "grad_norm": 1.199349284172058, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.2808, + "grad_norm": 2.2785050868988037, + "learning_rate": 1.679e-05, + "num_tokens": 112933.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.1303, + "grad_norm": 1.4797053337097168, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.1437, + "grad_norm": 1.2159603834152222, + "learning_rate": 1.677e-05, + "num_tokens": 113957.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.1094, + "grad_norm": 1.3378634452819824, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.1107, + "grad_norm": 1.3265125751495361, + "learning_rate": 1.675e-05, + "num_tokens": 114981.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.104, + "grad_norm": 1.0398075580596924, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0508, + "grad_norm": 3.7928128242492676, + "learning_rate": 1.673e-05, + "num_tokens": 115584.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.1141, + "grad_norm": 1.543946385383606, + "learning_rate": 1.672e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.2347, + "grad_norm": 3.0478694438934326, + "learning_rate": 1.671e-05, + "num_tokens": 116608.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.1568, + "grad_norm": 1.438165307044983, + "learning_rate": 1.67e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0602, + "grad_norm": 4.521894454956055, + "learning_rate": 1.669e-05, + "num_tokens": 117211.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0575, + "grad_norm": 4.285327434539795, + "learning_rate": 1.668e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.1228, + "grad_norm": 1.7977162599563599, + "learning_rate": 1.667e-05, + "num_tokens": 117814.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0498, + "grad_norm": 3.2977139949798584, + "learning_rate": 1.666e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.1072, + "grad_norm": 1.0961717367172241, + "learning_rate": 1.665e-05, + "num_tokens": 118417.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.0888, + "grad_norm": 1.2719725370407104, + "learning_rate": 1.664e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.1016, + "grad_norm": 1.7138031721115112, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 119441.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.0775, + "grad_norm": 1.2170872688293457, + "learning_rate": 1.662e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0415, + "grad_norm": 2.3039064407348633, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 120044.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0407, + "grad_norm": 2.1441495418548584, + "learning_rate": 1.66e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 1.0, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0378, + "grad_norm": 1.570320725440979, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 120226.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0358, + "grad_norm": 1.359679937362671, + "learning_rate": 1.658e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.1491, + "grad_norm": 1.4656238555908203, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 120829.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.093, + "grad_norm": 1.550439715385437, + "learning_rate": 1.656e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.1191, + "grad_norm": 1.6594032049179077, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 121853.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.1667, + "grad_norm": 1.6316683292388916, + "learning_rate": 1.654e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.1172, + "grad_norm": 1.1592111587524414, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 122877.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0288, + "grad_norm": 1.2376233339309692, + "learning_rate": 1.652e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 1.0, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0279, + "grad_norm": 1.1726553440093994, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 123059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.158, + "grad_norm": 1.639247179031372, + "learning_rate": 1.65e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0254, + "grad_norm": 0.882344126701355, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 123662.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0236, + "grad_norm": 0.7603262066841125, + "learning_rate": 1.648e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0231, + "grad_norm": 1.0259835720062256, + "learning_rate": 1.647e-05, + "num_tokens": 123844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.1341, + "grad_norm": 1.3803941011428833, + "learning_rate": 1.646e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.26, + "grad_norm": 2.67657208442688, + "learning_rate": 1.645e-05, + "num_tokens": 124868.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.0787, + "grad_norm": 1.1956502199172974, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0184, + "grad_norm": 1.0563417673110962, + "learning_rate": 1.643e-05, + "num_tokens": 125471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.2769, + "grad_norm": 3.5824198722839355, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.017, + "grad_norm": 0.9444816708564758, + "learning_rate": 1.641e-05, + "num_tokens": 126074.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.1499, + "grad_norm": 1.6610344648361206, + "learning_rate": 1.64e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0159, + "grad_norm": 1.3713178634643555, + "learning_rate": 1.639e-05, + "num_tokens": 126677.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0142, + "grad_norm": 0.7958543300628662, + "learning_rate": 1.638e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 1.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0136, + "grad_norm": 0.7060168385505676, + "learning_rate": 1.637e-05, + "num_tokens": 126859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0126, + "grad_norm": 0.6885517239570618, + "learning_rate": 1.636e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.1437, + "grad_norm": 1.7837411165237427, + "learning_rate": 1.635e-05, + "num_tokens": 127462.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": 0.1352, + "grad_norm": 1.0794353485107422, + "learning_rate": 1.634e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.1036, + "grad_norm": 1.2649973630905151, + "learning_rate": 1.633e-05, + "num_tokens": 128486.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.082, + "grad_norm": 1.4123811721801758, + "learning_rate": 1.632e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.2251, + "grad_norm": 2.3190250396728516, + "learning_rate": 1.631e-05, + "num_tokens": 129510.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0101, + "grad_norm": 1.145607590675354, + "learning_rate": 1.63e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 1.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.01, + "grad_norm": 1.1430310010910034, + "learning_rate": 1.629e-05, + "num_tokens": 129692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.1157, + "grad_norm": 1.080237865447998, + "learning_rate": 1.628e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0094, + "grad_norm": 0.8564168810844421, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 130295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.009, + "grad_norm": 0.6895986199378967, + "learning_rate": 1.626e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0088, + "grad_norm": 0.7237755656242371, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 130477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0081, + "grad_norm": 0.7111520767211914, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 1.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.2266, + "grad_norm": 3.2268872261047363, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 131080.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.1096, + "grad_norm": 1.5681886672973633, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.1323, + "grad_norm": 1.1309343576431274, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 132104.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0065, + "grad_norm": 0.4017643630504608, + "learning_rate": 1.62e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.0901, + "grad_norm": 1.3869181871414185, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 132707.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.135, + "grad_norm": 1.0720597505569458, + "learning_rate": 1.618e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.2196, + "grad_norm": 2.46571683883667, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 133731.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.1479, + "grad_norm": 1.4283263683319092, + "learning_rate": 1.616e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.1442, + "grad_norm": 1.0318039655685425, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 134755.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.119, + "grad_norm": 0.9293051958084106, + "learning_rate": 1.614e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0122, + "grad_norm": 2.9073522090911865, + "learning_rate": 1.613e-05, + "num_tokens": 135358.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0156, + "grad_norm": 3.24949049949646, + "learning_rate": 1.612e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.2428, + "grad_norm": 2.2780046463012695, + "learning_rate": 1.611e-05, + "num_tokens": 135961.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0158, + "grad_norm": 2.8313698768615723, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.073, + "grad_norm": 1.1441925764083862, + "learning_rate": 1.609e-05, + "num_tokens": 136564.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.0713, + "grad_norm": 1.0356674194335938, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.1163, + "grad_norm": 0.9958234429359436, + "learning_rate": 1.607e-05, + "num_tokens": 137588.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.065, + "grad_norm": 1.0690953731536865, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0143, + "grad_norm": 2.4794986248016357, + "learning_rate": 1.605e-05, + "num_tokens": 138191.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.1213, + "grad_norm": 1.1662561893463135, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0133, + "grad_norm": 2.1572377681732178, + "learning_rate": 1.603e-05, + "num_tokens": 138794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": 0.2415, + "grad_norm": 2.1097450256347656, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.2415, + "grad_norm": 1.9146851301193237, + "learning_rate": 1.601e-05, + "num_tokens": 139818.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.0792, + "grad_norm": 1.4688655138015747, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.1037, + "grad_norm": 1.3678481578826904, + "learning_rate": 1.599e-05, + "num_tokens": 140842.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.0645, + "grad_norm": 1.394155740737915, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.1221, + "grad_norm": 1.3450697660446167, + "learning_rate": 1.597e-05, + "num_tokens": 141866.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0111, + "grad_norm": 1.5307925939559937, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 1.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.0111, + "grad_norm": 1.5876197814941406, + "learning_rate": 1.595e-05, + "num_tokens": 142048.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.1193, + "grad_norm": 1.4841184616088867, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.1328, + "grad_norm": 1.1095598936080933, + "learning_rate": 1.593e-05, + "num_tokens": 143072.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0093, + "grad_norm": 1.4608124494552612, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 1.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.1107, + "grad_norm": 1.4897429943084717, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 143675.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.1984, + "grad_norm": 2.675309419631958, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0076, + "grad_norm": 1.1623023748397827, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 144278.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0063, + "grad_norm": 0.732515275478363, + "learning_rate": 1.588e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 1.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.1286, + "grad_norm": 1.144338846206665, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 144881.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.1896, + "grad_norm": 2.561152219772339, + "learning_rate": 1.586e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.1736, + "grad_norm": 2.7632133960723877, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 145905.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0056, + "grad_norm": 0.5383828282356262, + "learning_rate": 1.584e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 1.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0053, + "grad_norm": 0.5213011503219604, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 146087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.1293, + "grad_norm": 1.3833296298980713, + "learning_rate": 1.582e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0047, + "grad_norm": 0.35407668352127075, + "learning_rate": 1.581e-05, + "num_tokens": 146690.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.1152, + "grad_norm": 1.2960784435272217, + "learning_rate": 1.58e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.0701, + "grad_norm": 1.1170578002929688, + "learning_rate": 1.579e-05, + "num_tokens": 147714.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.1111, + "grad_norm": 1.0579668283462524, + "learning_rate": 1.578e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0048, + "grad_norm": 0.4491373300552368, + "learning_rate": 1.577e-05, + "num_tokens": 148317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0052, + "grad_norm": 0.5798842906951904, + "learning_rate": 1.576e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0053, + "grad_norm": 0.6644476056098938, + "learning_rate": 1.575e-05, + "num_tokens": 148499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.1002, + "grad_norm": 1.4146150350570679, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0049, + "grad_norm": 0.5174235701560974, + "learning_rate": 1.573e-05, + "num_tokens": 149102.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.1005, + "grad_norm": 1.295534610748291, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.0997, + "grad_norm": 1.874627947807312, + "learning_rate": 1.571e-05, + "num_tokens": 150126.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0048, + "grad_norm": 0.477443128824234, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 1.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0048, + "grad_norm": 0.5091577172279358, + "learning_rate": 1.569e-05, + "num_tokens": 150308.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0045, + "grad_norm": 0.42573752999305725, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 1.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.1289, + "grad_norm": 1.2042423486709595, + "learning_rate": 1.567e-05, + "num_tokens": 150911.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.0741, + "grad_norm": 1.1629348993301392, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.004, + "grad_norm": 0.3303038775920868, + "learning_rate": 1.565e-05, + "num_tokens": 151514.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0039, + "grad_norm": 0.279052734375, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 1.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.1122, + "grad_norm": 1.5259605646133423, + "learning_rate": 1.563e-05, + "num_tokens": 152117.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.1174, + "grad_norm": 1.2986260652542114, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.0041, + "grad_norm": 0.4193200170993805, + "learning_rate": 1.561e-05, + "num_tokens": 152720.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.1207, + "grad_norm": 1.2413984537124634, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0045, + "grad_norm": 0.6368035078048706, + "learning_rate": 1.559e-05, + "num_tokens": 153323.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.101, + "grad_norm": 1.2425626516342163, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.1124, + "grad_norm": 1.019707202911377, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 154347.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0051, + "grad_norm": 0.8345929384231567, + "learning_rate": 1.556e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0052, + "grad_norm": 0.8587450385093689, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 154529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.1214, + "grad_norm": 1.1086853742599487, + "learning_rate": 1.554e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.1164, + "grad_norm": 1.238479495048523, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 155553.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.1249, + "grad_norm": 1.3684537410736084, + "learning_rate": 1.552e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0054, + "grad_norm": 0.947119951248169, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 156156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0056, + "grad_norm": 0.9146615266799927, + "learning_rate": 1.55e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.0782, + "grad_norm": 1.2344416379928589, + "learning_rate": 1.549e-05, + "num_tokens": 156759.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.4506, + "grad_norm": 7.777007579803467, + "learning_rate": 1.548e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.0639, + "grad_norm": 1.501968264579773, + "learning_rate": 1.547e-05, + "num_tokens": 157783.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0046, + "grad_norm": 0.6376725435256958, + "learning_rate": 1.546e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 1.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0043, + "grad_norm": 0.5955199003219604, + "learning_rate": 1.545e-05, + "num_tokens": 157965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.1027, + "grad_norm": 1.514914631843567, + "learning_rate": 1.544e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.1145, + "grad_norm": 1.1080951690673828, + "learning_rate": 1.543e-05, + "num_tokens": 158989.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.1661, + "grad_norm": 2.103287696838379, + "learning_rate": 1.542e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0041, + "grad_norm": 0.5920866131782532, + "learning_rate": 1.541e-05, + "num_tokens": 159592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.0831, + "grad_norm": 1.2727563381195068, + "learning_rate": 1.54e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.076, + "grad_norm": 1.3624043464660645, + "learning_rate": 1.539e-05, + "num_tokens": 160616.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0051, + "grad_norm": 1.0213030576705933, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 1.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0053, + "grad_norm": 1.1751487255096436, + "learning_rate": 1.537e-05, + "num_tokens": 160798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.1073, + "grad_norm": 1.1450884342193604, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.1152, + "grad_norm": 1.0188744068145752, + "learning_rate": 1.535e-05, + "num_tokens": 161822.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0042, + "grad_norm": 0.6943671703338623, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 1.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.0041, + "grad_norm": 0.5702145099639893, + "learning_rate": 1.533e-05, + "num_tokens": 162004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.1601, + "grad_norm": 2.467028856277466, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0036, + "grad_norm": 0.3947738707065582, + "learning_rate": 1.531e-05, + "num_tokens": 162607.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0035, + "grad_norm": 0.3578404486179352, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": 0.1018, + "grad_norm": 1.5206029415130615, + "learning_rate": 1.529e-05, + "num_tokens": 163210.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.0753, + "grad_norm": 1.400350570678711, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0032, + "grad_norm": 0.33458250761032104, + "learning_rate": 1.527e-05, + "num_tokens": 163813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0029, + "grad_norm": 0.2822412848472595, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 1.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0029, + "grad_norm": 0.24599352478981018, + "learning_rate": 1.525e-05, + "num_tokens": 163995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.0772, + "grad_norm": 1.2155442237854004, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0028, + "grad_norm": 0.2298114001750946, + "learning_rate": 1.523e-05, + "num_tokens": 164598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0027, + "grad_norm": 0.23676389455795288, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.0027, + "grad_norm": 0.21022361516952515, + "learning_rate": 1.521e-05, + "num_tokens": 164780.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.1104, + "grad_norm": 1.7568659782409668, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0027, + "grad_norm": 0.28411486744880676, + "learning_rate": 1.519e-05, + "num_tokens": 165383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0028, + "grad_norm": 0.2967180907726288, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 1.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0026, + "grad_norm": 0.31251031160354614, + "learning_rate": 1.517e-05, + "num_tokens": 165565.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.0629, + "grad_norm": 1.4641610383987427, + "learning_rate": 1.516e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.0024, + "grad_norm": 0.22654157876968384, + "learning_rate": 1.515e-05, + "num_tokens": 166168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.063, + "grad_norm": 1.187050223350525, + "learning_rate": 1.514e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.0565, + "grad_norm": 1.331944227218628, + "learning_rate": 1.513e-05, + "num_tokens": 167192.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0026, + "grad_norm": 0.37733522057533264, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 1.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.0989, + "grad_norm": 1.4206980466842651, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 167795.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0028, + "grad_norm": 0.3664330244064331, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 1.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.003, + "grad_norm": 0.5825914740562439, + "learning_rate": 1.509e-05, + "num_tokens": 167977.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.003, + "grad_norm": 0.47541120648384094, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 1.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": 0.1152, + "grad_norm": 1.194077730178833, + "learning_rate": 1.507e-05, + "num_tokens": 168580.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.0642, + "grad_norm": 1.5998581647872925, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0031, + "grad_norm": 0.45395979285240173, + "learning_rate": 1.505e-05, + "num_tokens": 169183.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.066, + "grad_norm": 1.4924191236495972, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0642, + "grad_norm": 1.4406323432922363, + "learning_rate": 1.503e-05, + "num_tokens": 170207.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.004, + "grad_norm": 0.7274853587150574, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 1.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0637, + "grad_norm": 1.4921272993087769, + "learning_rate": 1.501e-05, + "num_tokens": 170810.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0881, + "grad_norm": 1.3289899826049805, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0046, + "grad_norm": 0.9299827814102173, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 171413.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.0917, + "grad_norm": 1.0895007848739624, + "learning_rate": 1.498e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0055, + "grad_norm": 1.2428455352783203, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 172016.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.0904, + "grad_norm": 1.1731876134872437, + "learning_rate": 1.496e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0042, + "grad_norm": 0.8642317652702332, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 172619.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.0042, + "grad_norm": 0.9150028228759766, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 1.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.1244, + "grad_norm": 1.520849585533142, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 173222.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": 0.0667, + "grad_norm": 1.3897782564163208, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0028, + "grad_norm": 0.4630263149738312, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 173825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0026, + "grad_norm": 0.32279714941978455, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 1.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.1723, + "grad_norm": 2.5587806701660156, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 174428.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.084, + "grad_norm": 1.5307081937789917, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0455, + "grad_norm": 1.2075250148773193, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 175452.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0025, + "grad_norm": 0.3137587904930115, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.1133, + "grad_norm": 1.3542101383209229, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 176055.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0025, + "grad_norm": 0.3963753581047058, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 1.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.1022, + "grad_norm": 1.4186869859695435, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 176658.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0029, + "grad_norm": 0.533608615398407, + "learning_rate": 1.482e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 1.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0842, + "grad_norm": 1.5056371688842773, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 177261.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0033, + "grad_norm": 0.6577285528182983, + "learning_rate": 1.48e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 1.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.1089, + "grad_norm": 1.4338765144348145, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 177864.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.1055, + "grad_norm": 1.13351571559906, + "learning_rate": 1.478e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.0951, + "grad_norm": 1.237243413925171, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 178888.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.212, + "grad_norm": 3.4371607303619385, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.0058, + "grad_norm": 1.4969244003295898, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 179491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0068, + "grad_norm": 1.7211462259292603, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 1.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.0986, + "grad_norm": 0.948099672794342, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 180094.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0057, + "grad_norm": 1.391058325767517, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 1.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0042, + "grad_norm": 0.9918210506439209, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 180276.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.2042, + "grad_norm": 2.672642230987549, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.003, + "grad_norm": 0.45506858825683594, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 180879.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0797, + "grad_norm": 1.4114668369293213, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0027, + "grad_norm": 0.5301483869552612, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 181482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0668, + "grad_norm": 1.3311203718185425, + "learning_rate": 1.466e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.0022, + "grad_norm": 0.2691483795642853, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 182085.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.1992, + "grad_norm": 1.9987740516662598, + "learning_rate": 1.464e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.1435, + "grad_norm": 2.9904839992523193, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 183109.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.1085, + "grad_norm": 1.4652901887893677, + "learning_rate": 1.462e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0022, + "grad_norm": 0.30126360058784485, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 183712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0023, + "grad_norm": 0.28965601325035095, + "learning_rate": 1.46e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 1.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0022, + "grad_norm": 0.23019753396511078, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 183894.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0022, + "grad_norm": 0.21258652210235596, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 1.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0748, + "grad_norm": 1.3212836980819702, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 184497.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.002, + "grad_norm": 0.15865401923656464, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 1.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.002, + "grad_norm": 0.18746234476566315, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 184679.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0684, + "grad_norm": 1.4932857751846313, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0021, + "grad_norm": 0.23370607197284698, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 185282.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0765, + "grad_norm": 1.3977128267288208, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.0999, + "grad_norm": 1.421388030052185, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 186306.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0025, + "grad_norm": 0.41459253430366516, + "learning_rate": 1.45e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0026, + "grad_norm": 0.4490201473236084, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 186488.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0918, + "grad_norm": 1.3046605587005615, + "learning_rate": 1.448e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0864, + "grad_norm": 1.233083963394165, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 187512.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0032, + "grad_norm": 0.6014226078987122, + "learning_rate": 1.446e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.1619, + "grad_norm": 2.670433759689331, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 188115.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0034, + "grad_norm": 0.6123008131980896, + "learning_rate": 1.444e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 1.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.1146, + "grad_norm": 1.6403765678405762, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 188718.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": 0.1593, + "grad_norm": 2.7106077671051025, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0035, + "grad_norm": 0.693053126335144, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 189321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.06, + "grad_norm": 4.2686448097229, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.0764, + "grad_norm": 1.4215189218521118, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 190345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0037, + "grad_norm": 0.7100173234939575, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 1.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.1991, + "grad_norm": 2.5193188190460205, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 190948.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.0711, + "grad_norm": 1.3730517625808716, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.0891, + "grad_norm": 1.397972583770752, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 191972.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0928, + "grad_norm": 1.5409183502197266, + "learning_rate": 1.434e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.0893, + "grad_norm": 1.1101114749908447, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 192996.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0055, + "grad_norm": 1.2417343854904175, + "learning_rate": 1.432e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0829, + "grad_norm": 1.277969479560852, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 193599.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.0892, + "grad_norm": 1.385054349899292, + "learning_rate": 1.43e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.0074, + "grad_norm": 1.8123408555984497, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 194202.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0575, + "grad_norm": 1.3045315742492676, + "learning_rate": 1.428e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.1662, + "grad_norm": 2.5381715297698975, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 195226.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.0067, + "grad_norm": 1.5872633457183838, + "learning_rate": 1.426e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0061, + "grad_norm": 1.5367522239685059, + "learning_rate": 1.425e-05, + "num_tokens": 195408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0052, + "grad_norm": 1.1771265268325806, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0035, + "grad_norm": 0.596717119216919, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 195590.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0027, + "grad_norm": 0.3555561900138855, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 1.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0022, + "grad_norm": 0.31791797280311584, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 195772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.1456, + "grad_norm": 3.0790412425994873, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.0915, + "grad_norm": 1.610164761543274, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 196796.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.0019, + "grad_norm": 0.35682275891304016, + "learning_rate": 1.418e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 1.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0758, + "grad_norm": 1.1877442598342896, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 197399.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.0018, + "grad_norm": 0.3156123459339142, + "learning_rate": 1.416e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 1.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0017, + "grad_norm": 0.25764769315719604, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 197581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.1041, + "grad_norm": 1.8042068481445312, + "learning_rate": 1.414e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.1758, + "grad_norm": 2.5269131660461426, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 198605.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0016, + "grad_norm": 0.12714117765426636, + "learning_rate": 1.412e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0016, + "grad_norm": 0.13591638207435608, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 198787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.0943, + "grad_norm": 1.4506866931915283, + "learning_rate": 1.41e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0017, + "grad_norm": 0.17016956210136414, + "learning_rate": 1.409e-05, + "num_tokens": 199390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0715, + "grad_norm": 1.1805306673049927, + "learning_rate": 1.408e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.298, + "step": 596 + }, + { + "loss": 0.0831, + "grad_norm": 1.2475357055664062, + "learning_rate": 1.407e-05, + "num_tokens": 200414.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.002, + "grad_norm": 0.35699722170829773, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 1.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0721, + "grad_norm": 1.1971431970596313, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 201017.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.066, + "grad_norm": 1.1251575946807861, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0027, + "grad_norm": 0.5506196618080139, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 201620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.1048, + "grad_norm": 1.8220717906951904, + "learning_rate": 1.402e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.0037, + "grad_norm": 0.8545289039611816, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 202223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0037, + "grad_norm": 0.8475953936576843, + "learning_rate": 1.4e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 1.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.0967, + "grad_norm": 1.2703156471252441, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 202826.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": 0.098, + "grad_norm": 1.2548829317092896, + "learning_rate": 1.398e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.0924, + "grad_norm": 1.2570987939834595, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 203850.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0609, + "grad_norm": 1.531058669090271, + "learning_rate": 1.396e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.1424, + "grad_norm": 2.5060534477233887, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 204874.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0048, + "grad_norm": 1.0655303001403809, + "learning_rate": 1.394e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0593, + "grad_norm": 1.0243408679962158, + "learning_rate": 1.393e-05, + "num_tokens": 205477.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0905, + "grad_norm": 1.3182287216186523, + "learning_rate": 1.392e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0068, + "grad_norm": 1.4663218259811401, + "learning_rate": 1.391e-05, + "num_tokens": 206080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0057, + "grad_norm": 1.2375314235687256, + "learning_rate": 1.39e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0489, + "grad_norm": 1.071290135383606, + "learning_rate": 1.389e-05, + "num_tokens": 206683.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0743, + "grad_norm": 1.0402666330337524, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.1041, + "grad_norm": 2.195901870727539, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 207707.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0038, + "grad_norm": 0.7095027565956116, + "learning_rate": 1.386e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0804, + "grad_norm": 1.4653010368347168, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 208310.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0038, + "grad_norm": 0.7164344191551208, + "learning_rate": 1.384e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.1019, + "grad_norm": 1.508054494857788, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 208913.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0031, + "grad_norm": 0.4974660575389862, + "learning_rate": 1.382e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0031, + "grad_norm": 0.4921479821205139, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 209095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0614, + "grad_norm": 1.180677056312561, + "learning_rate": 1.38e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0843, + "grad_norm": 1.1165193319320679, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 210119.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0816, + "grad_norm": 1.4082179069519043, + "learning_rate": 1.378e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.0893, + "grad_norm": 1.1407965421676636, + "learning_rate": 1.377e-05, + "num_tokens": 211143.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0029, + "grad_norm": 0.47326186299324036, + "learning_rate": 1.376e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.003, + "grad_norm": 0.48467254638671875, + "learning_rate": 1.375e-05, + "num_tokens": 211325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0025, + "grad_norm": 0.3466941714286804, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0028, + "grad_norm": 0.383543461561203, + "learning_rate": 1.373e-05, + "num_tokens": 211507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0027, + "grad_norm": 0.3878021240234375, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0699, + "grad_norm": 1.2407838106155396, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 212110.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0956, + "grad_norm": 1.2576494216918945, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0022, + "grad_norm": 0.25685280561447144, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 212713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0022, + "grad_norm": 0.2545858323574066, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0023, + "grad_norm": 0.2819485366344452, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 212895.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0858, + "grad_norm": 1.0897297859191895, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0021, + "grad_norm": 0.325777530670166, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 213498.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0021, + "grad_norm": 0.29383793473243713, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 1.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0944, + "grad_norm": 1.389978289604187, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 214101.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0962, + "grad_norm": 1.3364863395690918, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0019, + "grad_norm": 0.23381884396076202, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 214704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.058, + "grad_norm": 1.5767658948898315, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.002, + "grad_norm": 0.288552463054657, + "learning_rate": 1.359e-05, + "num_tokens": 215307.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0894, + "grad_norm": 1.6633201837539673, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0829, + "grad_norm": 1.4220677614212036, + "learning_rate": 1.357e-05, + "num_tokens": 216331.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0845, + "grad_norm": 1.3433754444122314, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.0917, + "grad_norm": 1.295201063156128, + "learning_rate": 1.355e-05, + "num_tokens": 217355.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.0891, + "grad_norm": 1.3927174806594849, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.006, + "grad_norm": 1.4622353315353394, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 217958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0481, + "grad_norm": 1.178935170173645, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0075, + "grad_norm": 1.825118064880371, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 218561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.0065, + "grad_norm": 1.5563267469406128, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0059, + "grad_norm": 1.4133291244506836, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 218743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0753, + "grad_norm": 1.4185911417007446, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.087, + "grad_norm": 1.3738617897033691, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 219767.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0702, + "grad_norm": 1.0876400470733643, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0031, + "grad_norm": 0.587776243686676, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 220370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": 0.057, + "grad_norm": 1.4529519081115723, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0596, + "grad_norm": 1.0564322471618652, + "learning_rate": 1.343e-05, + "num_tokens": 221394.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0795, + "grad_norm": 1.359084129333496, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0602, + "grad_norm": 1.625110387802124, + "learning_rate": 1.341e-05, + "num_tokens": 222418.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.1519, + "grad_norm": 2.79744291305542, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.1522, + "grad_norm": 2.5003347396850586, + "learning_rate": 1.339e-05, + "num_tokens": 223442.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0591, + "grad_norm": 1.2735769748687744, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0603, + "grad_norm": 1.4963431358337402, + "learning_rate": 1.337e-05, + "num_tokens": 224466.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.008, + "grad_norm": 1.6320358514785767, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0885, + "grad_norm": 1.660543441772461, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 225069.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.059, + "grad_norm": 1.6638036966323853, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0092, + "grad_norm": 1.7701940536499023, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 225672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0717, + "grad_norm": 1.6387797594070435, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0795, + "grad_norm": 1.6651279926300049, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 226696.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0811, + "grad_norm": 1.6673662662506104, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.1082, + "grad_norm": 2.1547534465789795, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 227720.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0724, + "grad_norm": 1.5310810804367065, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.1319, + "grad_norm": 3.544659376144409, + "learning_rate": 1.327e-05, + "num_tokens": 228744.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0668, + "grad_norm": 1.4902386665344238, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0099, + "grad_norm": 1.8921332359313965, + "learning_rate": 1.325e-05, + "num_tokens": 229347.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0093, + "grad_norm": 1.8240478038787842, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0727, + "grad_norm": 1.3348301649093628, + "learning_rate": 1.323e-05, + "num_tokens": 229950.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.082, + "grad_norm": 1.235790491104126, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0743, + "grad_norm": 1.6094404458999634, + "learning_rate": 1.321e-05, + "num_tokens": 230974.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0079, + "grad_norm": 1.5763838291168213, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 1.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0882, + "grad_norm": 1.602766513824463, + "learning_rate": 1.319e-05, + "num_tokens": 231577.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0654, + "grad_norm": 1.5263670682907104, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0678, + "grad_norm": 1.2824158668518066, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 232601.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.1246, + "grad_norm": 2.722593307495117, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0428, + "grad_norm": 1.1944324970245361, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 233625.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0643, + "grad_norm": 1.0645701885223389, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.0061, + "grad_norm": 1.2870023250579834, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 234228.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0055, + "grad_norm": 1.1952035427093506, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 1.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0621, + "grad_norm": 1.063179850578308, + "learning_rate": 1.311e-05, + "num_tokens": 234831.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0047, + "grad_norm": 0.9894086122512817, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 1.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0763, + "grad_norm": 1.4259341955184937, + "learning_rate": 1.309e-05, + "num_tokens": 235434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0642, + "grad_norm": 1.2943477630615234, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.079, + "grad_norm": 1.5152034759521484, + "learning_rate": 1.307e-05, + "num_tokens": 236458.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0717, + "grad_norm": 1.1957803964614868, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.0599, + "grad_norm": 1.4417110681533813, + "learning_rate": 1.305e-05, + "num_tokens": 237482.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0654, + "grad_norm": 1.5242059230804443, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0365, + "grad_norm": 1.1553280353546143, + "learning_rate": 1.303e-05, + "num_tokens": 238506.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0045, + "grad_norm": 0.8679006695747375, + "learning_rate": 1.302e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0782, + "grad_norm": 1.3552151918411255, + "learning_rate": 1.301e-05, + "num_tokens": 239109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": 0.0777, + "grad_norm": 1.6802747249603271, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0895, + "grad_norm": 2.0004899501800537, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 240133.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0065, + "grad_norm": 1.2331161499023438, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 1.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0926, + "grad_norm": 1.814571738243103, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 240736.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0447, + "grad_norm": 1.2055951356887817, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.1061, + "grad_norm": 1.93771493434906, + "learning_rate": 1.295e-05, + "num_tokens": 241760.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0071, + "grad_norm": 1.3096961975097656, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 1.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0809, + "grad_norm": 1.462066650390625, + "learning_rate": 1.293e-05, + "num_tokens": 242363.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0696, + "grad_norm": 1.6013977527618408, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.0067, + "grad_norm": 1.247151494026184, + "learning_rate": 1.291e-05, + "num_tokens": 242966.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0822, + "grad_norm": 1.3341907262802124, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.1516, + "grad_norm": 2.655081033706665, + "learning_rate": 1.289e-05, + "num_tokens": 243990.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0628, + "grad_norm": 1.1444809436798096, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.0731, + "grad_norm": 1.465855598449707, + "learning_rate": 1.287e-05, + "num_tokens": 245014.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0057, + "grad_norm": 1.112541913986206, + "learning_rate": 1.286e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.1399, + "grad_norm": 3.088876485824585, + "learning_rate": 1.285e-05, + "num_tokens": 245617.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0759, + "grad_norm": 1.2233434915542603, + "learning_rate": 1.284e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0557, + "grad_norm": 1.2852802276611328, + "learning_rate": 1.283e-05, + "num_tokens": 246641.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.005, + "grad_norm": 1.0076061487197876, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 1.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0549, + "grad_norm": 1.230972409248352, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 247244.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.004, + "grad_norm": 0.7870916724205017, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 1.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0034, + "grad_norm": 0.6174665093421936, + "learning_rate": 1.279e-05, + "num_tokens": 247426.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.5346, + "grad_norm": 9.506900787353516, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0627, + "grad_norm": 1.454014539718628, + "learning_rate": 1.277e-05, + "num_tokens": 248450.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0024, + "grad_norm": 0.3459113836288452, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 1.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0775, + "grad_norm": 1.3046914339065552, + "learning_rate": 1.275e-05, + "num_tokens": 249053.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0528, + "grad_norm": 1.3675225973129272, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0629, + "grad_norm": 1.5410852432250977, + "learning_rate": 1.273e-05, + "num_tokens": 250077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0579, + "grad_norm": 1.2241291999816895, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0023, + "grad_norm": 0.32806485891342163, + "learning_rate": 1.271e-05, + "num_tokens": 250680.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0024, + "grad_norm": 0.3713594675064087, + "learning_rate": 1.27e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0024, + "grad_norm": 0.383628249168396, + "learning_rate": 1.269e-05, + "num_tokens": 250862.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0565, + "grad_norm": 1.4605262279510498, + "learning_rate": 1.268e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0907, + "grad_norm": 2.0260767936706543, + "learning_rate": 1.267e-05, + "num_tokens": 251886.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.1355, + "grad_norm": 2.7483110427856445, + "learning_rate": 1.266e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0028, + "grad_norm": 0.5287377834320068, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 252489.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0029, + "grad_norm": 0.5259289145469666, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0029, + "grad_norm": 0.5197233557701111, + "learning_rate": 1.263e-05, + "num_tokens": 252671.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0779, + "grad_norm": 1.9638550281524658, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0022, + "grad_norm": 0.34271013736724854, + "learning_rate": 1.261e-05, + "num_tokens": 253274.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0021, + "grad_norm": 0.31841135025024414, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.0021, + "grad_norm": 0.28541284799575806, + "learning_rate": 1.259e-05, + "num_tokens": 253456.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.0765, + "grad_norm": 1.1577314138412476, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0018, + "grad_norm": 0.2100057303905487, + "learning_rate": 1.257e-05, + "num_tokens": 254059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0017, + "grad_norm": 0.19263769686222076, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.0813, + "grad_norm": 1.540268898010254, + "learning_rate": 1.255e-05, + "num_tokens": 254662.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0705, + "grad_norm": 1.2791322469711304, + "learning_rate": 1.254e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.3907, + "grad_norm": 7.0182013511657715, + "learning_rate": 1.253e-05, + "num_tokens": 255686.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0017, + "grad_norm": 0.19119806587696075, + "learning_rate": 1.252e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 1.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0017, + "grad_norm": 0.18740034103393555, + "learning_rate": 1.251e-05, + "num_tokens": 255868.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0797, + "grad_norm": 1.8779743909835815, + "learning_rate": 1.25e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0018, + "grad_norm": 0.1861187219619751, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 256471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0017, + "grad_norm": 0.17008422315120697, + "learning_rate": 1.248e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0018, + "grad_norm": 0.2042454481124878, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 256653.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.083, + "grad_norm": 1.2712551355361938, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0019, + "grad_norm": 0.22894388437271118, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 257256.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0632, + "grad_norm": 1.2945611476898193, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0018, + "grad_norm": 0.21884307265281677, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 257859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0018, + "grad_norm": 0.22480158507823944, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0019, + "grad_norm": 0.24674543738365173, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 258041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0795, + "grad_norm": 2.106468677520752, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0018, + "grad_norm": 0.2204350233078003, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 258644.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0737, + "grad_norm": 1.4242573976516724, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0878, + "grad_norm": 1.518812656402588, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 259668.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0633, + "grad_norm": 1.0321228504180908, + "learning_rate": 1.236e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0756, + "grad_norm": 1.1949939727783203, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 260692.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0024, + "grad_norm": 0.4306935966014862, + "learning_rate": 1.234e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 1.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0627, + "grad_norm": 1.1531753540039062, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 261295.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.003, + "grad_norm": 0.6374348998069763, + "learning_rate": 1.232e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0036, + "grad_norm": 0.7683020234107971, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 261477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.1434, + "grad_norm": 2.3946049213409424, + "learning_rate": 1.23e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.0032, + "grad_norm": 0.6773089170455933, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 262080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.003, + "grad_norm": 0.5508646368980408, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0517, + "grad_norm": 1.0663422346115112, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 262683.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0598, + "grad_norm": 1.1945189237594604, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0024, + "grad_norm": 0.3890499174594879, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 263286.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0023, + "grad_norm": 0.3637482821941376, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 1.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0022, + "grad_norm": 0.3558770716190338, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 263468.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.0698, + "grad_norm": 1.282705545425415, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0753, + "grad_norm": 1.923362374305725, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 264492.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": 0.0769, + "grad_norm": 1.28227961063385, + "learning_rate": 1.22e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0019, + "grad_norm": 0.26410141587257385, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 265095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0802, + "grad_norm": 1.2387802600860596, + "learning_rate": 1.218e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.002, + "grad_norm": 0.3023037612438202, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 265698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0547, + "grad_norm": 1.3596991300582886, + "learning_rate": 1.216e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0725, + "grad_norm": 1.2279936075210571, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 266722.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0603, + "grad_norm": 1.4540890455245972, + "learning_rate": 1.214e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0026, + "grad_norm": 0.48957788944244385, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 267325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0771, + "grad_norm": 1.2322392463684082, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0434, + "grad_norm": 1.224611759185791, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 268349.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0034, + "grad_norm": 0.7317530512809753, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0038, + "grad_norm": 0.7885755300521851, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 268531.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0692, + "grad_norm": 1.2012921571731567, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.0036, + "grad_norm": 0.8018218874931335, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 269134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0451, + "grad_norm": 1.2235223054885864, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0474, + "grad_norm": 1.2205861806869507, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 270158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.0032, + "grad_norm": 0.7037767767906189, + "learning_rate": 1.204e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0518, + "grad_norm": 1.4091877937316895, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 270761.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0691, + "grad_norm": 1.106124758720398, + "learning_rate": 1.202e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0034, + "grad_norm": 0.7851144075393677, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 271364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0032, + "grad_norm": 0.7951046824455261, + "learning_rate": 1.2e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0831, + "grad_norm": 1.5029832124710083, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 271967.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0026, + "grad_norm": 0.5559270977973938, + "learning_rate": 1.198e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0022, + "grad_norm": 0.4153921902179718, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 272149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.0021, + "grad_norm": 0.37202781438827515, + "learning_rate": 1.196e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0529, + "grad_norm": 1.0388691425323486, + "learning_rate": 1.195e-05, + "num_tokens": 272752.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0017, + "grad_norm": 0.22652830183506012, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0645, + "grad_norm": 1.505333423614502, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 273355.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0865, + "grad_norm": 1.883539080619812, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0015, + "grad_norm": 0.16957923769950867, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 273958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0015, + "grad_norm": 0.19717897474765778, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0014, + "grad_norm": 0.1534471958875656, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 274140.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0494, + "grad_norm": 1.1535961627960205, + "learning_rate": 1.188e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0014, + "grad_norm": 0.1624767929315567, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 274743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0015, + "grad_norm": 0.17362011969089508, + "learning_rate": 1.186e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0775, + "grad_norm": 1.9903476238250732, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 275346.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.1399, + "grad_norm": 3.302823781967163, + "learning_rate": 1.184e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0673, + "grad_norm": 1.326196312904358, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 276370.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0015, + "grad_norm": 0.18564815819263458, + "learning_rate": 1.182e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 1.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0548, + "grad_norm": 1.438742756843567, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 276973.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0017, + "grad_norm": 0.23712487518787384, + "learning_rate": 1.18e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0018, + "grad_norm": 0.27533257007598877, + "learning_rate": 1.179e-05, + "num_tokens": 277155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0018, + "grad_norm": 0.2764306366443634, + "learning_rate": 1.178e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0513, + "grad_norm": 1.2485377788543701, + "learning_rate": 1.177e-05, + "num_tokens": 277758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.143, + "grad_norm": 2.3260533809661865, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0865, + "grad_norm": 2.006594181060791, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 278782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": 0.0728, + "grad_norm": 1.229394793510437, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0727, + "grad_norm": 1.264754295349121, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 279806.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0624, + "grad_norm": 1.1297813653945923, + "learning_rate": 1.172e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0657, + "grad_norm": 1.348644495010376, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 280830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.4017, + "grad_norm": 7.936118125915527, + "learning_rate": 1.17e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0509, + "grad_norm": 2.504011392593384, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 281854.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0071, + "grad_norm": 1.4856328964233398, + "learning_rate": 1.168e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0065, + "grad_norm": 1.3074718713760376, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 282036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.0064, + "grad_norm": 1.328763484954834, + "learning_rate": 1.166e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0545, + "grad_norm": 1.255282998085022, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 282639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.1362, + "grad_norm": 1.9963600635528564, + "learning_rate": 1.164e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0042, + "grad_norm": 0.8505628108978271, + "learning_rate": 1.163e-05, + "num_tokens": 283242.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0554, + "grad_norm": 1.5559666156768799, + "learning_rate": 1.162e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0029, + "grad_norm": 0.528516411781311, + "learning_rate": 1.161e-05, + "num_tokens": 283845.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0025, + "grad_norm": 0.40555793046951294, + "learning_rate": 1.16e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 1.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0021, + "grad_norm": 0.3407900333404541, + "learning_rate": 1.159e-05, + "num_tokens": 284027.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0726, + "grad_norm": 1.2919087409973145, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.4289, + "grad_norm": 6.98607063293457, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 285051.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0511, + "grad_norm": 1.4350818395614624, + "learning_rate": 1.156e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0519, + "grad_norm": 1.400582194328308, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 286075.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0017, + "grad_norm": 0.31648895144462585, + "learning_rate": 1.154e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0018, + "grad_norm": 0.3369519114494324, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 286257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0572, + "grad_norm": 1.1995043754577637, + "learning_rate": 1.152e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0742, + "grad_norm": 0.9991039633750916, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 287281.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0501, + "grad_norm": 1.4309474229812622, + "learning_rate": 1.15e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.1276, + "grad_norm": 2.5142507553100586, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 288305.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0023, + "grad_norm": 0.4930354058742523, + "learning_rate": 1.148e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 1.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.09, + "grad_norm": 1.8823350667953491, + "learning_rate": 1.147e-05, + "num_tokens": 288908.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0517, + "grad_norm": 1.3514404296875, + "learning_rate": 1.146e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0023, + "grad_norm": 0.39818212389945984, + "learning_rate": 1.145e-05, + "num_tokens": 289511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.0026, + "grad_norm": 0.4840705394744873, + "learning_rate": 1.144e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0551, + "grad_norm": 0.9981673955917358, + "learning_rate": 1.143e-05, + "num_tokens": 290114.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0025, + "grad_norm": 0.43263715505599976, + "learning_rate": 1.142e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.1179, + "grad_norm": 2.982013463973999, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 290717.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0691, + "grad_norm": 0.9637575745582581, + "learning_rate": 1.14e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0764, + "grad_norm": 1.1376231908798218, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 291741.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0474, + "grad_norm": 0.9938456416130066, + "learning_rate": 1.138e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0036, + "grad_norm": 0.6827121376991272, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 292344.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.069, + "grad_norm": 1.1721850633621216, + "learning_rate": 1.136e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0742, + "grad_norm": 1.3182216882705688, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 293368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0619, + "grad_norm": 1.405136227607727, + "learning_rate": 1.134e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0053, + "grad_norm": 1.0143218040466309, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 293971.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0822, + "grad_norm": 1.4492801427841187, + "learning_rate": 1.132e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0518, + "grad_norm": 1.1326556205749512, + "learning_rate": 1.131e-05, + "num_tokens": 294995.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0059, + "grad_norm": 1.0942848920822144, + "learning_rate": 1.13e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0486, + "grad_norm": 1.2563117742538452, + "learning_rate": 1.129e-05, + "num_tokens": 295598.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.0994, + "grad_norm": 2.3433609008789062, + "learning_rate": 1.128e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.1001, + "grad_norm": 2.7536284923553467, + "learning_rate": 1.127e-05, + "num_tokens": 296622.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": 0.0585, + "grad_norm": 0.9778537154197693, + "learning_rate": 1.126e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0062, + "grad_norm": 1.1226321458816528, + "learning_rate": 1.125e-05, + "num_tokens": 297225.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0471, + "grad_norm": 1.1883548498153687, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0784, + "grad_norm": 1.976486086845398, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 298249.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0701, + "grad_norm": 1.0843766927719116, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.067, + "grad_norm": 1.3081246614456177, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 299273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0062, + "grad_norm": 1.1432628631591797, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.0415, + "grad_norm": 0.9637823104858398, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 299876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0059, + "grad_norm": 1.120526909828186, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 1.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.005, + "grad_norm": 0.9103840589523315, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 300058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0696, + "grad_norm": 1.4037501811981201, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0466, + "grad_norm": 0.9911297559738159, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 301082.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.0383, + "grad_norm": 0.9758827090263367, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0675, + "grad_norm": 1.3758506774902344, + "learning_rate": 1.113e-05, + "num_tokens": 302106.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0032, + "grad_norm": 0.5923029780387878, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 1.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0032, + "grad_norm": 0.5734418630599976, + "learning_rate": 1.111e-05, + "num_tokens": 302288.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0533, + "grad_norm": 1.0125759840011597, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0738, + "grad_norm": 1.2687044143676758, + "learning_rate": 1.109e-05, + "num_tokens": 303312.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.372, + "grad_norm": 5.941206455230713, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.108, + "grad_norm": 2.1613714694976807, + "learning_rate": 1.107e-05, + "num_tokens": 304336.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.0024, + "grad_norm": 0.39348432421684265, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 1.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.0639, + "grad_norm": 1.184023141860962, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 304939.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0824, + "grad_norm": 1.9686490297317505, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0026, + "grad_norm": 0.44682711362838745, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 305542.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0028, + "grad_norm": 0.49993517994880676, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0026, + "grad_norm": 0.4428325891494751, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 305724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0709, + "grad_norm": 1.2466169595718384, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0735, + "grad_norm": 1.3401033878326416, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 306748.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0023, + "grad_norm": 0.3811323642730713, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 1.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0706, + "grad_norm": 1.4406594038009644, + "learning_rate": 1.097e-05, + "num_tokens": 307351.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.054, + "grad_norm": 1.363612413406372, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0505, + "grad_norm": 1.161858320236206, + "learning_rate": 1.095e-05, + "num_tokens": 308375.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.0022, + "grad_norm": 0.3702404797077179, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0023, + "grad_norm": 0.39905861020088196, + "learning_rate": 1.093e-05, + "num_tokens": 308557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0654, + "grad_norm": 1.083019733428955, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0613, + "grad_norm": 1.1142648458480835, + "learning_rate": 1.091e-05, + "num_tokens": 309581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0526, + "grad_norm": 1.24055016040802, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0687, + "grad_norm": 1.400773525238037, + "learning_rate": 1.089e-05, + "num_tokens": 310605.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0462, + "grad_norm": 1.1053345203399658, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0574, + "grad_norm": 1.0202289819717407, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 311629.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.1215, + "grad_norm": 2.0495526790618896, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0603, + "grad_norm": 0.9297711253166199, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 312653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0073, + "grad_norm": 1.4618480205535889, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0491, + "grad_norm": 1.1468454599380493, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 313256.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.07, + "grad_norm": 1.5984728336334229, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0097, + "grad_norm": 1.7861182689666748, + "learning_rate": 1.081e-05, + "num_tokens": 313859.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0098, + "grad_norm": 1.7681940793991089, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0086, + "grad_norm": 1.6711666584014893, + "learning_rate": 1.079e-05, + "num_tokens": 314041.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0431, + "grad_norm": 1.0142930746078491, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0417, + "grad_norm": 0.9444635510444641, + "learning_rate": 1.077e-05, + "num_tokens": 315065.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0054, + "grad_norm": 1.0890287160873413, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0045, + "grad_norm": 0.9186440706253052, + "learning_rate": 1.075e-05, + "num_tokens": 315247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0033, + "grad_norm": 0.6265022158622742, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 1.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0426, + "grad_norm": 1.0279744863510132, + "learning_rate": 1.073e-05, + "num_tokens": 315850.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0693, + "grad_norm": 1.372605323791504, + "learning_rate": 1.072e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0017, + "grad_norm": 0.21290767192840576, + "learning_rate": 1.071e-05, + "num_tokens": 316453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0015, + "grad_norm": 0.17253448069095612, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.0526, + "grad_norm": 1.160703182220459, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 317056.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0727, + "grad_norm": 1.2380679845809937, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.1214, + "grad_norm": 2.0913727283477783, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 318080.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0506, + "grad_norm": 1.0945791006088257, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.075, + "grad_norm": 1.382978916168213, + "learning_rate": 1.065e-05, + "num_tokens": 319104.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0015, + "grad_norm": 0.172458216547966, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0742, + "grad_norm": 1.5439574718475342, + "learning_rate": 1.063e-05, + "num_tokens": 319707.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": 0.0875, + "grad_norm": 1.514559030532837, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.1175, + "grad_norm": 2.566283941268921, + "learning_rate": 1.061e-05, + "num_tokens": 320731.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0018, + "grad_norm": 0.22718015313148499, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 1.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0662, + "grad_norm": 1.2446449995040894, + "learning_rate": 1.059e-05, + "num_tokens": 321334.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0023, + "grad_norm": 0.32198604941368103, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.1204, + "grad_norm": 3.195101261138916, + "learning_rate": 1.057e-05, + "num_tokens": 321937.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0647, + "grad_norm": 1.3185839653015137, + "learning_rate": 1.056e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0025, + "grad_norm": 0.3570478856563568, + "learning_rate": 1.055e-05, + "num_tokens": 322540.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0692, + "grad_norm": 1.1017460823059082, + "learning_rate": 1.054e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0583, + "grad_norm": 1.167201042175293, + "learning_rate": 1.053e-05, + "num_tokens": 323564.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.1038, + "grad_norm": 2.155097723007202, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0038, + "grad_norm": 0.646456778049469, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 324167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0751, + "grad_norm": 1.3510818481445312, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.1132, + "grad_norm": 2.1775286197662354, + "learning_rate": 1.049e-05, + "num_tokens": 325191.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.1073, + "grad_norm": 2.2072458267211914, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0048, + "grad_norm": 0.8271514177322388, + "learning_rate": 1.047e-05, + "num_tokens": 325794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0679, + "grad_norm": 1.0402039289474487, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0045, + "grad_norm": 0.7622825503349304, + "learning_rate": 1.045e-05, + "num_tokens": 326397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0538, + "grad_norm": 1.2865958213806152, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0656, + "grad_norm": 1.024865746498108, + "learning_rate": 1.043e-05, + "num_tokens": 327421.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0039, + "grad_norm": 0.6565131545066833, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0043, + "grad_norm": 0.7380317449569702, + "learning_rate": 1.041e-05, + "num_tokens": 327603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0035, + "grad_norm": 0.570799708366394, + "learning_rate": 1.04e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 1.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.062, + "grad_norm": 1.1511563062667847, + "learning_rate": 1.039e-05, + "num_tokens": 328206.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0695, + "grad_norm": 1.2906415462493896, + "learning_rate": 1.038e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0468, + "grad_norm": 1.2258033752441406, + "learning_rate": 1.037e-05, + "num_tokens": 329230.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0024, + "grad_norm": 0.3688075542449951, + "learning_rate": 1.036e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0023, + "grad_norm": 0.3373582065105438, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 329412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.0709, + "grad_norm": 2.084989309310913, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.002, + "grad_norm": 0.27264249324798584, + "learning_rate": 1.033e-05, + "num_tokens": 330015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0018, + "grad_norm": 0.24489571154117584, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0799, + "grad_norm": 1.8190633058547974, + "learning_rate": 1.031e-05, + "num_tokens": 330618.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0836, + "grad_norm": 1.4041454792022705, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.1136, + "grad_norm": 2.274580240249634, + "learning_rate": 1.029e-05, + "num_tokens": 331642.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0424, + "grad_norm": 1.3687119483947754, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0015, + "grad_norm": 0.16964252293109894, + "learning_rate": 1.027e-05, + "num_tokens": 332245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.0698, + "grad_norm": 1.1283705234527588, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0018, + "grad_norm": 0.22557133436203003, + "learning_rate": 1.025e-05, + "num_tokens": 332848.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0017, + "grad_norm": 0.21104346215724945, + "learning_rate": 1.024e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.0018, + "grad_norm": 0.24475614726543427, + "learning_rate": 1.023e-05, + "num_tokens": 333030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.0563, + "grad_norm": 2.955718755722046, + "learning_rate": 1.022e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0017, + "grad_norm": 0.24137888848781586, + "learning_rate": 1.021e-05, + "num_tokens": 333633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0017, + "grad_norm": 0.22060562670230865, + "learning_rate": 1.02e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0723, + "grad_norm": 1.5680960416793823, + "learning_rate": 1.019e-05, + "num_tokens": 334236.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0016, + "grad_norm": 0.2214270681142807, + "learning_rate": 1.018e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0016, + "grad_norm": 0.216565802693367, + "learning_rate": 1.017e-05, + "num_tokens": 334418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0684, + "grad_norm": 1.214136004447937, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.1141, + "grad_norm": 2.0787954330444336, + "learning_rate": 1.015e-05, + "num_tokens": 335442.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0015, + "grad_norm": 0.1908382773399353, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 1.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.0684, + "grad_norm": 0.9953256845474243, + "learning_rate": 1.013e-05, + "num_tokens": 336045.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.1151, + "grad_norm": 2.989778518676758, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0015, + "grad_norm": 0.1622181534767151, + "learning_rate": 1.011e-05, + "num_tokens": 336648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0015, + "grad_norm": 0.19451792538166046, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0015, + "grad_norm": 0.17583484947681427, + "learning_rate": 1.009e-05, + "num_tokens": 336830.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": 0.0971, + "grad_norm": 2.013803482055664, + "learning_rate": 1.008e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0015, + "grad_norm": 0.17960964143276215, + "learning_rate": 1.007e-05, + "num_tokens": 337433.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0015, + "grad_norm": 0.18522843718528748, + "learning_rate": 1.006e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.071, + "grad_norm": 1.612250804901123, + "learning_rate": 1.005e-05, + "num_tokens": 338036.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0789, + "grad_norm": 1.4309505224227905, + "learning_rate": 1.004e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0749, + "grad_norm": 1.3195449113845825, + "learning_rate": 1.003e-05, + "num_tokens": 339060.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0685, + "grad_norm": 2.325835943222046, + "learning_rate": 1.002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0454, + "grad_norm": 1.1207916736602783, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 340084.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.0018, + "grad_norm": 0.25914737582206726, + "learning_rate": 1e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 1.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0022, + "grad_norm": 0.35625582933425903, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 340266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.002, + "grad_norm": 0.3242781162261963, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0021, + "grad_norm": 0.3145410120487213, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 340448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0021, + "grad_norm": 0.33488088846206665, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0019, + "grad_norm": 0.2918454706668854, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 340630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0728, + "grad_norm": 1.2409576177597046, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.072, + "grad_norm": 1.2893600463867188, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 341654.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.043, + "grad_norm": 1.1790004968643188, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0602, + "grad_norm": 1.1076241731643677, + "learning_rate": 9.91e-06, + "num_tokens": 342678.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0017, + "grad_norm": 0.2319565713405609, + "learning_rate": 9.9e-06, + "num_tokens": 342769.0, + "mean_token_accuracy": 1.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0573, + "grad_norm": 2.263990879058838, + "learning_rate": 9.89e-06, + "num_tokens": 343281.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0018, + "grad_norm": 0.27414289116859436, + "learning_rate": 9.88e-06, + "num_tokens": 343372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.068, + "grad_norm": 1.3204398155212402, + "learning_rate": 9.87e-06, + "num_tokens": 343884.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0021, + "grad_norm": 0.33790865540504456, + "learning_rate": 9.86e-06, + "num_tokens": 343975.0, + "mean_token_accuracy": 1.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.002, + "grad_norm": 0.3250488340854645, + "learning_rate": 9.85e-06, + "num_tokens": 344066.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0614, + "grad_norm": 1.4563555717468262, + "learning_rate": 9.84e-06, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0499, + "grad_norm": 3.906182289123535, + "learning_rate": 9.83e-06, + "num_tokens": 345090.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.1039, + "grad_norm": 2.9131107330322266, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.1067, + "grad_norm": 3.119446039199829, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 346114.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.0023, + "grad_norm": 0.3656690716743469, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 346205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": 0.0647, + "grad_norm": 1.234238862991333, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 346717.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0612, + "grad_norm": 1.0838911533355713, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0668, + "grad_norm": 1.8563507795333862, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 347741.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0027, + "grad_norm": 0.447256475687027, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 347832.0, + "mean_token_accuracy": 1.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0029, + "grad_norm": 0.4668635427951813, + "learning_rate": 9.75e-06, + "num_tokens": 347923.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0027, + "grad_norm": 0.45568251609802246, + "learning_rate": 9.74e-06, + "num_tokens": 348014.0, + "mean_token_accuracy": 1.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0029, + "grad_norm": 0.5207828283309937, + "learning_rate": 9.73e-06, + "num_tokens": 348105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0023, + "grad_norm": 0.3548046946525574, + "learning_rate": 9.72e-06, + "num_tokens": 348196.0, + "mean_token_accuracy": 1.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.0022, + "grad_norm": 0.3339339792728424, + "learning_rate": 9.71e-06, + "num_tokens": 348287.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0449, + "grad_norm": 1.344630479812622, + "learning_rate": 9.7e-06, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0627, + "grad_norm": 1.3697110414505005, + "learning_rate": 9.69e-06, + "num_tokens": 349311.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0631, + "grad_norm": 1.4324746131896973, + "learning_rate": 9.68e-06, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0598, + "grad_norm": 1.1418583393096924, + "learning_rate": 9.67e-06, + "num_tokens": 350335.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0703, + "grad_norm": 1.3187053203582764, + "learning_rate": 9.66e-06, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0674, + "grad_norm": 1.5415701866149902, + "learning_rate": 9.65e-06, + "num_tokens": 351359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0022, + "grad_norm": 0.5410366654396057, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 351450.0, + "mean_token_accuracy": 1.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0472, + "grad_norm": 1.4691059589385986, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 351962.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0714, + "grad_norm": 1.8328925371170044, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0502, + "grad_norm": 1.4959746599197388, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 352986.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0025, + "grad_norm": 0.3770292103290558, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 353077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.0638, + "grad_norm": 1.2776446342468262, + "learning_rate": 9.59e-06, + "num_tokens": 353589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0437, + "grad_norm": 1.0079017877578735, + "learning_rate": 9.58e-06, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": 0.0628, + "grad_norm": 1.1776297092437744, + "learning_rate": 9.57e-06, + "num_tokens": 354613.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0444, + "grad_norm": 1.2560832500457764, + "learning_rate": 9.56e-06, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0658, + "grad_norm": 1.9305787086486816, + "learning_rate": 9.55e-06, + "num_tokens": 355637.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0673, + "grad_norm": 1.5484907627105713, + "learning_rate": 9.54e-06, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0603, + "grad_norm": 1.2816107273101807, + "learning_rate": 9.53e-06, + "num_tokens": 356661.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0071, + "grad_norm": 1.2031859159469604, + "learning_rate": 9.52e-06, + "num_tokens": 356752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0446, + "grad_norm": 1.0432018041610718, + "learning_rate": 9.51e-06, + "num_tokens": 357264.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0082, + "grad_norm": 1.3467326164245605, + "learning_rate": 9.5e-06, + "num_tokens": 357355.0, + "mean_token_accuracy": 1.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": 0.044, + "grad_norm": 1.1683317422866821, + "learning_rate": 9.49e-06, + "num_tokens": 357867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.007, + "grad_norm": 1.1747612953186035, + "learning_rate": 9.48e-06, + "num_tokens": 357958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0623, + "grad_norm": 1.1376299858093262, + "learning_rate": 9.47e-06, + "num_tokens": 358470.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0711, + "grad_norm": 1.2417066097259521, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0052, + "grad_norm": 0.9077128171920776, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 359073.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0053, + "grad_norm": 0.951680600643158, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 359164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0467, + "grad_norm": 1.1328734159469604, + "learning_rate": 9.43e-06, + "num_tokens": 359676.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0036, + "grad_norm": 0.6388375163078308, + "learning_rate": 9.42e-06, + "num_tokens": 359767.0, + "mean_token_accuracy": 1.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0713, + "grad_norm": 1.098759651184082, + "learning_rate": 9.41e-06, + "num_tokens": 360279.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0024, + "grad_norm": 0.3749485909938812, + "learning_rate": 9.4e-06, + "num_tokens": 360370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.078, + "grad_norm": 1.4193601608276367, + "learning_rate": 9.39e-06, + "num_tokens": 360882.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0021, + "grad_norm": 0.29766610264778137, + "learning_rate": 9.38e-06, + "num_tokens": 360973.0, + "mean_token_accuracy": 1.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0019, + "grad_norm": 0.2773911952972412, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 361064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0016, + "grad_norm": 0.19664674997329712, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0994, + "grad_norm": 2.1268746852874756, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 361667.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.0476, + "grad_norm": 1.1297088861465454, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0518, + "grad_norm": 1.1052606105804443, + "learning_rate": 9.33e-06, + "num_tokens": 362691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0483, + "grad_norm": 1.1215248107910156, + "learning_rate": 9.32e-06, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0595, + "grad_norm": 1.192276120185852, + "learning_rate": 9.31e-06, + "num_tokens": 363715.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.1127, + "grad_norm": 2.282710552215576, + "learning_rate": 9.3e-06, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0014, + "grad_norm": 0.18352188169956207, + "learning_rate": 9.29e-06, + "num_tokens": 364318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0668, + "grad_norm": 1.2716619968414307, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.1147, + "grad_norm": 2.7008156776428223, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 365342.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.1018, + "grad_norm": 2.031930446624756, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.002, + "grad_norm": 0.2863346338272095, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 365945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0471, + "grad_norm": 1.2682809829711914, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.002, + "grad_norm": 0.30941078066825867, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 366548.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0024, + "grad_norm": 0.3932475745677948, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": 0.0632, + "grad_norm": 1.0679800510406494, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 367151.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0663, + "grad_norm": 1.3005118370056152, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0423, + "grad_norm": 1.1240161657333374, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 368175.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0029, + "grad_norm": 0.4581877887248993, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 368266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0029, + "grad_norm": 0.47186893224716187, + "learning_rate": 9.17e-06, + "num_tokens": 368357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0032, + "grad_norm": 0.5238748788833618, + "learning_rate": 9.16e-06, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0028, + "grad_norm": 0.4411686062812805, + "learning_rate": 9.15e-06, + "num_tokens": 368539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0026, + "grad_norm": 0.40239110589027405, + "learning_rate": 9.14e-06, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0023, + "grad_norm": 0.3315543234348297, + "learning_rate": 9.13e-06, + "num_tokens": 368721.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0021, + "grad_norm": 0.2885858416557312, + "learning_rate": 9.12e-06, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.073, + "grad_norm": 1.8177210092544556, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 369324.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.0966, + "grad_norm": 1.7291756868362427, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0016, + "grad_norm": 0.19609428942203522, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 369927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0496, + "grad_norm": 1.1353715658187866, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0015, + "grad_norm": 0.17373698949813843, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 370530.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.0441, + "grad_norm": 1.0672266483306885, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0014, + "grad_norm": 0.154168039560318, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 371133.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0737, + "grad_norm": 1.3493475914001465, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0013, + "grad_norm": 0.14875750243663788, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 371736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0012, + "grad_norm": 0.13037247955799103, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0012, + "grad_norm": 0.12503254413604736, + "learning_rate": 9.01e-06, + "num_tokens": 371918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0012, + "grad_norm": 0.12820948660373688, + "learning_rate": 9e-06, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.0885, + "grad_norm": 1.8362265825271606, + "learning_rate": 8.99e-06, + "num_tokens": 372521.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0012, + "grad_norm": 0.12838858366012573, + "learning_rate": 8.98e-06, + "num_tokens": 372612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0495, + "grad_norm": 1.446435809135437, + "learning_rate": 8.97e-06, + "num_tokens": 373124.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.07, + "grad_norm": 1.1417546272277832, + "learning_rate": 8.96e-06, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0679, + "grad_norm": 1.1534578800201416, + "learning_rate": 8.95e-06, + "num_tokens": 374148.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.0556, + "grad_norm": 1.263162612915039, + "learning_rate": 8.94e-06, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0687, + "grad_norm": 1.441730260848999, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 375172.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0561, + "grad_norm": 0.989497721195221, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0508, + "grad_norm": 1.1718560457229614, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 376196.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0436, + "grad_norm": 1.1105691194534302, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0574, + "grad_norm": 1.159988522529602, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 377220.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0028, + "grad_norm": 0.5130383968353271, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 377311.0, + "mean_token_accuracy": 1.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0703, + "grad_norm": 1.8314932584762573, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 377823.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0389, + "grad_norm": 0.7763837575912476, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0648, + "grad_norm": 1.4212884902954102, + "learning_rate": 8.85e-06, + "num_tokens": 378847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0467, + "grad_norm": 1.0347092151641846, + "learning_rate": 8.84e-06, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0574, + "grad_norm": 0.9852561950683594, + "learning_rate": 8.83e-06, + "num_tokens": 379871.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0443, + "grad_norm": 1.2871586084365845, + "learning_rate": 8.82e-06, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0497, + "grad_norm": 1.0900676250457764, + "learning_rate": 8.81e-06, + "num_tokens": 380895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0094, + "grad_norm": 1.5167303085327148, + "learning_rate": 8.8e-06, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0091, + "grad_norm": 1.4984208345413208, + "learning_rate": 8.79e-06, + "num_tokens": 381077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0087, + "grad_norm": 1.4189144372940063, + "learning_rate": 8.78e-06, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0711, + "grad_norm": 1.5254539251327515, + "learning_rate": 8.77e-06, + "num_tokens": 381680.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0559, + "grad_norm": 0.9745803475379944, + "learning_rate": 8.76e-06, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0487, + "grad_norm": 0.9314166307449341, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 382704.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.0985, + "grad_norm": 1.935889482498169, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.0884, + "grad_norm": 2.4487457275390625, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 383728.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0417, + "grad_norm": 1.0779677629470825, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0071, + "grad_norm": 1.1962640285491943, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 384331.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0412, + "grad_norm": 1.0417979955673218, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.0064, + "grad_norm": 1.0799331665039062, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 384934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0061, + "grad_norm": 1.0343092679977417, + "learning_rate": 8.68e-06, + "num_tokens": 385025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0516, + "grad_norm": 1.2088981866836548, + "learning_rate": 8.67e-06, + "num_tokens": 385537.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0645, + "grad_norm": 1.4574052095413208, + "learning_rate": 8.66e-06, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0608, + "grad_norm": 1.5976455211639404, + "learning_rate": 8.65e-06, + "num_tokens": 386561.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0034, + "grad_norm": 0.562424898147583, + "learning_rate": 8.64e-06, + "num_tokens": 386652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0031, + "grad_norm": 0.5184334516525269, + "learning_rate": 8.63e-06, + "num_tokens": 386743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0538, + "grad_norm": 1.175452709197998, + "learning_rate": 8.62e-06, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0457, + "grad_norm": 1.0699386596679688, + "learning_rate": 8.61e-06, + "num_tokens": 387767.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0409, + "grad_norm": 1.2275623083114624, + "learning_rate": 8.6e-06, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0024, + "grad_norm": 0.36210763454437256, + "learning_rate": 8.59e-06, + "num_tokens": 388370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0506, + "grad_norm": 1.1862293481826782, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0417, + "grad_norm": 1.0955649614334106, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 389394.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0021, + "grad_norm": 0.3166447579860687, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 389485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0021, + "grad_norm": 0.3213079571723938, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 389576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.002, + "grad_norm": 0.29460856318473816, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0018, + "grad_norm": 0.2646322250366211, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 389758.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.0962, + "grad_norm": 1.9064080715179443, + "learning_rate": 8.52e-06, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0018, + "grad_norm": 0.26078224182128906, + "learning_rate": 8.51e-06, + "num_tokens": 390361.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0015, + "grad_norm": 0.22155798971652985, + "learning_rate": 8.5e-06, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0462, + "grad_norm": 1.282672643661499, + "learning_rate": 8.49e-06, + "num_tokens": 390964.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0615, + "grad_norm": 1.0272878408432007, + "learning_rate": 8.48e-06, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0391, + "grad_norm": 1.081066370010376, + "learning_rate": 8.47e-06, + "num_tokens": 391988.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0016, + "grad_norm": 0.2022254467010498, + "learning_rate": 8.46e-06, + "num_tokens": 392079.0, + "mean_token_accuracy": 1.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0647, + "grad_norm": 1.203537106513977, + "learning_rate": 8.45e-06, + "num_tokens": 392591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0411, + "grad_norm": 1.3823119401931763, + "learning_rate": 8.44e-06, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0017, + "grad_norm": 0.23678964376449585, + "learning_rate": 8.43e-06, + "num_tokens": 393194.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0498, + "grad_norm": 1.1035040616989136, + "learning_rate": 8.42e-06, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0019, + "grad_norm": 0.2826336622238159, + "learning_rate": 8.41e-06, + "num_tokens": 393797.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0018, + "grad_norm": 0.26219162344932556, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0369, + "grad_norm": 0.8924168944358826, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 394400.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.002, + "grad_norm": 0.2968710660934448, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 394491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0655, + "grad_norm": 1.4359571933746338, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 395003.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0793, + "grad_norm": 1.4873827695846558, + "learning_rate": 8.36e-06, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0022, + "grad_norm": 0.3399635851383209, + "learning_rate": 8.35e-06, + "num_tokens": 395606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0388, + "grad_norm": 1.2504096031188965, + "learning_rate": 8.34e-06, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0022, + "grad_norm": 0.34148266911506653, + "learning_rate": 8.33e-06, + "num_tokens": 396209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0022, + "grad_norm": 0.33662110567092896, + "learning_rate": 8.32e-06, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.0022, + "grad_norm": 0.324468731880188, + "learning_rate": 8.31e-06, + "num_tokens": 396391.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.1031, + "grad_norm": 1.776872992515564, + "learning_rate": 8.3e-06, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0019, + "grad_norm": 0.27522948384284973, + "learning_rate": 8.29e-06, + "num_tokens": 396994.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0625, + "grad_norm": 1.0583921670913696, + "learning_rate": 8.28e-06, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.002, + "grad_norm": 0.2976676821708679, + "learning_rate": 8.27e-06, + "num_tokens": 397597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0428, + "grad_norm": 1.0262646675109863, + "learning_rate": 8.26e-06, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.0569, + "grad_norm": 1.088004469871521, + "learning_rate": 8.25e-06, + "num_tokens": 398621.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0617, + "grad_norm": 1.422031044960022, + "learning_rate": 8.24e-06, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0705, + "grad_norm": 1.1122493743896484, + "learning_rate": 8.23e-06, + "num_tokens": 399645.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0023, + "grad_norm": 0.3706248998641968, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 399736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0548, + "grad_norm": 1.159569501876831, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 400248.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0027, + "grad_norm": 0.44550517201423645, + "learning_rate": 8.2e-06, + "num_tokens": 400339.0, + "mean_token_accuracy": 1.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0505, + "grad_norm": 1.0908255577087402, + "learning_rate": 8.19e-06, + "num_tokens": 400851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0429, + "grad_norm": 0.9888002276420593, + "learning_rate": 8.18e-06, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.039, + "grad_norm": 1.1269707679748535, + "learning_rate": 8.17e-06, + "num_tokens": 401875.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0547, + "grad_norm": 2.2459864616394043, + "learning_rate": 8.16e-06, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0648, + "grad_norm": 1.141405463218689, + "learning_rate": 8.15e-06, + "num_tokens": 402899.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0036, + "grad_norm": 0.6154343485832214, + "learning_rate": 8.14e-06, + "num_tokens": 402990.0, + "mean_token_accuracy": 1.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0037, + "grad_norm": 0.607581377029419, + "learning_rate": 8.13e-06, + "num_tokens": 403081.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.041, + "grad_norm": 1.0139696598052979, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0548, + "grad_norm": 1.2063956260681152, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 404105.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0546, + "grad_norm": 1.0185149908065796, + "learning_rate": 8.1e-06, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0846, + "grad_norm": 1.5638638734817505, + "learning_rate": 8.09e-06, + "num_tokens": 405129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0397, + "grad_norm": 0.9592515826225281, + "learning_rate": 8.08e-06, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0732, + "grad_norm": 2.417308807373047, + "learning_rate": 8.07e-06, + "num_tokens": 406153.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0397, + "grad_norm": 1.0397586822509766, + "learning_rate": 8.06e-06, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0539, + "grad_norm": 1.0043741464614868, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 407177.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0064, + "grad_norm": 1.0331615209579468, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 407268.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": 0.3439, + "grad_norm": 7.151169776916504, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 407780.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.3186, + "grad_norm": 6.194533348083496, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0064, + "grad_norm": 1.0373780727386475, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 408383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0693, + "grad_norm": 1.3804030418395996, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0063, + "grad_norm": 1.0356889963150024, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 408986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0063, + "grad_norm": 1.025659203529358, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 409077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.1028, + "grad_norm": 2.4993162155151367, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 409589.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0765, + "grad_norm": 1.528414011001587, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0039, + "grad_norm": 0.6606444120407104, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 410192.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.1021, + "grad_norm": 1.9298466444015503, + "learning_rate": 7.94e-06, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0658, + "grad_norm": 1.2403901815414429, + "learning_rate": 7.93e-06, + "num_tokens": 411216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0901, + "grad_norm": 2.676560878753662, + "learning_rate": 7.92e-06, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0025, + "grad_norm": 0.3969874083995819, + "learning_rate": 7.91e-06, + "num_tokens": 411819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0022, + "grad_norm": 0.3410389721393585, + "learning_rate": 7.9e-06, + "num_tokens": 411910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.0467, + "grad_norm": 1.2688374519348145, + "learning_rate": 7.89e-06, + "num_tokens": 412422.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0906, + "grad_norm": 1.5839786529541016, + "learning_rate": 7.88e-06, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0808, + "grad_norm": 1.8329588174819946, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 413446.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": 0.0678, + "grad_norm": 1.438069462776184, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0675, + "grad_norm": 1.4430946111679077, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 414470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0019, + "grad_norm": 0.29633986949920654, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 414561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0494, + "grad_norm": 1.1387202739715576, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 415073.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0021, + "grad_norm": 0.32885608077049255, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 415164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.0862, + "grad_norm": 2.407383680343628, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 415676.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0614, + "grad_norm": 1.1128315925598145, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0022, + "grad_norm": 0.3651196360588074, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 416279.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0648, + "grad_norm": 1.3287708759307861, + "learning_rate": 7.78e-06, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.0023, + "grad_norm": 0.3838794231414795, + "learning_rate": 7.77e-06, + "num_tokens": 416882.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0684, + "grad_norm": 1.4677760601043701, + "learning_rate": 7.76e-06, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0024, + "grad_norm": 0.42079463601112366, + "learning_rate": 7.75e-06, + "num_tokens": 417485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0024, + "grad_norm": 0.42147955298423767, + "learning_rate": 7.74e-06, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0441, + "grad_norm": 1.1677274703979492, + "learning_rate": 7.73e-06, + "num_tokens": 418088.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0492, + "grad_norm": 1.4035431146621704, + "learning_rate": 7.72e-06, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0671, + "grad_norm": 1.9446959495544434, + "learning_rate": 7.71e-06, + "num_tokens": 419112.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0025, + "grad_norm": 0.4543871581554413, + "learning_rate": 7.7e-06, + "num_tokens": 419203.0, + "mean_token_accuracy": 1.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.042, + "grad_norm": 1.1771857738494873, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 419715.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0679, + "grad_norm": 1.3713475465774536, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0026, + "grad_norm": 0.47350987792015076, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 420318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0633, + "grad_norm": 1.3524508476257324, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": 0.0637, + "grad_norm": 1.2763797044754028, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 421342.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0902, + "grad_norm": 1.6739592552185059, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0031, + "grad_norm": 0.5534782409667969, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 421945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.0501, + "grad_norm": 1.3401867151260376, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.046, + "grad_norm": 1.1883294582366943, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 422969.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0466, + "grad_norm": 1.101483941078186, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.071, + "grad_norm": 1.3334777355194092, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 423993.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0558, + "grad_norm": 1.267762541770935, + "learning_rate": 7.58e-06, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0658, + "grad_norm": 1.4283661842346191, + "learning_rate": 7.57e-06, + "num_tokens": 425017.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0411, + "grad_norm": 0.9805395007133484, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0766, + "grad_norm": 1.4888850450515747, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 426041.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0055, + "grad_norm": 0.9557706713676453, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 426132.0, + "mean_token_accuracy": 1.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0054, + "grad_norm": 0.9585487842559814, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 426223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0538, + "grad_norm": 1.1800369024276733, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0051, + "grad_norm": 0.8553330898284912, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 426826.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0629, + "grad_norm": 1.230909824371338, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.068, + "grad_norm": 1.453507900238037, + "learning_rate": 7.49e-06, + "num_tokens": 427850.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0427, + "grad_norm": 0.9869980812072754, + "learning_rate": 7.48e-06, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.1017, + "grad_norm": 2.1453680992126465, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 428874.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0042, + "grad_norm": 0.7140144109725952, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 428965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.0616, + "grad_norm": 1.021086573600769, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 429477.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0434, + "grad_norm": 1.1894596815109253, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0862, + "grad_norm": 2.159723997116089, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 430501.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.0429, + "grad_norm": 1.066892147064209, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0572, + "grad_norm": 1.0095235109329224, + "learning_rate": 7.41e-06, + "num_tokens": 431525.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.054, + "grad_norm": 1.2086626291275024, + "learning_rate": 7.4e-06, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0046, + "grad_norm": 0.7741432189941406, + "learning_rate": 7.39e-06, + "num_tokens": 432128.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0047, + "grad_norm": 0.7828612923622131, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 432219.0, + "mean_token_accuracy": 1.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0045, + "grad_norm": 0.7598645687103271, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 432310.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0046, + "grad_norm": 0.7734522819519043, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 432401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": 0.057, + "grad_norm": 1.0973255634307861, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 432913.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.065, + "grad_norm": 1.709967017173767, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0931, + "grad_norm": 2.1337525844573975, + "learning_rate": 7.33e-06, + "num_tokens": 433937.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0028, + "grad_norm": 0.4441553056240082, + "learning_rate": 7.32e-06, + "num_tokens": 434028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0805, + "grad_norm": 3.2075629234313965, + "learning_rate": 7.31e-06, + "num_tokens": 434540.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0026, + "grad_norm": 0.4167421758174896, + "learning_rate": 7.3e-06, + "num_tokens": 434631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0023, + "grad_norm": 0.35469523072242737, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 434722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0021, + "grad_norm": 0.31768423318862915, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0441, + "grad_norm": 0.9787921905517578, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 435325.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0019, + "grad_norm": 0.2729261517524719, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 435416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0016, + "grad_norm": 0.21043084561824799, + "learning_rate": 7.25e-06, + "num_tokens": 435507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0015, + "grad_norm": 0.1971331685781479, + "learning_rate": 7.24e-06, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0802, + "grad_norm": 1.84896719455719, + "learning_rate": 7.23e-06, + "num_tokens": 436110.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.0687, + "grad_norm": 1.369922399520874, + "learning_rate": 7.22e-06, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0014, + "grad_norm": 0.16199085116386414, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 436713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0013, + "grad_norm": 0.14561891555786133, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0762, + "grad_norm": 2.150111436843872, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 437316.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.0011, + "grad_norm": 0.12219979614019394, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 437407.0, + "mean_token_accuracy": 1.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0409, + "grad_norm": 1.0275540351867676, + "learning_rate": 7.17e-06, + "num_tokens": 437919.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0622, + "grad_norm": 1.3782963752746582, + "learning_rate": 7.16e-06, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0942, + "grad_norm": 2.0990819931030273, + "learning_rate": 7.15e-06, + "num_tokens": 438943.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0556, + "grad_norm": 1.1607019901275635, + "learning_rate": 7.14e-06, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0012, + "grad_norm": 0.14383459091186523, + "learning_rate": 7.13e-06, + "num_tokens": 439546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0443, + "grad_norm": 1.0032017230987549, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0014, + "grad_norm": 0.18446141481399536, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 440149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0014, + "grad_norm": 0.19693079590797424, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.0486, + "grad_norm": 1.2597516775131226, + "learning_rate": 7.09e-06, + "num_tokens": 440752.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0014, + "grad_norm": 0.1964249163866043, + "learning_rate": 7.08e-06, + "num_tokens": 440843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0015, + "grad_norm": 0.21462222933769226, + "learning_rate": 7.07e-06, + "num_tokens": 440934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0508, + "grad_norm": 1.3977996110916138, + "learning_rate": 7.06e-06, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": 0.0828, + "grad_norm": 1.5659841299057007, + "learning_rate": 7.05e-06, + "num_tokens": 441958.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0603, + "grad_norm": 1.602921724319458, + "learning_rate": 7.04e-06, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0744, + "grad_norm": 2.2317163944244385, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 442982.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0561, + "grad_norm": 2.125541925430298, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.002, + "grad_norm": 0.3173121213912964, + "learning_rate": 7.01e-06, + "num_tokens": 443585.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0459, + "grad_norm": 1.2071703672409058, + "learning_rate": 7e-06, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0432, + "grad_norm": 1.2934582233428955, + "learning_rate": 6.99e-06, + "num_tokens": 444609.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0489, + "grad_norm": 1.1334161758422852, + "learning_rate": 6.98e-06, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0579, + "grad_norm": 0.9369598627090454, + "learning_rate": 6.97e-06, + "num_tokens": 445633.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0033, + "grad_norm": 0.5776845812797546, + "learning_rate": 6.96e-06, + "num_tokens": 445724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0569, + "grad_norm": 1.3031799793243408, + "learning_rate": 6.95e-06, + "num_tokens": 446236.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0037, + "grad_norm": 0.6248667240142822, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 446327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0032, + "grad_norm": 0.5299662947654724, + "learning_rate": 6.93e-06, + "num_tokens": 446418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0667, + "grad_norm": 1.8433657884597778, + "learning_rate": 6.92e-06, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0577, + "grad_norm": 1.1226876974105835, + "learning_rate": 6.91e-06, + "num_tokens": 447442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.0567, + "grad_norm": 1.1603243350982666, + "learning_rate": 6.9e-06, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0032, + "grad_norm": 0.5435492992401123, + "learning_rate": 6.89e-06, + "num_tokens": 448045.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0606, + "grad_norm": 0.9929336905479431, + "learning_rate": 6.88e-06, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0036, + "grad_norm": 0.6169335842132568, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 448648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0649, + "grad_norm": 1.2230188846588135, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0613, + "grad_norm": 1.0680222511291504, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 449672.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.0455, + "grad_norm": 1.529793620109558, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0036, + "grad_norm": 0.614677906036377, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 450275.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.074, + "grad_norm": 2.1550259590148926, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0541, + "grad_norm": 0.9593685269355774, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 451299.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.0036, + "grad_norm": 0.5768935084342957, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 451390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0465, + "grad_norm": 1.2158730030059814, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 451902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0438, + "grad_norm": 1.1586334705352783, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0444, + "grad_norm": 1.4859849214553833, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 452926.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0403, + "grad_norm": 1.1270227432250977, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.004, + "grad_norm": 0.6430424451828003, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 453529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.0906, + "grad_norm": 1.5925347805023193, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0422, + "grad_norm": 0.9977685213088989, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 454553.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0564, + "grad_norm": 1.1696628332138062, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0518, + "grad_norm": 0.9724094271659851, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 455577.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0047, + "grad_norm": 0.7779951095581055, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 455668.0, + "mean_token_accuracy": 1.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0043, + "grad_norm": 0.7115391492843628, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 455759.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.3534, + "grad_norm": 6.629246234893799, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0038, + "grad_norm": 0.6219172477722168, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 456362.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0041, + "grad_norm": 0.6817074418067932, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 456453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0601, + "grad_norm": 1.2284682989120483, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 456965.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0585, + "grad_norm": 1.3272614479064941, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0417, + "grad_norm": 0.929707944393158, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 457989.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.0768, + "grad_norm": 1.2148957252502441, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.003, + "grad_norm": 0.4916832149028778, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 458592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": 0.0659, + "grad_norm": 1.1595323085784912, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0669, + "grad_norm": 1.3607900142669678, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 459616.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0843, + "grad_norm": 2.730896472930908, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0587, + "grad_norm": 1.2983198165893555, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 460640.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.0675, + "grad_norm": 1.475829839706421, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0034, + "grad_norm": 0.569835364818573, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 461243.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0031, + "grad_norm": 0.5171738862991333, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 461334.0, + "mean_token_accuracy": 1.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0032, + "grad_norm": 0.5472842454910278, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 461425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0029, + "grad_norm": 0.4868464767932892, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 461516.0, + "mean_token_accuracy": 1.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0616, + "grad_norm": 1.1753767728805542, + "learning_rate": 6.51e-06, + "num_tokens": 462028.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.05, + "grad_norm": 1.306359052658081, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.0027, + "grad_norm": 0.4471572935581207, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 462631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0535, + "grad_norm": 1.1857725381851196, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0023, + "grad_norm": 0.39148810505867004, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 463234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0021, + "grad_norm": 0.3375743329524994, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0601, + "grad_norm": 3.349716901779175, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 463837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.077, + "grad_norm": 1.3602453470230103, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0482, + "grad_norm": 1.1098014116287231, + "learning_rate": 6.43e-06, + "num_tokens": 464861.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0019, + "grad_norm": 0.3053341507911682, + "learning_rate": 6.42e-06, + "num_tokens": 464952.0, + "mean_token_accuracy": 1.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0019, + "grad_norm": 0.3125056326389313, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 465043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0019, + "grad_norm": 0.28826457262039185, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0652, + "grad_norm": 1.4113070964813232, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 465646.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0467, + "grad_norm": 1.2754263877868652, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0017, + "grad_norm": 0.2621810734272003, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 466249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0658, + "grad_norm": 1.0557119846343994, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0567, + "grad_norm": 1.4838411808013916, + "learning_rate": 6.35e-06, + "num_tokens": 467273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0017, + "grad_norm": 0.26117855310440063, + "learning_rate": 6.34e-06, + "num_tokens": 467364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0447, + "grad_norm": 1.1064739227294922, + "learning_rate": 6.33e-06, + "num_tokens": 467876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0435, + "grad_norm": 1.063262939453125, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.066, + "grad_norm": 1.1504032611846924, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 468900.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0641, + "grad_norm": 1.203201174736023, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0585, + "grad_norm": 1.2477880716323853, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 469924.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0025, + "grad_norm": 0.4655078947544098, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 470015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0602, + "grad_norm": 1.341115951538086, + "learning_rate": 6.27e-06, + "num_tokens": 470527.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0904, + "grad_norm": 2.366762399673462, + "learning_rate": 6.26e-06, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0033, + "grad_norm": 0.6076349020004272, + "learning_rate": 6.25e-06, + "num_tokens": 471130.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0907, + "grad_norm": 1.9339498281478882, + "learning_rate": 6.24e-06, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0864, + "grad_norm": 1.780813217163086, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 472154.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0033, + "grad_norm": 0.6028679609298706, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 472245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0542, + "grad_norm": 1.0088207721710205, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 472757.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0758, + "grad_norm": 1.5442019701004028, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0034, + "grad_norm": 0.6019788980484009, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 473360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.277, + "grad_norm": 5.171119689941406, + "learning_rate": 6.18e-06, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0036, + "grad_norm": 0.6451438665390015, + "learning_rate": 6.17e-06, + "num_tokens": 473963.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0037, + "grad_norm": 0.6643303036689758, + "learning_rate": 6.16e-06, + "num_tokens": 474054.0, + "mean_token_accuracy": 1.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0034, + "grad_norm": 0.6205865740776062, + "learning_rate": 6.15e-06, + "num_tokens": 474145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0029, + "grad_norm": 0.4953503906726837, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 474236.0, + "mean_token_accuracy": 1.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.0027, + "grad_norm": 0.46802619099617004, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 474327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0908, + "grad_norm": 1.535525918006897, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0417, + "grad_norm": 0.9248743653297424, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 475351.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.002, + "grad_norm": 0.3165223300457001, + "learning_rate": 6.1e-06, + "num_tokens": 475442.0, + "mean_token_accuracy": 1.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0542, + "grad_norm": 0.9654661417007446, + "learning_rate": 6.09e-06, + "num_tokens": 475954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0692, + "grad_norm": 1.3097866773605347, + "learning_rate": 6.08e-06, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0701, + "grad_norm": 1.50612473487854, + "learning_rate": 6.07e-06, + "num_tokens": 476978.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0017, + "grad_norm": 0.2454281896352768, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 477069.0, + "mean_token_accuracy": 1.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0855, + "grad_norm": 1.9738035202026367, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 477581.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0017, + "grad_norm": 0.2594867944717407, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 477672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0579, + "grad_norm": 1.1067945957183838, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 478184.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0566, + "grad_norm": 1.0555428266525269, + "learning_rate": 6.02e-06, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0016, + "grad_norm": 0.24508465826511383, + "learning_rate": 6.01e-06, + "num_tokens": 478787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0632, + "grad_norm": 1.3900046348571777, + "learning_rate": 6e-06, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0404, + "grad_norm": 0.9500136971473694, + "learning_rate": 5.99e-06, + "num_tokens": 479811.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0573, + "grad_norm": 1.2340861558914185, + "learning_rate": 5.98e-06, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.04, + "grad_norm": 1.035536527633667, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 480835.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.064, + "grad_norm": 0.9856736660003662, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0456, + "grad_norm": 1.2168488502502441, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 481859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.0819, + "grad_norm": 1.6233789920806885, + "learning_rate": 5.94e-06, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0644, + "grad_norm": 1.539711594581604, + "learning_rate": 5.93e-06, + "num_tokens": 482883.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0031, + "grad_norm": 0.5361098647117615, + "learning_rate": 5.92e-06, + "num_tokens": 482974.0, + "mean_token_accuracy": 1.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0657, + "grad_norm": 1.5077885389328003, + "learning_rate": 5.91e-06, + "num_tokens": 483486.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0033, + "grad_norm": 0.5819950699806213, + "learning_rate": 5.9e-06, + "num_tokens": 483577.0, + "mean_token_accuracy": 1.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": 0.0844, + "grad_norm": 1.6911466121673584, + "learning_rate": 5.89e-06, + "num_tokens": 484089.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.059, + "grad_norm": 0.909106969833374, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0046, + "grad_norm": 0.8148921132087708, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 484692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0603, + "grad_norm": 1.50859797000885, + "learning_rate": 5.86e-06, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0041, + "grad_norm": 0.7295659780502319, + "learning_rate": 5.85e-06, + "num_tokens": 485295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.0532, + "grad_norm": 1.1242952346801758, + "learning_rate": 5.84e-06, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0544, + "grad_norm": 0.9595649838447571, + "learning_rate": 5.83e-06, + "num_tokens": 486319.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0042, + "grad_norm": 0.7197695374488831, + "learning_rate": 5.82e-06, + "num_tokens": 486410.0, + "mean_token_accuracy": 1.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": 0.0637, + "grad_norm": 1.327078938484192, + "learning_rate": 5.81e-06, + "num_tokens": 486922.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0515, + "grad_norm": 1.3836802244186401, + "learning_rate": 5.8e-06, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0471, + "grad_norm": 2.055051326751709, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 487946.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0634, + "grad_norm": 1.3304088115692139, + "learning_rate": 5.78e-06, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0042, + "grad_norm": 0.7247684597969055, + "learning_rate": 5.77e-06, + "num_tokens": 488549.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0042, + "grad_norm": 0.7230411767959595, + "learning_rate": 5.76e-06, + "num_tokens": 488640.0, + "mean_token_accuracy": 1.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0802, + "grad_norm": 1.942260980606079, + "learning_rate": 5.75e-06, + "num_tokens": 489152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0408, + "grad_norm": 0.9843087792396545, + "learning_rate": 5.74e-06, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0037, + "grad_norm": 0.6149731278419495, + "learning_rate": 5.73e-06, + "num_tokens": 489755.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0035, + "grad_norm": 0.591227114200592, + "learning_rate": 5.72e-06, + "num_tokens": 489846.0, + "mean_token_accuracy": 1.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0034, + "grad_norm": 0.5716548562049866, + "learning_rate": 5.71e-06, + "num_tokens": 489937.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0028, + "grad_norm": 0.4706770181655884, + "learning_rate": 5.7e-06, + "num_tokens": 490028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0023, + "grad_norm": 0.37091749906539917, + "learning_rate": 5.69e-06, + "num_tokens": 490119.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0592, + "grad_norm": 1.1389172077178955, + "learning_rate": 5.68e-06, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0021, + "grad_norm": 0.33143892884254456, + "learning_rate": 5.67e-06, + "num_tokens": 490722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.068, + "grad_norm": 2.0014731884002686, + "learning_rate": 5.66e-06, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0433, + "grad_norm": 1.1497068405151367, + "learning_rate": 5.65e-06, + "num_tokens": 491746.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0017, + "grad_norm": 0.2540724575519562, + "learning_rate": 5.64e-06, + "num_tokens": 491837.0, + "mean_token_accuracy": 1.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0403, + "grad_norm": 1.0868761539459229, + "learning_rate": 5.63e-06, + "num_tokens": 492349.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0015, + "grad_norm": 0.19899524748325348, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 492440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0455, + "grad_norm": 1.617480754852295, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 492952.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0014, + "grad_norm": 0.19665531814098358, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 493043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0648, + "grad_norm": 1.622554898262024, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 493555.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0014, + "grad_norm": 0.18810254335403442, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 493646.0, + "mean_token_accuracy": 1.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0701, + "grad_norm": 1.4964152574539185, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 494158.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0013, + "grad_norm": 0.15776444971561432, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 494249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0012, + "grad_norm": 0.1539117842912674, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 494340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0013, + "grad_norm": 0.1636369377374649, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0012, + "grad_norm": 0.15004193782806396, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 494522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0012, + "grad_norm": 0.15097948908805847, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0012, + "grad_norm": 0.14485493302345276, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 494704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.047, + "grad_norm": 1.3281570672988892, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0519, + "grad_norm": 2.394688844680786, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 495728.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.0012, + "grad_norm": 0.1376945525407791, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 495819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0011, + "grad_norm": 0.13309122622013092, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 495910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0439, + "grad_norm": 1.0667738914489746, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.0012, + "grad_norm": 0.14376237988471985, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 496513.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0011, + "grad_norm": 0.13507920503616333, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0749, + "grad_norm": 1.5052191019058228, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 497116.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0012, + "grad_norm": 0.14203152060508728, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 497207.0, + "mean_token_accuracy": 1.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0445, + "grad_norm": 1.228667974472046, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 497719.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.0656, + "grad_norm": 1.407843828201294, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0647, + "grad_norm": 1.6894930601119995, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 498743.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0012, + "grad_norm": 0.14642253518104553, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 498834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0452, + "grad_norm": 1.07169508934021, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 499346.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0013, + "grad_norm": 0.1761048138141632, + "learning_rate": 5.36e-06, + "num_tokens": 499437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0849, + "grad_norm": 2.0752289295196533, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 499949.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0425, + "grad_norm": 1.113696575164795, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0846, + "grad_norm": 1.7338367700576782, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 500973.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0014, + "grad_norm": 0.1934671550989151, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 501064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0443, + "grad_norm": 1.1740210056304932, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 501576.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0016, + "grad_norm": 0.221791610121727, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 501667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0419, + "grad_norm": 1.0604463815689087, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 502179.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0018, + "grad_norm": 0.2774617373943329, + "learning_rate": 5.28e-06, + "num_tokens": 502270.0, + "mean_token_accuracy": 1.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0715, + "grad_norm": 1.4584964513778687, + "learning_rate": 5.27e-06, + "num_tokens": 502782.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0424, + "grad_norm": 1.1874643564224243, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0681, + "grad_norm": 1.1877933740615845, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 503806.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0574, + "grad_norm": 1.2860503196716309, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0024, + "grad_norm": 0.38671889901161194, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 504409.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0778, + "grad_norm": 1.683851718902588, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0624, + "grad_norm": 1.148560643196106, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 505433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0026, + "grad_norm": 0.422258198261261, + "learning_rate": 5.2e-06, + "num_tokens": 505524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0029, + "grad_norm": 0.48346948623657227, + "learning_rate": 5.19e-06, + "num_tokens": 505615.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.003, + "grad_norm": 0.4990505874156952, + "learning_rate": 5.18e-06, + "num_tokens": 505706.0, + "mean_token_accuracy": 1.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0444, + "grad_norm": 1.1750332117080688, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 506218.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0631, + "grad_norm": 1.0927088260650635, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0029, + "grad_norm": 0.491895854473114, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 506821.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0029, + "grad_norm": 0.48604080080986023, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 506912.0, + "mean_token_accuracy": 1.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0646, + "grad_norm": 1.8152271509170532, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 507424.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0905, + "grad_norm": 2.1916065216064453, + "learning_rate": 5.12e-06, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0441, + "grad_norm": 0.9943680167198181, + "learning_rate": 5.11e-06, + "num_tokens": 508448.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0028, + "grad_norm": 0.4724738299846649, + "learning_rate": 5.1e-06, + "num_tokens": 508539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0455, + "grad_norm": 1.327681303024292, + "learning_rate": 5.09e-06, + "num_tokens": 509051.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0401, + "grad_norm": 1.00179922580719, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.2741, + "grad_norm": 5.871794700622559, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 510075.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0028, + "grad_norm": 0.48077592253685, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 510166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0706, + "grad_norm": 1.4320826530456543, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 510678.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.0435, + "grad_norm": 1.2258262634277344, + "learning_rate": 5.04e-06, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0031, + "grad_norm": 0.5447593331336975, + "learning_rate": 5.03e-06, + "num_tokens": 511281.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0408, + "grad_norm": 1.0005323886871338, + "learning_rate": 5.02e-06, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0031, + "grad_norm": 0.52440345287323, + "learning_rate": 5.01e-06, + "num_tokens": 511884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0931, + "grad_norm": 2.2890543937683105, + "learning_rate": 5e-06, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0028, + "grad_norm": 0.47974297404289246, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 512487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0028, + "grad_norm": 0.4712013900279999, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 512578.0, + "mean_token_accuracy": 1.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0734, + "grad_norm": 1.7330412864685059, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 513090.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.0412, + "grad_norm": 1.2318421602249146, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0577, + "grad_norm": 1.1624799966812134, + "learning_rate": 4.95e-06, + "num_tokens": 514114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0667, + "grad_norm": 1.3667885065078735, + "learning_rate": 4.94e-06, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0472, + "grad_norm": 1.0038102865219116, + "learning_rate": 4.93e-06, + "num_tokens": 515138.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0662, + "grad_norm": 1.370149850845337, + "learning_rate": 4.92e-06, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.003, + "grad_norm": 0.4965730309486389, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 515741.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0397, + "grad_norm": 0.9282152056694031, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0576, + "grad_norm": 1.0276484489440918, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 516765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0656, + "grad_norm": 1.319326400756836, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0636, + "grad_norm": 1.2873133420944214, + "learning_rate": 4.87e-06, + "num_tokens": 517789.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.0032, + "grad_norm": 0.5650099515914917, + "learning_rate": 4.86e-06, + "num_tokens": 517880.0, + "mean_token_accuracy": 1.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0404, + "grad_norm": 1.389515995979309, + "learning_rate": 4.85e-06, + "num_tokens": 518392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0036, + "grad_norm": 0.6158953309059143, + "learning_rate": 4.84e-06, + "num_tokens": 518483.0, + "mean_token_accuracy": 1.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0823, + "grad_norm": 2.242391347885132, + "learning_rate": 4.83e-06, + "num_tokens": 518995.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0653, + "grad_norm": 1.5677355527877808, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0781, + "grad_norm": 2.0974771976470947, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 520019.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0611, + "grad_norm": 1.4084426164627075, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0044, + "grad_norm": 0.7955360412597656, + "learning_rate": 4.79e-06, + "num_tokens": 520622.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0352, + "grad_norm": 0.9566419124603271, + "learning_rate": 4.78e-06, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0564, + "grad_norm": 0.9539786577224731, + "learning_rate": 4.77e-06, + "num_tokens": 521646.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0459, + "grad_norm": 1.0773917436599731, + "learning_rate": 4.76e-06, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.075, + "grad_norm": 2.423198938369751, + "learning_rate": 4.75e-06, + "num_tokens": 522670.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0044, + "grad_norm": 0.7832935452461243, + "learning_rate": 4.74e-06, + "num_tokens": 522761.0, + "mean_token_accuracy": 1.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0661, + "grad_norm": 1.3831069469451904, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 523273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.0043, + "grad_norm": 0.7653414011001587, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 523364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0039, + "grad_norm": 0.7014725208282471, + "learning_rate": 4.71e-06, + "num_tokens": 523455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0042, + "grad_norm": 0.7603307962417603, + "learning_rate": 4.7e-06, + "num_tokens": 523546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0622, + "grad_norm": 1.3033061027526855, + "learning_rate": 4.69e-06, + "num_tokens": 524058.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0774, + "grad_norm": 2.0244553089141846, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0035, + "grad_norm": 0.6342400908470154, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 524661.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0031, + "grad_norm": 0.5407992601394653, + "learning_rate": 4.66e-06, + "num_tokens": 524752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0611, + "grad_norm": 1.2235374450683594, + "learning_rate": 4.65e-06, + "num_tokens": 525264.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0623, + "grad_norm": 1.3751453161239624, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0027, + "grad_norm": 0.4813397526741028, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 525867.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.0664, + "grad_norm": 1.2894669771194458, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.056, + "grad_norm": 1.4559017419815063, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 526891.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": 0.0775, + "grad_norm": 2.593362808227539, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.3138, + "grad_norm": 5.148370742797852, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 527915.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0756, + "grad_norm": 2.2736735343933105, + "learning_rate": 4.58e-06, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.065, + "grad_norm": 3.2683534622192383, + "learning_rate": 4.57e-06, + "num_tokens": 528939.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0025, + "grad_norm": 0.44800934195518494, + "learning_rate": 4.56e-06, + "num_tokens": 529030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.2697, + "grad_norm": 5.550428867340088, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 529542.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0566, + "grad_norm": 1.0541280508041382, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0021, + "grad_norm": 0.3617427945137024, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 530145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0473, + "grad_norm": 1.3375787734985352, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0021, + "grad_norm": 0.33384522795677185, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 530748.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0379, + "grad_norm": 1.0544806718826294, + "learning_rate": 4.5e-06, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0023, + "grad_norm": 0.39406508207321167, + "learning_rate": 4.49e-06, + "num_tokens": 531351.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0752, + "grad_norm": 1.9515206813812256, + "learning_rate": 4.48e-06, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.0023, + "grad_norm": 0.3835340738296509, + "learning_rate": 4.47e-06, + "num_tokens": 531954.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.059, + "grad_norm": 1.1221628189086914, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0021, + "grad_norm": 0.3509887456893921, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 532557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.064, + "grad_norm": 1.205573320388794, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0718, + "grad_norm": 2.1418721675872803, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 533581.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0414, + "grad_norm": 1.3037139177322388, + "learning_rate": 4.42e-06, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.0736, + "grad_norm": 2.1680147647857666, + "learning_rate": 4.41e-06, + "num_tokens": 534605.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0021, + "grad_norm": 0.347339004278183, + "learning_rate": 4.4e-06, + "num_tokens": 534696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": 0.0736, + "grad_norm": 2.0864803791046143, + "learning_rate": 4.39e-06, + "num_tokens": 535208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0025, + "grad_norm": 0.4395049810409546, + "learning_rate": 4.38e-06, + "num_tokens": 535299.0, + "mean_token_accuracy": 1.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0023, + "grad_norm": 0.39004504680633545, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 535390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0022, + "grad_norm": 0.36095598340034485, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 535481.0, + "mean_token_accuracy": 1.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0582, + "grad_norm": 1.2327930927276611, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 535993.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0461, + "grad_norm": 1.040818452835083, + "learning_rate": 4.34e-06, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.248, + "grad_norm": 5.55968713760376, + "learning_rate": 4.33e-06, + "num_tokens": 537017.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0021, + "grad_norm": 0.33996713161468506, + "learning_rate": 4.32e-06, + "num_tokens": 537108.0, + "mean_token_accuracy": 1.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0885, + "grad_norm": 1.9103176593780518, + "learning_rate": 4.31e-06, + "num_tokens": 537620.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0021, + "grad_norm": 0.3596363663673401, + "learning_rate": 4.3e-06, + "num_tokens": 537711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0024, + "grad_norm": 0.38911113142967224, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 537802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.0575, + "grad_norm": 1.1043959856033325, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.0398, + "grad_norm": 1.0082714557647705, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 538826.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.07, + "grad_norm": 1.312532901763916, + "learning_rate": 4.26e-06, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.0019, + "grad_norm": 0.314879834651947, + "learning_rate": 4.25e-06, + "num_tokens": 539429.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.002, + "grad_norm": 0.32559505105018616, + "learning_rate": 4.24e-06, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0021, + "grad_norm": 0.3332079350948334, + "learning_rate": 4.23e-06, + "num_tokens": 539611.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0585, + "grad_norm": 1.1406902074813843, + "learning_rate": 4.22e-06, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0018, + "grad_norm": 0.2799522876739502, + "learning_rate": 4.21e-06, + "num_tokens": 540214.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0525, + "grad_norm": 1.1263917684555054, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0019, + "grad_norm": 0.28769129514694214, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 540817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.002, + "grad_norm": 0.3043234348297119, + "learning_rate": 4.18e-06, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0018, + "grad_norm": 0.2788783311843872, + "learning_rate": 4.17e-06, + "num_tokens": 540999.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.002, + "grad_norm": 0.3088054358959198, + "learning_rate": 4.16e-06, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.0382, + "grad_norm": 1.0789445638656616, + "learning_rate": 4.15e-06, + "num_tokens": 541602.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.0435, + "grad_norm": 1.0291471481323242, + "learning_rate": 4.14e-06, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0754, + "grad_norm": 1.4396899938583374, + "learning_rate": 4.13e-06, + "num_tokens": 542626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.05, + "grad_norm": 1.1235865354537964, + "learning_rate": 4.12e-06, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0018, + "grad_norm": 0.2745732069015503, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 543229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0017, + "grad_norm": 0.2619018256664276, + "learning_rate": 4.1e-06, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.063, + "grad_norm": 1.068122148513794, + "learning_rate": 4.09e-06, + "num_tokens": 543832.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.076, + "grad_norm": 1.5099190473556519, + "learning_rate": 4.08e-06, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.075, + "grad_norm": 1.370004415512085, + "learning_rate": 4.07e-06, + "num_tokens": 544856.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.06, + "grad_norm": 1.2732493877410889, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.045, + "grad_norm": 1.2496861219406128, + "learning_rate": 4.05e-06, + "num_tokens": 545880.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0471, + "grad_norm": 1.1135365962982178, + "learning_rate": 4.04e-06, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0668, + "grad_norm": 1.5768578052520752, + "learning_rate": 4.03e-06, + "num_tokens": 546904.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0024, + "grad_norm": 0.3887575566768646, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 546995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0023, + "grad_norm": 0.3817980885505676, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 547086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.2858, + "grad_norm": 5.93766975402832, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0023, + "grad_norm": 0.3757269084453583, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 547689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0611, + "grad_norm": 1.3149932622909546, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.085, + "grad_norm": 1.8090168237686157, + "learning_rate": 3.97e-06, + "num_tokens": 548713.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0624, + "grad_norm": 1.2021411657333374, + "learning_rate": 3.96e-06, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0597, + "grad_norm": 1.1230809688568115, + "learning_rate": 3.95e-06, + "num_tokens": 549737.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0521, + "grad_norm": 1.225655198097229, + "learning_rate": 3.94e-06, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0028, + "grad_norm": 0.4546661674976349, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 550340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.2426, + "grad_norm": 4.83814001083374, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0032, + "grad_norm": 0.5268356800079346, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 550943.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.003, + "grad_norm": 0.5073143839836121, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0571, + "grad_norm": 1.12201988697052, + "learning_rate": 3.89e-06, + "num_tokens": 551546.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0027, + "grad_norm": 0.441703200340271, + "learning_rate": 3.88e-06, + "num_tokens": 551637.0, + "mean_token_accuracy": 1.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.06, + "grad_norm": 1.055845022201538, + "learning_rate": 3.87e-06, + "num_tokens": 552149.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0026, + "grad_norm": 0.4252733290195465, + "learning_rate": 3.86e-06, + "num_tokens": 552240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0654, + "grad_norm": 1.2097599506378174, + "learning_rate": 3.85e-06, + "num_tokens": 552752.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0031, + "grad_norm": 0.5153416395187378, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 552843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0412, + "grad_norm": 1.2524850368499756, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 553355.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0603, + "grad_norm": 1.216737985610962, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0027, + "grad_norm": 0.4374849498271942, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 553958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0027, + "grad_norm": 0.45386913418769836, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 554049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0772, + "grad_norm": 2.3643293380737305, + "learning_rate": 3.79e-06, + "num_tokens": 554561.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0585, + "grad_norm": 1.1927247047424316, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0024, + "grad_norm": 0.4038313329219818, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 555164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0024, + "grad_norm": 0.3948758542537689, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 555255.0, + "mean_token_accuracy": 1.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0022, + "grad_norm": 0.36720144748687744, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 555346.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0024, + "grad_norm": 0.3845508098602295, + "learning_rate": 3.74e-06, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0021, + "grad_norm": 0.33976465463638306, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 555528.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0656, + "grad_norm": 1.0829418897628784, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0816, + "grad_norm": 1.7684704065322876, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 556552.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0021, + "grad_norm": 0.3379213809967041, + "learning_rate": 3.7e-06, + "num_tokens": 556643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0017, + "grad_norm": 0.268597275018692, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 556734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0571, + "grad_norm": 1.7145894765853882, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0017, + "grad_norm": 0.262333482503891, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 557337.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0453, + "grad_norm": 1.0645833015441895, + "learning_rate": 3.66e-06, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0596, + "grad_norm": 1.364123821258545, + "learning_rate": 3.65e-06, + "num_tokens": 558361.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0472, + "grad_norm": 0.9277791380882263, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.062, + "grad_norm": 1.2970867156982422, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 559385.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0486, + "grad_norm": 1.1752419471740723, + "learning_rate": 3.62e-06, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.067, + "grad_norm": 1.646427869796753, + "learning_rate": 3.61e-06, + "num_tokens": 560409.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.0488, + "grad_norm": 1.3798638582229614, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0585, + "grad_norm": 1.2615973949432373, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 561433.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0536, + "grad_norm": 1.4801198244094849, + "learning_rate": 3.58e-06, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0021, + "grad_norm": 0.3402940332889557, + "learning_rate": 3.57e-06, + "num_tokens": 562036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0506, + "grad_norm": 0.878396213054657, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0022, + "grad_norm": 0.37959179282188416, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 562639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0023, + "grad_norm": 0.39978647232055664, + "learning_rate": 3.54e-06, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.0692, + "grad_norm": 1.6479856967926025, + "learning_rate": 3.53e-06, + "num_tokens": 563242.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0022, + "grad_norm": 0.37655898928642273, + "learning_rate": 3.52e-06, + "num_tokens": 563333.0, + "mean_token_accuracy": 1.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0547, + "grad_norm": 1.4809867143630981, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 563845.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.038, + "grad_norm": 1.2819538116455078, + "learning_rate": 3.5e-06, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0437, + "grad_norm": 1.2474430799484253, + "learning_rate": 3.49e-06, + "num_tokens": 564869.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0611, + "grad_norm": 1.1493180990219116, + "learning_rate": 3.48e-06, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.062, + "grad_norm": 1.4344936609268188, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 565893.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0027, + "grad_norm": 0.501312255859375, + "learning_rate": 3.46e-06, + "num_tokens": 565984.0, + "mean_token_accuracy": 1.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.003, + "grad_norm": 0.57524174451828, + "learning_rate": 3.45e-06, + "num_tokens": 566075.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.003, + "grad_norm": 0.546630322933197, + "learning_rate": 3.44e-06, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0028, + "grad_norm": 0.5239407420158386, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 566257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0395, + "grad_norm": 0.8654681444168091, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.0399, + "grad_norm": 0.9791849851608276, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 567281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0714, + "grad_norm": 1.4680542945861816, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0029, + "grad_norm": 0.5489619970321655, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 567884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0652, + "grad_norm": 1.445259690284729, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.0031, + "grad_norm": 0.554716944694519, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 568487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0655, + "grad_norm": 1.0966905355453491, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0494, + "grad_norm": 1.049824833869934, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 569511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0591, + "grad_norm": 1.8449171781539917, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.003, + "grad_norm": 0.5422641634941101, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 570114.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.0805, + "grad_norm": 1.8794130086898804, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.0481, + "grad_norm": 0.9934747219085693, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 571138.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0497, + "grad_norm": 1.2348871231079102, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": 0.0444, + "grad_norm": 1.1614453792572021, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 572162.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0388, + "grad_norm": 1.22681725025177, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0032, + "grad_norm": 0.5757941603660583, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 572765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0034, + "grad_norm": 0.611791729927063, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 572856.0, + "mean_token_accuracy": 1.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0616, + "grad_norm": 1.136299967765808, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 573368.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0433, + "grad_norm": 1.2018715143203735, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.042, + "grad_norm": 1.0409917831420898, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 574392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.044, + "grad_norm": 1.2323369979858398, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0034, + "grad_norm": 0.6153194904327393, + "learning_rate": 3.21e-06, + "num_tokens": 574995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0034, + "grad_norm": 0.6106674671173096, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 575086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.0639, + "grad_norm": 1.089705467224121, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 575598.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0692, + "grad_norm": 1.5026510953903198, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0637, + "grad_norm": 1.383870005607605, + "learning_rate": 3.17e-06, + "num_tokens": 576622.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0032, + "grad_norm": 0.568756639957428, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 576713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.0413, + "grad_norm": 1.2440272569656372, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 577225.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.039, + "grad_norm": 1.180145025253296, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0033, + "grad_norm": 0.6265860795974731, + "learning_rate": 3.13e-06, + "num_tokens": 577828.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0033, + "grad_norm": 0.5880522727966309, + "learning_rate": 3.12e-06, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0032, + "grad_norm": 0.5984041690826416, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 578010.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0557, + "grad_norm": 1.0321638584136963, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0585, + "grad_norm": 1.1382465362548828, + "learning_rate": 3.09e-06, + "num_tokens": 579034.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0032, + "grad_norm": 0.5756648778915405, + "learning_rate": 3.08e-06, + "num_tokens": 579125.0, + "mean_token_accuracy": 1.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.003, + "grad_norm": 0.5428857207298279, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 579216.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0774, + "grad_norm": 1.805572271347046, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0569, + "grad_norm": 1.139460563659668, + "learning_rate": 3.05e-06, + "num_tokens": 580240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.0426, + "grad_norm": 1.383743405342102, + "learning_rate": 3.04e-06, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0024, + "grad_norm": 0.4358248710632324, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 580843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0397, + "grad_norm": 1.0429037809371948, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0457, + "grad_norm": 1.3951339721679688, + "learning_rate": 3.01e-06, + "num_tokens": 581867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0027, + "grad_norm": 0.47018593549728394, + "learning_rate": 3e-06, + "num_tokens": 581958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0731, + "grad_norm": 1.9685642719268799, + "learning_rate": 2.99e-06, + "num_tokens": 582470.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.0026, + "grad_norm": 0.45238158106803894, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 582561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0024, + "grad_norm": 0.40610402822494507, + "learning_rate": 2.97e-06, + "num_tokens": 582652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0525, + "grad_norm": 1.0180531740188599, + "learning_rate": 2.96e-06, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0436, + "grad_norm": 1.2175544500350952, + "learning_rate": 2.95e-06, + "num_tokens": 583676.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.0601, + "grad_norm": 1.2007901668548584, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0566, + "grad_norm": 1.2265726327896118, + "learning_rate": 2.93e-06, + "num_tokens": 584700.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0556, + "grad_norm": 1.1947659254074097, + "learning_rate": 2.92e-06, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0027, + "grad_norm": 0.464779794216156, + "learning_rate": 2.91e-06, + "num_tokens": 585303.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.0026, + "grad_norm": 0.4438534080982208, + "learning_rate": 2.9e-06, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0593, + "grad_norm": 1.0972975492477417, + "learning_rate": 2.89e-06, + "num_tokens": 585906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0835, + "grad_norm": 1.884253978729248, + "learning_rate": 2.88e-06, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0633, + "grad_norm": 1.0084459781646729, + "learning_rate": 2.87e-06, + "num_tokens": 586930.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0558, + "grad_norm": 1.0302374362945557, + "learning_rate": 2.86e-06, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0542, + "grad_norm": 0.9511706829071045, + "learning_rate": 2.85e-06, + "num_tokens": 587954.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0506, + "grad_norm": 1.4875551462173462, + "learning_rate": 2.84e-06, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0596, + "grad_norm": 1.1406636238098145, + "learning_rate": 2.83e-06, + "num_tokens": 588978.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0843, + "grad_norm": 1.663854718208313, + "learning_rate": 2.82e-06, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.003, + "grad_norm": 0.5147997140884399, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 589581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0862, + "grad_norm": 1.6565779447555542, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0031, + "grad_norm": 0.5479184985160828, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 590184.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0444, + "grad_norm": 1.354533076286316, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0031, + "grad_norm": 0.5383754968643188, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 590787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0405, + "grad_norm": 1.1847655773162842, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0686, + "grad_norm": 1.8093054294586182, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 591811.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0599, + "grad_norm": 0.9621073603630066, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0037, + "grad_norm": 0.6532343626022339, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 592414.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.062, + "grad_norm": 1.1963555812835693, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0471, + "grad_norm": 1.2936190366744995, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 593438.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0039, + "grad_norm": 0.6896610856056213, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 593529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.0035, + "grad_norm": 0.619045615196228, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 593620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.0037, + "grad_norm": 0.6495220065116882, + "learning_rate": 2.68e-06, + "num_tokens": 593711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0033, + "grad_norm": 0.5850738286972046, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 593802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0394, + "grad_norm": 1.1021217107772827, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.003, + "grad_norm": 0.5251200795173645, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 594405.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0029, + "grad_norm": 0.5125622153282166, + "learning_rate": 2.64e-06, + "num_tokens": 594496.0, + "mean_token_accuracy": 1.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0829, + "grad_norm": 1.8204774856567383, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 595008.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.0624, + "grad_norm": 1.3469654321670532, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0587, + "grad_norm": 1.1263304948806763, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 596032.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0791, + "grad_norm": 2.308769941329956, + "learning_rate": 2.6e-06, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0025, + "grad_norm": 0.42390695214271545, + "learning_rate": 2.59e-06, + "num_tokens": 596635.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0025, + "grad_norm": 0.4351828694343567, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0025, + "grad_norm": 0.45117858052253723, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 596817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.002, + "grad_norm": 0.3449709117412567, + "learning_rate": 2.56e-06, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0552, + "grad_norm": 1.02012038230896, + "learning_rate": 2.55e-06, + "num_tokens": 597420.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0021, + "grad_norm": 0.35598093271255493, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 597511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0706, + "grad_norm": 1.9882680177688599, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 598023.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0585, + "grad_norm": 1.1153826713562012, + "learning_rate": 2.52e-06, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0606, + "grad_norm": 1.6919127702713013, + "learning_rate": 2.51e-06, + "num_tokens": 599047.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.0381, + "grad_norm": 0.9558757543563843, + "learning_rate": 2.5e-06, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0021, + "grad_norm": 0.3558536469936371, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 599650.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0522, + "grad_norm": 1.5039445161819458, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0762, + "grad_norm": 1.8451253175735474, + "learning_rate": 2.47e-06, + "num_tokens": 600674.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0021, + "grad_norm": 0.3580801486968994, + "learning_rate": 2.46e-06, + "num_tokens": 600765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0596, + "grad_norm": 1.0082149505615234, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 601277.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0019, + "grad_norm": 0.31669387221336365, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 601368.0, + "mean_token_accuracy": 1.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0021, + "grad_norm": 0.3432970345020294, + "learning_rate": 2.43e-06, + "num_tokens": 601459.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0574, + "grad_norm": 1.3162227869033813, + "learning_rate": 2.42e-06, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0435, + "grad_norm": 1.0670703649520874, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 602483.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0461, + "grad_norm": 1.2668665647506714, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0594, + "grad_norm": 1.4527745246887207, + "learning_rate": 2.39e-06, + "num_tokens": 603507.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.002, + "grad_norm": 0.3514978885650635, + "learning_rate": 2.38e-06, + "num_tokens": 603598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": 0.0729, + "grad_norm": 2.0161454677581787, + "learning_rate": 2.37e-06, + "num_tokens": 604110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0022, + "grad_norm": 0.38664510846138, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 604201.0, + "mean_token_accuracy": 1.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0353, + "grad_norm": 0.9888522624969482, + "learning_rate": 2.35e-06, + "num_tokens": 604713.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0816, + "grad_norm": 1.6845252513885498, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.002, + "grad_norm": 0.34472399950027466, + "learning_rate": 2.33e-06, + "num_tokens": 605316.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0612, + "grad_norm": 1.5795350074768066, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.036, + "grad_norm": 1.0923341512680054, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 606340.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0021, + "grad_norm": 0.36445900797843933, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 606431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0021, + "grad_norm": 0.36632096767425537, + "learning_rate": 2.29e-06, + "num_tokens": 606522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0024, + "grad_norm": 0.4193936884403229, + "learning_rate": 2.28e-06, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0021, + "grad_norm": 0.36693835258483887, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 606704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0695, + "grad_norm": 1.6587837934494019, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0439, + "grad_norm": 1.2197368144989014, + "learning_rate": 2.25e-06, + "num_tokens": 607728.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.0737, + "grad_norm": 1.8300983905792236, + "learning_rate": 2.24e-06, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0443, + "grad_norm": 1.1544647216796875, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 608752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0023, + "grad_norm": 0.40331411361694336, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 608843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.0024, + "grad_norm": 0.4283469021320343, + "learning_rate": 2.21e-06, + "num_tokens": 608934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0023, + "grad_norm": 0.38760119676589966, + "learning_rate": 2.2e-06, + "num_tokens": 609025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0768, + "grad_norm": 2.4320685863494873, + "learning_rate": 2.19e-06, + "num_tokens": 609537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.0022, + "grad_norm": 0.3753429353237152, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 609628.0, + "mean_token_accuracy": 1.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.0022, + "grad_norm": 0.37054023146629333, + "learning_rate": 2.17e-06, + "num_tokens": 609719.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.063, + "grad_norm": 1.1455004215240479, + "learning_rate": 2.16e-06, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.002, + "grad_norm": 0.3473651707172394, + "learning_rate": 2.15e-06, + "num_tokens": 610322.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0613, + "grad_norm": 1.3616305589675903, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0728, + "grad_norm": 1.4589122533798218, + "learning_rate": 2.13e-06, + "num_tokens": 611346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0021, + "grad_norm": 0.3479214906692505, + "learning_rate": 2.12e-06, + "num_tokens": 611437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0652, + "grad_norm": 1.3161977529525757, + "learning_rate": 2.11e-06, + "num_tokens": 611949.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0019, + "grad_norm": 0.30886292457580566, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 612040.0, + "mean_token_accuracy": 1.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0592, + "grad_norm": 1.1527003049850464, + "learning_rate": 2.09e-06, + "num_tokens": 612552.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0019, + "grad_norm": 0.32701927423477173, + "learning_rate": 2.08e-06, + "num_tokens": 612643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0019, + "grad_norm": 0.31851011514663696, + "learning_rate": 2.07e-06, + "num_tokens": 612734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0019, + "grad_norm": 0.3128160238265991, + "learning_rate": 2.06e-06, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0609, + "grad_norm": 1.4082930088043213, + "learning_rate": 2.05e-06, + "num_tokens": 613337.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0367, + "grad_norm": 1.014041781425476, + "learning_rate": 2.04e-06, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0018, + "grad_norm": 0.31275689601898193, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 613940.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0651, + "grad_norm": 1.7855079174041748, + "learning_rate": 2.02e-06, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0019, + "grad_norm": 0.3344590663909912, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 614543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0647, + "grad_norm": 1.4787598848342896, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0578, + "grad_norm": 1.2822742462158203, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 615567.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0431, + "grad_norm": 1.270432472229004, + "learning_rate": 1.98e-06, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0629, + "grad_norm": 1.4008212089538574, + "learning_rate": 1.97e-06, + "num_tokens": 616591.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0018, + "grad_norm": 0.29254984855651855, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 616682.0, + "mean_token_accuracy": 1.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.002, + "grad_norm": 0.33816665410995483, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 616773.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0407, + "grad_norm": 1.2000517845153809, + "learning_rate": 1.94e-06, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0021, + "grad_norm": 0.36089253425598145, + "learning_rate": 1.93e-06, + "num_tokens": 617376.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.0018, + "grad_norm": 0.3009200990200043, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0681, + "grad_norm": 1.279045581817627, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 617979.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.041, + "grad_norm": 0.9949601292610168, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0436, + "grad_norm": 1.0469834804534912, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 619003.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.07, + "grad_norm": 1.9559322595596313, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.002, + "grad_norm": 0.34342578053474426, + "learning_rate": 1.87e-06, + "num_tokens": 619606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.0878, + "grad_norm": 1.9412786960601807, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.002, + "grad_norm": 0.32897070050239563, + "learning_rate": 1.85e-06, + "num_tokens": 620209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0558, + "grad_norm": 1.230363368988037, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0021, + "grad_norm": 0.36400625109672546, + "learning_rate": 1.83e-06, + "num_tokens": 620812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0836, + "grad_norm": 2.0716917514801025, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0621, + "grad_norm": 1.304250717163086, + "learning_rate": 1.81e-06, + "num_tokens": 621836.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0021, + "grad_norm": 0.36326804757118225, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 621927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0021, + "grad_norm": 0.35329553484916687, + "learning_rate": 1.79e-06, + "num_tokens": 622018.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0022, + "grad_norm": 0.37259048223495483, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0427, + "grad_norm": 1.4227620363235474, + "learning_rate": 1.77e-06, + "num_tokens": 622621.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.0019, + "grad_norm": 0.3209492564201355, + "learning_rate": 1.76e-06, + "num_tokens": 622712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.0461, + "grad_norm": 1.0381195545196533, + "learning_rate": 1.75e-06, + "num_tokens": 623224.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.042, + "grad_norm": 1.2007672786712646, + "learning_rate": 1.74e-06, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0021, + "grad_norm": 0.36294040083885193, + "learning_rate": 1.73e-06, + "num_tokens": 623827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0021, + "grad_norm": 0.36834561824798584, + "learning_rate": 1.72e-06, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0571, + "grad_norm": 1.3143699169158936, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 624430.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0019, + "grad_norm": 0.3313964307308197, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 624521.0, + "mean_token_accuracy": 1.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.002, + "grad_norm": 0.357883095741272, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 624612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0021, + "grad_norm": 0.3507683277130127, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0019, + "grad_norm": 0.32915839552879333, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 624794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.055, + "grad_norm": 1.478965163230896, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0563, + "grad_norm": 1.0098392963409424, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 625818.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0018, + "grad_norm": 0.30924662947654724, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 625909.0, + "mean_token_accuracy": 1.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0662, + "grad_norm": 1.276971459388733, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 626421.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0018, + "grad_norm": 0.3022649586200714, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 626512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0019, + "grad_norm": 0.32340654730796814, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 626603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.038, + "grad_norm": 1.0054205656051636, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.0445, + "grad_norm": 1.2428219318389893, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 627627.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0531, + "grad_norm": 1.1613452434539795, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0018, + "grad_norm": 0.2842133641242981, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 628230.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0018, + "grad_norm": 0.3061327040195465, + "learning_rate": 1.56e-06, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0019, + "grad_norm": 0.31931373476982117, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 628412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0689, + "grad_norm": 1.777726650238037, + "learning_rate": 1.54e-06, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0626, + "grad_norm": 1.0839914083480835, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 629436.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0621, + "grad_norm": 1.0777654647827148, + "learning_rate": 1.52e-06, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0617, + "grad_norm": 1.3572564125061035, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 630460.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0019, + "grad_norm": 0.31615281105041504, + "learning_rate": 1.5e-06, + "num_tokens": 630551.0, + "mean_token_accuracy": 1.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0584, + "grad_norm": 1.4089421033859253, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 631063.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0376, + "grad_norm": 0.9989500641822815, + "learning_rate": 1.48e-06, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0564, + "grad_norm": 1.4619941711425781, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 632087.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0017, + "grad_norm": 0.27881649136543274, + "learning_rate": 1.46e-06, + "num_tokens": 632178.0, + "mean_token_accuracy": 1.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.0021, + "grad_norm": 0.3606109619140625, + "learning_rate": 1.45e-06, + "num_tokens": 632269.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0018, + "grad_norm": 0.3089398145675659, + "learning_rate": 1.44e-06, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.002, + "grad_norm": 0.35239994525909424, + "learning_rate": 1.43e-06, + "num_tokens": 632451.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.0434, + "grad_norm": 1.028780460357666, + "learning_rate": 1.42e-06, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.055, + "grad_norm": 1.3252202272415161, + "learning_rate": 1.41e-06, + "num_tokens": 633475.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.002, + "grad_norm": 0.34616848826408386, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 633566.0, + "mean_token_accuracy": 1.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0021, + "grad_norm": 0.345546156167984, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 633657.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": 0.041, + "grad_norm": 1.0742279291152954, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.0558, + "grad_norm": 1.3981537818908691, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 634681.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0021, + "grad_norm": 0.3480032682418823, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 634772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0414, + "grad_norm": 1.1904889345169067, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 635284.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0019, + "grad_norm": 0.32626014947891235, + "learning_rate": 1.34e-06, + "num_tokens": 635375.0, + "mean_token_accuracy": 1.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.0019, + "grad_norm": 0.3311507999897003, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 635466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.0417, + "grad_norm": 1.0487819910049438, + "learning_rate": 1.32e-06, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0612, + "grad_norm": 1.482262372970581, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 636490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0597, + "grad_norm": 1.0906400680541992, + "learning_rate": 1.3e-06, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0451, + "grad_norm": 1.3021650314331055, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 637514.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0566, + "grad_norm": 1.1073824167251587, + "learning_rate": 1.28e-06, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0021, + "grad_norm": 0.366703599691391, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 638117.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0402, + "grad_norm": 1.114858865737915, + "learning_rate": 1.26e-06, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0726, + "grad_norm": 1.9793658256530762, + "learning_rate": 1.25e-06, + "num_tokens": 639141.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0393, + "grad_norm": 1.212233066558838, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.002, + "grad_norm": 0.3448551893234253, + "learning_rate": 1.23e-06, + "num_tokens": 639744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.002, + "grad_norm": 0.33576035499572754, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 639835.0, + "mean_token_accuracy": 1.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0662, + "grad_norm": 1.6050575971603394, + "learning_rate": 1.21e-06, + "num_tokens": 640347.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0361, + "grad_norm": 1.034451961517334, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0022, + "grad_norm": 0.3761736750602722, + "learning_rate": 1.19e-06, + "num_tokens": 640950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0648, + "grad_norm": 1.8947163820266724, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0556, + "grad_norm": 1.317289113998413, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 641974.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0441, + "grad_norm": 1.1064449548721313, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0895, + "grad_norm": 1.8790072202682495, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 642998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0824, + "grad_norm": 2.2661681175231934, + "learning_rate": 1.14e-06, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": 0.08, + "grad_norm": 2.5085411071777344, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 644022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0382, + "grad_norm": 0.8821580410003662, + "learning_rate": 1.12e-06, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.0419, + "grad_norm": 1.2789467573165894, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 645046.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0661, + "grad_norm": 1.2416129112243652, + "learning_rate": 1.1e-06, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0385, + "grad_norm": 1.19954514503479, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 646070.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0803, + "grad_norm": 1.7022594213485718, + "learning_rate": 1.08e-06, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0651, + "grad_norm": 1.4528557062149048, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 647094.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0647, + "grad_norm": 1.2057602405548096, + "learning_rate": 1.06e-06, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0609, + "grad_norm": 1.2766141891479492, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 648118.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0437, + "grad_norm": 1.1985217332839966, + "learning_rate": 1.04e-06, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0571, + "grad_norm": 1.1973105669021606, + "learning_rate": 1.03e-06, + "num_tokens": 649142.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0664, + "grad_norm": 1.5751904249191284, + "learning_rate": 1.02e-06, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0436, + "grad_norm": 1.0939377546310425, + "learning_rate": 1.01e-06, + "num_tokens": 650166.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0031, + "grad_norm": 0.5472993850708008, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 650257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0595, + "grad_norm": 1.3305593729019165, + "learning_rate": 9.9e-07, + "num_tokens": 650769.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": 0.0391, + "grad_norm": 1.123191475868225, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0032, + "grad_norm": 0.5546753406524658, + "learning_rate": 9.7e-07, + "num_tokens": 651372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0031, + "grad_norm": 0.5491161942481995, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 651463.0, + "mean_token_accuracy": 1.0, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.0687, + "grad_norm": 2.234290599822998, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 651975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0586, + "grad_norm": 1.2323557138442993, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0557, + "grad_norm": 1.1316601037979126, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 652999.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.0399, + "grad_norm": 1.354643702507019, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0032, + "grad_norm": 0.5774580836296082, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 653602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.2131, + "grad_norm": 5.501800537109375, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0552, + "grad_norm": 1.1691670417785645, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 654626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0571, + "grad_norm": 1.3334885835647583, + "learning_rate": 8.8e-07, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0033, + "grad_norm": 0.5850784778594971, + "learning_rate": 8.7e-07, + "num_tokens": 655229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0751, + "grad_norm": 2.8085896968841553, + "learning_rate": 8.6e-07, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0805, + "grad_norm": 1.9259722232818604, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 656253.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0404, + "grad_norm": 1.23832106590271, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0566, + "grad_norm": 1.0702412128448486, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 657277.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0608, + "grad_norm": 1.4386783838272095, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0592, + "grad_norm": 1.2550030946731567, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 658301.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0434, + "grad_norm": 1.8757680654525757, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.2038, + "grad_norm": 4.9877095222473145, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 659325.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0037, + "grad_norm": 0.6778392791748047, + "learning_rate": 7.8e-07, + "num_tokens": 659416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.048, + "grad_norm": 1.6256376504898071, + "learning_rate": 7.7e-07, + "num_tokens": 659928.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.0561, + "grad_norm": 1.4658511877059937, + "learning_rate": 7.6e-07, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.071, + "grad_norm": 1.7589434385299683, + "learning_rate": 7.5e-07, + "num_tokens": 660952.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.0403, + "grad_norm": 1.2130093574523926, + "learning_rate": 7.4e-07, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0594, + "grad_norm": 1.2599217891693115, + "learning_rate": 7.3e-07, + "num_tokens": 661976.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0584, + "grad_norm": 1.2125273942947388, + "learning_rate": 7.2e-07, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0039, + "grad_norm": 0.6885141730308533, + "learning_rate": 7.1e-07, + "num_tokens": 662579.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.056, + "grad_norm": 1.233972430229187, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.004, + "grad_norm": 0.7142868041992188, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 663182.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0614, + "grad_norm": 1.4658222198486328, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0493, + "grad_norm": 1.051007866859436, + "learning_rate": 6.7e-07, + "num_tokens": 664206.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0409, + "grad_norm": 1.2317217588424683, + "learning_rate": 6.6e-07, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.004, + "grad_norm": 0.7169041633605957, + "learning_rate": 6.5e-07, + "num_tokens": 664809.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0393, + "grad_norm": 1.290911316871643, + "learning_rate": 6.4e-07, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.043, + "grad_norm": 1.550564169883728, + "learning_rate": 6.3e-07, + "num_tokens": 665833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.044, + "grad_norm": 1.1559568643569946, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0761, + "grad_norm": 1.5238863229751587, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 666857.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0479, + "grad_norm": 1.310771107673645, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0463, + "grad_norm": 1.120958924293518, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 667881.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.0039, + "grad_norm": 0.6784827709197998, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 667972.0, + "mean_token_accuracy": 1.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0672, + "grad_norm": 1.386460542678833, + "learning_rate": 5.7e-07, + "num_tokens": 668484.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0453, + "grad_norm": 1.2751063108444214, + "learning_rate": 5.6e-07, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.062, + "grad_norm": 1.0763590335845947, + "learning_rate": 5.5e-07, + "num_tokens": 669508.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0571, + "grad_norm": 1.2678844928741455, + "learning_rate": 5.4e-07, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.004, + "grad_norm": 0.7198203802108765, + "learning_rate": 5.3e-07, + "num_tokens": 670111.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0442, + "grad_norm": 1.2891501188278198, + "learning_rate": 5.2e-07, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0039, + "grad_norm": 0.6999010443687439, + "learning_rate": 5.1e-07, + "num_tokens": 670714.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.004, + "grad_norm": 0.7249695658683777, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 670805.0, + "mean_token_accuracy": 1.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0781, + "grad_norm": 1.6599754095077515, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 671317.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0038, + "grad_norm": 0.6885353922843933, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 671408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0568, + "grad_norm": 1.6591845750808716, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 671920.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0038, + "grad_norm": 0.6629458069801331, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 672011.0, + "mean_token_accuracy": 1.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0553, + "grad_norm": 1.0831410884857178, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 672523.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.065, + "grad_norm": 1.709847331047058, + "learning_rate": 4.4e-07, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0446, + "grad_norm": 1.2094167470932007, + "learning_rate": 4.3e-07, + "num_tokens": 673547.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0585, + "grad_norm": 1.23978853225708, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0039, + "grad_norm": 0.6842091083526611, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 674150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0603, + "grad_norm": 1.337598204612732, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.004, + "grad_norm": 0.7296668291091919, + "learning_rate": 3.9e-07, + "num_tokens": 674753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0038, + "grad_norm": 0.6806443333625793, + "learning_rate": 3.8e-07, + "num_tokens": 674844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0038, + "grad_norm": 0.6828562021255493, + "learning_rate": 3.7e-07, + "num_tokens": 674935.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0667, + "grad_norm": 1.748108148574829, + "learning_rate": 3.6e-07, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0386, + "grad_norm": 1.3246146440505981, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 675959.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0038, + "grad_norm": 0.6706036329269409, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 676050.0, + "mean_token_accuracy": 1.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0552, + "grad_norm": 1.2772272825241089, + "learning_rate": 3.3e-07, + "num_tokens": 676562.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0596, + "grad_norm": 1.3164302110671997, + "learning_rate": 3.2e-07, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0582, + "grad_norm": 1.3520668745040894, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 677586.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0547, + "grad_norm": 1.2490239143371582, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0387, + "grad_norm": 1.1652135848999023, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 678610.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0626, + "grad_norm": 1.9845855236053467, + "learning_rate": 2.8e-07, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0038, + "grad_norm": 0.6789660453796387, + "learning_rate": 2.7e-07, + "num_tokens": 679213.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.0037, + "grad_norm": 0.678180456161499, + "learning_rate": 2.6e-07, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0038, + "grad_norm": 0.6906817555427551, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 679395.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0516, + "grad_norm": 1.1001511812210083, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0037, + "grad_norm": 0.6647882461547852, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 679998.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.0627, + "grad_norm": 1.4906483888626099, + "learning_rate": 2.2e-07, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0653, + "grad_norm": 1.6483995914459229, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 681022.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0542, + "grad_norm": 1.1732497215270996, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0035, + "grad_norm": 0.6123244762420654, + "learning_rate": 1.9e-07, + "num_tokens": 681625.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0628, + "grad_norm": 3.3254270553588867, + "learning_rate": 1.8e-07, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.0409, + "grad_norm": 1.0730781555175781, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 682649.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0034, + "grad_norm": 0.5923974514007568, + "learning_rate": 1.6e-07, + "num_tokens": 682740.0, + "mean_token_accuracy": 1.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.033, + "grad_norm": 1.07072114944458, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 683252.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0563, + "grad_norm": 1.1191027164459229, + "learning_rate": 1.4e-07, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0034, + "grad_norm": 0.6199093461036682, + "learning_rate": 1.3e-07, + "num_tokens": 683855.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0497, + "grad_norm": 1.2205955982208252, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0553, + "grad_norm": 1.2247557640075684, + "learning_rate": 1.1e-07, + "num_tokens": 684879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.0615, + "grad_norm": 1.5119178295135498, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0036, + "grad_norm": 0.6369652152061462, + "learning_rate": 9e-08, + "num_tokens": 685482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0409, + "grad_norm": 1.2765092849731445, + "learning_rate": 8e-08, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0446, + "grad_norm": 1.0794225931167603, + "learning_rate": 7e-08, + "num_tokens": 686506.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0037, + "grad_norm": 0.6602066159248352, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 686597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0637, + "grad_norm": 1.4354852437973022, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 687109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.0037, + "grad_norm": 0.6749649047851562, + "learning_rate": 4e-08, + "num_tokens": 687200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0, + "step": 2000 + }, + { + "train_runtime": 372.1845, + "train_samples_per_second": 5.374, + "train_steps_per_second": 5.374, + "total_flos": 1.1456146931712e+16, + "train_loss": 0.18184852770145518, + "epoch": 1.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..cf39b39eacfc4a0eb4375b757c1d2cdd829d1bbd --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "unsloth_available": false, + "train_runtime": 372.1845, + "train_loss": 0.18184852770145518, + "train_metrics": { + "train_runtime": 372.1845, + "train_samples_per_second": 5.374, + "train_steps_per_second": 5.374, + "total_flos": 1.1456146931712e+16, + "train_loss": 0.18184852770145518 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/submission_summary.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/submission_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..67d00756e92a5f7b983ca1856d58db24059c3fad --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/submission_summary.json @@ -0,0 +1,376 @@ +{ + "status": "ok", + "generated_at_unix": 1777179904.792038, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "not_seen_in_status", + "grpo_postsave_inference": "not_seen_in_status", + "policy_ablation": "not_seen_in_status" + }, + "metrics": { + "sft_train_loss": 0.18184852770145518, + "sft_train_runtime": 372.1845, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.569, + "sft_last_loss": 0.0037, + "sft_best_loss": 0.0011, + "sft_last_token_accuracy": 1.0, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.762, + "sft_avg_latency_seconds": 2.748, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "ok", + "files": [ + ".gitattributes", + "usable_model_bundles/local-qwen-0-5b-active-smoke/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/bundle_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/generation_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merge_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/acceptance_gate.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/anti_hacking_overfit_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/baselines.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dose_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dosing_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/frontier_ready.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/graph_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_ablation_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_auto.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_fallback_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_strict_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_sweep_summary.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/inference_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/planner_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/plot_index.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/risk_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/robustness.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/supervisor_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json" + ], + "meaningful_file_count": 82, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/63acc4b1a4167e78b785814b5de63c5a913f9099", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 736.955, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png", + "qwen-qwen2-5-3b-instruct_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png", + "qwen-qwen2-5-3b-instruct_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png", + "qwen-qwen2-5-3b-instruct_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png", + "reward_component_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png", + "primary_reward_channel_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 3B grpo_history.json: pending_artifact_upload", + "Qwen 3B grpo_postsave_inference: not_seen_in_status", + "Qwen 3B grpo_training: not_seen_in_status", + "Qwen 3B policy_ablation: not_seen_in_status", + "Qwen 3B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/training_space_runtime_status.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/training_space_runtime_status.json new file mode 100644 index 0000000000000000000000000000000000000000..6cca9e3b0f08bc37056d674ec0aa0a32a3a62356 --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/reports/training_space_runtime_status.json @@ -0,0 +1,39 @@ +{ + "status": "ok", + "generated_at_utc": "2026-04-26T05:08:41.819955+00:00", + "space_id": "TheJackBright/polyguard-openenv-training-full", + "artifact_repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "runtime": "SpaceRuntime(stage='PAUSED', hardware=None, requested_hardware='cpu-basic', sleep_time=172800, storage=None, raw={'stage': 'PAUSED', 'hardware': {'current': None, 'requested': 'cpu-basic'}, 'gcTimeout': 172800, 'replicas': {'requested': 1}, 'devMode': False, 'domains': [{'domain': 'thejackbright-polyguard-openenv-training-full.hf.space', 'stage': 'READY'}]})", + "runtime_error": "", + "artifact_error": "", + "artifact_file_count": 83, + "has_usable_active_bundle": true, + "has_full_sweep_artifacts": false, + "run_statuses": { + "qwen-qwen2-5-0-5b-instruct": { + "sft_training": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "sft_postsave_inference": "artifact_available", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload", + "artifact_files": [] + }, + "qwen-qwen2-5-1-5b-instruct": { + "sft_training": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "sft_postsave_inference": "artifact_available", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload", + "artifact_files": [] + }, + "qwen-qwen2-5-3b-instruct": { + "sft_training": "artifact_available", + "grpo_training": "not_seen_in_status", + "sft_postsave_inference": "artifact_available", + "grpo_postsave_inference": "not_seen_in_status", + "policy_ablation": "not_seen_in_status", + "artifact_files": [] + } + }, + "interpretation": "The Space is not actively training if runtime contains stage='PAUSED'. Completed stage records are taken from live evidence snapshots when available; missing per-run artifact files mean the full sweep checkpoints/reports are not yet downloadable." +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/submission_summary.json b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/submission_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..67d00756e92a5f7b983ca1856d58db24059c3fad --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/submission_summary.json @@ -0,0 +1,376 @@ +{ + "status": "ok", + "generated_at_unix": 1777179904.792038, + "models": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.19233327957964502, + "sft_train_runtime": 234.6302, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.0856, + "sft_last_loss": 0.0626, + "sft_best_loss": 0.0057, + "sft_last_token_accuracy": 0.9717137813568115, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 1.839, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "remote_completed_pending_artifact_upload", + "grpo_postsave_inference": "remote_completed_pending_artifact_upload", + "policy_ablation": "remote_completed_pending_artifact_upload" + }, + "metrics": { + "sft_train_loss": 0.11515871361242898, + "sft_train_runtime": 483.7085, + "sft_examples_used": 2000, + "sft_history_steps": 4001, + "sft_first_loss": 2.9686, + "sft_last_loss": 0.0681, + "sft_best_loss": 0.0009, + "sft_last_token_accuracy": 0.9726027250289917, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.726, + "sft_avg_latency_seconds": 2.158, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "statuses": { + "sft_training": "artifact_available", + "sft_postsave_inference": "artifact_available", + "grpo_training": "not_seen_in_status", + "grpo_postsave_inference": "not_seen_in_status", + "policy_ablation": "not_seen_in_status" + }, + "metrics": { + "sft_train_loss": 0.18184852770145518, + "sft_train_runtime": 372.1845, + "sft_examples_used": 2000, + "sft_history_steps": 2001, + "sft_first_loss": 3.569, + "sft_last_loss": 0.0037, + "sft_best_loss": 0.0011, + "sft_last_token_accuracy": 1.0, + "sft_valid_rate": 1.0, + "sft_avg_env_reward": 0.762, + "sft_avg_latency_seconds": 2.748, + "grpo_avg_reward": null, + "grpo_history_steps": 0, + "grpo_valid_rate": null, + "grpo_avg_env_reward": null, + "grpo_avg_latency_seconds": null + }, + "files": { + "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json", + "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json", + "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json", + "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "grpo_trl_run.json": "", + "grpo_history.json": "", + "grpo_reward_components.jsonl": "", + "postsave_inference_grpo.json": "", + "grpo_ablation_report.json": "", + "error.json": "" + } + } + ], + "artifact_repo": { + "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts", + "status": "ok", + "files": [ + ".gitattributes", + "usable_model_bundles/local-qwen-0-5b-active-smoke/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/bundle_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/generation_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merge_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/README.md", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_model.safetensors", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/added_tokens.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/chat_template.jinja", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/merges.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/special_tokens_map.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer_config.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/training_args.bin", + "usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/vocab.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/acceptance_gate.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/active_model_manifest.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/anti_hacking_overfit_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/baselines.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.txt", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dose_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dosing_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/frontier_ready.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/graph_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_ablation_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_training_cycle/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_auto.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_fallback_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_trl_run_strict_check.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_sweep_summary.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/hf_training_status.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/improvement_report_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/inference_benchmark.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/planner_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/plot_index.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/postsave_inference_smoke.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/risk_train.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/robustness.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/supervisor_grpo.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "usable_model_bundles/local-qwen-0-5b-active-smoke/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json" + ], + "meaningful_file_count": 82, + "error": "" + }, + "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/63acc4b1a4167e78b785814b5de63c5a913f9099", + "training_space_status": { + "status": "running", + "source": "https://thejackbright-polyguard-openenv-training-full.hf.space", + "completed_run_ids": [] + }, + "stage_records": [ + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 257.387, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 4230.645, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 15.201, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 18.461, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-0-5b-instruct", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "label": "Qwen 0.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 3.989, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 454.278, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_training", + "returncode": 0, + "elapsed_seconds": 5118.654, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "sft_postsave_inference", + "returncode": 0, + "elapsed_seconds": 17.128, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "grpo_postsave_inference", + "returncode": 0, + "elapsed_seconds": 21.528, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-1-5b-instruct", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "label": "Qwen 1.5B", + "stage": "policy_ablation", + "returncode": 0, + "elapsed_seconds": 4.001, + "completed": true + }, + { + "run_id": "qwen-qwen2-5-3b-instruct", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "label": "Qwen 3B", + "stage": "sft_training", + "returncode": 0, + "elapsed_seconds": 736.955, + "completed": true + } + ], + "charts": { + "qwen_0_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_training_loss.png", + "qwen_0_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_token_accuracy.png", + "qwen_0_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_sft_learning_rate.png", + "qwen_1_5b_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_training_loss.png", + "qwen_1_5b_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_token_accuracy.png", + "qwen_1_5b_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_1_5b_sft_learning_rate.png", + "qwen-qwen2-5-3b-instruct_sft_training_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_training_loss.png", + "qwen-qwen2-5-3b-instruct_sft_token_accuracy": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_token_accuracy.png", + "qwen-qwen2-5-3b-instruct_sft_learning_rate": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen-qwen2-5-3b-instruct_sft_learning_rate.png", + "qwen_0_5b_vs_1_5b_sft_loss_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png", + "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png", + "qwen_0_5b_1_5b_final_sft_train_loss": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_final_sft_train_loss.png", + "qwen_0_5b_1_5b_postsave_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_reward.png", + "qwen_0_5b_1_5b_postsave_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_postsave_latency.png", + "qwen_0_5b_1_5b_sft_runtime": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_sft_runtime.png", + "qwen_0_5b_1_5b_remote_completed_stage_durations": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/qwen_0_5b_1_5b_remote_completed_stage_durations.png", + "policy_ablation_avg_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_avg_reward.png", + "policy_ablation_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_legality.png", + "policy_ablation_exploit_detection": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/policy_ablation_exploit_detection.png", + "reward_component_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png", + "primary_reward_channel_bars": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/primary_reward_channel_bars.png", + "basic_llm_vs_full_pipeline_reward": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward.png", + "basic_llm_vs_full_pipeline_legality": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_legality.png", + "basic_llm_vs_full_pipeline_latency": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_latency.png", + "basic_llm_vs_full_pipeline_reward_delta_by_seed": "outputs/plots/submission_evidence/qwen_0_5b_1_5b_3b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png" + }, + "pending_artifacts": [ + "Qwen 0.5B grpo_history.json: pending_artifact_upload", + "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 1.5B grpo_history.json: pending_artifact_upload", + "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload", + "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload", + "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload", + "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload", + "Qwen 3B grpo_history.json: pending_artifact_upload", + "Qwen 3B grpo_postsave_inference: not_seen_in_status", + "Qwen 3B grpo_training: not_seen_in_status", + "Qwen 3B policy_ablation: not_seen_in_status", + "Qwen 3B postsave_inference_grpo.json: pending_artifact_upload" + ], + "reward_validation_errors": [], + "primary_judge": "PolyGuard verifier/reward system" +} diff --git a/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/traces/action_traces.jsonl b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/traces/action_traces.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..87a57447f1bc35b3d3352ec244b958c47a92d06b --- /dev/null +++ b/docs/results/submission_evidence_qwen_0_5b_1_5b_3b/traces/action_traces.jsonl @@ -0,0 +1,24 @@ +{"seed": 8000, "policy": "basic_llm", "reward": 0.717, "latency_seconds": 0.0261, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "exploit_detection", "failure_reasons": ["holdout_ddi_not_addressed"], "anti_cheat_reasons": ["holdout_ddi_not_addressed"], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.001, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.675, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.717}, "primary_reward_channels": {"safety_legality": 0.675, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8000, "policy": "sft_policy", "reward": 0.803, "latency_seconds": 0.0013, "legal": true, "candidate_id": "cand_02", "action_type": "STOP_DRUG", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.842, "burden_improvement_score": 0.55, "disease_stability_score": 0.58, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.78, "primary_safety_legality": 0.944, "primary_clinical_improvement": 0.657, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.803}, "primary_reward_channels": {"safety_legality": 0.944, "clinical_improvement": 0.657, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8000, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 3.9969, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8001, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8001, "policy": "sft_policy", "reward": 0.755, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_02", "action_type": "STOP_DRUG", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.518, "burden_improvement_score": 0.55, "disease_stability_score": 0.58, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.78, "primary_safety_legality": 0.944, "primary_clinical_improvement": 0.549, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.755}, "primary_reward_channels": {"safety_legality": 0.944, "clinical_improvement": 0.549, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8001, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0036, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8002, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8002, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8002, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 0.0024, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8003, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8003, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8003, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0028, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8004, "policy": "basic_llm", "reward": 0.717, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "exploit_detection", "failure_reasons": ["holdout_ddi_not_addressed"], "anti_cheat_reasons": ["holdout_ddi_not_addressed"], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.001, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.675, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.717}, "primary_reward_channels": {"safety_legality": 0.675, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8004, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0011, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8004, "policy": "full_polyguard_pipeline", "reward": 0.804, "latency_seconds": 0.0027, "legal": true, "candidate_id": "cand_03", "action_type": "REDUCE_DOSE_BUCKET", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.87, "primary_safety_legality": 0.967, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.804}, "primary_reward_channels": {"safety_legality": 0.967, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8005, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0017, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8005, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0013, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8005, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.003, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8006, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0014, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8006, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8006, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0028, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} +{"seed": 8007, "policy": "basic_llm", "reward": 0.777, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_01", "action_type": "KEEP_REGIMEN", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.7, "primary_safety_legality": 0.924, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.777}, "primary_reward_channels": {"safety_legality": 0.924, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8007, "policy": "sft_policy", "reward": 0.831, "latency_seconds": 0.0012, "legal": true, "candidate_id": "cand_04", "action_type": "SUBSTITUTE_WITHIN_CLASS", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.824, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.84, "primary_safety_legality": 0.959, "primary_clinical_improvement": 0.741, "primary_dosing_quality": 0.53, "primary_process_integrity": 0.894, "total_reward": 0.831}, "primary_reward_channels": {"safety_legality": 0.959, "clinical_improvement": 0.741, "dosing_quality": 0.53, "process_integrity": 0.894}} +{"seed": 8007, "policy": "full_polyguard_pipeline", "reward": 0.806, "latency_seconds": 0.0023, "legal": true, "candidate_id": "cand_05", "action_type": "DOSE_HOLD", "termination_reason": "ongoing", "failure_reasons": [], "anti_cheat_reasons": [], "reward_breakdown": {"format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.75, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.999, "uncertainty_calibration_score": 0.92, "primary_safety_legality": 0.979, "primary_clinical_improvement": 0.633, "primary_dosing_quality": 0.655, "primary_process_integrity": 0.894, "total_reward": 0.806}, "primary_reward_channels": {"safety_legality": 0.979, "clinical_improvement": 0.633, "dosing_quality": 0.655, "process_integrity": 0.894}} diff --git a/docs/results/success_rate.png b/docs/results/success_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..b918ae36817cfb351bb924de05a638e1ee4c73c2 Binary files /dev/null and b/docs/results/success_rate.png differ diff --git a/docs/results/supervisor_grpo.json b/docs/results/supervisor_grpo.json new file mode 100644 index 0000000000000000000000000000000000000000..9ca005bc384929ba19cdbc4c7dae17badf8e6269 --- /dev/null +++ b/docs/results/supervisor_grpo.json @@ -0,0 +1,28 @@ +{ + "avg_reward": 0.7348571428571429, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.8571428571428572, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.5, + "avg_dosing_quality": 0.5, + "avg_process_fidelity": 0.92, + "exploit_detection_count": 3.0, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.5, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.9000000000000001, + "dosing_quality_score": 0.5, + "abstention_quality_score": 0.56, + "efficiency_score": 0.6427142857142858, + "process_fidelity_score": 0.92, + "explanation_grounding_score": 0.7999999999999999, + "anti_cheat_score": 0.5712857142857143, + "uncertainty_calibration_score": 0.6142857142857144 + } +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json b/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..e97c6f0e7743a9470ea6efc3b741694dafed8b57 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 1.839, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8000\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.224, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "There are 5 candidates with IDs . The candidate with ID has the lowest prediction score. Therefore, we will choose candidate . Rationale: Candidate has the lowest prediction score among the candidates with IDs , indicating that they are the most likely to", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "there is no patient with patient_id = 8002, please select a different patient_id. Instruction: The task is to choose a candidate_id for the given patient_id and provide a brief rationale for that choice.\nThus, the final answer is (e).Human beings have been able to use fire since at least 3 million years ago, but we are not sure how they learned", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.748, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8003\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 1.737, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8004\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_1", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 1.736, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json b/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..986493a2b78954513d0716891dc5dd71c576c569 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "model_index": 0, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 2, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json b/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..7d5a627d2519bb0afa7485ff88f51eb813d742e6 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.0856, + "grad_norm": 3.887380838394165, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.001, + "step": 1 + }, + { + "loss": 1.6647, + "grad_norm": 1.2190884351730347, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.7138103246688843, + "epoch": 0.002, + "step": 2 + }, + { + "loss": 1.1696, + "grad_norm": 0.8276316523551941, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.003, + "step": 3 + }, + { + "loss": 3.0464, + "grad_norm": 3.3297364711761475, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5222222208976746, + "epoch": 0.004, + "step": 4 + }, + { + "loss": 1.1875, + "grad_norm": 0.8076611757278442, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.005, + "step": 5 + }, + { + "loss": 1.6105, + "grad_norm": 1.0332727432250977, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.7188019752502441, + "epoch": 0.006, + "step": 6 + }, + { + "loss": 1.5834, + "grad_norm": 1.0094527006149292, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.007, + "step": 7 + }, + { + "loss": 1.1683, + "grad_norm": 0.7861526012420654, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.008, + "step": 8 + }, + { + "loss": 1.3843, + "grad_norm": 0.7377748489379883, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7495107650756836, + "epoch": 0.009, + "step": 9 + }, + { + "loss": 1.584, + "grad_norm": 0.9443085193634033, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.720465898513794, + "epoch": 0.01, + "step": 10 + }, + { + "loss": 1.366, + "grad_norm": 0.7967380285263062, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7504892349243164, + "epoch": 0.011, + "step": 11 + }, + { + "loss": 1.5266, + "grad_norm": 1.0016096830368042, + "learning_rate": 1.989e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.012, + "step": 12 + }, + { + "loss": 1.2453, + "grad_norm": 0.9283791184425354, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.7836938500404358, + "epoch": 0.013, + "step": 13 + }, + { + "loss": 1.6206, + "grad_norm": 0.9805537462234497, + "learning_rate": 1.987e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7171381115913391, + "epoch": 0.014, + "step": 14 + }, + { + "loss": 1.5375, + "grad_norm": 0.9191323518753052, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.7337770462036133, + "epoch": 0.015, + "step": 15 + }, + { + "loss": 1.3423, + "grad_norm": 0.7822748422622681, + "learning_rate": 1.985e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.016, + "step": 16 + }, + { + "loss": 2.9309, + "grad_norm": 2.773752450942993, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5277777910232544, + "epoch": 0.017, + "step": 17 + }, + { + "loss": 1.1574, + "grad_norm": 0.7265554666519165, + "learning_rate": 1.983e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7876712083816528, + "epoch": 0.018, + "step": 18 + }, + { + "loss": 2.9093, + "grad_norm": 2.9051146507263184, + "learning_rate": 1.982e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5388888716697693, + "epoch": 0.019, + "step": 19 + }, + { + "loss": 1.5786, + "grad_norm": 0.9728697538375854, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.722129762172699, + "epoch": 0.02, + "step": 20 + }, + { + "loss": 1.0934, + "grad_norm": 0.7319854497909546, + "learning_rate": 1.98e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.7974559664726257, + "epoch": 0.021, + "step": 21 + }, + { + "loss": 1.2097, + "grad_norm": 0.8981963992118835, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.022, + "step": 22 + }, + { + "loss": 1.4816, + "grad_norm": 1.0308023691177368, + "learning_rate": 1.978e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.023, + "step": 23 + }, + { + "loss": 1.3218, + "grad_norm": 0.7793745398521423, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7544031143188477, + "epoch": 0.024, + "step": 24 + }, + { + "loss": 1.4883, + "grad_norm": 1.0108226537704468, + "learning_rate": 1.976e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.025, + "step": 25 + }, + { + "loss": 1.1398, + "grad_norm": 0.7284001111984253, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7857142686843872, + "epoch": 0.026, + "step": 26 + }, + { + "loss": 1.5201, + "grad_norm": 0.9933396577835083, + "learning_rate": 1.974e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.7354409098625183, + "epoch": 0.027, + "step": 27 + }, + { + "loss": 2.8162, + "grad_norm": 3.1626200675964355, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.028, + "step": 28 + }, + { + "loss": 1.31, + "grad_norm": 0.8019158244132996, + "learning_rate": 1.972e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.7573385238647461, + "epoch": 0.029, + "step": 29 + }, + { + "loss": 2.7985, + "grad_norm": 3.126246929168701, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.03, + "step": 30 + }, + { + "loss": 1.5341, + "grad_norm": 0.952720582485199, + "learning_rate": 1.97e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7271214723587036, + "epoch": 0.031, + "step": 31 + }, + { + "loss": 1.0763, + "grad_norm": 0.7093926668167114, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.032, + "step": 32 + }, + { + "loss": 1.2127, + "grad_norm": 0.813561201095581, + "learning_rate": 1.968e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.033, + "step": 33 + }, + { + "loss": 2.7516, + "grad_norm": 3.1947083473205566, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.034, + "step": 34 + }, + { + "loss": 1.1881, + "grad_norm": 1.0367817878723145, + "learning_rate": 1.966e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.035, + "step": 35 + }, + { + "loss": 1.1991, + "grad_norm": 0.9249914288520813, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.036, + "step": 36 + }, + { + "loss": 1.0422, + "grad_norm": 0.7850101590156555, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.037, + "step": 37 + }, + { + "loss": 1.2488, + "grad_norm": 0.8151567578315735, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7651663422584534, + "epoch": 0.038, + "step": 38 + }, + { + "loss": 1.5095, + "grad_norm": 1.0585670471191406, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.7254575490951538, + "epoch": 0.039, + "step": 39 + }, + { + "loss": 2.6828, + "grad_norm": 3.3681087493896484, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.04, + "step": 40 + }, + { + "loss": 1.1754, + "grad_norm": 1.029766321182251, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.041, + "step": 41 + }, + { + "loss": 1.0827, + "grad_norm": 0.7520174980163574, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.042, + "step": 42 + }, + { + "loss": 1.1385, + "grad_norm": 1.012759804725647, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.043, + "step": 43 + }, + { + "loss": 2.6322, + "grad_norm": 3.4875218868255615, + "learning_rate": 1.957e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.044, + "step": 44 + }, + { + "loss": 1.23, + "grad_norm": 0.9103058576583862, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.045, + "step": 45 + }, + { + "loss": 1.4499, + "grad_norm": 1.0566458702087402, + "learning_rate": 1.955e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.046, + "step": 46 + }, + { + "loss": 1.1171, + "grad_norm": 1.0389467477798462, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.047, + "step": 47 + }, + { + "loss": 1.4262, + "grad_norm": 1.0595616102218628, + "learning_rate": 1.953e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.740432620048523, + "epoch": 0.048, + "step": 48 + }, + { + "loss": 1.1224, + "grad_norm": 1.0530123710632324, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.049, + "step": 49 + }, + { + "loss": 2.5409, + "grad_norm": 3.6781489849090576, + "learning_rate": 1.951e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.05, + "step": 50 + }, + { + "loss": 1.0942, + "grad_norm": 1.0411880016326904, + "learning_rate": 1.95e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.7970049977302551, + "epoch": 0.051, + "step": 51 + }, + { + "loss": 1.0622, + "grad_norm": 0.8258970975875854, + "learning_rate": 1.949e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.052, + "step": 52 + }, + { + "loss": 1.1977, + "grad_norm": 0.8957047462463379, + "learning_rate": 1.948e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.7700586915016174, + "epoch": 0.053, + "step": 53 + }, + { + "loss": 1.3695, + "grad_norm": 1.122542142868042, + "learning_rate": 1.947e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.7520798444747925, + "epoch": 0.054, + "step": 54 + }, + { + "loss": 0.8548, + "grad_norm": 0.7688314914703369, + "learning_rate": 1.946e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.055, + "step": 55 + }, + { + "loss": 1.0659, + "grad_norm": 1.0568362474441528, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.056, + "step": 56 + }, + { + "loss": 1.0294, + "grad_norm": 0.8596540689468384, + "learning_rate": 1.944e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.057, + "step": 57 + }, + { + "loss": 1.4359, + "grad_norm": 1.2490142583847046, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.7321131229400635, + "epoch": 0.058, + "step": 58 + }, + { + "loss": 2.416, + "grad_norm": 3.7482848167419434, + "learning_rate": 1.942e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.059, + "step": 59 + }, + { + "loss": 1.0725, + "grad_norm": 1.117326259613037, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.06, + "step": 60 + }, + { + "loss": 0.9739, + "grad_norm": 0.8864734768867493, + "learning_rate": 1.94e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.061, + "step": 61 + }, + { + "loss": 1.1443, + "grad_norm": 0.9423307776451111, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.7739726305007935, + "epoch": 0.062, + "step": 62 + }, + { + "loss": 0.8009, + "grad_norm": 0.8988932967185974, + "learning_rate": 1.938e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.063, + "step": 63 + }, + { + "loss": 1.0508, + "grad_norm": 1.1697311401367188, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.064, + "step": 64 + }, + { + "loss": 1.2747, + "grad_norm": 1.2967511415481567, + "learning_rate": 1.936e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.7570715546607971, + "epoch": 0.065, + "step": 65 + }, + { + "loss": 1.2796, + "grad_norm": 1.2881773710250854, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7554076313972473, + "epoch": 0.066, + "step": 66 + }, + { + "loss": 2.3052, + "grad_norm": 4.034823894500732, + "learning_rate": 1.934e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.067, + "step": 67 + }, + { + "loss": 1.2806, + "grad_norm": 1.3690178394317627, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.7587354183197021, + "epoch": 0.068, + "step": 68 + }, + { + "loss": 1.1807, + "grad_norm": 1.0886963605880737, + "learning_rate": 1.932e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.7632094025611877, + "epoch": 0.069, + "step": 69 + }, + { + "loss": 1.0076, + "grad_norm": 1.3501569032669067, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.801996648311615, + "epoch": 0.07, + "step": 70 + }, + { + "loss": 0.921, + "grad_norm": 1.0231209993362427, + "learning_rate": 1.93e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8111546039581299, + "epoch": 0.071, + "step": 71 + }, + { + "loss": 2.1999, + "grad_norm": 4.47637939453125, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.072, + "step": 72 + }, + { + "loss": 2.1852, + "grad_norm": 4.533531188964844, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.073, + "step": 73 + }, + { + "loss": 2.1623, + "grad_norm": 4.683750152587891, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.074, + "step": 74 + }, + { + "loss": 1.2988, + "grad_norm": 1.5087296962738037, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.7437604069709778, + "epoch": 0.075, + "step": 75 + }, + { + "loss": 2.1266, + "grad_norm": 4.944180011749268, + "learning_rate": 1.925e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.605555534362793, + "epoch": 0.076, + "step": 76 + }, + { + "loss": 0.9762, + "grad_norm": 1.0376505851745605, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.077, + "step": 77 + }, + { + "loss": 2.0834, + "grad_norm": 5.394686222076416, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.078, + "step": 78 + }, + { + "loss": 0.9309, + "grad_norm": 1.0764528512954712, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8170254230499268, + "epoch": 0.079, + "step": 79 + }, + { + "loss": 0.7549, + "grad_norm": 1.089787244796753, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.08, + "step": 80 + }, + { + "loss": 1.0972, + "grad_norm": 1.2265634536743164, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.7915851473808289, + "epoch": 0.081, + "step": 81 + }, + { + "loss": 2.0061, + "grad_norm": 5.302765846252441, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.082, + "step": 82 + }, + { + "loss": 1.1197, + "grad_norm": 1.216346025466919, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.7749511003494263, + "epoch": 0.083, + "step": 83 + }, + { + "loss": 1.181, + "grad_norm": 1.5846738815307617, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.084, + "step": 84 + }, + { + "loss": 0.8929, + "grad_norm": 1.1130127906799316, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8268101811408997, + "epoch": 0.085, + "step": 85 + }, + { + "loss": 1.9339, + "grad_norm": NaN, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.086, + "step": 86 + }, + { + "loss": 1.1623, + "grad_norm": 1.7714096307754517, + "learning_rate": 1.915e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.7720465660095215, + "epoch": 0.087, + "step": 87 + }, + { + "loss": 1.0203, + "grad_norm": 1.204126000404358, + "learning_rate": 1.914e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.088, + "step": 88 + }, + { + "loss": 0.8569, + "grad_norm": 1.2058078050613403, + "learning_rate": 1.913e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.089, + "step": 89 + }, + { + "loss": 1.197, + "grad_norm": 1.8821589946746826, + "learning_rate": 1.912e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.7670549154281616, + "epoch": 0.09, + "step": 90 + }, + { + "loss": 1.1908, + "grad_norm": 1.9740996360778809, + "learning_rate": 1.911e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.7703827023506165, + "epoch": 0.091, + "step": 91 + }, + { + "loss": 0.889, + "grad_norm": 1.5037046670913696, + "learning_rate": 1.91e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8236272931098938, + "epoch": 0.092, + "step": 92 + }, + { + "loss": 1.1821, + "grad_norm": 1.539967656135559, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.093, + "step": 93 + }, + { + "loss": 1.0278, + "grad_norm": 1.2005809545516968, + "learning_rate": 1.908e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.094, + "step": 94 + }, + { + "loss": 1.1361, + "grad_norm": 1.8167128562927246, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.095, + "step": 95 + }, + { + "loss": 1.0977, + "grad_norm": 2.2985150814056396, + "learning_rate": 1.906e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.096, + "step": 96 + }, + { + "loss": 1.0695, + "grad_norm": 1.590173602104187, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.7920132875442505, + "epoch": 0.097, + "step": 97 + }, + { + "loss": 1.1519, + "grad_norm": 1.5389997959136963, + "learning_rate": 1.904e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.098, + "step": 98 + }, + { + "loss": 1.1507, + "grad_norm": 1.6002172231674194, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.780366063117981, + "epoch": 0.099, + "step": 99 + }, + { + "loss": 1.0454, + "grad_norm": 1.181969404220581, + "learning_rate": 1.902e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8033267855644226, + "epoch": 0.1, + "step": 100 + }, + { + "loss": 1.0897, + "grad_norm": 1.832823634147644, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.101, + "step": 101 + }, + { + "loss": 0.8593, + "grad_norm": 1.2972052097320557, + "learning_rate": 1.9e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8346379399299622, + "epoch": 0.102, + "step": 102 + }, + { + "loss": 0.9507, + "grad_norm": 1.114174723625183, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8150684833526611, + "epoch": 0.103, + "step": 103 + }, + { + "loss": 0.8422, + "grad_norm": 1.0837013721466064, + "learning_rate": 1.898e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.104, + "step": 104 + }, + { + "loss": 0.9674, + "grad_norm": 1.1756479740142822, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.105, + "step": 105 + }, + { + "loss": 0.7975, + "grad_norm": 1.3874446153640747, + "learning_rate": 1.896e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.840266227722168, + "epoch": 0.106, + "step": 106 + }, + { + "loss": 1.0557, + "grad_norm": 1.959272027015686, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.7936772108078003, + "epoch": 0.107, + "step": 107 + }, + { + "loss": 1.0885, + "grad_norm": 1.503557801246643, + "learning_rate": 1.894e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.108, + "step": 108 + }, + { + "loss": 0.8082, + "grad_norm": 1.470276117324829, + "learning_rate": 1.893e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.8302828669548035, + "epoch": 0.109, + "step": 109 + }, + { + "loss": 1.5508, + "grad_norm": 6.328886985778809, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.6944444179534912, + "epoch": 0.11, + "step": 110 + }, + { + "loss": 1.0059, + "grad_norm": 1.5663049221038818, + "learning_rate": 1.891e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.8103161454200745, + "epoch": 0.111, + "step": 111 + }, + { + "loss": 1.0336, + "grad_norm": 1.4562171697616577, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.112, + "step": 112 + }, + { + "loss": 1.0438, + "grad_norm": 1.5646629333496094, + "learning_rate": 1.889e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.113, + "step": 113 + }, + { + "loss": 1.0279, + "grad_norm": 1.513607144355774, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.114, + "step": 114 + }, + { + "loss": 1.4402, + "grad_norm": 6.165053367614746, + "learning_rate": 1.887e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.115, + "step": 115 + }, + { + "loss": 0.7349, + "grad_norm": 1.454982876777649, + "learning_rate": 1.886e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.116, + "step": 116 + }, + { + "loss": 0.7338, + "grad_norm": 1.9169820547103882, + "learning_rate": 1.885e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.841930091381073, + "epoch": 0.117, + "step": 117 + }, + { + "loss": 0.7831, + "grad_norm": 1.3472567796707153, + "learning_rate": 1.884e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.118, + "step": 118 + }, + { + "loss": 1.028, + "grad_norm": 1.5241106748580933, + "learning_rate": 1.883e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.8036605715751648, + "epoch": 0.119, + "step": 119 + }, + { + "loss": 1.3458, + "grad_norm": 5.9579386711120605, + "learning_rate": 1.882e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.12, + "step": 120 + }, + { + "loss": 0.7727, + "grad_norm": 1.444265604019165, + "learning_rate": 1.881e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.8385518789291382, + "epoch": 0.121, + "step": 121 + }, + { + "loss": 0.6351, + "grad_norm": 1.281785488128662, + "learning_rate": 1.88e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.122, + "step": 122 + }, + { + "loss": 0.6884, + "grad_norm": 1.6917502880096436, + "learning_rate": 1.879e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.123, + "step": 123 + }, + { + "loss": 0.886, + "grad_norm": 1.6544225215911865, + "learning_rate": 1.878e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.8286189436912537, + "epoch": 0.124, + "step": 124 + }, + { + "loss": 0.7652, + "grad_norm": 1.2762014865875244, + "learning_rate": 1.877e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.125, + "step": 125 + }, + { + "loss": 1.2517, + "grad_norm": 7.621744632720947, + "learning_rate": 1.876e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.126, + "step": 126 + }, + { + "loss": 0.6909, + "grad_norm": 1.8651930093765259, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.127, + "step": 127 + }, + { + "loss": 0.9464, + "grad_norm": 2.0513856410980225, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.821963369846344, + "epoch": 0.128, + "step": 128 + }, + { + "loss": 0.8355, + "grad_norm": 1.3392603397369385, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.8405088186264038, + "epoch": 0.129, + "step": 129 + }, + { + "loss": 0.7124, + "grad_norm": 1.7539966106414795, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.861896812915802, + "epoch": 0.13, + "step": 130 + }, + { + "loss": 1.1931, + "grad_norm": 7.2109856605529785, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.7611111402511597, + "epoch": 0.131, + "step": 131 + }, + { + "loss": 0.806, + "grad_norm": 1.531593918800354, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.8424657583236694, + "epoch": 0.132, + "step": 132 + }, + { + "loss": 0.7483, + "grad_norm": 1.6686372756958008, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.133, + "step": 133 + }, + { + "loss": 0.905, + "grad_norm": 3.809466600418091, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.8336106538772583, + "epoch": 0.134, + "step": 134 + }, + { + "loss": 0.7299, + "grad_norm": 1.7963030338287354, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.135, + "step": 135 + }, + { + "loss": 0.6384, + "grad_norm": 2.485582113265991, + "learning_rate": 1.866e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.8718801736831665, + "epoch": 0.136, + "step": 136 + }, + { + "loss": 0.5473, + "grad_norm": 1.6607071161270142, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.137, + "step": 137 + }, + { + "loss": 0.6719, + "grad_norm": 1.6095962524414062, + "learning_rate": 1.864e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.860232949256897, + "epoch": 0.138, + "step": 138 + }, + { + "loss": 0.8772, + "grad_norm": 1.8398959636688232, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.8352745175361633, + "epoch": 0.139, + "step": 139 + }, + { + "loss": 0.6813, + "grad_norm": 1.754347324371338, + "learning_rate": 1.862e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.14, + "step": 140 + }, + { + "loss": 0.8176, + "grad_norm": 1.8010166883468628, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.141, + "step": 141 + }, + { + "loss": 0.6013, + "grad_norm": 2.131845712661743, + "learning_rate": 1.86e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.8768718838691711, + "epoch": 0.142, + "step": 142 + }, + { + "loss": 1.0551, + "grad_norm": 8.797135353088379, + "learning_rate": 1.859e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.8055555820465088, + "epoch": 0.143, + "step": 143 + }, + { + "loss": 0.8096, + "grad_norm": 1.6665289402008057, + "learning_rate": 1.858e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.144, + "step": 144 + }, + { + "loss": 0.6237, + "grad_norm": 2.031190872192383, + "learning_rate": 1.857e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.8735440969467163, + "epoch": 0.145, + "step": 145 + }, + { + "loss": 0.8527, + "grad_norm": 2.5186493396759033, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.8386023044586182, + "epoch": 0.146, + "step": 146 + }, + { + "loss": 0.83, + "grad_norm": 1.5677316188812256, + "learning_rate": 1.855e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.8444226980209351, + "epoch": 0.147, + "step": 147 + }, + { + "loss": 0.6951, + "grad_norm": 3.395341634750366, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.148, + "step": 148 + }, + { + "loss": 0.7634, + "grad_norm": 1.658737301826477, + "learning_rate": 1.853e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.149, + "step": 149 + }, + { + "loss": 0.6195, + "grad_norm": 1.4803838729858398, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.8776907920837402, + "epoch": 0.15, + "step": 150 + }, + { + "loss": 0.6916, + "grad_norm": 1.462860345840454, + "learning_rate": 1.851e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.151, + "step": 151 + }, + { + "loss": 0.7854, + "grad_norm": 1.6279668807983398, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.152, + "step": 152 + }, + { + "loss": 0.749, + "grad_norm": 1.8625388145446777, + "learning_rate": 1.849e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.153, + "step": 153 + }, + { + "loss": 0.6619, + "grad_norm": 1.6320242881774902, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.8679060935974121, + "epoch": 0.154, + "step": 154 + }, + { + "loss": 0.9864, + "grad_norm": NaN, + "learning_rate": 1.847e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.8222222328186035, + "epoch": 0.155, + "step": 155 + }, + { + "loss": 0.7698, + "grad_norm": 2.241466999053955, + "learning_rate": 1.847e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.156, + "step": 156 + }, + { + "loss": 0.8501, + "grad_norm": 2.594738721847534, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.8435940146446228, + "epoch": 0.157, + "step": 157 + }, + { + "loss": 0.962, + "grad_norm": 10.902610778808594, + "learning_rate": 1.845e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.8166666626930237, + "epoch": 0.158, + "step": 158 + }, + { + "loss": 0.7822, + "grad_norm": 1.6955127716064453, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.159, + "step": 159 + }, + { + "loss": 0.7942, + "grad_norm": 2.5727546215057373, + "learning_rate": 1.843e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.8519134521484375, + "epoch": 0.16, + "step": 160 + }, + { + "loss": 0.8074, + "grad_norm": 2.082172155380249, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.161, + "step": 161 + }, + { + "loss": 0.6346, + "grad_norm": 1.4917131662368774, + "learning_rate": 1.841e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.8698630332946777, + "epoch": 0.162, + "step": 162 + }, + { + "loss": 0.6574, + "grad_norm": 1.7243297100067139, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.8659490942955017, + "epoch": 0.163, + "step": 163 + }, + { + "loss": 0.7782, + "grad_norm": 2.236922264099121, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.164, + "step": 164 + }, + { + "loss": 0.7541, + "grad_norm": 2.998671531677246, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.165, + "step": 165 + }, + { + "loss": 0.7637, + "grad_norm": 2.231337070465088, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.166, + "step": 166 + }, + { + "loss": 0.4918, + "grad_norm": 2.1853654384613037, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.167, + "step": 167 + }, + { + "loss": 0.8615, + "grad_norm": 19.52778434753418, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.168, + "step": 168 + }, + { + "loss": 0.727, + "grad_norm": 2.8629372119903564, + "learning_rate": 1.834e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.169, + "step": 169 + }, + { + "loss": 0.6812, + "grad_norm": 2.578798294067383, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.8600782752037048, + "epoch": 0.17, + "step": 170 + }, + { + "loss": 0.718, + "grad_norm": 2.7950305938720703, + "learning_rate": 1.832e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.171, + "step": 171 + }, + { + "loss": 0.8269, + "grad_norm": 18.518278121948242, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.8333333134651184, + "epoch": 0.172, + "step": 172 + }, + { + "loss": 0.8122, + "grad_norm": 10.636402130126953, + "learning_rate": 1.83e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.8500000238418579, + "epoch": 0.173, + "step": 173 + }, + { + "loss": 0.5631, + "grad_norm": 1.8652675151824951, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.174, + "step": 174 + }, + { + "loss": 0.5823, + "grad_norm": 2.174743890762329, + "learning_rate": 1.828e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.8825831413269043, + "epoch": 0.175, + "step": 175 + }, + { + "loss": 0.6878, + "grad_norm": 2.426223039627075, + "learning_rate": 1.827e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.176, + "step": 176 + }, + { + "loss": 0.4815, + "grad_norm": 2.2111594676971436, + "learning_rate": 1.826e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.177, + "step": 177 + }, + { + "loss": 0.7905, + "grad_norm": 12.419157981872559, + "learning_rate": 1.825e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.178, + "step": 178 + }, + { + "loss": 0.6485, + "grad_norm": 2.6929852962493896, + "learning_rate": 1.824e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.8851913213729858, + "epoch": 0.179, + "step": 179 + }, + { + "loss": 0.5821, + "grad_norm": 2.588067054748535, + "learning_rate": 1.823e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.8855186104774475, + "epoch": 0.18, + "step": 180 + }, + { + "loss": 0.5376, + "grad_norm": 2.6413276195526123, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.181, + "step": 181 + }, + { + "loss": 0.4776, + "grad_norm": 2.0201733112335205, + "learning_rate": 1.821e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.182, + "step": 182 + }, + { + "loss": 0.7141, + "grad_norm": 8.398615837097168, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 0.8611111044883728, + "epoch": 0.183, + "step": 183 + }, + { + "loss": 0.687, + "grad_norm": 6.920986175537109, + "learning_rate": 1.819e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 0.8777777552604675, + "epoch": 0.184, + "step": 184 + }, + { + "loss": 0.6518, + "grad_norm": 3.54260516166687, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.185, + "step": 185 + }, + { + "loss": 0.6429, + "grad_norm": 4.033841609954834, + "learning_rate": 1.817e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.186, + "step": 186 + }, + { + "loss": 0.4786, + "grad_norm": 2.4023964405059814, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.187, + "step": 187 + }, + { + "loss": 0.5997, + "grad_norm": 2.695603370666504, + "learning_rate": 1.815e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.188, + "step": 188 + }, + { + "loss": 0.6251, + "grad_norm": 7.4209184646606445, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.189, + "step": 189 + }, + { + "loss": 0.6324, + "grad_norm": 10.130674362182617, + "learning_rate": 1.813e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.19, + "step": 190 + }, + { + "loss": 0.5939, + "grad_norm": 2.6180245876312256, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.873776912689209, + "epoch": 0.191, + "step": 191 + }, + { + "loss": 0.4098, + "grad_norm": 2.2663474082946777, + "learning_rate": 1.811e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.192, + "step": 192 + }, + { + "loss": 0.5111, + "grad_norm": 2.2139604091644287, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.8894324898719788, + "epoch": 0.193, + "step": 193 + }, + { + "loss": 0.4332, + "grad_norm": 2.2271547317504883, + "learning_rate": 1.809e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.194, + "step": 194 + }, + { + "loss": 0.4893, + "grad_norm": 2.0789742469787598, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.8972602486610413, + "epoch": 0.195, + "step": 195 + }, + { + "loss": 0.5755, + "grad_norm": 18.601898193359375, + "learning_rate": 1.807e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.196, + "step": 196 + }, + { + "loss": 0.4635, + "grad_norm": 6.127828598022461, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.900166392326355, + "epoch": 0.197, + "step": 197 + }, + { + "loss": 0.603, + "grad_norm": 2.668287515640259, + "learning_rate": 1.805e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.198, + "step": 198 + }, + { + "loss": 0.6088, + "grad_norm": 2.419572353363037, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.8757338523864746, + "epoch": 0.199, + "step": 199 + }, + { + "loss": 0.5672, + "grad_norm": 3.028404712677002, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.8885191082954407, + "epoch": 0.2, + "step": 200 + }, + { + "loss": 0.4556, + "grad_norm": 4.009725093841553, + "learning_rate": 1.802e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.201, + "step": 201 + }, + { + "loss": 0.5269, + "grad_norm": 2.9101243019104004, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.202, + "step": 202 + }, + { + "loss": 0.6214, + "grad_norm": 2.7398433685302734, + "learning_rate": 1.8e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.8581213355064392, + "epoch": 0.203, + "step": 203 + }, + { + "loss": 0.5646, + "grad_norm": 2.60606050491333, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 0.8868552446365356, + "epoch": 0.204, + "step": 204 + }, + { + "loss": 0.3748, + "grad_norm": 3.7512423992156982, + "learning_rate": 1.798e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9234609007835388, + "epoch": 0.205, + "step": 205 + }, + { + "loss": 0.597, + "grad_norm": 3.150888442993164, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.206, + "step": 206 + }, + { + "loss": 0.511, + "grad_norm": 3.328899383544922, + "learning_rate": 1.796e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.207, + "step": 207 + }, + { + "loss": 0.491, + "grad_norm": 8.625993728637695, + "learning_rate": 1.795e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.208, + "step": 208 + }, + { + "loss": 0.4053, + "grad_norm": 2.2067341804504395, + "learning_rate": 1.794e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.209, + "step": 209 + }, + { + "loss": 0.4192, + "grad_norm": 2.0993006229400635, + "learning_rate": 1.793e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.21, + "step": 210 + }, + { + "loss": 0.3785, + "grad_norm": 2.821485996246338, + "learning_rate": 1.792e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9151414036750793, + "epoch": 0.211, + "step": 211 + }, + { + "loss": 0.5336, + "grad_norm": 2.169666051864624, + "learning_rate": 1.791e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.8901830315589905, + "epoch": 0.212, + "step": 212 + }, + { + "loss": 0.5235, + "grad_norm": 3.1590685844421387, + "learning_rate": 1.79e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.8835616707801819, + "epoch": 0.213, + "step": 213 + }, + { + "loss": 0.4736, + "grad_norm": 11.030704498291016, + "learning_rate": 1.789e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 0.9055555462837219, + "epoch": 0.214, + "step": 214 + }, + { + "loss": 0.5599, + "grad_norm": 3.9144341945648193, + "learning_rate": 1.788e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.880199670791626, + "epoch": 0.215, + "step": 215 + }, + { + "loss": 0.5102, + "grad_norm": 2.9705278873443604, + "learning_rate": 1.787e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.216, + "step": 216 + }, + { + "loss": 0.4821, + "grad_norm": 3.4463229179382324, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.217, + "step": 217 + }, + { + "loss": 0.4385, + "grad_norm": 8.850930213928223, + "learning_rate": 1.785e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 0.9277777671813965, + "epoch": 0.218, + "step": 218 + }, + { + "loss": 0.4633, + "grad_norm": 2.936647415161133, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.219, + "step": 219 + }, + { + "loss": 0.4098, + "grad_norm": 6.922672271728516, + "learning_rate": 1.783e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.22, + "step": 220 + }, + { + "loss": 0.5233, + "grad_norm": 2.318746328353882, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.221, + "step": 221 + }, + { + "loss": 0.3223, + "grad_norm": 4.281177520751953, + "learning_rate": 1.781e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.222, + "step": 222 + }, + { + "loss": 0.4973, + "grad_norm": 3.6921546459198, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.8951746821403503, + "epoch": 0.223, + "step": 223 + }, + { + "loss": 0.4666, + "grad_norm": 3.4926915168762207, + "learning_rate": 1.779e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 0.8968386054039001, + "epoch": 0.224, + "step": 224 + }, + { + "loss": 0.3519, + "grad_norm": 2.668114423751831, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.225, + "step": 225 + }, + { + "loss": 0.4244, + "grad_norm": 2.4111084938049316, + "learning_rate": 1.777e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.226, + "step": 226 + }, + { + "loss": 0.3912, + "grad_norm": 10.561456680297852, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 0.949999988079071, + "epoch": 0.227, + "step": 227 + }, + { + "loss": 0.5091, + "grad_norm": 2.472616672515869, + "learning_rate": 1.775e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.228, + "step": 228 + }, + { + "loss": 0.4842, + "grad_norm": 2.881739854812622, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 0.8935108184814453, + "epoch": 0.229, + "step": 229 + }, + { + "loss": 0.4435, + "grad_norm": 3.2438275814056396, + "learning_rate": 1.773e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9118136167526245, + "epoch": 0.23, + "step": 230 + }, + { + "loss": 0.3527, + "grad_norm": 2.2769415378570557, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.231, + "step": 231 + }, + { + "loss": 0.4951, + "grad_norm": 3.046674966812134, + "learning_rate": 1.771e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.232, + "step": 232 + }, + { + "loss": 0.4926, + "grad_norm": 4.042079925537109, + "learning_rate": 1.77e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 0.8918468952178955, + "epoch": 0.233, + "step": 233 + }, + { + "loss": 0.4564, + "grad_norm": 4.222212314605713, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9051580429077148, + "epoch": 0.234, + "step": 234 + }, + { + "loss": 0.3074, + "grad_norm": 3.150768280029297, + "learning_rate": 1.768e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.235, + "step": 235 + }, + { + "loss": 0.3858, + "grad_norm": 3.456815004348755, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.236, + "step": 236 + }, + { + "loss": 0.3352, + "grad_norm": 9.094295501708984, + "learning_rate": 1.766e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.237, + "step": 237 + }, + { + "loss": 0.4867, + "grad_norm": 3.2864322662353516, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.238, + "step": 238 + }, + { + "loss": 0.3303, + "grad_norm": 5.672657012939453, + "learning_rate": 1.764e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.239, + "step": 239 + }, + { + "loss": 0.4708, + "grad_norm": 3.677504062652588, + "learning_rate": 1.763e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.24, + "step": 240 + }, + { + "loss": 0.3175, + "grad_norm": 5.829269886016846, + "learning_rate": 1.762e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.241, + "step": 241 + }, + { + "loss": 0.4315, + "grad_norm": 3.211578130722046, + "learning_rate": 1.761e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.242, + "step": 242 + }, + { + "loss": 0.3084, + "grad_norm": 5.2650628089904785, + "learning_rate": 1.76e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.243, + "step": 243 + }, + { + "loss": 0.4516, + "grad_norm": 5.401496887207031, + "learning_rate": 1.759e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.244, + "step": 244 + }, + { + "loss": 0.4197, + "grad_norm": 3.938694953918457, + "learning_rate": 1.758e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.245, + "step": 245 + }, + { + "loss": 0.4329, + "grad_norm": 3.4744861125946045, + "learning_rate": 1.757e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 0.90183025598526, + "epoch": 0.246, + "step": 246 + }, + { + "loss": 0.4525, + "grad_norm": 4.853247165679932, + "learning_rate": 1.756e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 0.9084858298301697, + "epoch": 0.247, + "step": 247 + }, + { + "loss": 0.2768, + "grad_norm": 5.6177144050598145, + "learning_rate": 1.755e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.248, + "step": 248 + }, + { + "loss": 0.3517, + "grad_norm": 2.8669052124023438, + "learning_rate": 1.754e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.249, + "step": 249 + }, + { + "loss": 0.4142, + "grad_norm": 3.5590577125549316, + "learning_rate": 1.753e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.25, + "step": 250 + }, + { + "loss": 0.4307, + "grad_norm": 5.072361946105957, + "learning_rate": 1.752e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 0.9101497530937195, + "epoch": 0.251, + "step": 251 + }, + { + "loss": 0.3981, + "grad_norm": 3.637819528579712, + "learning_rate": 1.751e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.912915825843811, + "epoch": 0.252, + "step": 252 + }, + { + "loss": 0.4344, + "grad_norm": 4.066125869750977, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.253, + "step": 253 + }, + { + "loss": 0.3574, + "grad_norm": 4.836447715759277, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.254, + "step": 254 + }, + { + "loss": 0.2738, + "grad_norm": 14.006624221801758, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.255, + "step": 255 + }, + { + "loss": 0.3416, + "grad_norm": 5.2639079093933105, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.256, + "step": 256 + }, + { + "loss": 0.2762, + "grad_norm": 12.536176681518555, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.257, + "step": 257 + }, + { + "loss": 0.4114, + "grad_norm": 6.311218738555908, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9090019464492798, + "epoch": 0.258, + "step": 258 + }, + { + "loss": 0.3912, + "grad_norm": 3.2677178382873535, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 0.920133113861084, + "epoch": 0.259, + "step": 259 + }, + { + "loss": 0.3059, + "grad_norm": 4.582422256469727, + "learning_rate": 1.743e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.26, + "step": 260 + }, + { + "loss": 0.3697, + "grad_norm": 5.214661121368408, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.261, + "step": 261 + }, + { + "loss": 0.3486, + "grad_norm": 5.719533920288086, + "learning_rate": 1.741e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.262, + "step": 262 + }, + { + "loss": 0.328, + "grad_norm": 4.692359924316406, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9363992214202881, + "epoch": 0.263, + "step": 263 + }, + { + "loss": 0.3665, + "grad_norm": 2.810206174850464, + "learning_rate": 1.739e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.264, + "step": 264 + }, + { + "loss": 0.2363, + "grad_norm": 6.301739692687988, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.265, + "step": 265 + }, + { + "loss": 0.3762, + "grad_norm": 2.9034929275512695, + "learning_rate": 1.737e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 0.9168053269386292, + "epoch": 0.266, + "step": 266 + }, + { + "loss": 0.3573, + "grad_norm": 5.10465669631958, + "learning_rate": 1.736e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.267, + "step": 267 + }, + { + "loss": 0.3708, + "grad_norm": 2.8359761238098145, + "learning_rate": 1.735e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9251247644424438, + "epoch": 0.268, + "step": 268 + }, + { + "loss": 0.3615, + "grad_norm": 2.6100833415985107, + "learning_rate": 1.734e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.9267886877059937, + "epoch": 0.269, + "step": 269 + }, + { + "loss": 0.3131, + "grad_norm": 3.610330820083618, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.27, + "step": 270 + }, + { + "loss": 0.3301, + "grad_norm": 3.1220433712005615, + "learning_rate": 1.732e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.271, + "step": 271 + }, + { + "loss": 0.2314, + "grad_norm": 7.683000564575195, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.272, + "step": 272 + }, + { + "loss": 0.2391, + "grad_norm": 10.635171890258789, + "learning_rate": 1.73e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.273, + "step": 273 + }, + { + "loss": 0.3934, + "grad_norm": 7.659923076629639, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 0.9334442615509033, + "epoch": 0.274, + "step": 274 + }, + { + "loss": 0.3376, + "grad_norm": 5.6293864250183105, + "learning_rate": 1.728e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.275, + "step": 275 + }, + { + "loss": 0.3734, + "grad_norm": 4.872118949890137, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.276, + "step": 276 + }, + { + "loss": 0.2395, + "grad_norm": 3.4475960731506348, + "learning_rate": 1.726e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.277, + "step": 277 + }, + { + "loss": 0.3513, + "grad_norm": 3.5093634128570557, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.278, + "step": 278 + }, + { + "loss": 0.3505, + "grad_norm": 3.436389446258545, + "learning_rate": 1.724e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 0.9367720484733582, + "epoch": 0.279, + "step": 279 + }, + { + "loss": 0.3041, + "grad_norm": 3.4393298625946045, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.28, + "step": 280 + }, + { + "loss": 0.2922, + "grad_norm": 3.826392889022827, + "learning_rate": 1.722e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.281, + "step": 281 + }, + { + "loss": 0.3414, + "grad_norm": 7.017237663269043, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.282, + "step": 282 + }, + { + "loss": 0.3521, + "grad_norm": 4.018287658691406, + "learning_rate": 1.72e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.283, + "step": 283 + }, + { + "loss": 0.3455, + "grad_norm": 3.9697959423065186, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.284, + "step": 284 + }, + { + "loss": 0.3368, + "grad_norm": 3.0641541481018066, + "learning_rate": 1.718e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.285, + "step": 285 + }, + { + "loss": 0.3244, + "grad_norm": 4.277006149291992, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.286, + "step": 286 + }, + { + "loss": 0.353, + "grad_norm": 2.6876814365386963, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.287, + "step": 287 + }, + { + "loss": 0.3236, + "grad_norm": 3.7715723514556885, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.288, + "step": 288 + }, + { + "loss": 0.3158, + "grad_norm": 3.555406332015991, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.289, + "step": 289 + }, + { + "loss": 0.2062, + "grad_norm": 9.316679000854492, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.29, + "step": 290 + }, + { + "loss": 0.2002, + "grad_norm": 5.817254543304443, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.291, + "step": 291 + }, + { + "loss": 0.2809, + "grad_norm": 5.106694221496582, + "learning_rate": 1.711e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.292, + "step": 292 + }, + { + "loss": 0.295, + "grad_norm": 7.797866344451904, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.293, + "step": 293 + }, + { + "loss": 0.3144, + "grad_norm": 8.002677917480469, + "learning_rate": 1.709e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.294, + "step": 294 + }, + { + "loss": 0.2345, + "grad_norm": 4.315321445465088, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.295, + "step": 295 + }, + { + "loss": 0.306, + "grad_norm": 4.690162181854248, + "learning_rate": 1.707e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.296, + "step": 296 + }, + { + "loss": 0.3098, + "grad_norm": 4.387345790863037, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.297, + "step": 297 + }, + { + "loss": 0.2898, + "grad_norm": 5.204096794128418, + "learning_rate": 1.705e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.298, + "step": 298 + }, + { + "loss": 0.2894, + "grad_norm": 4.000877380371094, + "learning_rate": 1.704e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.299, + "step": 299 + }, + { + "loss": 0.3295, + "grad_norm": 5.276703357696533, + "learning_rate": 1.703e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9383561611175537, + "epoch": 0.3, + "step": 300 + }, + { + "loss": 0.2139, + "grad_norm": 2.6593077182769775, + "learning_rate": 1.702e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.301, + "step": 301 + }, + { + "loss": 0.2077, + "grad_norm": 9.37561321258545, + "learning_rate": 1.701e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.302, + "step": 302 + }, + { + "loss": 0.2274, + "grad_norm": 2.972815990447998, + "learning_rate": 1.7e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.303, + "step": 303 + }, + { + "loss": 0.2545, + "grad_norm": 2.4279375076293945, + "learning_rate": 1.699e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.304, + "step": 304 + }, + { + "loss": 0.2871, + "grad_norm": 2.8517541885375977, + "learning_rate": 1.698e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.305, + "step": 305 + }, + { + "loss": 0.2877, + "grad_norm": 4.114612102508545, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.306, + "step": 306 + }, + { + "loss": 0.2145, + "grad_norm": 14.7569580078125, + "learning_rate": 1.696e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 0.9722222089767456, + "epoch": 0.307, + "step": 307 + }, + { + "loss": 0.294, + "grad_norm": 3.094182252883911, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.308, + "step": 308 + }, + { + "loss": 0.2044, + "grad_norm": 3.026052951812744, + "learning_rate": 1.694e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.309, + "step": 309 + }, + { + "loss": 0.3061, + "grad_norm": 3.1381635665893555, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.31, + "step": 310 + }, + { + "loss": 0.2239, + "grad_norm": 2.3573496341705322, + "learning_rate": 1.692e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.311, + "step": 311 + }, + { + "loss": 0.2853, + "grad_norm": 7.762936115264893, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.312, + "step": 312 + }, + { + "loss": 0.2793, + "grad_norm": 7.716437816619873, + "learning_rate": 1.69e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.313, + "step": 313 + }, + { + "loss": 0.2764, + "grad_norm": 4.531182765960693, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.314, + "step": 314 + }, + { + "loss": 0.1807, + "grad_norm": 5.600939750671387, + "learning_rate": 1.688e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.315, + "step": 315 + }, + { + "loss": 0.1751, + "grad_norm": 6.357442378997803, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.316, + "step": 316 + }, + { + "loss": 0.2278, + "grad_norm": 4.381490230560303, + "learning_rate": 1.686e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.317, + "step": 317 + }, + { + "loss": 0.1693, + "grad_norm": 4.711330413818359, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.318, + "step": 318 + }, + { + "loss": 0.2719, + "grad_norm": 7.21658182144165, + "learning_rate": 1.684e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.319, + "step": 319 + }, + { + "loss": 0.1613, + "grad_norm": 2.806929111480713, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.32, + "step": 320 + }, + { + "loss": 0.2236, + "grad_norm": 3.729052782058716, + "learning_rate": 1.682e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.321, + "step": 321 + }, + { + "loss": 0.3026, + "grad_norm": 3.512017250061035, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.322, + "step": 322 + }, + { + "loss": 0.2492, + "grad_norm": 5.842523097991943, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.323, + "step": 323 + }, + { + "loss": 0.2591, + "grad_norm": 3.444624662399292, + "learning_rate": 1.679e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9442269802093506, + "epoch": 0.324, + "step": 324 + }, + { + "loss": 0.245, + "grad_norm": 3.560624837875366, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.325, + "step": 325 + }, + { + "loss": 0.2493, + "grad_norm": 3.812241792678833, + "learning_rate": 1.677e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.326, + "step": 326 + }, + { + "loss": 0.1623, + "grad_norm": 9.361125946044922, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.327, + "step": 327 + }, + { + "loss": 0.2385, + "grad_norm": 4.130789279937744, + "learning_rate": 1.675e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.328, + "step": 328 + }, + { + "loss": 0.248, + "grad_norm": 3.7591042518615723, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.329, + "step": 329 + }, + { + "loss": 0.2815, + "grad_norm": 6.346067905426025, + "learning_rate": 1.673e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.33, + "step": 330 + }, + { + "loss": 0.2502, + "grad_norm": 3.433945655822754, + "learning_rate": 1.672e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.331, + "step": 331 + }, + { + "loss": 0.2994, + "grad_norm": 3.7655599117279053, + "learning_rate": 1.671e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9403131008148193, + "epoch": 0.332, + "step": 332 + }, + { + "loss": 0.2622, + "grad_norm": 3.707118511199951, + "learning_rate": 1.67e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.333, + "step": 333 + }, + { + "loss": 0.2418, + "grad_norm": 5.776569843292236, + "learning_rate": 1.669e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.334, + "step": 334 + }, + { + "loss": 0.2278, + "grad_norm": 2.7461037635803223, + "learning_rate": 1.668e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.335, + "step": 335 + }, + { + "loss": 0.2152, + "grad_norm": 2.729001760482788, + "learning_rate": 1.667e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.336, + "step": 336 + }, + { + "loss": 0.2093, + "grad_norm": 2.409708261489868, + "learning_rate": 1.666e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.337, + "step": 337 + }, + { + "loss": 0.2121, + "grad_norm": 4.6761651039123535, + "learning_rate": 1.665e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.338, + "step": 338 + }, + { + "loss": 0.2645, + "grad_norm": 3.167815685272217, + "learning_rate": 1.664e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.339, + "step": 339 + }, + { + "loss": 0.1629, + "grad_norm": 12.654186248779297, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.34, + "step": 340 + }, + { + "loss": 0.2156, + "grad_norm": 2.461930751800537, + "learning_rate": 1.662e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.341, + "step": 341 + }, + { + "loss": 0.2281, + "grad_norm": 4.044505596160889, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.342, + "step": 342 + }, + { + "loss": 0.2303, + "grad_norm": 3.00589656829834, + "learning_rate": 1.66e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.343, + "step": 343 + }, + { + "loss": 0.2372, + "grad_norm": 1.9332551956176758, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.344, + "step": 344 + }, + { + "loss": 0.2303, + "grad_norm": 3.804724931716919, + "learning_rate": 1.658e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.345, + "step": 345 + }, + { + "loss": 0.1629, + "grad_norm": 13.47612190246582, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.346, + "step": 346 + }, + { + "loss": 0.2276, + "grad_norm": 3.5881187915802, + "learning_rate": 1.656e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.347, + "step": 347 + }, + { + "loss": 0.2474, + "grad_norm": 3.895529270172119, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.348, + "step": 348 + }, + { + "loss": 0.2205, + "grad_norm": 3.4531259536743164, + "learning_rate": 1.654e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 0.349, + "step": 349 + }, + { + "loss": 0.2277, + "grad_norm": 3.849405288696289, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.35, + "step": 350 + }, + { + "loss": 0.1993, + "grad_norm": 3.522599458694458, + "learning_rate": 1.652e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.351, + "step": 351 + }, + { + "loss": 0.2291, + "grad_norm": 3.7573893070220947, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 0.352, + "step": 352 + }, + { + "loss": 0.1756, + "grad_norm": 4.224817276000977, + "learning_rate": 1.65e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.353, + "step": 353 + }, + { + "loss": 0.1992, + "grad_norm": 2.2447433471679688, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.354, + "step": 354 + }, + { + "loss": 0.184, + "grad_norm": 2.0203311443328857, + "learning_rate": 1.648e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.355, + "step": 355 + }, + { + "loss": 0.2236, + "grad_norm": 3.499854803085327, + "learning_rate": 1.647e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.356, + "step": 356 + }, + { + "loss": 0.2141, + "grad_norm": 5.057332992553711, + "learning_rate": 1.646e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.357, + "step": 357 + }, + { + "loss": 0.232, + "grad_norm": 2.861778974533081, + "learning_rate": 1.645e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.358, + "step": 358 + }, + { + "loss": 0.184, + "grad_norm": 3.52634596824646, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.359, + "step": 359 + }, + { + "loss": 0.2205, + "grad_norm": 2.3115124702453613, + "learning_rate": 1.643e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.36, + "step": 360 + }, + { + "loss": 0.1838, + "grad_norm": 3.043916940689087, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.361, + "step": 361 + }, + { + "loss": 0.1874, + "grad_norm": 3.2404396533966064, + "learning_rate": 1.641e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.362, + "step": 362 + }, + { + "loss": 0.4084, + "grad_norm": 12.86927604675293, + "learning_rate": 1.64e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.363, + "step": 363 + }, + { + "loss": 0.1677, + "grad_norm": 3.4789700508117676, + "learning_rate": 1.639e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.364, + "step": 364 + }, + { + "loss": 0.1922, + "grad_norm": 4.1049699783325195, + "learning_rate": 1.638e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.365, + "step": 365 + }, + { + "loss": 0.1915, + "grad_norm": 3.2055957317352295, + "learning_rate": 1.637e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.366, + "step": 366 + }, + { + "loss": 0.166, + "grad_norm": 12.477117538452148, + "learning_rate": 1.636e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.367, + "step": 367 + }, + { + "loss": 0.1799, + "grad_norm": 4.58711051940918, + "learning_rate": 1.635e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.368, + "step": 368 + }, + { + "loss": 0.2299, + "grad_norm": 2.874641180038452, + "learning_rate": 1.634e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.369, + "step": 369 + }, + { + "loss": 0.1414, + "grad_norm": 5.157703399658203, + "learning_rate": 1.633e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.37, + "step": 370 + }, + { + "loss": 0.1812, + "grad_norm": 3.2541451454162598, + "learning_rate": 1.632e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.371, + "step": 371 + }, + { + "loss": 0.1366, + "grad_norm": 3.705273151397705, + "learning_rate": 1.631e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.372, + "step": 372 + }, + { + "loss": 0.1681, + "grad_norm": 3.6492865085601807, + "learning_rate": 1.63e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.373, + "step": 373 + }, + { + "loss": 0.1324, + "grad_norm": 3.3717288970947266, + "learning_rate": 1.629e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.374, + "step": 374 + }, + { + "loss": 0.1816, + "grad_norm": 4.410749912261963, + "learning_rate": 1.628e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.375, + "step": 375 + }, + { + "loss": 0.3611, + "grad_norm": 11.978804588317871, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 0.9301164746284485, + "epoch": 0.376, + "step": 376 + }, + { + "loss": 0.1686, + "grad_norm": 2.8153111934661865, + "learning_rate": 1.626e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.377, + "step": 377 + }, + { + "loss": 0.1293, + "grad_norm": 3.5253026485443115, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.378, + "step": 378 + }, + { + "loss": 0.1597, + "grad_norm": 2.9006922245025635, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.379, + "step": 379 + }, + { + "loss": 0.1975, + "grad_norm": 6.231935024261475, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.38, + "step": 380 + }, + { + "loss": 0.1232, + "grad_norm": 3.3006174564361572, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.381, + "step": 381 + }, + { + "loss": 0.1599, + "grad_norm": 3.177495241165161, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.382, + "step": 382 + }, + { + "loss": 0.1858, + "grad_norm": 2.967477798461914, + "learning_rate": 1.62e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.383, + "step": 383 + }, + { + "loss": 0.1725, + "grad_norm": 2.6947214603424072, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.384, + "step": 384 + }, + { + "loss": 0.1644, + "grad_norm": 3.6320605278015137, + "learning_rate": 1.618e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.385, + "step": 385 + }, + { + "loss": 0.1726, + "grad_norm": 6.163839817047119, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.386, + "step": 386 + }, + { + "loss": 0.2253, + "grad_norm": 3.695767879486084, + "learning_rate": 1.616e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.387, + "step": 387 + }, + { + "loss": 0.1295, + "grad_norm": 11.877620697021484, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.388, + "step": 388 + }, + { + "loss": 0.1641, + "grad_norm": 2.5848593711853027, + "learning_rate": 1.614e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.389, + "step": 389 + }, + { + "loss": 0.1299, + "grad_norm": 11.58799934387207, + "learning_rate": 1.613e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.39, + "step": 390 + }, + { + "loss": 0.153, + "grad_norm": 3.0241589546203613, + "learning_rate": 1.612e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.391, + "step": 391 + }, + { + "loss": 0.1741, + "grad_norm": 4.446482181549072, + "learning_rate": 1.611e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.392, + "step": 392 + }, + { + "loss": 0.1517, + "grad_norm": 2.0452992916107178, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.393, + "step": 393 + }, + { + "loss": 0.1482, + "grad_norm": 3.511587142944336, + "learning_rate": 1.609e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.394, + "step": 394 + }, + { + "loss": 0.1673, + "grad_norm": 4.165390968322754, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.395, + "step": 395 + }, + { + "loss": 0.1577, + "grad_norm": 2.5295603275299072, + "learning_rate": 1.607e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.396, + "step": 396 + }, + { + "loss": 0.1444, + "grad_norm": 2.6492788791656494, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.397, + "step": 397 + }, + { + "loss": 0.1731, + "grad_norm": 3.1617088317871094, + "learning_rate": 1.605e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.398, + "step": 398 + }, + { + "loss": 0.1411, + "grad_norm": 2.628790855407715, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.399, + "step": 399 + }, + { + "loss": 0.1442, + "grad_norm": 2.589632272720337, + "learning_rate": 1.603e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.4, + "step": 400 + }, + { + "loss": 0.1647, + "grad_norm": 2.7175090312957764, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.401, + "step": 401 + }, + { + "loss": 0.1225, + "grad_norm": 9.854316711425781, + "learning_rate": 1.601e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.402, + "step": 402 + }, + { + "loss": 0.1635, + "grad_norm": 2.513782501220703, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.403, + "step": 403 + }, + { + "loss": 0.1172, + "grad_norm": 4.978464126586914, + "learning_rate": 1.599e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.404, + "step": 404 + }, + { + "loss": 0.1535, + "grad_norm": 6.545207977294922, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.405, + "step": 405 + }, + { + "loss": 0.1554, + "grad_norm": 4.268946647644043, + "learning_rate": 1.597e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.406, + "step": 406 + }, + { + "loss": 0.1143, + "grad_norm": 2.5581111907958984, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.407, + "step": 407 + }, + { + "loss": 0.1446, + "grad_norm": 4.272138595581055, + "learning_rate": 1.595e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.408, + "step": 408 + }, + { + "loss": 0.1058, + "grad_norm": 1.8749103546142578, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.409, + "step": 409 + }, + { + "loss": 0.1972, + "grad_norm": 4.553700923919678, + "learning_rate": 1.593e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.41, + "step": 410 + }, + { + "loss": 0.1465, + "grad_norm": 4.258208751678467, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.411, + "step": 411 + }, + { + "loss": 0.1556, + "grad_norm": 2.6741788387298584, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.412, + "step": 412 + }, + { + "loss": 0.1074, + "grad_norm": 5.901241779327393, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.413, + "step": 413 + }, + { + "loss": 0.1999, + "grad_norm": 2.886406421661377, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 414 + }, + { + "loss": 0.163, + "grad_norm": 3.367415189743042, + "learning_rate": 1.588e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.415, + "step": 415 + }, + { + "loss": 0.1678, + "grad_norm": 2.3446123600006104, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.416, + "step": 416 + }, + { + "loss": 0.2442, + "grad_norm": 4.648331165313721, + "learning_rate": 1.586e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9500978589057922, + "epoch": 0.417, + "step": 417 + }, + { + "loss": 0.1314, + "grad_norm": 3.296555519104004, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.418, + "step": 418 + }, + { + "loss": 0.1224, + "grad_norm": 14.873774528503418, + "learning_rate": 1.584e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.419, + "step": 419 + }, + { + "loss": 0.1792, + "grad_norm": 2.493760108947754, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.42, + "step": 420 + }, + { + "loss": 0.1289, + "grad_norm": 4.287231922149658, + "learning_rate": 1.582e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.421, + "step": 421 + }, + { + "loss": 0.1176, + "grad_norm": 12.776876449584961, + "learning_rate": 1.581e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.422, + "step": 422 + }, + { + "loss": 0.1651, + "grad_norm": 2.691632032394409, + "learning_rate": 1.58e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.423, + "step": 423 + }, + { + "loss": 0.271, + "grad_norm": 7.320021152496338, + "learning_rate": 1.579e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.424, + "step": 424 + }, + { + "loss": 0.1183, + "grad_norm": 2.511960029602051, + "learning_rate": 1.578e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.425, + "step": 425 + }, + { + "loss": 0.1387, + "grad_norm": 2.424102306365967, + "learning_rate": 1.577e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.426, + "step": 426 + }, + { + "loss": 0.1443, + "grad_norm": 3.659524917602539, + "learning_rate": 1.576e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.427, + "step": 427 + }, + { + "loss": 0.2176, + "grad_norm": 4.393547058105469, + "learning_rate": 1.575e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.428, + "step": 428 + }, + { + "loss": 0.1576, + "grad_norm": 3.995103359222412, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.429, + "step": 429 + }, + { + "loss": 0.0995, + "grad_norm": 7.335996627807617, + "learning_rate": 1.573e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.43, + "step": 430 + }, + { + "loss": 0.1224, + "grad_norm": 2.3261799812316895, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.431, + "step": 431 + }, + { + "loss": 0.1781, + "grad_norm": 3.084444761276245, + "learning_rate": 1.571e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.432, + "step": 432 + }, + { + "loss": 0.1262, + "grad_norm": 2.499669075012207, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.433, + "step": 433 + }, + { + "loss": 0.1306, + "grad_norm": 2.529611587524414, + "learning_rate": 1.569e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.434, + "step": 434 + }, + { + "loss": 0.1473, + "grad_norm": 2.308983325958252, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.435, + "step": 435 + }, + { + "loss": 0.1387, + "grad_norm": 2.9792327880859375, + "learning_rate": 1.567e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.436, + "step": 436 + }, + { + "loss": 0.1256, + "grad_norm": 3.446150302886963, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.437, + "step": 437 + }, + { + "loss": 0.1884, + "grad_norm": 2.8107986450195312, + "learning_rate": 1.565e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.438, + "step": 438 + }, + { + "loss": 0.1801, + "grad_norm": 2.476114511489868, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.439, + "step": 439 + }, + { + "loss": 0.1216, + "grad_norm": 2.8834075927734375, + "learning_rate": 1.563e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.44, + "step": 440 + }, + { + "loss": 0.1391, + "grad_norm": 3.0233523845672607, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.441, + "step": 441 + }, + { + "loss": 0.1355, + "grad_norm": 3.540644645690918, + "learning_rate": 1.561e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.442, + "step": 442 + }, + { + "loss": 0.1031, + "grad_norm": 2.104804515838623, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.443, + "step": 443 + }, + { + "loss": 0.1389, + "grad_norm": 2.2567386627197266, + "learning_rate": 1.559e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.444, + "step": 444 + }, + { + "loss": 0.116, + "grad_norm": 2.4400763511657715, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.445, + "step": 445 + }, + { + "loss": 0.1294, + "grad_norm": 2.306941509246826, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.446, + "step": 446 + }, + { + "loss": 0.1189, + "grad_norm": 2.5862247943878174, + "learning_rate": 1.556e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.447, + "step": 447 + }, + { + "loss": 0.2484, + "grad_norm": 4.606533050537109, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.448, + "step": 448 + }, + { + "loss": 0.2119, + "grad_norm": 3.4597740173339844, + "learning_rate": 1.554e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.449, + "step": 449 + }, + { + "loss": 0.1395, + "grad_norm": 3.5644280910491943, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.45, + "step": 450 + }, + { + "loss": 0.1167, + "grad_norm": 13.761821746826172, + "learning_rate": 1.552e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.451, + "step": 451 + }, + { + "loss": 0.1423, + "grad_norm": 3.3145618438720703, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.452, + "step": 452 + }, + { + "loss": 0.131, + "grad_norm": 4.129085540771484, + "learning_rate": 1.55e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.453, + "step": 453 + }, + { + "loss": 0.1337, + "grad_norm": 2.807199001312256, + "learning_rate": 1.549e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.454, + "step": 454 + }, + { + "loss": 0.1235, + "grad_norm": 2.291154384613037, + "learning_rate": 1.548e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.455, + "step": 455 + }, + { + "loss": 0.123, + "grad_norm": 3.186185836791992, + "learning_rate": 1.547e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.456, + "step": 456 + }, + { + "loss": 0.13, + "grad_norm": 2.2184228897094727, + "learning_rate": 1.546e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.457, + "step": 457 + }, + { + "loss": 0.1232, + "grad_norm": 2.6860218048095703, + "learning_rate": 1.545e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.458, + "step": 458 + }, + { + "loss": 0.1668, + "grad_norm": 2.615064859390259, + "learning_rate": 1.544e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.459, + "step": 459 + }, + { + "loss": 0.1268, + "grad_norm": 3.520294427871704, + "learning_rate": 1.543e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.46, + "step": 460 + }, + { + "loss": 0.1183, + "grad_norm": 3.490569829940796, + "learning_rate": 1.542e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.461, + "step": 461 + }, + { + "loss": 0.1025, + "grad_norm": 12.270122528076172, + "learning_rate": 1.541e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.462, + "step": 462 + }, + { + "loss": 0.1059, + "grad_norm": 2.1151371002197266, + "learning_rate": 1.54e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.463, + "step": 463 + }, + { + "loss": 0.1021, + "grad_norm": 2.0290112495422363, + "learning_rate": 1.539e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.464, + "step": 464 + }, + { + "loss": 0.0993, + "grad_norm": 10.768261909484863, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.465, + "step": 465 + }, + { + "loss": 0.1187, + "grad_norm": 3.7776851654052734, + "learning_rate": 1.537e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.466, + "step": 466 + }, + { + "loss": 0.0929, + "grad_norm": 3.5349013805389404, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.467, + "step": 467 + }, + { + "loss": 0.1292, + "grad_norm": 4.221794605255127, + "learning_rate": 1.535e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.468, + "step": 468 + }, + { + "loss": 0.1597, + "grad_norm": 3.645026445388794, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.469, + "step": 469 + }, + { + "loss": 0.1281, + "grad_norm": 4.336436748504639, + "learning_rate": 1.533e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.47, + "step": 470 + }, + { + "loss": 0.1427, + "grad_norm": 4.119178295135498, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.471, + "step": 471 + }, + { + "loss": 0.1959, + "grad_norm": 3.495059013366699, + "learning_rate": 1.531e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.472, + "step": 472 + }, + { + "loss": 0.1062, + "grad_norm": 2.910947799682617, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.473, + "step": 473 + }, + { + "loss": 0.1641, + "grad_norm": 1.9516125917434692, + "learning_rate": 1.529e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.474, + "step": 474 + }, + { + "loss": 0.1267, + "grad_norm": 2.637050151824951, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.475, + "step": 475 + }, + { + "loss": 0.1602, + "grad_norm": 2.365922689437866, + "learning_rate": 1.527e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 476 + }, + { + "loss": 0.145, + "grad_norm": 3.577690362930298, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.477, + "step": 477 + }, + { + "loss": 0.1917, + "grad_norm": 2.425001621246338, + "learning_rate": 1.525e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.478, + "step": 478 + }, + { + "loss": 0.1295, + "grad_norm": 2.570420503616333, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.479, + "step": 479 + }, + { + "loss": 0.1216, + "grad_norm": 2.951737403869629, + "learning_rate": 1.523e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.48, + "step": 480 + }, + { + "loss": 0.1172, + "grad_norm": 2.9054367542266846, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.481, + "step": 481 + }, + { + "loss": 0.1028, + "grad_norm": 11.967851638793945, + "learning_rate": 1.521e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.482, + "step": 482 + }, + { + "loss": 0.1411, + "grad_norm": 3.018132448196411, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.483, + "step": 483 + }, + { + "loss": 0.0953, + "grad_norm": 2.7196693420410156, + "learning_rate": 1.519e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.484, + "step": 484 + }, + { + "loss": 0.1322, + "grad_norm": 3.49013090133667, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.485, + "step": 485 + }, + { + "loss": 0.0793, + "grad_norm": 3.015738010406494, + "learning_rate": 1.517e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.486, + "step": 486 + }, + { + "loss": 0.1429, + "grad_norm": 2.9223875999450684, + "learning_rate": 1.516e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.487, + "step": 487 + }, + { + "loss": 0.1468, + "grad_norm": 3.956615924835205, + "learning_rate": 1.515e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.488, + "step": 488 + }, + { + "loss": 0.1171, + "grad_norm": 4.619190216064453, + "learning_rate": 1.514e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.489, + "step": 489 + }, + { + "loss": 0.0767, + "grad_norm": 1.605452299118042, + "learning_rate": 1.513e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.49, + "step": 490 + }, + { + "loss": 0.128, + "grad_norm": 4.304430961608887, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.491, + "step": 491 + }, + { + "loss": 0.0781, + "grad_norm": 1.868319034576416, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.492, + "step": 492 + }, + { + "loss": 0.1311, + "grad_norm": 2.720447540283203, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.493, + "step": 493 + }, + { + "loss": 0.1312, + "grad_norm": 3.6773548126220703, + "learning_rate": 1.509e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.494, + "step": 494 + }, + { + "loss": 0.164, + "grad_norm": 3.9428446292877197, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.495, + "step": 495 + }, + { + "loss": 0.1516, + "grad_norm": 2.488532781600952, + "learning_rate": 1.507e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.496, + "step": 496 + }, + { + "loss": 0.076, + "grad_norm": 3.0369679927825928, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.497, + "step": 497 + }, + { + "loss": 0.1552, + "grad_norm": 2.921428680419922, + "learning_rate": 1.505e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.498, + "step": 498 + }, + { + "loss": 0.0745, + "grad_norm": 4.530489921569824, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.499, + "step": 499 + }, + { + "loss": 0.1431, + "grad_norm": 2.894956350326538, + "learning_rate": 1.503e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.5, + "step": 500 + }, + { + "loss": 0.1196, + "grad_norm": 2.8564133644104004, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.501, + "step": 501 + }, + { + "loss": 0.1022, + "grad_norm": 2.487640857696533, + "learning_rate": 1.501e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.502, + "step": 502 + }, + { + "loss": 0.0816, + "grad_norm": 9.081964492797852, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.503, + "step": 503 + }, + { + "loss": 0.0696, + "grad_norm": 5.340896129608154, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.504, + "step": 504 + }, + { + "loss": 0.1355, + "grad_norm": 2.5042786598205566, + "learning_rate": 1.498e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.505, + "step": 505 + }, + { + "loss": 0.1177, + "grad_norm": 2.9676339626312256, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.506, + "step": 506 + }, + { + "loss": 0.1305, + "grad_norm": 2.792555570602417, + "learning_rate": 1.496e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.507, + "step": 507 + }, + { + "loss": 0.1155, + "grad_norm": 3.074509620666504, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.508, + "step": 508 + }, + { + "loss": 0.1274, + "grad_norm": 3.4446146488189697, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.509, + "step": 509 + }, + { + "loss": 0.0961, + "grad_norm": 4.31768798828125, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.51, + "step": 510 + }, + { + "loss": 0.1406, + "grad_norm": 3.5040206909179688, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.511, + "step": 511 + }, + { + "loss": 0.163, + "grad_norm": 3.973576307296753, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.512, + "step": 512 + }, + { + "loss": 0.1435, + "grad_norm": 2.7186615467071533, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.513, + "step": 513 + }, + { + "loss": 0.1024, + "grad_norm": 2.8186845779418945, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.514, + "step": 514 + }, + { + "loss": 0.0781, + "grad_norm": 10.394554138183594, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.515, + "step": 515 + }, + { + "loss": 0.0874, + "grad_norm": 10.657512664794922, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.516, + "step": 516 + }, + { + "loss": 0.0946, + "grad_norm": 2.6607813835144043, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.517, + "step": 517 + }, + { + "loss": 0.1189, + "grad_norm": 2.2012691497802734, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.518, + "step": 518 + }, + { + "loss": 0.1313, + "grad_norm": 3.873806953430176, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.519, + "step": 519 + }, + { + "loss": 0.0999, + "grad_norm": 1.8396018743515015, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.52, + "step": 520 + }, + { + "loss": 0.1057, + "grad_norm": 2.922558307647705, + "learning_rate": 1.482e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.521, + "step": 521 + }, + { + "loss": 0.0865, + "grad_norm": 2.5007052421569824, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.522, + "step": 522 + }, + { + "loss": 0.1029, + "grad_norm": 1.885617733001709, + "learning_rate": 1.48e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.523, + "step": 523 + }, + { + "loss": 0.0958, + "grad_norm": 1.7554020881652832, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.524, + "step": 524 + }, + { + "loss": 0.1244, + "grad_norm": 3.055809736251831, + "learning_rate": 1.478e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.525, + "step": 525 + }, + { + "loss": 0.1059, + "grad_norm": 2.518828868865967, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.526, + "step": 526 + }, + { + "loss": 0.0849, + "grad_norm": 4.157986640930176, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.527, + "step": 527 + }, + { + "loss": 0.0949, + "grad_norm": 5.624795436859131, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.528, + "step": 528 + }, + { + "loss": 0.1133, + "grad_norm": 4.383209228515625, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.529, + "step": 529 + }, + { + "loss": 0.0753, + "grad_norm": 10.447527885437012, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.53, + "step": 530 + }, + { + "loss": 0.0758, + "grad_norm": 2.0648767948150635, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.531, + "step": 531 + }, + { + "loss": 0.109, + "grad_norm": 2.311145782470703, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.532, + "step": 532 + }, + { + "loss": 0.0993, + "grad_norm": 2.5646841526031494, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.533, + "step": 533 + }, + { + "loss": 0.061, + "grad_norm": 4.201132774353027, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 534 + }, + { + "loss": 0.1403, + "grad_norm": 3.2465627193450928, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.535, + "step": 535 + }, + { + "loss": 0.0917, + "grad_norm": 4.278575420379639, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.536, + "step": 536 + }, + { + "loss": 0.1363, + "grad_norm": 2.6477434635162354, + "learning_rate": 1.466e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.537, + "step": 537 + }, + { + "loss": 0.1035, + "grad_norm": 2.616262435913086, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.538, + "step": 538 + }, + { + "loss": 0.1702, + "grad_norm": 2.8426945209503174, + "learning_rate": 1.464e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.539, + "step": 539 + }, + { + "loss": 0.0969, + "grad_norm": 2.934753179550171, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.54, + "step": 540 + }, + { + "loss": 0.0628, + "grad_norm": 6.173173904418945, + "learning_rate": 1.462e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.541, + "step": 541 + }, + { + "loss": 0.113, + "grad_norm": 2.183295249938965, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.542, + "step": 542 + }, + { + "loss": 0.0674, + "grad_norm": 2.466468095779419, + "learning_rate": 1.46e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.543, + "step": 543 + }, + { + "loss": 0.0629, + "grad_norm": 6.685276508331299, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.544, + "step": 544 + }, + { + "loss": 0.0606, + "grad_norm": 6.428196907043457, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 545 + }, + { + "loss": 0.0552, + "grad_norm": 3.2987399101257324, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 546 + }, + { + "loss": 0.1492, + "grad_norm": 3.802187919616699, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.547, + "step": 547 + }, + { + "loss": 0.0903, + "grad_norm": 3.23189115524292, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.548, + "step": 548 + }, + { + "loss": 0.0758, + "grad_norm": 3.0735082626342773, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.549, + "step": 549 + }, + { + "loss": 0.0978, + "grad_norm": 2.9236018657684326, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.55, + "step": 550 + }, + { + "loss": 0.0489, + "grad_norm": 1.232297420501709, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 551 + }, + { + "loss": 0.0472, + "grad_norm": 1.1960967779159546, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 552 + }, + { + "loss": 0.1622, + "grad_norm": 2.9212372303009033, + "learning_rate": 1.45e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 0.9550748467445374, + "epoch": 0.553, + "step": 553 + }, + { + "loss": 0.0964, + "grad_norm": 2.9365901947021484, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.554, + "step": 554 + }, + { + "loss": 0.1015, + "grad_norm": 3.297194719314575, + "learning_rate": 1.448e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.555, + "step": 555 + }, + { + "loss": 0.108, + "grad_norm": 3.8434770107269287, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.556, + "step": 556 + }, + { + "loss": 0.0869, + "grad_norm": 3.068513870239258, + "learning_rate": 1.446e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.557, + "step": 557 + }, + { + "loss": 0.0823, + "grad_norm": 2.382955312728882, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.558, + "step": 558 + }, + { + "loss": 0.0952, + "grad_norm": 2.0796663761138916, + "learning_rate": 1.444e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.559, + "step": 559 + }, + { + "loss": 0.0904, + "grad_norm": 2.491260290145874, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.56, + "step": 560 + }, + { + "loss": 0.0888, + "grad_norm": 1.8683680295944214, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.561, + "step": 561 + }, + { + "loss": 0.0824, + "grad_norm": 2.5860776901245117, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.562, + "step": 562 + }, + { + "loss": 0.0648, + "grad_norm": 10.482237815856934, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 563 + }, + { + "loss": 0.1033, + "grad_norm": 1.8212071657180786, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.564, + "step": 564 + }, + { + "loss": 0.1275, + "grad_norm": 2.206996440887451, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.565, + "step": 565 + }, + { + "loss": 0.1174, + "grad_norm": 2.454157590866089, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.566, + "step": 566 + }, + { + "loss": 0.0846, + "grad_norm": 2.7483479976654053, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.567, + "step": 567 + }, + { + "loss": 0.0712, + "grad_norm": 9.780473709106445, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.568, + "step": 568 + }, + { + "loss": 0.0838, + "grad_norm": 2.227144718170166, + "learning_rate": 1.434e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.569, + "step": 569 + }, + { + "loss": 0.0996, + "grad_norm": 2.4927093982696533, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.57, + "step": 570 + }, + { + "loss": 0.0723, + "grad_norm": 2.6736180782318115, + "learning_rate": 1.432e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.571, + "step": 571 + }, + { + "loss": 0.0765, + "grad_norm": 1.8901737928390503, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 0.572, + "step": 572 + }, + { + "loss": 0.0661, + "grad_norm": 1.9803191423416138, + "learning_rate": 1.43e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.573, + "step": 573 + }, + { + "loss": 0.06, + "grad_norm": 1.9032983779907227, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.574, + "step": 574 + }, + { + "loss": 0.0437, + "grad_norm": 2.9226999282836914, + "learning_rate": 1.428e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 575 + }, + { + "loss": 0.1345, + "grad_norm": 2.60559344291687, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.576, + "step": 576 + }, + { + "loss": 0.043, + "grad_norm": 3.43766713142395, + "learning_rate": 1.426e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 577 + }, + { + "loss": 0.0881, + "grad_norm": 3.27600359916687, + "learning_rate": 1.425e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.578, + "step": 578 + }, + { + "loss": 0.0777, + "grad_norm": 3.8467905521392822, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.579, + "step": 579 + }, + { + "loss": 0.0971, + "grad_norm": 3.3157150745391846, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.58, + "step": 580 + }, + { + "loss": 0.0769, + "grad_norm": 2.6883363723754883, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.581, + "step": 581 + }, + { + "loss": 0.0381, + "grad_norm": 2.187551736831665, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 582 + }, + { + "loss": 0.0571, + "grad_norm": 1.9329798221588135, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.583, + "step": 583 + }, + { + "loss": 0.0984, + "grad_norm": 2.6686573028564453, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 584 + }, + { + "loss": 0.0904, + "grad_norm": 2.7718393802642822, + "learning_rate": 1.418e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.585, + "step": 585 + }, + { + "loss": 0.0364, + "grad_norm": 3.612837314605713, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 586 + }, + { + "loss": 0.1408, + "grad_norm": 2.518528461456299, + "learning_rate": 1.416e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.587, + "step": 587 + }, + { + "loss": 0.0875, + "grad_norm": 2.7795908451080322, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.588, + "step": 588 + }, + { + "loss": 0.0644, + "grad_norm": 2.4260590076446533, + "learning_rate": 1.414e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 0.589, + "step": 589 + }, + { + "loss": 0.0884, + "grad_norm": 2.681588888168335, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 590 + }, + { + "loss": 0.1001, + "grad_norm": 2.8202459812164307, + "learning_rate": 1.412e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.591, + "step": 591 + }, + { + "loss": 0.0774, + "grad_norm": 1.7170965671539307, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.592, + "step": 592 + }, + { + "loss": 0.069, + "grad_norm": 1.68620765209198, + "learning_rate": 1.41e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.593, + "step": 593 + }, + { + "loss": 0.0694, + "grad_norm": 2.236591339111328, + "learning_rate": 1.409e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.594, + "step": 594 + }, + { + "loss": 0.0943, + "grad_norm": 2.7542996406555176, + "learning_rate": 1.408e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.595, + "step": 595 + }, + { + "loss": 0.0578, + "grad_norm": 1.8813996315002441, + "learning_rate": 1.407e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.596, + "step": 596 + }, + { + "loss": 0.0911, + "grad_norm": 2.0993378162384033, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.597, + "step": 597 + }, + { + "loss": 0.107, + "grad_norm": 2.6184418201446533, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.598, + "step": 598 + }, + { + "loss": 0.0803, + "grad_norm": 1.8751370906829834, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.599, + "step": 599 + }, + { + "loss": 0.0774, + "grad_norm": 3.0198869705200195, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.6, + "step": 600 + }, + { + "loss": 0.2953, + "grad_norm": 14.372690200805664, + "learning_rate": 1.402e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.601, + "step": 601 + }, + { + "loss": 0.0943, + "grad_norm": 2.2585110664367676, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.602, + "step": 602 + }, + { + "loss": 0.0432, + "grad_norm": 8.796082496643066, + "learning_rate": 1.4e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.603, + "step": 603 + }, + { + "loss": 0.1307, + "grad_norm": 2.903687000274658, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.604, + "step": 604 + }, + { + "loss": 0.1348, + "grad_norm": 3.1296894550323486, + "learning_rate": 1.398e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.605, + "step": 605 + }, + { + "loss": 0.1161, + "grad_norm": 2.436495542526245, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.606, + "step": 606 + }, + { + "loss": 0.0368, + "grad_norm": 5.359442710876465, + "learning_rate": 1.396e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.607, + "step": 607 + }, + { + "loss": 0.1177, + "grad_norm": 3.3482797145843506, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.608, + "step": 608 + }, + { + "loss": 0.1024, + "grad_norm": 3.229761838912964, + "learning_rate": 1.394e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.609, + "step": 609 + }, + { + "loss": 0.0988, + "grad_norm": 2.772888660430908, + "learning_rate": 1.393e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.61, + "step": 610 + }, + { + "loss": 0.0699, + "grad_norm": 2.91560435295105, + "learning_rate": 1.392e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.611, + "step": 611 + }, + { + "loss": 0.1212, + "grad_norm": 3.1388144493103027, + "learning_rate": 1.391e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.612, + "step": 612 + }, + { + "loss": 0.0776, + "grad_norm": 2.409531831741333, + "learning_rate": 1.39e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.613, + "step": 613 + }, + { + "loss": 0.0922, + "grad_norm": 2.301997423171997, + "learning_rate": 1.389e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.614, + "step": 614 + }, + { + "loss": 0.0382, + "grad_norm": 6.567748546600342, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.615, + "step": 615 + }, + { + "loss": 0.0702, + "grad_norm": 2.9374635219573975, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 616 + }, + { + "loss": 0.0952, + "grad_norm": 2.805278778076172, + "learning_rate": 1.386e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.617, + "step": 617 + }, + { + "loss": 0.0809, + "grad_norm": 2.7832789421081543, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.618, + "step": 618 + }, + { + "loss": 0.0967, + "grad_norm": 2.5809061527252197, + "learning_rate": 1.384e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.619, + "step": 619 + }, + { + "loss": 0.1193, + "grad_norm": 4.146383285522461, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.62, + "step": 620 + }, + { + "loss": 0.0646, + "grad_norm": 2.3339507579803467, + "learning_rate": 1.382e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.621, + "step": 621 + }, + { + "loss": 0.0698, + "grad_norm": 2.154700756072998, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.622, + "step": 622 + }, + { + "loss": 0.0861, + "grad_norm": 3.4389989376068115, + "learning_rate": 1.38e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.623, + "step": 623 + }, + { + "loss": 0.0744, + "grad_norm": 2.087575674057007, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.624, + "step": 624 + }, + { + "loss": 0.093, + "grad_norm": 2.7172322273254395, + "learning_rate": 1.378e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.625, + "step": 625 + }, + { + "loss": 0.0731, + "grad_norm": 2.2669014930725098, + "learning_rate": 1.377e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.626, + "step": 626 + }, + { + "loss": 0.0747, + "grad_norm": 3.104933500289917, + "learning_rate": 1.376e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.627, + "step": 627 + }, + { + "loss": 0.085, + "grad_norm": 2.475816249847412, + "learning_rate": 1.375e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.628, + "step": 628 + }, + { + "loss": 0.1415, + "grad_norm": 3.2964231967926025, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.629, + "step": 629 + }, + { + "loss": 0.0823, + "grad_norm": 1.5372464656829834, + "learning_rate": 1.373e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.63, + "step": 630 + }, + { + "loss": 0.1085, + "grad_norm": 2.136002540588379, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.631, + "step": 631 + }, + { + "loss": 0.0802, + "grad_norm": 2.1365489959716797, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.632, + "step": 632 + }, + { + "loss": 0.0359, + "grad_norm": 7.951494216918945, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.633, + "step": 633 + }, + { + "loss": 0.0344, + "grad_norm": 7.441174507141113, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.634, + "step": 634 + }, + { + "loss": 0.0838, + "grad_norm": 2.689347505569458, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.635, + "step": 635 + }, + { + "loss": 0.1337, + "grad_norm": 4.8380937576293945, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.636, + "step": 636 + }, + { + "loss": 0.1259, + "grad_norm": 3.2358460426330566, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.637, + "step": 637 + }, + { + "loss": 0.0269, + "grad_norm": 3.706432580947876, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 638 + }, + { + "loss": 0.0617, + "grad_norm": 2.4131107330322266, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.639, + "step": 639 + }, + { + "loss": 0.0225, + "grad_norm": 2.5498831272125244, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 640 + }, + { + "loss": 0.1159, + "grad_norm": 2.7629480361938477, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.641, + "step": 641 + }, + { + "loss": 0.0249, + "grad_norm": 2.194697380065918, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 642 + }, + { + "loss": 0.0852, + "grad_norm": 2.5653960704803467, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.643, + "step": 643 + }, + { + "loss": 0.0783, + "grad_norm": 2.402456283569336, + "learning_rate": 1.359e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 644 + }, + { + "loss": 0.1104, + "grad_norm": 2.646005392074585, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.645, + "step": 645 + }, + { + "loss": 0.0582, + "grad_norm": 2.135377883911133, + "learning_rate": 1.357e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.646, + "step": 646 + }, + { + "loss": 0.0242, + "grad_norm": 2.295201539993286, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 647 + }, + { + "loss": 0.0712, + "grad_norm": 2.529376745223999, + "learning_rate": 1.355e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.648, + "step": 648 + }, + { + "loss": 0.0697, + "grad_norm": 2.2107226848602295, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.649, + "step": 649 + }, + { + "loss": 0.1203, + "grad_norm": 2.456563711166382, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.65, + "step": 650 + }, + { + "loss": 0.091, + "grad_norm": 2.3880977630615234, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.651, + "step": 651 + }, + { + "loss": 0.0641, + "grad_norm": 2.5870609283447266, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.652, + "step": 652 + }, + { + "loss": 0.0678, + "grad_norm": 2.0148985385894775, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.653, + "step": 653 + }, + { + "loss": 0.0745, + "grad_norm": 2.9625463485717773, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.654, + "step": 654 + }, + { + "loss": 0.0759, + "grad_norm": 2.3625717163085938, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.655, + "step": 655 + }, + { + "loss": 0.0826, + "grad_norm": 3.747469902038574, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.656, + "step": 656 + }, + { + "loss": 0.0772, + "grad_norm": 2.4018380641937256, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.657, + "step": 657 + }, + { + "loss": 0.0834, + "grad_norm": 2.684398889541626, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.658, + "step": 658 + }, + { + "loss": 0.074, + "grad_norm": 2.106499671936035, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.659, + "step": 659 + }, + { + "loss": 0.0759, + "grad_norm": 2.1065762042999268, + "learning_rate": 1.343e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.66, + "step": 660 + }, + { + "loss": 0.1232, + "grad_norm": 2.89585280418396, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.661, + "step": 661 + }, + { + "loss": 0.0784, + "grad_norm": 2.267303943634033, + "learning_rate": 1.341e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.662, + "step": 662 + }, + { + "loss": 0.0591, + "grad_norm": 1.4712592363357544, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.663, + "step": 663 + }, + { + "loss": 0.0626, + "grad_norm": 1.9069504737854004, + "learning_rate": 1.339e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.664, + "step": 664 + }, + { + "loss": 0.1356, + "grad_norm": 3.2215309143066406, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.665, + "step": 665 + }, + { + "loss": 0.0678, + "grad_norm": 2.080892562866211, + "learning_rate": 1.337e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.666, + "step": 666 + }, + { + "loss": 0.0643, + "grad_norm": 2.593749523162842, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.667, + "step": 667 + }, + { + "loss": 0.3105, + "grad_norm": 13.254192352294922, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.668, + "step": 668 + }, + { + "loss": 0.0305, + "grad_norm": 7.083673000335693, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.669, + "step": 669 + }, + { + "loss": 0.0827, + "grad_norm": 1.9234445095062256, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.67, + "step": 670 + }, + { + "loss": 0.072, + "grad_norm": 1.6489096879959106, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.671, + "step": 671 + }, + { + "loss": 0.0786, + "grad_norm": 2.5704004764556885, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.672, + "step": 672 + }, + { + "loss": 0.1092, + "grad_norm": 2.335846424102783, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.673, + "step": 673 + }, + { + "loss": 0.08, + "grad_norm": 1.7859958410263062, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.674, + "step": 674 + }, + { + "loss": 0.0303, + "grad_norm": 6.245123386383057, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.675, + "step": 675 + }, + { + "loss": 0.0248, + "grad_norm": 6.11707878112793, + "learning_rate": 1.327e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.676, + "step": 676 + }, + { + "loss": 0.0714, + "grad_norm": 2.122776985168457, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.677, + "step": 677 + }, + { + "loss": 0.0583, + "grad_norm": 2.350274085998535, + "learning_rate": 1.325e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.678, + "step": 678 + }, + { + "loss": 0.0192, + "grad_norm": 3.1966686248779297, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 679 + }, + { + "loss": 0.087, + "grad_norm": 2.123091459274292, + "learning_rate": 1.323e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.68, + "step": 680 + }, + { + "loss": 0.0536, + "grad_norm": 2.108837842941284, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.681, + "step": 681 + }, + { + "loss": 0.0187, + "grad_norm": 2.225255012512207, + "learning_rate": 1.321e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 682 + }, + { + "loss": 0.0689, + "grad_norm": 1.968031883239746, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.683, + "step": 683 + }, + { + "loss": 0.0822, + "grad_norm": 2.5669515132904053, + "learning_rate": 1.319e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.684, + "step": 684 + }, + { + "loss": 0.0661, + "grad_norm": 2.156057596206665, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.685, + "step": 685 + }, + { + "loss": 0.0545, + "grad_norm": 2.8333444595336914, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.686, + "step": 686 + }, + { + "loss": 0.0889, + "grad_norm": 3.069793939590454, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.687, + "step": 687 + }, + { + "loss": 0.0761, + "grad_norm": 1.9274708032608032, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.688, + "step": 688 + }, + { + "loss": 0.1089, + "grad_norm": 2.992846965789795, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.689, + "step": 689 + }, + { + "loss": 0.1287, + "grad_norm": 4.56328821182251, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.69, + "step": 690 + }, + { + "loss": 0.1186, + "grad_norm": 2.255676746368408, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.691, + "step": 691 + }, + { + "loss": 0.0906, + "grad_norm": 1.8538860082626343, + "learning_rate": 1.311e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.692, + "step": 692 + }, + { + "loss": 0.2418, + "grad_norm": 11.443807601928711, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9434276223182678, + "epoch": 0.693, + "step": 693 + }, + { + "loss": 0.0399, + "grad_norm": 9.349817276000977, + "learning_rate": 1.309e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.694, + "step": 694 + }, + { + "loss": 0.037, + "grad_norm": 9.234195709228516, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.695, + "step": 695 + }, + { + "loss": 0.1228, + "grad_norm": 2.415926456451416, + "learning_rate": 1.307e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.696, + "step": 696 + }, + { + "loss": 0.0524, + "grad_norm": 2.570728063583374, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.697, + "step": 697 + }, + { + "loss": 0.086, + "grad_norm": 3.062072992324829, + "learning_rate": 1.305e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.698, + "step": 698 + }, + { + "loss": 0.0829, + "grad_norm": 2.552957534790039, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.699, + "step": 699 + }, + { + "loss": 0.1109, + "grad_norm": 2.1273176670074463, + "learning_rate": 1.303e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.7, + "step": 700 + }, + { + "loss": 0.0811, + "grad_norm": 2.13920259475708, + "learning_rate": 1.302e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.701, + "step": 701 + }, + { + "loss": 0.0689, + "grad_norm": 2.0192079544067383, + "learning_rate": 1.301e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.702, + "step": 702 + }, + { + "loss": 0.0726, + "grad_norm": 1.9012140035629272, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.703, + "step": 703 + }, + { + "loss": 0.075, + "grad_norm": 2.420971393585205, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.704, + "step": 704 + }, + { + "loss": 0.0965, + "grad_norm": 1.7867904901504517, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.705, + "step": 705 + }, + { + "loss": 0.0757, + "grad_norm": 2.5515830516815186, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.706, + "step": 706 + }, + { + "loss": 0.0758, + "grad_norm": 2.5376474857330322, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.707, + "step": 707 + }, + { + "loss": 0.0995, + "grad_norm": 1.8845465183258057, + "learning_rate": 1.295e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.708, + "step": 708 + }, + { + "loss": 0.0824, + "grad_norm": 2.292940616607666, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.709, + "step": 709 + }, + { + "loss": 0.0723, + "grad_norm": 2.140986919403076, + "learning_rate": 1.293e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.71, + "step": 710 + }, + { + "loss": 0.0714, + "grad_norm": 2.8790059089660645, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.711, + "step": 711 + }, + { + "loss": 0.0623, + "grad_norm": 1.6493089199066162, + "learning_rate": 1.291e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.712, + "step": 712 + }, + { + "loss": 0.0657, + "grad_norm": 1.8830665349960327, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.713, + "step": 713 + }, + { + "loss": 0.029, + "grad_norm": 7.065803527832031, + "learning_rate": 1.289e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.714, + "step": 714 + }, + { + "loss": 0.0952, + "grad_norm": 2.2632198333740234, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.715, + "step": 715 + }, + { + "loss": 0.0383, + "grad_norm": 8.098624229431152, + "learning_rate": 1.287e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.716, + "step": 716 + }, + { + "loss": 0.023, + "grad_norm": 5.657382011413574, + "learning_rate": 1.286e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.717, + "step": 717 + }, + { + "loss": 0.0649, + "grad_norm": 1.4795526266098022, + "learning_rate": 1.285e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.718, + "step": 718 + }, + { + "loss": 0.0737, + "grad_norm": 2.7369728088378906, + "learning_rate": 1.284e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.719, + "step": 719 + }, + { + "loss": 0.0637, + "grad_norm": 2.345536708831787, + "learning_rate": 1.283e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.72, + "step": 720 + }, + { + "loss": 0.0594, + "grad_norm": 2.2326128482818604, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.721, + "step": 721 + }, + { + "loss": 0.057, + "grad_norm": 3.0859591960906982, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.722, + "step": 722 + }, + { + "loss": 0.0709, + "grad_norm": 2.870548963546753, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.723, + "step": 723 + }, + { + "loss": 0.0772, + "grad_norm": 3.3536510467529297, + "learning_rate": 1.279e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.724, + "step": 724 + }, + { + "loss": 0.0163, + "grad_norm": 2.2633590698242188, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 725 + }, + { + "loss": 0.0128, + "grad_norm": 1.1394838094711304, + "learning_rate": 1.277e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 726 + }, + { + "loss": 0.0683, + "grad_norm": 2.8505446910858154, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.727, + "step": 727 + }, + { + "loss": 0.0557, + "grad_norm": 2.6770808696746826, + "learning_rate": 1.275e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.728, + "step": 728 + }, + { + "loss": 0.0586, + "grad_norm": 3.0272936820983887, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.729, + "step": 729 + }, + { + "loss": 0.0126, + "grad_norm": 0.8217504620552063, + "learning_rate": 1.273e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 730 + }, + { + "loss": 0.0776, + "grad_norm": 4.100428581237793, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.731, + "step": 731 + }, + { + "loss": 0.0689, + "grad_norm": 2.3711600303649902, + "learning_rate": 1.271e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.732, + "step": 732 + }, + { + "loss": 0.0797, + "grad_norm": 3.585756301879883, + "learning_rate": 1.27e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.733, + "step": 733 + }, + { + "loss": 0.0532, + "grad_norm": 2.134615421295166, + "learning_rate": 1.269e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.734, + "step": 734 + }, + { + "loss": 0.0974, + "grad_norm": 2.3772988319396973, + "learning_rate": 1.268e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.735, + "step": 735 + }, + { + "loss": 0.1153, + "grad_norm": 2.4541940689086914, + "learning_rate": 1.267e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.736, + "step": 736 + }, + { + "loss": 0.048, + "grad_norm": 1.6060377359390259, + "learning_rate": 1.266e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.737, + "step": 737 + }, + { + "loss": 0.0451, + "grad_norm": 2.1678755283355713, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.738, + "step": 738 + }, + { + "loss": 0.0748, + "grad_norm": 2.047844409942627, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.739, + "step": 739 + }, + { + "loss": 0.0824, + "grad_norm": 2.762352705001831, + "learning_rate": 1.263e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.74, + "step": 740 + }, + { + "loss": 0.1146, + "grad_norm": 3.0128841400146484, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.741, + "step": 741 + }, + { + "loss": 0.0711, + "grad_norm": 2.0650486946105957, + "learning_rate": 1.261e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.742, + "step": 742 + }, + { + "loss": 0.0334, + "grad_norm": 7.7052412033081055, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.743, + "step": 743 + }, + { + "loss": 0.0709, + "grad_norm": 1.5119361877441406, + "learning_rate": 1.259e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.744, + "step": 744 + }, + { + "loss": 0.0308, + "grad_norm": 7.3754143714904785, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.745, + "step": 745 + }, + { + "loss": 0.0995, + "grad_norm": 2.8331611156463623, + "learning_rate": 1.257e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.746, + "step": 746 + }, + { + "loss": 0.0562, + "grad_norm": 3.423184871673584, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.747, + "step": 747 + }, + { + "loss": 0.0659, + "grad_norm": 1.857692003250122, + "learning_rate": 1.255e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.748, + "step": 748 + }, + { + "loss": 0.2618, + "grad_norm": 11.681804656982422, + "learning_rate": 1.254e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 0.9351081252098083, + "epoch": 0.749, + "step": 749 + }, + { + "loss": 0.0791, + "grad_norm": 2.311647415161133, + "learning_rate": 1.253e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.75, + "step": 750 + }, + { + "loss": 0.0486, + "grad_norm": 2.8530430793762207, + "learning_rate": 1.252e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.751, + "step": 751 + }, + { + "loss": 0.1104, + "grad_norm": 2.617987871170044, + "learning_rate": 1.251e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.752, + "step": 752 + }, + { + "loss": 0.0195, + "grad_norm": 4.978179931640625, + "learning_rate": 1.25e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.753, + "step": 753 + }, + { + "loss": 0.0726, + "grad_norm": 2.0882959365844727, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.754, + "step": 754 + }, + { + "loss": 0.0754, + "grad_norm": 2.1230452060699463, + "learning_rate": 1.248e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.755, + "step": 755 + }, + { + "loss": 0.0707, + "grad_norm": 2.2002744674682617, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.756, + "step": 756 + }, + { + "loss": 0.0494, + "grad_norm": 1.7500207424163818, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.757, + "step": 757 + }, + { + "loss": 0.0811, + "grad_norm": 1.8128851652145386, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.758, + "step": 758 + }, + { + "loss": 0.0756, + "grad_norm": 2.397252082824707, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.759, + "step": 759 + }, + { + "loss": 0.0501, + "grad_norm": 1.975466012954712, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.76, + "step": 760 + }, + { + "loss": 0.1087, + "grad_norm": 2.2733750343322754, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 761 + }, + { + "loss": 0.1041, + "grad_norm": 2.3084492683410645, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.762, + "step": 762 + }, + { + "loss": 0.0496, + "grad_norm": 2.098421096801758, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.763, + "step": 763 + }, + { + "loss": 0.0626, + "grad_norm": 2.004920482635498, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.764, + "step": 764 + }, + { + "loss": 0.0667, + "grad_norm": 1.603124737739563, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.765, + "step": 765 + }, + { + "loss": 0.0829, + "grad_norm": 2.5960142612457275, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.766, + "step": 766 + }, + { + "loss": 0.0234, + "grad_norm": 5.8595757484436035, + "learning_rate": 1.236e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.767, + "step": 767 + }, + { + "loss": 0.1032, + "grad_norm": 1.7731209993362427, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 768 + }, + { + "loss": 0.0228, + "grad_norm": 6.049434185028076, + "learning_rate": 1.234e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.769, + "step": 769 + }, + { + "loss": 0.0828, + "grad_norm": 1.9529765844345093, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.77, + "step": 770 + }, + { + "loss": 0.0718, + "grad_norm": 1.3272991180419922, + "learning_rate": 1.232e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.771, + "step": 771 + }, + { + "loss": 0.0907, + "grad_norm": 2.2710683345794678, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.772, + "step": 772 + }, + { + "loss": 0.2171, + "grad_norm": 6.965005397796631, + "learning_rate": 1.23e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.773, + "step": 773 + }, + { + "loss": 0.0657, + "grad_norm": 2.213243007659912, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.774, + "step": 774 + }, + { + "loss": 0.1745, + "grad_norm": 6.300892353057861, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.775, + "step": 775 + }, + { + "loss": 0.06, + "grad_norm": 2.4582417011260986, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.776, + "step": 776 + }, + { + "loss": 0.0516, + "grad_norm": 1.6709243059158325, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.777, + "step": 777 + }, + { + "loss": 0.1051, + "grad_norm": 2.654740810394287, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.778, + "step": 778 + }, + { + "loss": 0.072, + "grad_norm": 2.0503504276275635, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.779, + "step": 779 + }, + { + "loss": 0.0742, + "grad_norm": 1.800299882888794, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.78, + "step": 780 + }, + { + "loss": 0.0737, + "grad_norm": 2.063502788543701, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.781, + "step": 781 + }, + { + "loss": 0.1061, + "grad_norm": 2.698178291320801, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.782, + "step": 782 + }, + { + "loss": 0.0737, + "grad_norm": 2.0112061500549316, + "learning_rate": 1.22e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.783, + "step": 783 + }, + { + "loss": 0.0195, + "grad_norm": 5.365294933319092, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.784, + "step": 784 + }, + { + "loss": 0.0601, + "grad_norm": 1.5453028678894043, + "learning_rate": 1.218e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.785, + "step": 785 + }, + { + "loss": 0.2441, + "grad_norm": 10.393324851989746, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.786, + "step": 786 + }, + { + "loss": 0.1079, + "grad_norm": 2.6032726764678955, + "learning_rate": 1.216e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.787, + "step": 787 + }, + { + "loss": 0.0639, + "grad_norm": 2.6428260803222656, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.788, + "step": 788 + }, + { + "loss": 0.0632, + "grad_norm": 1.3782398700714111, + "learning_rate": 1.214e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.789, + "step": 789 + }, + { + "loss": 0.0189, + "grad_norm": 4.952188014984131, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.79, + "step": 790 + }, + { + "loss": 0.0613, + "grad_norm": 1.8376456499099731, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.791, + "step": 791 + }, + { + "loss": 0.0539, + "grad_norm": 1.6092228889465332, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.792, + "step": 792 + }, + { + "loss": 0.0151, + "grad_norm": 3.721954345703125, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 793 + }, + { + "loss": 0.0168, + "grad_norm": 3.578442096710205, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 794 + }, + { + "loss": 0.0494, + "grad_norm": 1.714572787284851, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 795 + }, + { + "loss": 0.0715, + "grad_norm": 2.152249813079834, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 796 + }, + { + "loss": 0.0106, + "grad_norm": 1.2338261604309082, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 797 + }, + { + "loss": 0.0948, + "grad_norm": 3.4057295322418213, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 798 + }, + { + "loss": 0.0967, + "grad_norm": 2.297558546066284, + "learning_rate": 1.204e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.799, + "step": 799 + }, + { + "loss": 0.0715, + "grad_norm": 2.948807716369629, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 800 + }, + { + "loss": 0.0691, + "grad_norm": 2.480257749557495, + "learning_rate": 1.202e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.801, + "step": 801 + }, + { + "loss": 0.2602, + "grad_norm": 9.955911636352539, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.802, + "step": 802 + }, + { + "loss": 0.0623, + "grad_norm": 2.92844295501709, + "learning_rate": 1.2e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.803, + "step": 803 + }, + { + "loss": 0.0922, + "grad_norm": 2.3774516582489014, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.804, + "step": 804 + }, + { + "loss": 0.0664, + "grad_norm": 1.5494801998138428, + "learning_rate": 1.198e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.805, + "step": 805 + }, + { + "loss": 0.1929, + "grad_norm": 6.599433422088623, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9450914859771729, + "epoch": 0.806, + "step": 806 + }, + { + "loss": 0.02, + "grad_norm": 5.4353718757629395, + "learning_rate": 1.196e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.807, + "step": 807 + }, + { + "loss": 0.0603, + "grad_norm": 1.707094669342041, + "learning_rate": 1.195e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.808, + "step": 808 + }, + { + "loss": 0.0722, + "grad_norm": 2.148479461669922, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.809, + "step": 809 + }, + { + "loss": 0.0717, + "grad_norm": 2.687295436859131, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.81, + "step": 810 + }, + { + "loss": 0.0695, + "grad_norm": 2.940627098083496, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.811, + "step": 811 + }, + { + "loss": 0.0195, + "grad_norm": 5.349563121795654, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.812, + "step": 812 + }, + { + "loss": 0.0931, + "grad_norm": 1.7995429039001465, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.813, + "step": 813 + }, + { + "loss": 0.0175, + "grad_norm": 5.07689094543457, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.814, + "step": 814 + }, + { + "loss": 0.0159, + "grad_norm": 4.247437000274658, + "learning_rate": 1.188e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.815, + "step": 815 + }, + { + "loss": 0.0783, + "grad_norm": 2.34236216545105, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.816, + "step": 816 + }, + { + "loss": 0.113, + "grad_norm": 2.772456407546997, + "learning_rate": 1.186e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.817, + "step": 817 + }, + { + "loss": 0.0621, + "grad_norm": 2.3582286834716797, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.818, + "step": 818 + }, + { + "loss": 0.0522, + "grad_norm": 3.014678716659546, + "learning_rate": 1.184e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.819, + "step": 819 + }, + { + "loss": 0.0758, + "grad_norm": 2.709341049194336, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.82, + "step": 820 + }, + { + "loss": 0.0718, + "grad_norm": 2.3536617755889893, + "learning_rate": 1.182e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.821, + "step": 821 + }, + { + "loss": 0.0789, + "grad_norm": 3.258106231689453, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.822, + "step": 822 + }, + { + "loss": 0.0763, + "grad_norm": 2.218254804611206, + "learning_rate": 1.18e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.823, + "step": 823 + }, + { + "loss": 0.0599, + "grad_norm": 2.2704806327819824, + "learning_rate": 1.179e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.824, + "step": 824 + }, + { + "loss": 0.0126, + "grad_norm": 2.4626388549804688, + "learning_rate": 1.178e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 825 + }, + { + "loss": 0.0669, + "grad_norm": 2.0617358684539795, + "learning_rate": 1.177e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.826, + "step": 826 + }, + { + "loss": 0.066, + "grad_norm": 2.0766263008117676, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.827, + "step": 827 + }, + { + "loss": 0.0618, + "grad_norm": 1.5771903991699219, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.828, + "step": 828 + }, + { + "loss": 0.0687, + "grad_norm": 1.789569616317749, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.829, + "step": 829 + }, + { + "loss": 0.0157, + "grad_norm": 4.058000087738037, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.83, + "step": 830 + }, + { + "loss": 0.0389, + "grad_norm": 1.5074262619018555, + "learning_rate": 1.172e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.831, + "step": 831 + }, + { + "loss": 0.0663, + "grad_norm": 2.1943564414978027, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.832, + "step": 832 + }, + { + "loss": 0.0734, + "grad_norm": 2.0293729305267334, + "learning_rate": 1.17e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.833, + "step": 833 + }, + { + "loss": 0.0734, + "grad_norm": 1.9577043056488037, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.834, + "step": 834 + }, + { + "loss": 0.0729, + "grad_norm": 2.053274154663086, + "learning_rate": 1.168e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 835 + }, + { + "loss": 0.1016, + "grad_norm": 4.023435115814209, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.836, + "step": 836 + }, + { + "loss": 0.0618, + "grad_norm": 2.152527093887329, + "learning_rate": 1.166e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.837, + "step": 837 + }, + { + "loss": 0.0633, + "grad_norm": 2.2773494720458984, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.838, + "step": 838 + }, + { + "loss": 0.0207, + "grad_norm": 5.423501491546631, + "learning_rate": 1.164e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.839, + "step": 839 + }, + { + "loss": 0.0651, + "grad_norm": 1.2856030464172363, + "learning_rate": 1.163e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.84, + "step": 840 + }, + { + "loss": 0.0628, + "grad_norm": 1.8682835102081299, + "learning_rate": 1.162e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 841 + }, + { + "loss": 0.0192, + "grad_norm": 4.855226516723633, + "learning_rate": 1.161e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.842, + "step": 842 + }, + { + "loss": 0.0757, + "grad_norm": 1.910493016242981, + "learning_rate": 1.16e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.843, + "step": 843 + }, + { + "loss": 0.0778, + "grad_norm": 3.503009796142578, + "learning_rate": 1.159e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.844, + "step": 844 + }, + { + "loss": 0.05, + "grad_norm": 1.867902398109436, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.845, + "step": 845 + }, + { + "loss": 0.0145, + "grad_norm": 3.8562870025634766, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 846 + }, + { + "loss": 0.0668, + "grad_norm": 1.7752705812454224, + "learning_rate": 1.156e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.847, + "step": 847 + }, + { + "loss": 0.0735, + "grad_norm": 2.393582582473755, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.848, + "step": 848 + }, + { + "loss": 0.0985, + "grad_norm": 2.7950665950775146, + "learning_rate": 1.154e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.849, + "step": 849 + }, + { + "loss": 0.0681, + "grad_norm": 2.1131601333618164, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.85, + "step": 850 + }, + { + "loss": 0.0515, + "grad_norm": 2.2755846977233887, + "learning_rate": 1.152e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.851, + "step": 851 + }, + { + "loss": 0.0434, + "grad_norm": 1.569434642791748, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.852, + "step": 852 + }, + { + "loss": 0.1047, + "grad_norm": 3.0928077697753906, + "learning_rate": 1.15e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.853, + "step": 853 + }, + { + "loss": 0.0575, + "grad_norm": 2.008404016494751, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.854, + "step": 854 + }, + { + "loss": 0.0579, + "grad_norm": 1.4861952066421509, + "learning_rate": 1.148e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.855, + "step": 855 + }, + { + "loss": 0.069, + "grad_norm": 1.9950709342956543, + "learning_rate": 1.147e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.856, + "step": 856 + }, + { + "loss": 0.0155, + "grad_norm": 4.394257068634033, + "learning_rate": 1.146e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.857, + "step": 857 + }, + { + "loss": 0.0969, + "grad_norm": 2.6770575046539307, + "learning_rate": 1.145e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.858, + "step": 858 + }, + { + "loss": 0.0712, + "grad_norm": 2.319610595703125, + "learning_rate": 1.144e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 859 + }, + { + "loss": 0.0689, + "grad_norm": 1.8970541954040527, + "learning_rate": 1.143e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.86, + "step": 860 + }, + { + "loss": 0.0899, + "grad_norm": 1.8339478969573975, + "learning_rate": 1.142e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.861, + "step": 861 + }, + { + "loss": 0.1032, + "grad_norm": 2.781162977218628, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.862, + "step": 862 + }, + { + "loss": 0.0604, + "grad_norm": 2.540081024169922, + "learning_rate": 1.14e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.863, + "step": 863 + }, + { + "loss": 0.0491, + "grad_norm": 1.9644439220428467, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.864, + "step": 864 + }, + { + "loss": 0.0802, + "grad_norm": 1.8939117193222046, + "learning_rate": 1.138e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.865, + "step": 865 + }, + { + "loss": 0.0681, + "grad_norm": 2.0177180767059326, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.866, + "step": 866 + }, + { + "loss": 0.0476, + "grad_norm": 1.9407687187194824, + "learning_rate": 1.136e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.867, + "step": 867 + }, + { + "loss": 0.0188, + "grad_norm": 5.371039390563965, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.868, + "step": 868 + }, + { + "loss": 0.0508, + "grad_norm": 1.873732566833496, + "learning_rate": 1.134e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.869, + "step": 869 + }, + { + "loss": 0.0237, + "grad_norm": 6.1496429443359375, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.87, + "step": 870 + }, + { + "loss": 0.099, + "grad_norm": 4.506502151489258, + "learning_rate": 1.132e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.871, + "step": 871 + }, + { + "loss": 0.1, + "grad_norm": 5.314243316650391, + "learning_rate": 1.131e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.872, + "step": 872 + }, + { + "loss": 0.0123, + "grad_norm": 3.1825995445251465, + "learning_rate": 1.13e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 873 + }, + { + "loss": 0.0132, + "grad_norm": 3.1502106189727783, + "learning_rate": 1.129e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 874 + }, + { + "loss": 0.0622, + "grad_norm": 2.719097375869751, + "learning_rate": 1.128e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.875, + "step": 875 + }, + { + "loss": 0.0992, + "grad_norm": 3.1199769973754883, + "learning_rate": 1.127e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.876, + "step": 876 + }, + { + "loss": 0.066, + "grad_norm": 2.5837504863739014, + "learning_rate": 1.126e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.877, + "step": 877 + }, + { + "loss": 0.0542, + "grad_norm": 2.4771666526794434, + "learning_rate": 1.125e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.878, + "step": 878 + }, + { + "loss": 0.0937, + "grad_norm": 3.6200714111328125, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.879, + "step": 879 + }, + { + "loss": 0.0674, + "grad_norm": 2.399535655975342, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.88, + "step": 880 + }, + { + "loss": 0.0678, + "grad_norm": 2.516605854034424, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.881, + "step": 881 + }, + { + "loss": 0.0668, + "grad_norm": 2.5172040462493896, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.882, + "step": 882 + }, + { + "loss": 0.0744, + "grad_norm": 2.4523816108703613, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.883, + "step": 883 + }, + { + "loss": 0.1019, + "grad_norm": 3.3321380615234375, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.884, + "step": 884 + }, + { + "loss": 0.0837, + "grad_norm": 1.8811334371566772, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.885, + "step": 885 + }, + { + "loss": 0.0531, + "grad_norm": 1.9141852855682373, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.886, + "step": 886 + }, + { + "loss": 0.0408, + "grad_norm": 1.487582802772522, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.887, + "step": 887 + }, + { + "loss": 0.0218, + "grad_norm": 5.286271095275879, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.888, + "step": 888 + }, + { + "loss": 0.0628, + "grad_norm": 1.7239201068878174, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.889, + "step": 889 + }, + { + "loss": 0.0625, + "grad_norm": 1.7386255264282227, + "learning_rate": 1.113e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.89, + "step": 890 + }, + { + "loss": 0.0405, + "grad_norm": 1.4104888439178467, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.891, + "step": 891 + }, + { + "loss": 0.0226, + "grad_norm": 4.608585834503174, + "learning_rate": 1.111e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.892, + "step": 892 + }, + { + "loss": 0.0968, + "grad_norm": 2.3830323219299316, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.893, + "step": 893 + }, + { + "loss": 0.0739, + "grad_norm": 1.8739683628082275, + "learning_rate": 1.109e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.894, + "step": 894 + }, + { + "loss": 0.058, + "grad_norm": 2.673945665359497, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.895, + "step": 895 + }, + { + "loss": 0.0943, + "grad_norm": 3.0288586616516113, + "learning_rate": 1.107e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.896, + "step": 896 + }, + { + "loss": 0.0726, + "grad_norm": 2.270813465118408, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.897, + "step": 897 + }, + { + "loss": 0.0589, + "grad_norm": 1.880444049835205, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.898, + "step": 898 + }, + { + "loss": 0.0143, + "grad_norm": 3.3361847400665283, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 899 + }, + { + "loss": 0.059, + "grad_norm": 1.848816990852356, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.9, + "step": 900 + }, + { + "loss": 0.0714, + "grad_norm": 2.0221500396728516, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.901, + "step": 901 + }, + { + "loss": 0.0668, + "grad_norm": 4.154532432556152, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.902, + "step": 902 + }, + { + "loss": 0.0617, + "grad_norm": 1.9648317098617554, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.903, + "step": 903 + }, + { + "loss": 0.0652, + "grad_norm": 2.866431474685669, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.904, + "step": 904 + }, + { + "loss": 0.0459, + "grad_norm": 2.3324079513549805, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.905, + "step": 905 + }, + { + "loss": 0.0111, + "grad_norm": 2.3991503715515137, + "learning_rate": 1.097e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 906 + }, + { + "loss": 0.0654, + "grad_norm": 1.9646960496902466, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.907, + "step": 907 + }, + { + "loss": 0.0798, + "grad_norm": 2.720228433609009, + "learning_rate": 1.095e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.908, + "step": 908 + }, + { + "loss": 0.0974, + "grad_norm": 2.5758628845214844, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.909, + "step": 909 + }, + { + "loss": 0.0621, + "grad_norm": 2.303436517715454, + "learning_rate": 1.093e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.91, + "step": 910 + }, + { + "loss": 0.0944, + "grad_norm": 2.617363929748535, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.911, + "step": 911 + }, + { + "loss": 0.0571, + "grad_norm": 1.898218035697937, + "learning_rate": 1.091e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.912, + "step": 912 + }, + { + "loss": 0.0136, + "grad_norm": 3.2630972862243652, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 913 + }, + { + "loss": 0.0482, + "grad_norm": 2.0208237171173096, + "learning_rate": 1.089e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.914, + "step": 914 + }, + { + "loss": 0.0486, + "grad_norm": 1.8037229776382446, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.915, + "step": 915 + }, + { + "loss": 0.0118, + "grad_norm": 2.722412586212158, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 916 + }, + { + "loss": 0.0687, + "grad_norm": 2.6608150005340576, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.917, + "step": 917 + }, + { + "loss": 0.0101, + "grad_norm": 1.664276361465454, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 918 + }, + { + "loss": 0.0609, + "grad_norm": 2.5043087005615234, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.919, + "step": 919 + }, + { + "loss": 0.0685, + "grad_norm": 2.0320653915405273, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.92, + "step": 920 + }, + { + "loss": 0.0709, + "grad_norm": 2.7590584754943848, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.921, + "step": 921 + }, + { + "loss": 0.0511, + "grad_norm": 2.424579620361328, + "learning_rate": 1.081e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.922, + "step": 922 + }, + { + "loss": 0.061, + "grad_norm": 1.826949119567871, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.923, + "step": 923 + }, + { + "loss": 0.0086, + "grad_norm": 1.5401605367660522, + "learning_rate": 1.079e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 924 + }, + { + "loss": 0.0667, + "grad_norm": 2.49796724319458, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.925, + "step": 925 + }, + { + "loss": 0.0741, + "grad_norm": 2.141827344894409, + "learning_rate": 1.077e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.926, + "step": 926 + }, + { + "loss": 0.0662, + "grad_norm": 2.1507174968719482, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.927, + "step": 927 + }, + { + "loss": 0.0596, + "grad_norm": 1.928731083869934, + "learning_rate": 1.075e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.928, + "step": 928 + }, + { + "loss": 0.0469, + "grad_norm": 2.391432523727417, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.929, + "step": 929 + }, + { + "loss": 0.0121, + "grad_norm": 2.9941039085388184, + "learning_rate": 1.073e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 930 + }, + { + "loss": 0.0452, + "grad_norm": 2.110806465148926, + "learning_rate": 1.072e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.931, + "step": 931 + }, + { + "loss": 0.0624, + "grad_norm": 1.8115919828414917, + "learning_rate": 1.071e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.932, + "step": 932 + }, + { + "loss": 0.0456, + "grad_norm": 1.548567533493042, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.933, + "step": 933 + }, + { + "loss": 0.0565, + "grad_norm": 1.9886720180511475, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.934, + "step": 934 + }, + { + "loss": 0.0457, + "grad_norm": 1.8589720726013184, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.935, + "step": 935 + }, + { + "loss": 0.041, + "grad_norm": 1.6640335321426392, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.936, + "step": 936 + }, + { + "loss": 0.0712, + "grad_norm": 2.0171613693237305, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.937, + "step": 937 + }, + { + "loss": 0.0628, + "grad_norm": 1.6715848445892334, + "learning_rate": 1.065e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.938, + "step": 938 + }, + { + "loss": 0.0416, + "grad_norm": 2.1554946899414062, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.939, + "step": 939 + }, + { + "loss": 0.0737, + "grad_norm": 2.242116689682007, + "learning_rate": 1.063e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.94, + "step": 940 + }, + { + "loss": 0.0177, + "grad_norm": 4.810120105743408, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.941, + "step": 941 + }, + { + "loss": 0.0649, + "grad_norm": 1.675683617591858, + "learning_rate": 1.061e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.942, + "step": 942 + }, + { + "loss": 0.0727, + "grad_norm": 2.5127744674682617, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.943, + "step": 943 + }, + { + "loss": 0.0587, + "grad_norm": 2.14599871635437, + "learning_rate": 1.059e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.944, + "step": 944 + }, + { + "loss": 0.1132, + "grad_norm": 2.5991926193237305, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.945, + "step": 945 + }, + { + "loss": 0.0786, + "grad_norm": 2.0661518573760986, + "learning_rate": 1.057e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.946, + "step": 946 + }, + { + "loss": 0.0686, + "grad_norm": 1.411996841430664, + "learning_rate": 1.056e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 947 + }, + { + "loss": 0.0886, + "grad_norm": 1.8908826112747192, + "learning_rate": 1.055e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.948, + "step": 948 + }, + { + "loss": 0.0795, + "grad_norm": 1.8596928119659424, + "learning_rate": 1.054e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.949, + "step": 949 + }, + { + "loss": 0.064, + "grad_norm": 2.0051939487457275, + "learning_rate": 1.053e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.95, + "step": 950 + }, + { + "loss": 0.0761, + "grad_norm": 1.7486968040466309, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 951 + }, + { + "loss": 0.0519, + "grad_norm": 1.7253214120864868, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.952, + "step": 952 + }, + { + "loss": 0.0688, + "grad_norm": 1.7860913276672363, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.953, + "step": 953 + }, + { + "loss": 0.0287, + "grad_norm": 6.397044658660889, + "learning_rate": 1.049e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 954 + }, + { + "loss": 0.0877, + "grad_norm": 1.6188372373580933, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.955, + "step": 955 + }, + { + "loss": 0.0595, + "grad_norm": 1.6029514074325562, + "learning_rate": 1.047e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.956, + "step": 956 + }, + { + "loss": 0.2163, + "grad_norm": 8.956819534301758, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.957, + "step": 957 + }, + { + "loss": 0.0666, + "grad_norm": 1.4872380495071411, + "learning_rate": 1.045e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.958, + "step": 958 + }, + { + "loss": 0.092, + "grad_norm": 3.029266595840454, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.959, + "step": 959 + }, + { + "loss": 0.0757, + "grad_norm": 1.899221658706665, + "learning_rate": 1.043e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.96, + "step": 960 + }, + { + "loss": 0.0666, + "grad_norm": 1.577907681465149, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.961, + "step": 961 + }, + { + "loss": 0.0581, + "grad_norm": 1.467238426208496, + "learning_rate": 1.041e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 962 + }, + { + "loss": 0.1923, + "grad_norm": 8.706313133239746, + "learning_rate": 1.04e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.963, + "step": 963 + }, + { + "loss": 0.062, + "grad_norm": 2.0428693294525146, + "learning_rate": 1.039e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.964, + "step": 964 + }, + { + "loss": 0.0775, + "grad_norm": 2.0258123874664307, + "learning_rate": 1.038e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.965, + "step": 965 + }, + { + "loss": 0.0661, + "grad_norm": 1.7304749488830566, + "learning_rate": 1.037e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.966, + "step": 966 + }, + { + "loss": 0.0547, + "grad_norm": 1.6691105365753174, + "learning_rate": 1.036e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.967, + "step": 967 + }, + { + "loss": 0.0617, + "grad_norm": 1.681009292602539, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.968, + "step": 968 + }, + { + "loss": 0.0544, + "grad_norm": 1.8074179887771606, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.969, + "step": 969 + }, + { + "loss": 0.0396, + "grad_norm": 1.812711477279663, + "learning_rate": 1.033e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.97, + "step": 970 + }, + { + "loss": 0.0577, + "grad_norm": 2.0831782817840576, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.971, + "step": 971 + }, + { + "loss": 0.0776, + "grad_norm": 1.3640745878219604, + "learning_rate": 1.031e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.972, + "step": 972 + }, + { + "loss": 0.0454, + "grad_norm": 1.9006543159484863, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.973, + "step": 973 + }, + { + "loss": 0.0633, + "grad_norm": 1.6996928453445435, + "learning_rate": 1.029e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.974, + "step": 974 + }, + { + "loss": 0.0738, + "grad_norm": 1.9721561670303345, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.975, + "step": 975 + }, + { + "loss": 0.0439, + "grad_norm": 2.2615768909454346, + "learning_rate": 1.027e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.976, + "step": 976 + }, + { + "loss": 0.0237, + "grad_norm": 5.635776519775391, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.977, + "step": 977 + }, + { + "loss": 0.094, + "grad_norm": 2.4352505207061768, + "learning_rate": 1.025e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.978, + "step": 978 + }, + { + "loss": 0.0648, + "grad_norm": 1.6868159770965576, + "learning_rate": 1.024e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.979, + "step": 979 + }, + { + "loss": 0.0652, + "grad_norm": 2.1479756832122803, + "learning_rate": 1.023e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.98, + "step": 980 + }, + { + "loss": 0.0597, + "grad_norm": 2.0000855922698975, + "learning_rate": 1.022e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.981, + "step": 981 + }, + { + "loss": 0.0643, + "grad_norm": 2.511259078979492, + "learning_rate": 1.021e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.982, + "step": 982 + }, + { + "loss": 0.0161, + "grad_norm": 3.99651837348938, + "learning_rate": 1.02e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.983, + "step": 983 + }, + { + "loss": 0.0649, + "grad_norm": 2.231045722961426, + "learning_rate": 1.019e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.984, + "step": 984 + }, + { + "loss": 0.0386, + "grad_norm": 1.9224427938461304, + "learning_rate": 1.018e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.985, + "step": 985 + }, + { + "loss": 0.0673, + "grad_norm": 2.328557014465332, + "learning_rate": 1.017e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.986, + "step": 986 + }, + { + "loss": 0.0642, + "grad_norm": 2.1176366806030273, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.987, + "step": 987 + }, + { + "loss": 0.0643, + "grad_norm": 2.319209098815918, + "learning_rate": 1.015e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.988, + "step": 988 + }, + { + "loss": 0.0126, + "grad_norm": 2.7921886444091797, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 989 + }, + { + "loss": 0.056, + "grad_norm": 1.6485341787338257, + "learning_rate": 1.013e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.99, + "step": 990 + }, + { + "loss": 0.0559, + "grad_norm": 1.85313081741333, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.991, + "step": 991 + }, + { + "loss": 0.0718, + "grad_norm": 2.0347867012023926, + "learning_rate": 1.011e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.992, + "step": 992 + }, + { + "loss": 0.0611, + "grad_norm": 2.6210453510284424, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.993, + "step": 993 + }, + { + "loss": 0.0428, + "grad_norm": 2.1774537563323975, + "learning_rate": 1.009e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.994, + "step": 994 + }, + { + "loss": 0.0564, + "grad_norm": 1.4708741903305054, + "learning_rate": 1.008e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.995, + "step": 995 + }, + { + "loss": 0.0461, + "grad_norm": 2.133490562438965, + "learning_rate": 1.007e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.996, + "step": 996 + }, + { + "loss": 0.0654, + "grad_norm": 1.8513908386230469, + "learning_rate": 1.006e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.997, + "step": 997 + }, + { + "loss": 0.0467, + "grad_norm": 2.651682138442993, + "learning_rate": 1.005e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.998, + "step": 998 + }, + { + "loss": 0.0496, + "grad_norm": 1.6719735860824585, + "learning_rate": 1.004e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.999, + "step": 999 + }, + { + "loss": 0.064, + "grad_norm": 1.7016679048538208, + "learning_rate": 1.003e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.0, + "step": 1000 + }, + { + "loss": 0.0601, + "grad_norm": 1.5496330261230469, + "learning_rate": 1.002e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.001, + "step": 1001 + }, + { + "loss": 0.0185, + "grad_norm": 4.8348541259765625, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687985.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.002, + "step": 1002 + }, + { + "loss": 0.0205, + "grad_norm": 5.356715202331543, + "learning_rate": 1e-05, + "num_tokens": 688167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.003, + "step": 1003 + }, + { + "loss": 0.065, + "grad_norm": 2.8306968212127686, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.004, + "step": 1004 + }, + { + "loss": 0.048, + "grad_norm": 1.684121012687683, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.005, + "step": 1005 + }, + { + "loss": 0.0611, + "grad_norm": 1.78119957447052, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.006, + "step": 1006 + }, + { + "loss": 0.069, + "grad_norm": 2.2316365242004395, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.007, + "step": 1007 + }, + { + "loss": 0.0779, + "grad_norm": 2.183338165283203, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.008, + "step": 1008 + }, + { + "loss": 0.0642, + "grad_norm": 1.943967580795288, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.009, + "step": 1009 + }, + { + "loss": 0.0415, + "grad_norm": 1.6110951900482178, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.01, + "step": 1010 + }, + { + "loss": 0.0117, + "grad_norm": 3.0185630321502686, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 1011 + }, + { + "loss": 0.0992, + "grad_norm": 3.14607310295105, + "learning_rate": 9.91e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 1.012, + "step": 1012 + }, + { + "loss": 0.047, + "grad_norm": 1.2475289106369019, + "learning_rate": 9.9e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.013, + "step": 1013 + }, + { + "loss": 0.0819, + "grad_norm": 2.5398612022399902, + "learning_rate": 9.89e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.014, + "step": 1014 + }, + { + "loss": 0.0555, + "grad_norm": 1.682294249534607, + "learning_rate": 9.88e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.015, + "step": 1015 + }, + { + "loss": 0.0867, + "grad_norm": 2.457875967025757, + "learning_rate": 9.87e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.016, + "step": 1016 + }, + { + "loss": 0.0667, + "grad_norm": 1.7135660648345947, + "learning_rate": 9.86e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.017, + "step": 1017 + }, + { + "loss": 0.0378, + "grad_norm": 1.4605510234832764, + "learning_rate": 9.85e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.018, + "step": 1018 + }, + { + "loss": 0.0612, + "grad_norm": 3.01509690284729, + "learning_rate": 9.84e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.019, + "step": 1019 + }, + { + "loss": 0.0623, + "grad_norm": 2.2433955669403076, + "learning_rate": 9.83e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.02, + "step": 1020 + }, + { + "loss": 0.0192, + "grad_norm": 5.402326583862305, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.021, + "step": 1021 + }, + { + "loss": 0.099, + "grad_norm": 4.552786827087402, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.022, + "step": 1022 + }, + { + "loss": 0.0569, + "grad_norm": 2.1845462322235107, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.023, + "step": 1023 + }, + { + "loss": 0.063, + "grad_norm": 2.7287683486938477, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.024, + "step": 1024 + }, + { + "loss": 0.0426, + "grad_norm": 2.1356048583984375, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.025, + "step": 1025 + }, + { + "loss": 0.0626, + "grad_norm": 2.1982219219207764, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.026, + "step": 1026 + }, + { + "loss": 0.0881, + "grad_norm": 2.790822982788086, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.027, + "step": 1027 + }, + { + "loss": 0.0872, + "grad_norm": 2.464653968811035, + "learning_rate": 9.75e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.028, + "step": 1028 + }, + { + "loss": 0.0144, + "grad_norm": 3.807983636856079, + "learning_rate": 9.74e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.029, + "step": 1029 + }, + { + "loss": 0.0594, + "grad_norm": 1.6763768196105957, + "learning_rate": 9.73e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.03, + "step": 1030 + }, + { + "loss": 0.0882, + "grad_norm": 1.924737811088562, + "learning_rate": 9.72e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.031, + "step": 1031 + }, + { + "loss": 0.0488, + "grad_norm": 2.331883430480957, + "learning_rate": 9.71e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.032, + "step": 1032 + }, + { + "loss": 0.088, + "grad_norm": 2.7460174560546875, + "learning_rate": 9.7e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.033, + "step": 1033 + }, + { + "loss": 0.0446, + "grad_norm": 1.7645024061203003, + "learning_rate": 9.69e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.034, + "step": 1034 + }, + { + "loss": 0.0806, + "grad_norm": 1.7870028018951416, + "learning_rate": 9.68e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.035, + "step": 1035 + }, + { + "loss": 0.0602, + "grad_norm": 1.6170544624328613, + "learning_rate": 9.67e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.036, + "step": 1036 + }, + { + "loss": 0.0427, + "grad_norm": 2.0376412868499756, + "learning_rate": 9.66e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.037, + "step": 1037 + }, + { + "loss": 0.0636, + "grad_norm": 2.1391189098358154, + "learning_rate": 9.65e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.038, + "step": 1038 + }, + { + "loss": 0.0127, + "grad_norm": 3.4139318466186523, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 1039 + }, + { + "loss": 0.0532, + "grad_norm": 2.2980690002441406, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.04, + "step": 1040 + }, + { + "loss": 0.042, + "grad_norm": 1.7804741859436035, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.041, + "step": 1041 + }, + { + "loss": 0.039, + "grad_norm": 1.5417966842651367, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.042, + "step": 1042 + }, + { + "loss": 0.0691, + "grad_norm": 1.9181416034698486, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.043, + "step": 1043 + }, + { + "loss": 0.0105, + "grad_norm": 2.567687511444092, + "learning_rate": 9.59e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 1044 + }, + { + "loss": 0.0513, + "grad_norm": 2.1507062911987305, + "learning_rate": 9.58e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.045, + "step": 1045 + }, + { + "loss": 0.0661, + "grad_norm": 2.6471474170684814, + "learning_rate": 9.57e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.046, + "step": 1046 + }, + { + "loss": 0.0528, + "grad_norm": 1.6081326007843018, + "learning_rate": 9.56e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.047, + "step": 1047 + }, + { + "loss": 0.0148, + "grad_norm": 3.6129963397979736, + "learning_rate": 9.55e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.048, + "step": 1048 + }, + { + "loss": 0.0589, + "grad_norm": 1.6536871194839478, + "learning_rate": 9.54e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 1049 + }, + { + "loss": 0.0893, + "grad_norm": 2.1024138927459717, + "learning_rate": 9.53e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.05, + "step": 1050 + }, + { + "loss": 0.0628, + "grad_norm": 1.6858649253845215, + "learning_rate": 9.52e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.051, + "step": 1051 + }, + { + "loss": 0.0532, + "grad_norm": 1.6352399587631226, + "learning_rate": 9.51e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.052, + "step": 1052 + }, + { + "loss": 0.0673, + "grad_norm": 1.62017822265625, + "learning_rate": 9.5e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.053, + "step": 1053 + }, + { + "loss": 0.0577, + "grad_norm": 1.5879229307174683, + "learning_rate": 9.49e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.054, + "step": 1054 + }, + { + "loss": 0.0148, + "grad_norm": 4.010829925537109, + "learning_rate": 9.48e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.055, + "step": 1055 + }, + { + "loss": 0.0147, + "grad_norm": 4.00789213180542, + "learning_rate": 9.47e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.056, + "step": 1056 + }, + { + "loss": 0.015, + "grad_norm": 4.107461929321289, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.057, + "step": 1057 + }, + { + "loss": 0.0458, + "grad_norm": 2.3218655586242676, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.058, + "step": 1058 + }, + { + "loss": 0.0119, + "grad_norm": 2.9490623474121094, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 1059 + }, + { + "loss": 0.0367, + "grad_norm": 1.8217196464538574, + "learning_rate": 9.43e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.06, + "step": 1060 + }, + { + "loss": 0.0079, + "grad_norm": 1.3022953271865845, + "learning_rate": 9.42e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 1061 + }, + { + "loss": 0.0724, + "grad_norm": 2.17926287651062, + "learning_rate": 9.41e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.062, + "step": 1062 + }, + { + "loss": 0.039, + "grad_norm": 1.739366888999939, + "learning_rate": 9.4e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.063, + "step": 1063 + }, + { + "loss": 0.0534, + "grad_norm": 2.180590867996216, + "learning_rate": 9.39e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.064, + "step": 1064 + }, + { + "loss": 0.0063, + "grad_norm": 0.5163084864616394, + "learning_rate": 9.38e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 1065 + }, + { + "loss": 0.0584, + "grad_norm": 2.8058063983917236, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.066, + "step": 1066 + }, + { + "loss": 0.0582, + "grad_norm": 2.005493640899658, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.067, + "step": 1067 + }, + { + "loss": 0.0497, + "grad_norm": 2.923448324203491, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.068, + "step": 1068 + }, + { + "loss": 0.006, + "grad_norm": 0.48110926151275635, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 1069 + }, + { + "loss": 0.0704, + "grad_norm": 2.408653497695923, + "learning_rate": 9.33e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.07, + "step": 1070 + }, + { + "loss": 0.0878, + "grad_norm": 2.767408847808838, + "learning_rate": 9.32e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 1071 + }, + { + "loss": 0.0599, + "grad_norm": 1.9640824794769287, + "learning_rate": 9.31e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.072, + "step": 1072 + }, + { + "loss": 0.0674, + "grad_norm": 2.939439535140991, + "learning_rate": 9.3e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.073, + "step": 1073 + }, + { + "loss": 0.0866, + "grad_norm": 2.223776340484619, + "learning_rate": 9.29e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.074, + "step": 1074 + }, + { + "loss": 0.0819, + "grad_norm": 1.7831770181655884, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.075, + "step": 1075 + }, + { + "loss": 0.0552, + "grad_norm": 1.528134822845459, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.076, + "step": 1076 + }, + { + "loss": 0.0105, + "grad_norm": 2.722768783569336, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 1077 + }, + { + "loss": 0.0559, + "grad_norm": 1.601446509361267, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.078, + "step": 1078 + }, + { + "loss": 0.0571, + "grad_norm": 1.6370468139648438, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.079, + "step": 1079 + }, + { + "loss": 0.0611, + "grad_norm": 1.7496470212936401, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.08, + "step": 1080 + }, + { + "loss": 0.0582, + "grad_norm": 1.8051985502243042, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.081, + "step": 1081 + }, + { + "loss": 0.0527, + "grad_norm": 1.1893869638442993, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.082, + "step": 1082 + }, + { + "loss": 0.0613, + "grad_norm": 1.7861930131912231, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.083, + "step": 1083 + }, + { + "loss": 0.0771, + "grad_norm": 1.6442121267318726, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.084, + "step": 1084 + }, + { + "loss": 0.0614, + "grad_norm": 1.7604858875274658, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.085, + "step": 1085 + }, + { + "loss": 0.0686, + "grad_norm": 1.7211897373199463, + "learning_rate": 9.17e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.086, + "step": 1086 + }, + { + "loss": 0.0851, + "grad_norm": 2.2072157859802246, + "learning_rate": 9.16e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.087, + "step": 1087 + }, + { + "loss": 0.0234, + "grad_norm": 6.049727916717529, + "learning_rate": 9.15e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.088, + "step": 1088 + }, + { + "loss": 0.0462, + "grad_norm": 2.178677558898926, + "learning_rate": 9.14e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.089, + "step": 1089 + }, + { + "loss": 0.0866, + "grad_norm": 2.1971359252929688, + "learning_rate": 9.13e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.09, + "step": 1090 + }, + { + "loss": 0.0701, + "grad_norm": 2.604931116104126, + "learning_rate": 9.12e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.091, + "step": 1091 + }, + { + "loss": 0.1403, + "grad_norm": 4.8585004806518555, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.092, + "step": 1092 + }, + { + "loss": 0.0418, + "grad_norm": 2.0918304920196533, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.093, + "step": 1093 + }, + { + "loss": 0.0607, + "grad_norm": 1.5581291913986206, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.094, + "step": 1094 + }, + { + "loss": 0.0464, + "grad_norm": 2.2121376991271973, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.095, + "step": 1095 + }, + { + "loss": 0.0187, + "grad_norm": 5.02223539352417, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.096, + "step": 1096 + }, + { + "loss": 0.051, + "grad_norm": 1.1968108415603638, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.097, + "step": 1097 + }, + { + "loss": 0.0379, + "grad_norm": 1.5838263034820557, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.098, + "step": 1098 + }, + { + "loss": 0.0599, + "grad_norm": 2.1656548976898193, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.099, + "step": 1099 + }, + { + "loss": 0.0531, + "grad_norm": 1.5780129432678223, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1, + "step": 1100 + }, + { + "loss": 0.0101, + "grad_norm": 2.5371878147125244, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 1101 + }, + { + "loss": 0.0635, + "grad_norm": 1.7947604656219482, + "learning_rate": 9.01e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.102, + "step": 1102 + }, + { + "loss": 0.0522, + "grad_norm": 2.101656436920166, + "learning_rate": 9e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.103, + "step": 1103 + }, + { + "loss": 0.0803, + "grad_norm": 1.9881861209869385, + "learning_rate": 8.99e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.104, + "step": 1104 + }, + { + "loss": 0.0618, + "grad_norm": 1.884840965270996, + "learning_rate": 8.98e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.105, + "step": 1105 + }, + { + "loss": 0.0554, + "grad_norm": 1.8216484785079956, + "learning_rate": 8.97e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.106, + "step": 1106 + }, + { + "loss": 0.0631, + "grad_norm": 2.1785407066345215, + "learning_rate": 8.96e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.107, + "step": 1107 + }, + { + "loss": 0.0409, + "grad_norm": 1.5896263122558594, + "learning_rate": 8.95e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.108, + "step": 1108 + }, + { + "loss": 0.1964, + "grad_norm": 6.368833541870117, + "learning_rate": 8.94e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 1.109, + "step": 1109 + }, + { + "loss": 0.0087, + "grad_norm": 1.9522284269332886, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 1110 + }, + { + "loss": 0.2323, + "grad_norm": 7.9943718910217285, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 1.111, + "step": 1111 + }, + { + "loss": 0.0801, + "grad_norm": 1.92306387424469, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.112, + "step": 1112 + }, + { + "loss": 0.045, + "grad_norm": 1.3462337255477905, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.113, + "step": 1113 + }, + { + "loss": 0.0721, + "grad_norm": 2.416792869567871, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 1114 + }, + { + "loss": 0.0406, + "grad_norm": 2.1178133487701416, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.115, + "step": 1115 + }, + { + "loss": 0.0559, + "grad_norm": 1.5205347537994385, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.116, + "step": 1116 + }, + { + "loss": 0.0342, + "grad_norm": 1.617630124092102, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.117, + "step": 1117 + }, + { + "loss": 0.0438, + "grad_norm": 2.34078049659729, + "learning_rate": 8.85e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1179999999999999, + "step": 1118 + }, + { + "loss": 0.0753, + "grad_norm": 1.8780885934829712, + "learning_rate": 8.84e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.119, + "step": 1119 + }, + { + "loss": 0.147, + "grad_norm": 5.077685356140137, + "learning_rate": 8.83e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.12, + "step": 1120 + }, + { + "loss": 0.0469, + "grad_norm": 1.9634060859680176, + "learning_rate": 8.82e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.121, + "step": 1121 + }, + { + "loss": 0.0662, + "grad_norm": 1.4567596912384033, + "learning_rate": 8.81e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1219999999999999, + "step": 1122 + }, + { + "loss": 0.0167, + "grad_norm": 4.722336292266846, + "learning_rate": 8.8e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.123, + "step": 1123 + }, + { + "loss": 0.0388, + "grad_norm": 2.1787490844726562, + "learning_rate": 8.79e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.124, + "step": 1124 + }, + { + "loss": 0.0508, + "grad_norm": 1.4540494680404663, + "learning_rate": 8.78e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.125, + "step": 1125 + }, + { + "loss": 0.0463, + "grad_norm": 1.9126884937286377, + "learning_rate": 8.77e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.126, + "step": 1126 + }, + { + "loss": 0.0413, + "grad_norm": 1.3725852966308594, + "learning_rate": 8.76e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.127, + "step": 1127 + }, + { + "loss": 0.0406, + "grad_norm": 1.769464373588562, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.1280000000000001, + "step": 1128 + }, + { + "loss": 0.0157, + "grad_norm": 4.246346473693848, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.129, + "step": 1129 + }, + { + "loss": 0.1541, + "grad_norm": 4.8993754386901855, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.13, + "step": 1130 + }, + { + "loss": 0.041, + "grad_norm": 1.7246980667114258, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.131, + "step": 1131 + }, + { + "loss": 0.0726, + "grad_norm": 2.2514991760253906, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1320000000000001, + "step": 1132 + }, + { + "loss": 0.0097, + "grad_norm": 2.538367509841919, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 1133 + }, + { + "loss": 0.083, + "grad_norm": 2.2139499187469482, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.134, + "step": 1134 + }, + { + "loss": 0.0086, + "grad_norm": 2.0688657760620117, + "learning_rate": 8.68e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 1135 + }, + { + "loss": 0.0579, + "grad_norm": 1.7580430507659912, + "learning_rate": 8.67e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.1360000000000001, + "step": 1136 + }, + { + "loss": 0.0071, + "grad_norm": 1.2317492961883545, + "learning_rate": 8.66e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 1137 + }, + { + "loss": 0.0547, + "grad_norm": 1.7383458614349365, + "learning_rate": 8.65e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.138, + "step": 1138 + }, + { + "loss": 0.0493, + "grad_norm": 1.9442108869552612, + "learning_rate": 8.64e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.139, + "step": 1139 + }, + { + "loss": 0.0743, + "grad_norm": 2.8182926177978516, + "learning_rate": 8.63e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.1400000000000001, + "step": 1140 + }, + { + "loss": 0.0058, + "grad_norm": 0.5721865296363831, + "learning_rate": 8.62e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 1141 + }, + { + "loss": 0.0615, + "grad_norm": 2.226674795150757, + "learning_rate": 8.61e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.142, + "step": 1142 + }, + { + "loss": 0.0063, + "grad_norm": 0.8222597241401672, + "learning_rate": 8.6e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 1143 + }, + { + "loss": 0.0679, + "grad_norm": 2.1432037353515625, + "learning_rate": 8.59e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.144, + "step": 1144 + }, + { + "loss": 0.0604, + "grad_norm": 2.196251392364502, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.145, + "step": 1145 + }, + { + "loss": 0.0067, + "grad_norm": 0.9334397912025452, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 1146 + }, + { + "loss": 0.0877, + "grad_norm": 2.9189441204071045, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.147, + "step": 1147 + }, + { + "loss": 0.04, + "grad_norm": 1.8555492162704468, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.148, + "step": 1148 + }, + { + "loss": 0.0433, + "grad_norm": 2.1462485790252686, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.149, + "step": 1149 + }, + { + "loss": 0.0912, + "grad_norm": 2.674384593963623, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.15, + "step": 1150 + }, + { + "loss": 0.0806, + "grad_norm": 2.1967833042144775, + "learning_rate": 8.52e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.151, + "step": 1151 + }, + { + "loss": 0.0397, + "grad_norm": 1.576885461807251, + "learning_rate": 8.51e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.152, + "step": 1152 + }, + { + "loss": 0.0385, + "grad_norm": 1.8607549667358398, + "learning_rate": 8.5e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.153, + "step": 1153 + }, + { + "loss": 0.0591, + "grad_norm": 2.075608491897583, + "learning_rate": 8.49e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.154, + "step": 1154 + }, + { + "loss": 0.0072, + "grad_norm": 1.595956563949585, + "learning_rate": 8.48e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 1155 + }, + { + "loss": 0.0107, + "grad_norm": 2.7350447177886963, + "learning_rate": 8.47e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 1156 + }, + { + "loss": 0.0675, + "grad_norm": 1.7995527982711792, + "learning_rate": 8.46e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.157, + "step": 1157 + }, + { + "loss": 0.0655, + "grad_norm": 2.3666279315948486, + "learning_rate": 8.45e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.158, + "step": 1158 + }, + { + "loss": 0.0898, + "grad_norm": 2.2464659214019775, + "learning_rate": 8.44e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.159, + "step": 1159 + }, + { + "loss": 0.0555, + "grad_norm": 2.4049134254455566, + "learning_rate": 8.43e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.16, + "step": 1160 + }, + { + "loss": 0.0835, + "grad_norm": 2.0087289810180664, + "learning_rate": 8.42e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.161, + "step": 1161 + }, + { + "loss": 0.0679, + "grad_norm": 2.1180970668792725, + "learning_rate": 8.41e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.162, + "step": 1162 + }, + { + "loss": 0.0605, + "grad_norm": 1.7271490097045898, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.163, + "step": 1163 + }, + { + "loss": 0.0381, + "grad_norm": 2.031334400177002, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.164, + "step": 1164 + }, + { + "loss": 0.0639, + "grad_norm": 1.7528166770935059, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.165, + "step": 1165 + }, + { + "loss": 0.1307, + "grad_norm": 3.783503293991089, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.166, + "step": 1166 + }, + { + "loss": 0.0473, + "grad_norm": 2.779741048812866, + "learning_rate": 8.36e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.167, + "step": 1167 + }, + { + "loss": 0.0455, + "grad_norm": 1.9504565000534058, + "learning_rate": 8.35e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.168, + "step": 1168 + }, + { + "loss": 0.0662, + "grad_norm": 2.2791426181793213, + "learning_rate": 8.34e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.169, + "step": 1169 + }, + { + "loss": 0.0857, + "grad_norm": 2.4661900997161865, + "learning_rate": 8.33e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.17, + "step": 1170 + }, + { + "loss": 0.0817, + "grad_norm": 2.018150568008423, + "learning_rate": 8.32e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.171, + "step": 1171 + }, + { + "loss": 0.0491, + "grad_norm": 1.4105336666107178, + "learning_rate": 8.31e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.172, + "step": 1172 + }, + { + "loss": 0.0705, + "grad_norm": 1.7099734544754028, + "learning_rate": 8.3e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.173, + "step": 1173 + }, + { + "loss": 0.0197, + "grad_norm": 5.4979472160339355, + "learning_rate": 8.29e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.174, + "step": 1174 + }, + { + "loss": 0.0515, + "grad_norm": 1.9852694272994995, + "learning_rate": 8.28e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.175, + "step": 1175 + }, + { + "loss": 0.0435, + "grad_norm": 1.3928176164627075, + "learning_rate": 8.27e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.176, + "step": 1176 + }, + { + "loss": 0.062, + "grad_norm": 2.7774510383605957, + "learning_rate": 8.26e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.177, + "step": 1177 + }, + { + "loss": 0.053, + "grad_norm": 0.9669445753097534, + "learning_rate": 8.25e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.178, + "step": 1178 + }, + { + "loss": 0.0178, + "grad_norm": 4.694067478179932, + "learning_rate": 8.24e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.179, + "step": 1179 + }, + { + "loss": 0.0133, + "grad_norm": 3.8942577838897705, + "learning_rate": 8.23e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.18, + "step": 1180 + }, + { + "loss": 0.042, + "grad_norm": 1.4630885124206543, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.181, + "step": 1181 + }, + { + "loss": 0.0598, + "grad_norm": 1.6373014450073242, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.182, + "step": 1182 + }, + { + "loss": 0.0454, + "grad_norm": 1.9768292903900146, + "learning_rate": 8.2e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.183, + "step": 1183 + }, + { + "loss": 0.0734, + "grad_norm": 1.4859123229980469, + "learning_rate": 8.19e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.184, + "step": 1184 + }, + { + "loss": 0.0647, + "grad_norm": 1.7751868963241577, + "learning_rate": 8.18e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.185, + "step": 1185 + }, + { + "loss": 0.0643, + "grad_norm": 1.6454154253005981, + "learning_rate": 8.17e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.186, + "step": 1186 + }, + { + "loss": 0.0511, + "grad_norm": 1.9402817487716675, + "learning_rate": 8.16e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.187, + "step": 1187 + }, + { + "loss": 0.047, + "grad_norm": 1.6513389348983765, + "learning_rate": 8.15e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.188, + "step": 1188 + }, + { + "loss": 0.0107, + "grad_norm": 2.9602744579315186, + "learning_rate": 8.14e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 1189 + }, + { + "loss": 0.0708, + "grad_norm": 1.9953235387802124, + "learning_rate": 8.13e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.19, + "step": 1190 + }, + { + "loss": 0.0562, + "grad_norm": 1.7549750804901123, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.191, + "step": 1191 + }, + { + "loss": 0.0589, + "grad_norm": 2.0597615242004395, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.192, + "step": 1192 + }, + { + "loss": 0.0469, + "grad_norm": 1.7559466361999512, + "learning_rate": 8.1e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.193, + "step": 1193 + }, + { + "loss": 0.0757, + "grad_norm": 2.0765254497528076, + "learning_rate": 8.09e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.194, + "step": 1194 + }, + { + "loss": 0.0118, + "grad_norm": 3.379472017288208, + "learning_rate": 8.08e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 1195 + }, + { + "loss": 0.0692, + "grad_norm": 1.6905264854431152, + "learning_rate": 8.07e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.196, + "step": 1196 + }, + { + "loss": 0.0493, + "grad_norm": 2.3974990844726562, + "learning_rate": 8.06e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.197, + "step": 1197 + }, + { + "loss": 0.0533, + "grad_norm": 1.609572410583496, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.198, + "step": 1198 + }, + { + "loss": 0.0727, + "grad_norm": 2.563096523284912, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.199, + "step": 1199 + }, + { + "loss": 0.0556, + "grad_norm": 2.0002143383026123, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.2, + "step": 1200 + }, + { + "loss": 0.0487, + "grad_norm": 1.7846338748931885, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.201, + "step": 1201 + }, + { + "loss": 0.0802, + "grad_norm": 2.2537660598754883, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.202, + "step": 1202 + }, + { + "loss": 0.0584, + "grad_norm": 3.043835163116455, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.203, + "step": 1203 + }, + { + "loss": 0.012, + "grad_norm": 3.2526142597198486, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.204, + "step": 1204 + }, + { + "loss": 0.063, + "grad_norm": 1.3797202110290527, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.205, + "step": 1205 + }, + { + "loss": 0.0658, + "grad_norm": 2.5818750858306885, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.206, + "step": 1206 + }, + { + "loss": 0.0108, + "grad_norm": 3.089911699295044, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 1207 + }, + { + "loss": 0.0781, + "grad_norm": 2.348559856414795, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.208, + "step": 1208 + }, + { + "loss": 0.053, + "grad_norm": 1.6293948888778687, + "learning_rate": 7.94e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.209, + "step": 1209 + }, + { + "loss": 0.0541, + "grad_norm": 1.7948721647262573, + "learning_rate": 7.93e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.21, + "step": 1210 + }, + { + "loss": 0.0408, + "grad_norm": 2.3477344512939453, + "learning_rate": 7.92e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.211, + "step": 1211 + }, + { + "loss": 0.0579, + "grad_norm": 2.6738388538360596, + "learning_rate": 7.91e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.212, + "step": 1212 + }, + { + "loss": 0.055, + "grad_norm": 1.522643804550171, + "learning_rate": 7.9e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.213, + "step": 1213 + }, + { + "loss": 0.0634, + "grad_norm": 1.585366129875183, + "learning_rate": 7.89e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.214, + "step": 1214 + }, + { + "loss": 0.0616, + "grad_norm": 1.645047664642334, + "learning_rate": 7.88e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.215, + "step": 1215 + }, + { + "loss": 0.0757, + "grad_norm": 1.689460039138794, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.216, + "step": 1216 + }, + { + "loss": 0.0454, + "grad_norm": 2.0291545391082764, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.217, + "step": 1217 + }, + { + "loss": 0.0104, + "grad_norm": 3.0368359088897705, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 1218 + }, + { + "loss": 0.0097, + "grad_norm": 2.792633533477783, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 1219 + }, + { + "loss": 0.0776, + "grad_norm": 2.638593912124634, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.22, + "step": 1220 + }, + { + "loss": 0.0612, + "grad_norm": 2.7605133056640625, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.221, + "step": 1221 + }, + { + "loss": 0.0884, + "grad_norm": 2.6775927543640137, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.222, + "step": 1222 + }, + { + "loss": 0.0752, + "grad_norm": 1.9850537776947021, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.223, + "step": 1223 + }, + { + "loss": 0.0439, + "grad_norm": 1.5452102422714233, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.224, + "step": 1224 + }, + { + "loss": 0.0435, + "grad_norm": 2.2355833053588867, + "learning_rate": 7.78e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.225, + "step": 1225 + }, + { + "loss": 0.0532, + "grad_norm": 1.7478253841400146, + "learning_rate": 7.77e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.226, + "step": 1226 + }, + { + "loss": 0.0106, + "grad_norm": 3.0870492458343506, + "learning_rate": 7.76e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 1227 + }, + { + "loss": 0.0534, + "grad_norm": 1.8180068731307983, + "learning_rate": 7.75e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.228, + "step": 1228 + }, + { + "loss": 0.0088, + "grad_norm": 2.428753137588501, + "learning_rate": 7.74e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 1229 + }, + { + "loss": 0.0094, + "grad_norm": 2.480687141418457, + "learning_rate": 7.73e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 1230 + }, + { + "loss": 0.056, + "grad_norm": 1.977836012840271, + "learning_rate": 7.72e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.231, + "step": 1231 + }, + { + "loss": 0.0576, + "grad_norm": 2.694723129272461, + "learning_rate": 7.71e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.232, + "step": 1232 + }, + { + "loss": 0.0559, + "grad_norm": 1.785524606704712, + "learning_rate": 7.7e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.233, + "step": 1233 + }, + { + "loss": 0.0548, + "grad_norm": 1.7176051139831543, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.234, + "step": 1234 + }, + { + "loss": 0.07, + "grad_norm": 1.961999773979187, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2349999999999999, + "step": 1235 + }, + { + "loss": 0.0592, + "grad_norm": 2.465545654296875, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.236, + "step": 1236 + }, + { + "loss": 0.0378, + "grad_norm": 1.4544801712036133, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.237, + "step": 1237 + }, + { + "loss": 0.0602, + "grad_norm": 1.772146224975586, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.238, + "step": 1238 + }, + { + "loss": 0.04, + "grad_norm": 2.1550979614257812, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2389999999999999, + "step": 1239 + }, + { + "loss": 0.0448, + "grad_norm": 2.0862441062927246, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.24, + "step": 1240 + }, + { + "loss": 0.073, + "grad_norm": 1.8445123434066772, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.241, + "step": 1241 + }, + { + "loss": 0.0701, + "grad_norm": 1.734731912612915, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.242, + "step": 1242 + }, + { + "loss": 0.0621, + "grad_norm": 2.5419921875, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2429999999999999, + "step": 1243 + }, + { + "loss": 0.0387, + "grad_norm": 2.232482671737671, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.244, + "step": 1244 + }, + { + "loss": 0.041, + "grad_norm": 2.1068978309631348, + "learning_rate": 7.58e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.245, + "step": 1245 + }, + { + "loss": 0.0677, + "grad_norm": 1.7934560775756836, + "learning_rate": 7.57e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.246, + "step": 1246 + }, + { + "loss": 0.0866, + "grad_norm": 2.3774123191833496, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.2469999999999999, + "step": 1247 + }, + { + "loss": 0.0188, + "grad_norm": 5.182284832000732, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.248, + "step": 1248 + }, + { + "loss": 0.0517, + "grad_norm": 1.6540446281433105, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.249, + "step": 1249 + }, + { + "loss": 0.0801, + "grad_norm": 1.7044258117675781, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.25, + "step": 1250 + }, + { + "loss": 0.018, + "grad_norm": 4.825031757354736, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.251, + "step": 1251 + }, + { + "loss": 0.0579, + "grad_norm": 1.9127049446105957, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.252, + "step": 1252 + }, + { + "loss": 0.0387, + "grad_norm": 1.524353265762329, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.2530000000000001, + "step": 1253 + }, + { + "loss": 0.0743, + "grad_norm": 1.8598476648330688, + "learning_rate": 7.49e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.254, + "step": 1254 + }, + { + "loss": 0.0364, + "grad_norm": 1.6264195442199707, + "learning_rate": 7.48e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.255, + "step": 1255 + }, + { + "loss": 0.0746, + "grad_norm": 1.4887213706970215, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.256, + "step": 1256 + }, + { + "loss": 0.0117, + "grad_norm": 3.425563335418701, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 1257 + }, + { + "loss": 0.0552, + "grad_norm": 1.6610738039016724, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.258, + "step": 1258 + }, + { + "loss": 0.0105, + "grad_norm": 2.9016385078430176, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 1259 + }, + { + "loss": 0.0657, + "grad_norm": 2.349597215652466, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.26, + "step": 1260 + }, + { + "loss": 0.0706, + "grad_norm": 1.7171733379364014, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.2610000000000001, + "step": 1261 + }, + { + "loss": 0.0076, + "grad_norm": 2.070596933364868, + "learning_rate": 7.41e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 1262 + }, + { + "loss": 0.082, + "grad_norm": 2.476560115814209, + "learning_rate": 7.4e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.263, + "step": 1263 + }, + { + "loss": 0.0696, + "grad_norm": 2.013134002685547, + "learning_rate": 7.39e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 1264 + }, + { + "loss": 0.0456, + "grad_norm": 2.0719385147094727, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2650000000000001, + "step": 1265 + }, + { + "loss": 0.0789, + "grad_norm": 2.737678289413452, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.266, + "step": 1266 + }, + { + "loss": 0.0755, + "grad_norm": 2.932962417602539, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.267, + "step": 1267 + }, + { + "loss": 0.0621, + "grad_norm": 1.5760010480880737, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.268, + "step": 1268 + }, + { + "loss": 0.145, + "grad_norm": 4.413599491119385, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9540117383003235, + "epoch": 1.2690000000000001, + "step": 1269 + }, + { + "loss": 0.052, + "grad_norm": 1.3965295553207397, + "learning_rate": 7.33e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.27, + "step": 1270 + }, + { + "loss": 0.0507, + "grad_norm": 1.5652461051940918, + "learning_rate": 7.32e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.271, + "step": 1271 + }, + { + "loss": 0.1608, + "grad_norm": 5.22923469543457, + "learning_rate": 7.31e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 1.272, + "step": 1272 + }, + { + "loss": 0.04, + "grad_norm": 2.1607284545898438, + "learning_rate": 7.3e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2730000000000001, + "step": 1273 + }, + { + "loss": 0.0093, + "grad_norm": 2.755345106124878, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 1274 + }, + { + "loss": 0.0403, + "grad_norm": 1.6918083429336548, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.275, + "step": 1275 + }, + { + "loss": 0.0569, + "grad_norm": 1.4805766344070435, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.276, + "step": 1276 + }, + { + "loss": 0.0639, + "grad_norm": 1.9898265600204468, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2770000000000001, + "step": 1277 + }, + { + "loss": 0.0764, + "grad_norm": 2.4644553661346436, + "learning_rate": 7.25e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.278, + "step": 1278 + }, + { + "loss": 0.0458, + "grad_norm": 1.6111081838607788, + "learning_rate": 7.24e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.279, + "step": 1279 + }, + { + "loss": 0.0439, + "grad_norm": 1.847048282623291, + "learning_rate": 7.23e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.28, + "step": 1280 + }, + { + "loss": 0.0485, + "grad_norm": 2.2336626052856445, + "learning_rate": 7.22e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.2810000000000001, + "step": 1281 + }, + { + "loss": 0.0204, + "grad_norm": 5.058897972106934, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.282, + "step": 1282 + }, + { + "loss": 0.059, + "grad_norm": 1.464397668838501, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.283, + "step": 1283 + }, + { + "loss": 0.0663, + "grad_norm": 1.986909031867981, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.284, + "step": 1284 + }, + { + "loss": 0.0553, + "grad_norm": 1.3948322534561157, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.285, + "step": 1285 + }, + { + "loss": 0.0762, + "grad_norm": 1.8114221096038818, + "learning_rate": 7.17e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.286, + "step": 1286 + }, + { + "loss": 0.0596, + "grad_norm": 1.3451945781707764, + "learning_rate": 7.16e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 1287 + }, + { + "loss": 0.066, + "grad_norm": 1.6588683128356934, + "learning_rate": 7.15e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.288, + "step": 1288 + }, + { + "loss": 0.0486, + "grad_norm": 1.8605456352233887, + "learning_rate": 7.14e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.289, + "step": 1289 + }, + { + "loss": 0.0567, + "grad_norm": 1.8595200777053833, + "learning_rate": 7.13e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.29, + "step": 1290 + }, + { + "loss": 0.0651, + "grad_norm": 1.3704520463943481, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.291, + "step": 1291 + }, + { + "loss": 0.0776, + "grad_norm": 1.5874192714691162, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.292, + "step": 1292 + }, + { + "loss": 0.0584, + "grad_norm": 1.6083050966262817, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.293, + "step": 1293 + }, + { + "loss": 0.0526, + "grad_norm": 2.637402296066284, + "learning_rate": 7.09e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.294, + "step": 1294 + }, + { + "loss": 0.0434, + "grad_norm": 1.125180721282959, + "learning_rate": 7.08e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.295, + "step": 1295 + }, + { + "loss": 0.0604, + "grad_norm": 1.9658552408218384, + "learning_rate": 7.07e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.296, + "step": 1296 + }, + { + "loss": 0.0609, + "grad_norm": 2.3239123821258545, + "learning_rate": 7.06e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.297, + "step": 1297 + }, + { + "loss": 0.0822, + "grad_norm": 2.9983248710632324, + "learning_rate": 7.05e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.298, + "step": 1298 + }, + { + "loss": 0.062, + "grad_norm": 1.7106144428253174, + "learning_rate": 7.04e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.299, + "step": 1299 + }, + { + "loss": 0.0542, + "grad_norm": 1.9297690391540527, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3, + "step": 1300 + }, + { + "loss": 0.0174, + "grad_norm": 4.6414361000061035, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.301, + "step": 1301 + }, + { + "loss": 0.0755, + "grad_norm": 2.1787867546081543, + "learning_rate": 7.01e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.302, + "step": 1302 + }, + { + "loss": 0.015, + "grad_norm": 4.113848686218262, + "learning_rate": 7e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.303, + "step": 1303 + }, + { + "loss": 0.0492, + "grad_norm": 1.3803060054779053, + "learning_rate": 6.99e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.304, + "step": 1304 + }, + { + "loss": 0.0512, + "grad_norm": 1.5045576095581055, + "learning_rate": 6.98e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.305, + "step": 1305 + }, + { + "loss": 0.0608, + "grad_norm": 1.5915031433105469, + "learning_rate": 6.97e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.306, + "step": 1306 + }, + { + "loss": 0.0583, + "grad_norm": 1.2304151058197021, + "learning_rate": 6.96e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.307, + "step": 1307 + }, + { + "loss": 0.0563, + "grad_norm": 1.7730633020401, + "learning_rate": 6.95e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.308, + "step": 1308 + }, + { + "loss": 0.0684, + "grad_norm": 1.730749249458313, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.309, + "step": 1309 + }, + { + "loss": 0.052, + "grad_norm": 1.6816562414169312, + "learning_rate": 6.93e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.31, + "step": 1310 + }, + { + "loss": 0.0732, + "grad_norm": 2.309110164642334, + "learning_rate": 6.92e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.311, + "step": 1311 + }, + { + "loss": 0.0634, + "grad_norm": 1.8224540948867798, + "learning_rate": 6.91e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.312, + "step": 1312 + }, + { + "loss": 0.0584, + "grad_norm": 1.9186445474624634, + "learning_rate": 6.9e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.313, + "step": 1313 + }, + { + "loss": 0.0348, + "grad_norm": 1.3239874839782715, + "learning_rate": 6.89e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.314, + "step": 1314 + }, + { + "loss": 0.0938, + "grad_norm": 2.3451895713806152, + "learning_rate": 6.88e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.315, + "step": 1315 + }, + { + "loss": 0.0623, + "grad_norm": 1.8779281377792358, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.316, + "step": 1316 + }, + { + "loss": 0.167, + "grad_norm": 4.993703842163086, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.317, + "step": 1317 + }, + { + "loss": 0.0142, + "grad_norm": 4.2328338623046875, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.318, + "step": 1318 + }, + { + "loss": 0.0792, + "grad_norm": 2.0863592624664307, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.319, + "step": 1319 + }, + { + "loss": 0.044, + "grad_norm": 2.3412485122680664, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.32, + "step": 1320 + }, + { + "loss": 0.0404, + "grad_norm": 1.4804179668426514, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.321, + "step": 1321 + }, + { + "loss": 0.0168, + "grad_norm": 4.645394802093506, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.322, + "step": 1322 + }, + { + "loss": 0.0718, + "grad_norm": 1.6375811100006104, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.323, + "step": 1323 + }, + { + "loss": 0.06, + "grad_norm": 1.5656460523605347, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.324, + "step": 1324 + }, + { + "loss": 0.065, + "grad_norm": 1.7190107107162476, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.325, + "step": 1325 + }, + { + "loss": 0.0152, + "grad_norm": 3.9972171783447266, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.326, + "step": 1326 + }, + { + "loss": 0.0679, + "grad_norm": 2.4974441528320312, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 1327 + }, + { + "loss": 0.0582, + "grad_norm": 2.3485262393951416, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.328, + "step": 1328 + }, + { + "loss": 0.0829, + "grad_norm": 2.598663091659546, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.329, + "step": 1329 + }, + { + "loss": 0.01, + "grad_norm": 2.8793528079986572, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 1330 + }, + { + "loss": 0.0661, + "grad_norm": 1.9478849172592163, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.331, + "step": 1331 + }, + { + "loss": 0.0715, + "grad_norm": 1.916156530380249, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.332, + "step": 1332 + }, + { + "loss": 0.0601, + "grad_norm": 1.6466504335403442, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.333, + "step": 1333 + }, + { + "loss": 0.01, + "grad_norm": 2.8242533206939697, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 1334 + }, + { + "loss": 0.0409, + "grad_norm": 1.506545066833496, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.335, + "step": 1335 + }, + { + "loss": 0.0809, + "grad_norm": 1.7198259830474854, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.336, + "step": 1336 + }, + { + "loss": 0.1451, + "grad_norm": 4.725864887237549, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 1.337, + "step": 1337 + }, + { + "loss": 0.0649, + "grad_norm": 1.4829907417297363, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.338, + "step": 1338 + }, + { + "loss": 0.0779, + "grad_norm": 1.798589825630188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.339, + "step": 1339 + }, + { + "loss": 0.0645, + "grad_norm": 2.8309855461120605, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.34, + "step": 1340 + }, + { + "loss": 0.0573, + "grad_norm": 2.2329795360565186, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.341, + "step": 1341 + }, + { + "loss": 0.0633, + "grad_norm": 1.7102524042129517, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.342, + "step": 1342 + }, + { + "loss": 0.0533, + "grad_norm": 1.8966953754425049, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.343, + "step": 1343 + }, + { + "loss": 0.1242, + "grad_norm": 3.5069096088409424, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3439999999999999, + "step": 1344 + }, + { + "loss": 0.0668, + "grad_norm": 1.6451408863067627, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.345, + "step": 1345 + }, + { + "loss": 0.0168, + "grad_norm": 4.646505355834961, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.346, + "step": 1346 + }, + { + "loss": 0.0122, + "grad_norm": 3.5036394596099854, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.347, + "step": 1347 + }, + { + "loss": 0.054, + "grad_norm": 1.476265788078308, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3479999999999999, + "step": 1348 + }, + { + "loss": 0.0771, + "grad_norm": 2.343313455581665, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.349, + "step": 1349 + }, + { + "loss": 0.041, + "grad_norm": 1.5659995079040527, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.35, + "step": 1350 + }, + { + "loss": 0.0377, + "grad_norm": 1.196007251739502, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.351, + "step": 1351 + }, + { + "loss": 0.1297, + "grad_norm": 3.8112542629241943, + "learning_rate": 6.51e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 1.3519999999999999, + "step": 1352 + }, + { + "loss": 0.0526, + "grad_norm": 1.3368208408355713, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.353, + "step": 1353 + }, + { + "loss": 0.0444, + "grad_norm": 1.8093925714492798, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.354, + "step": 1354 + }, + { + "loss": 0.0101, + "grad_norm": 2.882591485977173, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 1355 + }, + { + "loss": 0.0437, + "grad_norm": 1.7717807292938232, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.3559999999999999, + "step": 1356 + }, + { + "loss": 0.0546, + "grad_norm": 2.2301149368286133, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.357, + "step": 1357 + }, + { + "loss": 0.0102, + "grad_norm": 2.8497674465179443, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 1358 + }, + { + "loss": 0.059, + "grad_norm": 1.9033845663070679, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.359, + "step": 1359 + }, + { + "loss": 0.0431, + "grad_norm": 1.6551549434661865, + "learning_rate": 6.43e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3599999999999999, + "step": 1360 + }, + { + "loss": 0.0585, + "grad_norm": 1.5250738859176636, + "learning_rate": 6.42e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.361, + "step": 1361 + }, + { + "loss": 0.0576, + "grad_norm": 1.7390161752700806, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.362, + "step": 1362 + }, + { + "loss": 0.0642, + "grad_norm": 2.0047788619995117, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.363, + "step": 1363 + }, + { + "loss": 0.0409, + "grad_norm": 1.696035385131836, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.3639999999999999, + "step": 1364 + }, + { + "loss": 0.0577, + "grad_norm": 1.9078930616378784, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.365, + "step": 1365 + }, + { + "loss": 0.0098, + "grad_norm": 2.792039155960083, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 1366 + }, + { + "loss": 0.0582, + "grad_norm": 1.8414034843444824, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.367, + "step": 1367 + }, + { + "loss": 0.0545, + "grad_norm": 2.1793394088745117, + "learning_rate": 6.35e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 1368 + }, + { + "loss": 0.0449, + "grad_norm": 2.220048666000366, + "learning_rate": 6.34e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.369, + "step": 1369 + }, + { + "loss": 0.0545, + "grad_norm": 1.9344781637191772, + "learning_rate": 6.33e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.37, + "step": 1370 + }, + { + "loss": 0.0567, + "grad_norm": 1.8442058563232422, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.371, + "step": 1371 + }, + { + "loss": 0.0118, + "grad_norm": 3.14497971534729, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.3719999999999999, + "step": 1372 + }, + { + "loss": 0.0721, + "grad_norm": 2.7254114151000977, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.373, + "step": 1373 + }, + { + "loss": 0.0587, + "grad_norm": 1.436458945274353, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.374, + "step": 1374 + }, + { + "loss": 0.1323, + "grad_norm": 3.204223871231079, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.375, + "step": 1375 + }, + { + "loss": 0.0704, + "grad_norm": 1.601090431213379, + "learning_rate": 6.27e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.376, + "step": 1376 + }, + { + "loss": 0.0601, + "grad_norm": 1.5754057168960571, + "learning_rate": 6.26e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.377, + "step": 1377 + }, + { + "loss": 0.0711, + "grad_norm": 1.8766717910766602, + "learning_rate": 6.25e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.3780000000000001, + "step": 1378 + }, + { + "loss": 0.059, + "grad_norm": 2.119466781616211, + "learning_rate": 6.24e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.379, + "step": 1379 + }, + { + "loss": 0.0772, + "grad_norm": 1.8192287683486938, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.38, + "step": 1380 + }, + { + "loss": 0.0588, + "grad_norm": 1.6275320053100586, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.381, + "step": 1381 + }, + { + "loss": 0.0417, + "grad_norm": 2.3129870891571045, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3820000000000001, + "step": 1382 + }, + { + "loss": 0.0444, + "grad_norm": 1.6177237033843994, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.383, + "step": 1383 + }, + { + "loss": 0.0566, + "grad_norm": 2.093630075454712, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.384, + "step": 1384 + }, + { + "loss": 0.0655, + "grad_norm": 1.9267455339431763, + "learning_rate": 6.18e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.385, + "step": 1385 + }, + { + "loss": 0.0442, + "grad_norm": 1.0200287103652954, + "learning_rate": 6.17e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3860000000000001, + "step": 1386 + }, + { + "loss": 0.0638, + "grad_norm": 1.3187520503997803, + "learning_rate": 6.16e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.387, + "step": 1387 + }, + { + "loss": 0.0364, + "grad_norm": 1.6464682817459106, + "learning_rate": 6.15e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.388, + "step": 1388 + }, + { + "loss": 0.0775, + "grad_norm": 2.474910020828247, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.389, + "step": 1389 + }, + { + "loss": 0.0621, + "grad_norm": 1.1011793613433838, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.3900000000000001, + "step": 1390 + }, + { + "loss": 0.0218, + "grad_norm": 5.168939113616943, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.391, + "step": 1391 + }, + { + "loss": 0.0221, + "grad_norm": 5.572858810424805, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.392, + "step": 1392 + }, + { + "loss": 0.0561, + "grad_norm": 1.8146536350250244, + "learning_rate": 6.1e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.393, + "step": 1393 + }, + { + "loss": 0.0804, + "grad_norm": 3.2232189178466797, + "learning_rate": 6.09e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.3940000000000001, + "step": 1394 + }, + { + "loss": 0.039, + "grad_norm": 1.8940805196762085, + "learning_rate": 6.08e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.395, + "step": 1395 + }, + { + "loss": 0.0584, + "grad_norm": 2.0325937271118164, + "learning_rate": 6.07e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.396, + "step": 1396 + }, + { + "loss": 0.0422, + "grad_norm": 1.980771541595459, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.397, + "step": 1397 + }, + { + "loss": 0.0593, + "grad_norm": 1.710123896598816, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.3980000000000001, + "step": 1398 + }, + { + "loss": 0.0592, + "grad_norm": 2.430305004119873, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.399, + "step": 1399 + }, + { + "loss": 0.0467, + "grad_norm": 2.204895496368408, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.4, + "step": 1400 + }, + { + "loss": 0.0496, + "grad_norm": 1.7684513330459595, + "learning_rate": 6.02e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.401, + "step": 1401 + }, + { + "loss": 0.0462, + "grad_norm": 1.7807819843292236, + "learning_rate": 6.01e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.4020000000000001, + "step": 1402 + }, + { + "loss": 0.08, + "grad_norm": 1.9608607292175293, + "learning_rate": 6e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.403, + "step": 1403 + }, + { + "loss": 0.0588, + "grad_norm": 1.6851762533187866, + "learning_rate": 5.99e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.404, + "step": 1404 + }, + { + "loss": 0.0448, + "grad_norm": 1.395566701889038, + "learning_rate": 5.98e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 1.405, + "step": 1405 + }, + { + "loss": 0.0771, + "grad_norm": 1.94028639793396, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.4060000000000001, + "step": 1406 + }, + { + "loss": 0.0717, + "grad_norm": 2.421177864074707, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.407, + "step": 1407 + }, + { + "loss": 0.0602, + "grad_norm": 1.947490930557251, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.408, + "step": 1408 + }, + { + "loss": 0.084, + "grad_norm": 3.4976916313171387, + "learning_rate": 5.94e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.409, + "step": 1409 + }, + { + "loss": 0.0146, + "grad_norm": 3.9808900356292725, + "learning_rate": 5.93e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.41, + "step": 1410 + }, + { + "loss": 0.0583, + "grad_norm": 1.8078984022140503, + "learning_rate": 5.92e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 1411 + }, + { + "loss": 0.0687, + "grad_norm": 1.9551893472671509, + "learning_rate": 5.91e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.412, + "step": 1412 + }, + { + "loss": 0.0133, + "grad_norm": 3.68121075630188, + "learning_rate": 5.9e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.413, + "step": 1413 + }, + { + "loss": 0.0411, + "grad_norm": 1.987641453742981, + "learning_rate": 5.89e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.414, + "step": 1414 + }, + { + "loss": 0.0527, + "grad_norm": 1.6725058555603027, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.415, + "step": 1415 + }, + { + "loss": 0.0516, + "grad_norm": 1.3503282070159912, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.416, + "step": 1416 + }, + { + "loss": 0.0439, + "grad_norm": 1.5804824829101562, + "learning_rate": 5.86e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.417, + "step": 1417 + }, + { + "loss": 0.0481, + "grad_norm": 1.3769683837890625, + "learning_rate": 5.85e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.418, + "step": 1418 + }, + { + "loss": 0.0108, + "grad_norm": 3.01991868019104, + "learning_rate": 5.84e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.419, + "step": 1419 + }, + { + "loss": 0.0497, + "grad_norm": 1.416107177734375, + "learning_rate": 5.83e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.42, + "step": 1420 + }, + { + "loss": 0.0377, + "grad_norm": 1.3515864610671997, + "learning_rate": 5.82e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.421, + "step": 1421 + }, + { + "loss": 0.0607, + "grad_norm": 1.8614403009414673, + "learning_rate": 5.81e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.422, + "step": 1422 + }, + { + "loss": 0.0679, + "grad_norm": 2.109128952026367, + "learning_rate": 5.8e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.423, + "step": 1423 + }, + { + "loss": 0.0751, + "grad_norm": 1.5067026615142822, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.424, + "step": 1424 + }, + { + "loss": 0.0547, + "grad_norm": 1.5301975011825562, + "learning_rate": 5.78e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.425, + "step": 1425 + }, + { + "loss": 0.0683, + "grad_norm": 2.2441554069519043, + "learning_rate": 5.77e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.426, + "step": 1426 + }, + { + "loss": 0.0458, + "grad_norm": 1.8737249374389648, + "learning_rate": 5.76e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.427, + "step": 1427 + }, + { + "loss": 0.0687, + "grad_norm": 1.9434070587158203, + "learning_rate": 5.75e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.428, + "step": 1428 + }, + { + "loss": 0.0806, + "grad_norm": 1.8568007946014404, + "learning_rate": 5.74e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.429, + "step": 1429 + }, + { + "loss": 0.065, + "grad_norm": 2.0390608310699463, + "learning_rate": 5.73e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.43, + "step": 1430 + }, + { + "loss": 0.0615, + "grad_norm": 1.7913262844085693, + "learning_rate": 5.72e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.431, + "step": 1431 + }, + { + "loss": 0.0515, + "grad_norm": 2.496122121810913, + "learning_rate": 5.71e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.432, + "step": 1432 + }, + { + "loss": 0.0501, + "grad_norm": 1.633486270904541, + "learning_rate": 5.7e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.433, + "step": 1433 + }, + { + "loss": 0.0171, + "grad_norm": 4.812644958496094, + "learning_rate": 5.69e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.434, + "step": 1434 + }, + { + "loss": 0.0756, + "grad_norm": 2.208841562271118, + "learning_rate": 5.68e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.435, + "step": 1435 + }, + { + "loss": 0.0358, + "grad_norm": 1.725355625152588, + "learning_rate": 5.67e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.436, + "step": 1436 + }, + { + "loss": 0.0173, + "grad_norm": 4.879479885101318, + "learning_rate": 5.66e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.437, + "step": 1437 + }, + { + "loss": 0.1386, + "grad_norm": 3.6769933700561523, + "learning_rate": 5.65e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.438, + "step": 1438 + }, + { + "loss": 0.0712, + "grad_norm": 1.624098300933838, + "learning_rate": 5.64e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.439, + "step": 1439 + }, + { + "loss": 0.0534, + "grad_norm": 2.2485837936401367, + "learning_rate": 5.63e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.44, + "step": 1440 + }, + { + "loss": 0.0572, + "grad_norm": 1.977672815322876, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.441, + "step": 1441 + }, + { + "loss": 0.0515, + "grad_norm": 2.81058669090271, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.442, + "step": 1442 + }, + { + "loss": 0.0118, + "grad_norm": 3.3733158111572266, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.443, + "step": 1443 + }, + { + "loss": 0.0546, + "grad_norm": 1.634824275970459, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.444, + "step": 1444 + }, + { + "loss": 0.0549, + "grad_norm": 1.9184083938598633, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.445, + "step": 1445 + }, + { + "loss": 0.1835, + "grad_norm": 5.609441757202148, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 1.446, + "step": 1446 + }, + { + "loss": 0.0568, + "grad_norm": 1.4348167181015015, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.447, + "step": 1447 + }, + { + "loss": 0.0711, + "grad_norm": 1.6240220069885254, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.448, + "step": 1448 + }, + { + "loss": 0.0395, + "grad_norm": 1.7122279405593872, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.449, + "step": 1449 + }, + { + "loss": 0.0092, + "grad_norm": 2.6746726036071777, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 1450 + }, + { + "loss": 0.0516, + "grad_norm": 1.2466599941253662, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 1451 + }, + { + "loss": 0.0755, + "grad_norm": 2.3185651302337646, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.452, + "step": 1452 + }, + { + "loss": 0.0107, + "grad_norm": 3.2160799503326416, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.453, + "step": 1453 + }, + { + "loss": 0.0353, + "grad_norm": 1.6237694025039673, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.454, + "step": 1454 + }, + { + "loss": 0.052, + "grad_norm": 1.6856698989868164, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.455, + "step": 1455 + }, + { + "loss": 0.0672, + "grad_norm": 1.7814722061157227, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.456, + "step": 1456 + }, + { + "loss": 0.0354, + "grad_norm": 1.4843939542770386, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.457, + "step": 1457 + }, + { + "loss": 0.0642, + "grad_norm": 1.6205660104751587, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.458, + "step": 1458 + }, + { + "loss": 0.0694, + "grad_norm": 2.024721384048462, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.459, + "step": 1459 + }, + { + "loss": 0.0587, + "grad_norm": 1.8312665224075317, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.46, + "step": 1460 + }, + { + "loss": 0.0411, + "grad_norm": 1.8380608558654785, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.461, + "step": 1461 + }, + { + "loss": 0.0597, + "grad_norm": 1.7451549768447876, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.462, + "step": 1462 + }, + { + "loss": 0.0773, + "grad_norm": 1.7938144207000732, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.463, + "step": 1463 + }, + { + "loss": 0.0639, + "grad_norm": 2.6028213500976562, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.464, + "step": 1464 + }, + { + "loss": 0.0686, + "grad_norm": 1.8541765213012695, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.465, + "step": 1465 + }, + { + "loss": 0.0548, + "grad_norm": 1.739157795906067, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.466, + "step": 1466 + }, + { + "loss": 0.0131, + "grad_norm": 3.847865581512451, + "learning_rate": 5.36e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.467, + "step": 1467 + }, + { + "loss": 0.0556, + "grad_norm": 1.4072014093399048, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.468, + "step": 1468 + }, + { + "loss": 0.0656, + "grad_norm": 1.7529304027557373, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.4689999999999999, + "step": 1469 + }, + { + "loss": 0.0472, + "grad_norm": 1.359227180480957, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 1470 + }, + { + "loss": 0.0553, + "grad_norm": 1.8881477117538452, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.471, + "step": 1471 + }, + { + "loss": 0.0728, + "grad_norm": 1.792786717414856, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.472, + "step": 1472 + }, + { + "loss": 0.0589, + "grad_norm": 1.9897642135620117, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.4729999999999999, + "step": 1473 + }, + { + "loss": 0.0641, + "grad_norm": 2.224968433380127, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.474, + "step": 1474 + }, + { + "loss": 0.0176, + "grad_norm": 4.579442977905273, + "learning_rate": 5.28e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.475, + "step": 1475 + }, + { + "loss": 0.0465, + "grad_norm": 1.7030646800994873, + "learning_rate": 5.27e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.476, + "step": 1476 + }, + { + "loss": 0.0638, + "grad_norm": 1.8251057863235474, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.4769999999999999, + "step": 1477 + }, + { + "loss": 0.0532, + "grad_norm": 1.7170004844665527, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.478, + "step": 1478 + }, + { + "loss": 0.0146, + "grad_norm": 4.36711311340332, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.479, + "step": 1479 + }, + { + "loss": 0.0384, + "grad_norm": 1.4616270065307617, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.48, + "step": 1480 + }, + { + "loss": 0.0536, + "grad_norm": 1.4146326780319214, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4809999999999999, + "step": 1481 + }, + { + "loss": 0.058, + "grad_norm": 1.4087859392166138, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.482, + "step": 1482 + }, + { + "loss": 0.0131, + "grad_norm": 3.685961961746216, + "learning_rate": 5.2e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.483, + "step": 1483 + }, + { + "loss": 0.054, + "grad_norm": 2.024017572402954, + "learning_rate": 5.19e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.484, + "step": 1484 + }, + { + "loss": 0.0127, + "grad_norm": 3.772671699523926, + "learning_rate": 5.18e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.4849999999999999, + "step": 1485 + }, + { + "loss": 0.0119, + "grad_norm": 3.4980599880218506, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.486, + "step": 1486 + }, + { + "loss": 0.0759, + "grad_norm": 2.152510643005371, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.487, + "step": 1487 + }, + { + "loss": 0.0408, + "grad_norm": 1.5923069715499878, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.488, + "step": 1488 + }, + { + "loss": 0.0085, + "grad_norm": 2.5293490886688232, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 1489 + }, + { + "loss": 0.0694, + "grad_norm": 2.434215545654297, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.49, + "step": 1490 + }, + { + "loss": 0.0084, + "grad_norm": 2.269744873046875, + "learning_rate": 5.12e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 1491 + }, + { + "loss": 0.0472, + "grad_norm": 2.460083246231079, + "learning_rate": 5.11e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.492, + "step": 1492 + }, + { + "loss": 0.0346, + "grad_norm": 1.8150253295898438, + "learning_rate": 5.1e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.4929999999999999, + "step": 1493 + }, + { + "loss": 0.0436, + "grad_norm": 2.3509392738342285, + "learning_rate": 5.09e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.494, + "step": 1494 + }, + { + "loss": 0.0413, + "grad_norm": 1.7899376153945923, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.495, + "step": 1495 + }, + { + "loss": 0.0068, + "grad_norm": 1.4986844062805176, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 1496 + }, + { + "loss": 0.0719, + "grad_norm": 1.9978880882263184, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4969999999999999, + "step": 1497 + }, + { + "loss": 0.0407, + "grad_norm": 1.5322047472000122, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.498, + "step": 1498 + }, + { + "loss": 0.0057, + "grad_norm": 1.21915602684021, + "learning_rate": 5.04e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 1499 + }, + { + "loss": 0.0392, + "grad_norm": 1.8600904941558838, + "learning_rate": 5.03e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5, + "step": 1500 + }, + { + "loss": 0.058, + "grad_norm": 1.788377285003662, + "learning_rate": 5.02e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.501, + "step": 1501 + }, + { + "loss": 0.073, + "grad_norm": 2.0460190773010254, + "learning_rate": 5.01e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 1502 + }, + { + "loss": 0.0631, + "grad_norm": 2.3501951694488525, + "learning_rate": 5e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5030000000000001, + "step": 1503 + }, + { + "loss": 0.0655, + "grad_norm": 1.5405539274215698, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.504, + "step": 1504 + }, + { + "loss": 0.0527, + "grad_norm": 2.613194227218628, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.505, + "step": 1505 + }, + { + "loss": 0.0533, + "grad_norm": 2.3490524291992188, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.506, + "step": 1506 + }, + { + "loss": 0.007, + "grad_norm": 1.7071534395217896, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 1507 + }, + { + "loss": 0.0063, + "grad_norm": 1.578574776649475, + "learning_rate": 4.95e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 1508 + }, + { + "loss": 0.0586, + "grad_norm": 1.7500479221343994, + "learning_rate": 4.94e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.509, + "step": 1509 + }, + { + "loss": 0.0489, + "grad_norm": 2.1021506786346436, + "learning_rate": 4.93e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.51, + "step": 1510 + }, + { + "loss": 0.0505, + "grad_norm": 1.444482684135437, + "learning_rate": 4.92e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5110000000000001, + "step": 1511 + }, + { + "loss": 0.0663, + "grad_norm": 2.043468475341797, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.512, + "step": 1512 + }, + { + "loss": 0.0429, + "grad_norm": 1.7074294090270996, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.513, + "step": 1513 + }, + { + "loss": 0.0655, + "grad_norm": 2.4234681129455566, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.514, + "step": 1514 + }, + { + "loss": 0.0766, + "grad_norm": 2.124605655670166, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.5150000000000001, + "step": 1515 + }, + { + "loss": 0.0549, + "grad_norm": 1.533837080001831, + "learning_rate": 4.87e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.516, + "step": 1516 + }, + { + "loss": 0.0674, + "grad_norm": 1.8479790687561035, + "learning_rate": 4.86e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.517, + "step": 1517 + }, + { + "loss": 0.0105, + "grad_norm": 2.9812541007995605, + "learning_rate": 4.85e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 1518 + }, + { + "loss": 0.0394, + "grad_norm": 1.3361161947250366, + "learning_rate": 4.84e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5190000000000001, + "step": 1519 + }, + { + "loss": 0.0526, + "grad_norm": 1.8740735054016113, + "learning_rate": 4.83e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.52, + "step": 1520 + }, + { + "loss": 0.0622, + "grad_norm": 2.8182497024536133, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.521, + "step": 1521 + }, + { + "loss": 0.053, + "grad_norm": 1.3909233808517456, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.522, + "step": 1522 + }, + { + "loss": 0.0352, + "grad_norm": 1.3657585382461548, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5230000000000001, + "step": 1523 + }, + { + "loss": 0.0667, + "grad_norm": 1.9412925243377686, + "learning_rate": 4.79e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.524, + "step": 1524 + }, + { + "loss": 0.0536, + "grad_norm": 1.9261113405227661, + "learning_rate": 4.78e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.525, + "step": 1525 + }, + { + "loss": 0.0371, + "grad_norm": 1.7484430074691772, + "learning_rate": 4.77e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.526, + "step": 1526 + }, + { + "loss": 0.0629, + "grad_norm": 1.5757131576538086, + "learning_rate": 4.76e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5270000000000001, + "step": 1527 + }, + { + "loss": 0.0743, + "grad_norm": 2.2460429668426514, + "learning_rate": 4.75e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.528, + "step": 1528 + }, + { + "loss": 0.0537, + "grad_norm": 2.029741048812866, + "learning_rate": 4.74e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.529, + "step": 1529 + }, + { + "loss": 0.0363, + "grad_norm": 1.7011500597000122, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.53, + "step": 1530 + }, + { + "loss": 0.0773, + "grad_norm": 2.4450201988220215, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.5310000000000001, + "step": 1531 + }, + { + "loss": 0.0597, + "grad_norm": 2.192077159881592, + "learning_rate": 4.71e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.532, + "step": 1532 + }, + { + "loss": 0.0539, + "grad_norm": 1.464800238609314, + "learning_rate": 4.7e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.533, + "step": 1533 + }, + { + "loss": 0.0762, + "grad_norm": 2.326375722885132, + "learning_rate": 4.69e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.534, + "step": 1534 + }, + { + "loss": 0.0517, + "grad_norm": 1.547634482383728, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.5350000000000001, + "step": 1535 + }, + { + "loss": 0.0783, + "grad_norm": 2.2572309970855713, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.536, + "step": 1536 + }, + { + "loss": 0.0644, + "grad_norm": 2.7545583248138428, + "learning_rate": 4.66e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.537, + "step": 1537 + }, + { + "loss": 0.0596, + "grad_norm": 1.4186100959777832, + "learning_rate": 4.65e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.538, + "step": 1538 + }, + { + "loss": 0.0408, + "grad_norm": 1.7284655570983887, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.5390000000000001, + "step": 1539 + }, + { + "loss": 0.0605, + "grad_norm": 1.7523491382598877, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.54, + "step": 1540 + }, + { + "loss": 0.0593, + "grad_norm": 1.346951961517334, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.541, + "step": 1541 + }, + { + "loss": 0.0618, + "grad_norm": 1.4633326530456543, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.542, + "step": 1542 + }, + { + "loss": 0.0401, + "grad_norm": 1.6125143766403198, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5430000000000001, + "step": 1543 + }, + { + "loss": 0.0703, + "grad_norm": 1.801979422569275, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.544, + "step": 1544 + }, + { + "loss": 0.0168, + "grad_norm": 4.75988245010376, + "learning_rate": 4.58e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.545, + "step": 1545 + }, + { + "loss": 0.0395, + "grad_norm": 1.7274175882339478, + "learning_rate": 4.57e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.546, + "step": 1546 + }, + { + "loss": 0.0673, + "grad_norm": 1.813065767288208, + "learning_rate": 4.56e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5470000000000002, + "step": 1547 + }, + { + "loss": 0.0149, + "grad_norm": 4.271875858306885, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.548, + "step": 1548 + }, + { + "loss": 0.0663, + "grad_norm": 2.038168430328369, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.549, + "step": 1549 + }, + { + "loss": 0.0129, + "grad_norm": 3.939451217651367, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.55, + "step": 1550 + }, + { + "loss": 0.0375, + "grad_norm": 1.818014144897461, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5510000000000002, + "step": 1551 + }, + { + "loss": 0.0589, + "grad_norm": 1.9127329587936401, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.552, + "step": 1552 + }, + { + "loss": 0.062, + "grad_norm": 2.125767946243286, + "learning_rate": 4.5e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.553, + "step": 1553 + }, + { + "loss": 0.0627, + "grad_norm": 1.3601936101913452, + "learning_rate": 4.49e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.554, + "step": 1554 + }, + { + "loss": 0.0573, + "grad_norm": 1.9718780517578125, + "learning_rate": 4.48e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.5550000000000002, + "step": 1555 + }, + { + "loss": 0.0702, + "grad_norm": 1.8015897274017334, + "learning_rate": 4.47e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.556, + "step": 1556 + }, + { + "loss": 0.0456, + "grad_norm": 2.072335958480835, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.557, + "step": 1557 + }, + { + "loss": 0.0567, + "grad_norm": 1.921351432800293, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.558, + "step": 1558 + }, + { + "loss": 0.065, + "grad_norm": 1.5375345945358276, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5590000000000002, + "step": 1559 + }, + { + "loss": 0.0384, + "grad_norm": 1.3858362436294556, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.56, + "step": 1560 + }, + { + "loss": 0.0613, + "grad_norm": 1.8221303224563599, + "learning_rate": 4.42e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.561, + "step": 1561 + }, + { + "loss": 0.051, + "grad_norm": 1.5935691595077515, + "learning_rate": 4.41e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.562, + "step": 1562 + }, + { + "loss": 0.052, + "grad_norm": 1.4923861026763916, + "learning_rate": 4.4e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.563, + "step": 1563 + }, + { + "loss": 0.0114, + "grad_norm": 3.3136603832244873, + "learning_rate": 4.39e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.564, + "step": 1564 + }, + { + "loss": 0.0634, + "grad_norm": 1.8046377897262573, + "learning_rate": 4.38e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.565, + "step": 1565 + }, + { + "loss": 0.01, + "grad_norm": 2.8774094581604004, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.5659999999999998, + "step": 1566 + }, + { + "loss": 0.0506, + "grad_norm": 1.315585732460022, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.567, + "step": 1567 + }, + { + "loss": 0.051, + "grad_norm": 1.6535403728485107, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.568, + "step": 1568 + }, + { + "loss": 0.069, + "grad_norm": 1.9435205459594727, + "learning_rate": 4.34e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.569, + "step": 1569 + }, + { + "loss": 0.0599, + "grad_norm": 1.8793127536773682, + "learning_rate": 4.33e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.5699999999999998, + "step": 1570 + }, + { + "loss": 0.0098, + "grad_norm": 2.910207986831665, + "learning_rate": 4.32e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 1571 + }, + { + "loss": 0.0636, + "grad_norm": 2.1943273544311523, + "learning_rate": 4.31e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.572, + "step": 1572 + }, + { + "loss": 0.0567, + "grad_norm": 1.5598511695861816, + "learning_rate": 4.3e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.573, + "step": 1573 + }, + { + "loss": 0.0453, + "grad_norm": 1.9701513051986694, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 1574 + }, + { + "loss": 0.0102, + "grad_norm": 3.0775904655456543, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.575, + "step": 1575 + }, + { + "loss": 0.0422, + "grad_norm": 1.8043560981750488, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.576, + "step": 1576 + }, + { + "loss": 0.0473, + "grad_norm": 1.871073842048645, + "learning_rate": 4.26e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.577, + "step": 1577 + }, + { + "loss": 0.0514, + "grad_norm": 1.4562617540359497, + "learning_rate": 4.25e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.5779999999999998, + "step": 1578 + }, + { + "loss": 0.0367, + "grad_norm": 1.4301601648330688, + "learning_rate": 4.24e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.579, + "step": 1579 + }, + { + "loss": 0.0504, + "grad_norm": 1.6110836267471313, + "learning_rate": 4.23e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.58, + "step": 1580 + }, + { + "loss": 0.074, + "grad_norm": 2.0486574172973633, + "learning_rate": 4.22e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.581, + "step": 1581 + }, + { + "loss": 0.1233, + "grad_norm": 3.3242132663726807, + "learning_rate": 4.21e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5819999999999999, + "step": 1582 + }, + { + "loss": 0.0647, + "grad_norm": 1.307567834854126, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.583, + "step": 1583 + }, + { + "loss": 0.0609, + "grad_norm": 1.7847832441329956, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.584, + "step": 1584 + }, + { + "loss": 0.0095, + "grad_norm": 2.857769727706909, + "learning_rate": 4.18e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 1585 + }, + { + "loss": 0.0358, + "grad_norm": 1.3912484645843506, + "learning_rate": 4.17e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5859999999999999, + "step": 1586 + }, + { + "loss": 0.0389, + "grad_norm": 1.5175739526748657, + "learning_rate": 4.16e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.587, + "step": 1587 + }, + { + "loss": 0.0126, + "grad_norm": 3.7526566982269287, + "learning_rate": 4.15e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.588, + "step": 1588 + }, + { + "loss": 0.0558, + "grad_norm": 1.6538053750991821, + "learning_rate": 4.14e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.589, + "step": 1589 + }, + { + "loss": 0.0538, + "grad_norm": 1.3453150987625122, + "learning_rate": 4.13e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5899999999999999, + "step": 1590 + }, + { + "loss": 0.0608, + "grad_norm": 2.0873332023620605, + "learning_rate": 4.12e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.591, + "step": 1591 + }, + { + "loss": 0.0611, + "grad_norm": 1.9410951137542725, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.592, + "step": 1592 + }, + { + "loss": 0.0769, + "grad_norm": 1.8411427736282349, + "learning_rate": 4.1e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.593, + "step": 1593 + }, + { + "loss": 0.0111, + "grad_norm": 3.2430572509765625, + "learning_rate": 4.09e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 1594 + }, + { + "loss": 0.0722, + "grad_norm": 2.1307482719421387, + "learning_rate": 4.08e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.595, + "step": 1595 + }, + { + "loss": 0.0377, + "grad_norm": 2.088995933532715, + "learning_rate": 4.07e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.596, + "step": 1596 + }, + { + "loss": 0.0617, + "grad_norm": 1.546595811843872, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.597, + "step": 1597 + }, + { + "loss": 0.0683, + "grad_norm": 1.7900023460388184, + "learning_rate": 4.05e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.5979999999999999, + "step": 1598 + }, + { + "loss": 0.057, + "grad_norm": 1.5026994943618774, + "learning_rate": 4.04e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.599, + "step": 1599 + }, + { + "loss": 0.0468, + "grad_norm": 1.8879090547561646, + "learning_rate": 4.03e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6, + "step": 1600 + }, + { + "loss": 0.0345, + "grad_norm": 1.3179066181182861, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.601, + "step": 1601 + }, + { + "loss": 0.0363, + "grad_norm": 1.297089695930481, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.6019999999999999, + "step": 1602 + }, + { + "loss": 0.0465, + "grad_norm": 1.4451963901519775, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.603, + "step": 1603 + }, + { + "loss": 0.0593, + "grad_norm": 1.6601592302322388, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.604, + "step": 1604 + }, + { + "loss": 0.0633, + "grad_norm": 1.759940266609192, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.605, + "step": 1605 + }, + { + "loss": 0.0394, + "grad_norm": 1.640942096710205, + "learning_rate": 3.97e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.6059999999999999, + "step": 1606 + }, + { + "loss": 0.0107, + "grad_norm": 3.121732711791992, + "learning_rate": 3.96e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.607, + "step": 1607 + }, + { + "loss": 0.0343, + "grad_norm": 1.376590371131897, + "learning_rate": 3.95e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.608, + "step": 1608 + }, + { + "loss": 0.0731, + "grad_norm": 1.5605193376541138, + "learning_rate": 3.94e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.609, + "step": 1609 + }, + { + "loss": 0.011, + "grad_norm": 3.3589043617248535, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6099999999999999, + "step": 1610 + }, + { + "loss": 0.0541, + "grad_norm": 1.0635466575622559, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.611, + "step": 1611 + }, + { + "loss": 0.0801, + "grad_norm": 2.1112594604492188, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.612, + "step": 1612 + }, + { + "loss": 0.0541, + "grad_norm": 1.915789008140564, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.613, + "step": 1613 + }, + { + "loss": 0.0097, + "grad_norm": 2.9668385982513428, + "learning_rate": 3.89e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 1614 + }, + { + "loss": 0.0785, + "grad_norm": 1.7575700283050537, + "learning_rate": 3.88e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 1.615, + "step": 1615 + }, + { + "loss": 0.0092, + "grad_norm": 2.8856735229492188, + "learning_rate": 3.87e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 1616 + }, + { + "loss": 0.0842, + "grad_norm": 2.108201265335083, + "learning_rate": 3.86e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.617, + "step": 1617 + }, + { + "loss": 0.0513, + "grad_norm": 1.646217942237854, + "learning_rate": 3.85e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.6179999999999999, + "step": 1618 + }, + { + "loss": 0.0323, + "grad_norm": 1.7345075607299805, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.619, + "step": 1619 + }, + { + "loss": 0.0508, + "grad_norm": 2.1174609661102295, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.62, + "step": 1620 + }, + { + "loss": 0.0794, + "grad_norm": 1.751968502998352, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.621, + "step": 1621 + }, + { + "loss": 0.052, + "grad_norm": 2.0297329425811768, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6219999999999999, + "step": 1622 + }, + { + "loss": 0.0414, + "grad_norm": 1.4483790397644043, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.623, + "step": 1623 + }, + { + "loss": 0.0387, + "grad_norm": 1.6367487907409668, + "learning_rate": 3.79e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.624, + "step": 1624 + }, + { + "loss": 0.0579, + "grad_norm": 1.947627305984497, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.625, + "step": 1625 + }, + { + "loss": 0.0746, + "grad_norm": 1.7073363065719604, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.626, + "step": 1626 + }, + { + "loss": 0.07, + "grad_norm": 2.310190439224243, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.627, + "step": 1627 + }, + { + "loss": 0.0614, + "grad_norm": 1.841750979423523, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6280000000000001, + "step": 1628 + }, + { + "loss": 0.01, + "grad_norm": 3.1444506645202637, + "learning_rate": 3.74e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 1629 + }, + { + "loss": 0.0522, + "grad_norm": 1.662224292755127, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.63, + "step": 1630 + }, + { + "loss": 0.0132, + "grad_norm": 3.9977800846099854, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.631, + "step": 1631 + }, + { + "loss": 0.0544, + "grad_norm": 1.3922324180603027, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6320000000000001, + "step": 1632 + }, + { + "loss": 0.054, + "grad_norm": 2.120187759399414, + "learning_rate": 3.7e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.633, + "step": 1633 + }, + { + "loss": 0.0536, + "grad_norm": 1.914109468460083, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.634, + "step": 1634 + }, + { + "loss": 0.0598, + "grad_norm": 1.831244707107544, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.635, + "step": 1635 + }, + { + "loss": 0.0573, + "grad_norm": 1.5706382989883423, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.6360000000000001, + "step": 1636 + }, + { + "loss": 0.1282, + "grad_norm": 2.7458832263946533, + "learning_rate": 3.66e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 1.637, + "step": 1637 + }, + { + "loss": 0.0356, + "grad_norm": 1.4152108430862427, + "learning_rate": 3.65e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.638, + "step": 1638 + }, + { + "loss": 0.0121, + "grad_norm": 3.4849400520324707, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.639, + "step": 1639 + }, + { + "loss": 0.0702, + "grad_norm": 1.8692002296447754, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.6400000000000001, + "step": 1640 + }, + { + "loss": 0.0601, + "grad_norm": 1.828239917755127, + "learning_rate": 3.62e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.641, + "step": 1641 + }, + { + "loss": 0.0399, + "grad_norm": 1.8158057928085327, + "learning_rate": 3.61e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.642, + "step": 1642 + }, + { + "loss": 0.0451, + "grad_norm": 1.7628754377365112, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.643, + "step": 1643 + }, + { + "loss": 0.0679, + "grad_norm": 1.837315320968628, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6440000000000001, + "step": 1644 + }, + { + "loss": 0.0112, + "grad_norm": 3.3357973098754883, + "learning_rate": 3.58e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.645, + "step": 1645 + }, + { + "loss": 0.0501, + "grad_norm": 1.5952306985855103, + "learning_rate": 3.57e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 1646 + }, + { + "loss": 0.0742, + "grad_norm": 2.5686585903167725, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.647, + "step": 1647 + }, + { + "loss": 0.0109, + "grad_norm": 3.133192777633667, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 1648 + }, + { + "loss": 0.068, + "grad_norm": 1.585485577583313, + "learning_rate": 3.54e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.649, + "step": 1649 + }, + { + "loss": 0.0687, + "grad_norm": 2.0019702911376953, + "learning_rate": 3.53e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.65, + "step": 1650 + }, + { + "loss": 0.0575, + "grad_norm": 1.6265766620635986, + "learning_rate": 3.52e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.651, + "step": 1651 + }, + { + "loss": 0.0707, + "grad_norm": 1.6374586820602417, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6520000000000001, + "step": 1652 + }, + { + "loss": 0.0697, + "grad_norm": 2.4204654693603516, + "learning_rate": 3.5e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.653, + "step": 1653 + }, + { + "loss": 0.0588, + "grad_norm": 2.1378262042999268, + "learning_rate": 3.49e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.654, + "step": 1654 + }, + { + "loss": 0.0562, + "grad_norm": 2.214315414428711, + "learning_rate": 3.48e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.655, + "step": 1655 + }, + { + "loss": 0.0124, + "grad_norm": 3.5861706733703613, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.6560000000000001, + "step": 1656 + }, + { + "loss": 0.0487, + "grad_norm": 1.6121397018432617, + "learning_rate": 3.46e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.657, + "step": 1657 + }, + { + "loss": 0.0556, + "grad_norm": 2.084545850753784, + "learning_rate": 3.45e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.658, + "step": 1658 + }, + { + "loss": 0.0471, + "grad_norm": 1.8340671062469482, + "learning_rate": 3.44e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.659, + "step": 1659 + }, + { + "loss": 0.0507, + "grad_norm": 1.5023232698440552, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6600000000000001, + "step": 1660 + }, + { + "loss": 0.055, + "grad_norm": 1.5226930379867554, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.661, + "step": 1661 + }, + { + "loss": 0.0689, + "grad_norm": 1.8650307655334473, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.662, + "step": 1662 + }, + { + "loss": 0.0687, + "grad_norm": 1.4976561069488525, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.663, + "step": 1663 + }, + { + "loss": 0.012, + "grad_norm": 3.7820823192596436, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6640000000000001, + "step": 1664 + }, + { + "loss": 0.0644, + "grad_norm": 1.6768338680267334, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.665, + "step": 1665 + }, + { + "loss": 0.0508, + "grad_norm": 1.6384755373001099, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.666, + "step": 1666 + }, + { + "loss": 0.0557, + "grad_norm": 1.67027747631073, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.667, + "step": 1667 + }, + { + "loss": 0.0443, + "grad_norm": 1.8305268287658691, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6680000000000001, + "step": 1668 + }, + { + "loss": 0.0398, + "grad_norm": 1.6602362394332886, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.669, + "step": 1669 + }, + { + "loss": 0.0479, + "grad_norm": 1.694201946258545, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.67, + "step": 1670 + }, + { + "loss": 0.0693, + "grad_norm": 1.8437001705169678, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.671, + "step": 1671 + }, + { + "loss": 0.0512, + "grad_norm": 1.319399118423462, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.6720000000000002, + "step": 1672 + }, + { + "loss": 0.0141, + "grad_norm": 4.160251617431641, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.673, + "step": 1673 + }, + { + "loss": 0.0473, + "grad_norm": 1.736594557762146, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 1674 + }, + { + "loss": 0.0117, + "grad_norm": 3.6965503692626953, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.675, + "step": 1675 + }, + { + "loss": 0.0129, + "grad_norm": 3.8872127532958984, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.6760000000000002, + "step": 1676 + }, + { + "loss": 0.0338, + "grad_norm": 1.6114709377288818, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.677, + "step": 1677 + }, + { + "loss": 0.0401, + "grad_norm": 1.4854273796081543, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.678, + "step": 1678 + }, + { + "loss": 0.0091, + "grad_norm": 2.8193323612213135, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 1679 + }, + { + "loss": 0.0104, + "grad_norm": 3.194824457168579, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 1680 + }, + { + "loss": 0.0082, + "grad_norm": 2.627159357070923, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 1681 + }, + { + "loss": 0.0715, + "grad_norm": 2.015965223312378, + "learning_rate": 3.21e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.682, + "step": 1682 + }, + { + "loss": 0.0752, + "grad_norm": 1.8641659021377563, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.683, + "step": 1683 + }, + { + "loss": 0.0446, + "grad_norm": 1.8558416366577148, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 1684 + }, + { + "loss": 0.0754, + "grad_norm": 2.614729881286621, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 1.685, + "step": 1685 + }, + { + "loss": 0.0781, + "grad_norm": 2.3581247329711914, + "learning_rate": 3.17e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.686, + "step": 1686 + }, + { + "loss": 0.044, + "grad_norm": 2.02897310256958, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.687, + "step": 1687 + }, + { + "loss": 0.0576, + "grad_norm": 1.8537285327911377, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.688, + "step": 1688 + }, + { + "loss": 0.0673, + "grad_norm": 2.3672072887420654, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 1689 + }, + { + "loss": 0.0406, + "grad_norm": 2.049578905105591, + "learning_rate": 3.13e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.69, + "step": 1690 + }, + { + "loss": 0.0514, + "grad_norm": 1.8079686164855957, + "learning_rate": 3.12e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.6909999999999998, + "step": 1691 + }, + { + "loss": 0.0467, + "grad_norm": 1.5584005117416382, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.692, + "step": 1692 + }, + { + "loss": 0.0073, + "grad_norm": 2.0741705894470215, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 1693 + }, + { + "loss": 0.0501, + "grad_norm": 1.9797930717468262, + "learning_rate": 3.09e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.694, + "step": 1694 + }, + { + "loss": 0.0514, + "grad_norm": 1.531952977180481, + "learning_rate": 3.08e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 1695 + }, + { + "loss": 0.0511, + "grad_norm": 2.27657413482666, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.696, + "step": 1696 + }, + { + "loss": 0.0501, + "grad_norm": 1.5408827066421509, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.697, + "step": 1697 + }, + { + "loss": 0.0356, + "grad_norm": 1.3495177030563354, + "learning_rate": 3.05e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.698, + "step": 1698 + }, + { + "loss": 0.0524, + "grad_norm": 2.264927864074707, + "learning_rate": 3.04e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6989999999999998, + "step": 1699 + }, + { + "loss": 0.0085, + "grad_norm": 2.3997385501861572, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 1700 + }, + { + "loss": 0.0537, + "grad_norm": 2.03108811378479, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.701, + "step": 1701 + }, + { + "loss": 0.0625, + "grad_norm": 1.5735002756118774, + "learning_rate": 3.01e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.702, + "step": 1702 + }, + { + "loss": 0.0498, + "grad_norm": 1.4873791933059692, + "learning_rate": 3e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7029999999999998, + "step": 1703 + }, + { + "loss": 0.0401, + "grad_norm": 1.646492600440979, + "learning_rate": 2.99e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.704, + "step": 1704 + }, + { + "loss": 0.0092, + "grad_norm": 2.825364828109741, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 1705 + }, + { + "loss": 0.0094, + "grad_norm": 2.7768924236297607, + "learning_rate": 2.97e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 1706 + }, + { + "loss": 0.0095, + "grad_norm": 2.475404977798462, + "learning_rate": 2.96e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 1707 + }, + { + "loss": 0.0416, + "grad_norm": 2.0638792514801025, + "learning_rate": 2.95e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.708, + "step": 1708 + }, + { + "loss": 0.0544, + "grad_norm": 1.6516914367675781, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.709, + "step": 1709 + }, + { + "loss": 0.0534, + "grad_norm": 1.9903455972671509, + "learning_rate": 2.93e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.71, + "step": 1710 + }, + { + "loss": 0.061, + "grad_norm": 1.6336207389831543, + "learning_rate": 2.92e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7109999999999999, + "step": 1711 + }, + { + "loss": 0.0484, + "grad_norm": 1.5735485553741455, + "learning_rate": 2.91e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.712, + "step": 1712 + }, + { + "loss": 0.0523, + "grad_norm": 1.7996323108673096, + "learning_rate": 2.9e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.713, + "step": 1713 + }, + { + "loss": 0.0568, + "grad_norm": 1.6357063055038452, + "learning_rate": 2.89e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.714, + "step": 1714 + }, + { + "loss": 0.0097, + "grad_norm": 2.460446357727051, + "learning_rate": 2.88e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 1715 + }, + { + "loss": 0.0488, + "grad_norm": 1.7914141416549683, + "learning_rate": 2.87e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.716, + "step": 1716 + }, + { + "loss": 0.0426, + "grad_norm": 2.875281572341919, + "learning_rate": 2.86e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.717, + "step": 1717 + }, + { + "loss": 0.0535, + "grad_norm": 1.9656765460968018, + "learning_rate": 2.85e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.718, + "step": 1718 + }, + { + "loss": 0.0582, + "grad_norm": 1.7268273830413818, + "learning_rate": 2.84e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.7189999999999999, + "step": 1719 + }, + { + "loss": 0.0625, + "grad_norm": 1.7748886346817017, + "learning_rate": 2.83e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 1720 + }, + { + "loss": 0.0624, + "grad_norm": 1.655421257019043, + "learning_rate": 2.82e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.721, + "step": 1721 + }, + { + "loss": 0.0418, + "grad_norm": 1.857727289199829, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.722, + "step": 1722 + }, + { + "loss": 0.0628, + "grad_norm": 1.6072860956192017, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7229999999999999, + "step": 1723 + }, + { + "loss": 0.0079, + "grad_norm": 2.1282646656036377, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 1724 + }, + { + "loss": 0.0097, + "grad_norm": 2.870497465133667, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 1725 + }, + { + "loss": 0.0573, + "grad_norm": 2.2278597354888916, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.726, + "step": 1726 + }, + { + "loss": 0.0479, + "grad_norm": 1.6248372793197632, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.7269999999999999, + "step": 1727 + }, + { + "loss": 0.0098, + "grad_norm": 3.043905258178711, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 1728 + }, + { + "loss": 0.0515, + "grad_norm": 1.613357424736023, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.729, + "step": 1729 + }, + { + "loss": 0.0391, + "grad_norm": 1.959555983543396, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.73, + "step": 1730 + }, + { + "loss": 0.0085, + "grad_norm": 2.4167284965515137, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 1731 + }, + { + "loss": 0.0638, + "grad_norm": 1.9236712455749512, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.732, + "step": 1732 + }, + { + "loss": 0.0359, + "grad_norm": 1.9113582372665405, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.733, + "step": 1733 + }, + { + "loss": 0.0083, + "grad_norm": 2.5152554512023926, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 1734 + }, + { + "loss": 0.0471, + "grad_norm": 1.6409229040145874, + "learning_rate": 2.68e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7349999999999999, + "step": 1735 + }, + { + "loss": 0.0695, + "grad_norm": 2.0613510608673096, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.736, + "step": 1736 + }, + { + "loss": 0.057, + "grad_norm": 2.3862340450286865, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.737, + "step": 1737 + }, + { + "loss": 0.0733, + "grad_norm": 2.13395357131958, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.738, + "step": 1738 + }, + { + "loss": 0.0398, + "grad_norm": 1.8025071620941162, + "learning_rate": 2.64e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7389999999999999, + "step": 1739 + }, + { + "loss": 0.0076, + "grad_norm": 2.0499792098999023, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 1740 + }, + { + "loss": 0.061, + "grad_norm": 1.6320290565490723, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.741, + "step": 1741 + }, + { + "loss": 0.0581, + "grad_norm": 1.9588946104049683, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.742, + "step": 1742 + }, + { + "loss": 0.062, + "grad_norm": 1.8158897161483765, + "learning_rate": 2.6e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.7429999999999999, + "step": 1743 + }, + { + "loss": 0.0464, + "grad_norm": 2.4023096561431885, + "learning_rate": 2.59e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.744, + "step": 1744 + }, + { + "loss": 0.0604, + "grad_norm": 2.0760178565979004, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.745, + "step": 1745 + }, + { + "loss": 0.0721, + "grad_norm": 1.8943363428115845, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.746, + "step": 1746 + }, + { + "loss": 0.0394, + "grad_norm": 1.6580768823623657, + "learning_rate": 2.56e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.7469999999999999, + "step": 1747 + }, + { + "loss": 0.0575, + "grad_norm": 1.7064754962921143, + "learning_rate": 2.55e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.748, + "step": 1748 + }, + { + "loss": 0.1451, + "grad_norm": 5.286960124969482, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 1.749, + "step": 1749 + }, + { + "loss": 0.0367, + "grad_norm": 1.5256696939468384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.75, + "step": 1750 + }, + { + "loss": 0.0352, + "grad_norm": 1.4353508949279785, + "learning_rate": 2.52e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.751, + "step": 1751 + }, + { + "loss": 0.0544, + "grad_norm": 1.449508547782898, + "learning_rate": 2.51e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.752, + "step": 1752 + }, + { + "loss": 0.0088, + "grad_norm": 2.6737008094787598, + "learning_rate": 2.5e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 1753 + }, + { + "loss": 0.054, + "grad_norm": 1.1922411918640137, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.754, + "step": 1754 + }, + { + "loss": 0.0108, + "grad_norm": 3.180657386779785, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.755, + "step": 1755 + }, + { + "loss": 0.0636, + "grad_norm": 1.900195598602295, + "learning_rate": 2.47e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.756, + "step": 1756 + }, + { + "loss": 0.0602, + "grad_norm": 2.505511522293091, + "learning_rate": 2.46e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7570000000000001, + "step": 1757 + }, + { + "loss": 0.0516, + "grad_norm": 1.517896056175232, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.758, + "step": 1758 + }, + { + "loss": 0.0653, + "grad_norm": 1.5359817743301392, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.759, + "step": 1759 + }, + { + "loss": 0.062, + "grad_norm": 2.56500244140625, + "learning_rate": 2.43e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.76, + "step": 1760 + }, + { + "loss": 0.0616, + "grad_norm": 1.2327522039413452, + "learning_rate": 2.42e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7610000000000001, + "step": 1761 + }, + { + "loss": 0.0641, + "grad_norm": 2.0313050746917725, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.762, + "step": 1762 + }, + { + "loss": 0.0509, + "grad_norm": 1.9020798206329346, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.763, + "step": 1763 + }, + { + "loss": 0.0573, + "grad_norm": 1.3576561212539673, + "learning_rate": 2.39e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.764, + "step": 1764 + }, + { + "loss": 0.0359, + "grad_norm": 1.6285313367843628, + "learning_rate": 2.38e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7650000000000001, + "step": 1765 + }, + { + "loss": 0.0779, + "grad_norm": 2.119893789291382, + "learning_rate": 2.37e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.766, + "step": 1766 + }, + { + "loss": 0.0459, + "grad_norm": 1.8730247020721436, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.767, + "step": 1767 + }, + { + "loss": 0.0359, + "grad_norm": 1.5724204778671265, + "learning_rate": 2.35e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.768, + "step": 1768 + }, + { + "loss": 0.0375, + "grad_norm": 1.7161457538604736, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.7690000000000001, + "step": 1769 + }, + { + "loss": 0.0522, + "grad_norm": 1.3714388608932495, + "learning_rate": 2.33e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.77, + "step": 1770 + }, + { + "loss": 0.0368, + "grad_norm": 1.6326324939727783, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.771, + "step": 1771 + }, + { + "loss": 0.0526, + "grad_norm": 1.4099246263504028, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.772, + "step": 1772 + }, + { + "loss": 0.0343, + "grad_norm": 1.331606149673462, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7730000000000001, + "step": 1773 + }, + { + "loss": 0.0521, + "grad_norm": 2.03346586227417, + "learning_rate": 2.29e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.774, + "step": 1774 + }, + { + "loss": 0.0738, + "grad_norm": 2.287825584411621, + "learning_rate": 2.28e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.775, + "step": 1775 + }, + { + "loss": 0.0711, + "grad_norm": 1.560683012008667, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.776, + "step": 1776 + }, + { + "loss": 0.0483, + "grad_norm": 1.860205888748169, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.7770000000000001, + "step": 1777 + }, + { + "loss": 0.0418, + "grad_norm": 1.6539009809494019, + "learning_rate": 2.25e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.778, + "step": 1778 + }, + { + "loss": 0.0669, + "grad_norm": 1.5473995208740234, + "learning_rate": 2.24e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.779, + "step": 1779 + }, + { + "loss": 0.0488, + "grad_norm": 1.3596010208129883, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.78, + "step": 1780 + }, + { + "loss": 0.0407, + "grad_norm": 1.8577399253845215, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.7810000000000001, + "step": 1781 + }, + { + "loss": 0.0639, + "grad_norm": 2.693002462387085, + "learning_rate": 2.21e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.782, + "step": 1782 + }, + { + "loss": 0.0146, + "grad_norm": 4.3713555335998535, + "learning_rate": 2.2e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.783, + "step": 1783 + }, + { + "loss": 0.0702, + "grad_norm": 1.8829140663146973, + "learning_rate": 2.19e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.784, + "step": 1784 + }, + { + "loss": 0.0145, + "grad_norm": 4.203199863433838, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.7850000000000001, + "step": 1785 + }, + { + "loss": 0.0418, + "grad_norm": 1.0440939664840698, + "learning_rate": 2.17e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.786, + "step": 1786 + }, + { + "loss": 0.0658, + "grad_norm": 1.5156137943267822, + "learning_rate": 2.16e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.787, + "step": 1787 + }, + { + "loss": 0.0506, + "grad_norm": 1.6226084232330322, + "learning_rate": 2.15e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.788, + "step": 1788 + }, + { + "loss": 0.087, + "grad_norm": 1.8399536609649658, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7890000000000001, + "step": 1789 + }, + { + "loss": 0.0607, + "grad_norm": 2.031243324279785, + "learning_rate": 2.13e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.79, + "step": 1790 + }, + { + "loss": 0.0609, + "grad_norm": 1.581013798713684, + "learning_rate": 2.12e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.791, + "step": 1791 + }, + { + "loss": 0.0149, + "grad_norm": 4.233753681182861, + "learning_rate": 2.11e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.792, + "step": 1792 + }, + { + "loss": 0.0698, + "grad_norm": 1.890411615371704, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7930000000000001, + "step": 1793 + }, + { + "loss": 0.0529, + "grad_norm": 1.3680751323699951, + "learning_rate": 2.09e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.794, + "step": 1794 + }, + { + "loss": 0.0528, + "grad_norm": 1.9651073217391968, + "learning_rate": 2.08e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.795, + "step": 1795 + }, + { + "loss": 0.0133, + "grad_norm": 3.887544631958008, + "learning_rate": 2.07e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.796, + "step": 1796 + }, + { + "loss": 0.05, + "grad_norm": 1.304778814315796, + "learning_rate": 2.06e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7970000000000002, + "step": 1797 + }, + { + "loss": 0.071, + "grad_norm": 1.9661753177642822, + "learning_rate": 2.05e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.798, + "step": 1798 + }, + { + "loss": 0.0557, + "grad_norm": 1.5037291049957275, + "learning_rate": 2.04e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.799, + "step": 1799 + }, + { + "loss": 0.0372, + "grad_norm": 1.4804255962371826, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.8, + "step": 1800 + }, + { + "loss": 0.0645, + "grad_norm": 1.577778697013855, + "learning_rate": 2.02e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.8010000000000002, + "step": 1801 + }, + { + "loss": 0.0399, + "grad_norm": 1.5963507890701294, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.802, + "step": 1802 + }, + { + "loss": 0.0612, + "grad_norm": 1.7424527406692505, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.803, + "step": 1803 + }, + { + "loss": 0.0377, + "grad_norm": 1.4296543598175049, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.804, + "step": 1804 + }, + { + "loss": 0.0378, + "grad_norm": 1.4681419134140015, + "learning_rate": 1.98e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8050000000000002, + "step": 1805 + }, + { + "loss": 0.0385, + "grad_norm": 1.876345157623291, + "learning_rate": 1.97e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.806, + "step": 1806 + }, + { + "loss": 0.0454, + "grad_norm": 1.3991385698318481, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.807, + "step": 1807 + }, + { + "loss": 0.0706, + "grad_norm": 1.6286864280700684, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.808, + "step": 1808 + }, + { + "loss": 0.0409, + "grad_norm": 1.7534390687942505, + "learning_rate": 1.94e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8090000000000002, + "step": 1809 + }, + { + "loss": 0.1302, + "grad_norm": 4.238317966461182, + "learning_rate": 1.93e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 1.81, + "step": 1810 + }, + { + "loss": 0.0525, + "grad_norm": 2.2462339401245117, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.811, + "step": 1811 + }, + { + "loss": 0.0609, + "grad_norm": 1.5136423110961914, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.812, + "step": 1812 + }, + { + "loss": 0.0595, + "grad_norm": 1.4645228385925293, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.813, + "step": 1813 + }, + { + "loss": 0.0485, + "grad_norm": 1.4663139581680298, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.814, + "step": 1814 + }, + { + "loss": 0.0117, + "grad_norm": 3.569246768951416, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.815, + "step": 1815 + }, + { + "loss": 0.0765, + "grad_norm": 1.4224154949188232, + "learning_rate": 1.87e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.8159999999999998, + "step": 1816 + }, + { + "loss": 0.0517, + "grad_norm": 1.4875210523605347, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.817, + "step": 1817 + }, + { + "loss": 0.0123, + "grad_norm": 3.643899440765381, + "learning_rate": 1.85e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.818, + "step": 1818 + }, + { + "loss": 0.0358, + "grad_norm": 1.7132638692855835, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.819, + "step": 1819 + }, + { + "loss": 0.0396, + "grad_norm": 1.291243553161621, + "learning_rate": 1.83e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8199999999999998, + "step": 1820 + }, + { + "loss": 0.0611, + "grad_norm": 1.6885188817977905, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.821, + "step": 1821 + }, + { + "loss": 0.0507, + "grad_norm": 1.215349555015564, + "learning_rate": 1.81e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.822, + "step": 1822 + }, + { + "loss": 0.0508, + "grad_norm": 1.5074315071105957, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.823, + "step": 1823 + }, + { + "loss": 0.0593, + "grad_norm": 1.500303030014038, + "learning_rate": 1.79e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8239999999999998, + "step": 1824 + }, + { + "loss": 0.0696, + "grad_norm": 2.0285537242889404, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.825, + "step": 1825 + }, + { + "loss": 0.051, + "grad_norm": 1.3399317264556885, + "learning_rate": 1.77e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.826, + "step": 1826 + }, + { + "loss": 0.0479, + "grad_norm": 1.868754506111145, + "learning_rate": 1.76e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.827, + "step": 1827 + }, + { + "loss": 0.0123, + "grad_norm": 3.5505826473236084, + "learning_rate": 1.75e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.8279999999999998, + "step": 1828 + }, + { + "loss": 0.0384, + "grad_norm": 1.1001877784729004, + "learning_rate": 1.74e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.829, + "step": 1829 + }, + { + "loss": 0.0503, + "grad_norm": 1.5732758045196533, + "learning_rate": 1.73e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.83, + "step": 1830 + }, + { + "loss": 0.0569, + "grad_norm": 1.4768040180206299, + "learning_rate": 1.72e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.831, + "step": 1831 + }, + { + "loss": 0.0376, + "grad_norm": 2.298859119415283, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8319999999999999, + "step": 1832 + }, + { + "loss": 0.0626, + "grad_norm": 1.4698207378387451, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 1833 + }, + { + "loss": 0.0527, + "grad_norm": 1.462391972541809, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.834, + "step": 1834 + }, + { + "loss": 0.0751, + "grad_norm": 2.242673873901367, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 1.835, + "step": 1835 + }, + { + "loss": 0.0633, + "grad_norm": 1.4788683652877808, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.8359999999999999, + "step": 1836 + }, + { + "loss": 0.0523, + "grad_norm": 1.5662829875946045, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.837, + "step": 1837 + }, + { + "loss": 0.0496, + "grad_norm": 1.2137081623077393, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.838, + "step": 1838 + }, + { + "loss": 0.0144, + "grad_norm": 3.972593307495117, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.839, + "step": 1839 + }, + { + "loss": 0.0612, + "grad_norm": 2.0851247310638428, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.8399999999999999, + "step": 1840 + }, + { + "loss": 0.0351, + "grad_norm": 1.7115992307662964, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.841, + "step": 1841 + }, + { + "loss": 0.0543, + "grad_norm": 1.7121071815490723, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.842, + "step": 1842 + }, + { + "loss": 0.0398, + "grad_norm": 2.520775318145752, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.843, + "step": 1843 + }, + { + "loss": 0.0588, + "grad_norm": 1.4704424142837524, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8439999999999999, + "step": 1844 + }, + { + "loss": 0.0393, + "grad_norm": 1.1732555627822876, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.845, + "step": 1845 + }, + { + "loss": 0.0126, + "grad_norm": 3.8587839603424072, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.846, + "step": 1846 + }, + { + "loss": 0.0154, + "grad_norm": 4.2589006423950195, + "learning_rate": 1.56e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.847, + "step": 1847 + }, + { + "loss": 0.0525, + "grad_norm": 1.5793870687484741, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.8479999999999999, + "step": 1848 + }, + { + "loss": 0.0711, + "grad_norm": 1.637081265449524, + "learning_rate": 1.54e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.849, + "step": 1849 + }, + { + "loss": 0.0367, + "grad_norm": 1.405205488204956, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.85, + "step": 1850 + }, + { + "loss": 0.0122, + "grad_norm": 3.7381093502044678, + "learning_rate": 1.52e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.851, + "step": 1851 + }, + { + "loss": 0.0595, + "grad_norm": 1.4563549757003784, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8519999999999999, + "step": 1852 + }, + { + "loss": 0.012, + "grad_norm": 3.3752598762512207, + "learning_rate": 1.5e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.853, + "step": 1853 + }, + { + "loss": 0.0575, + "grad_norm": 1.6581268310546875, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.854, + "step": 1854 + }, + { + "loss": 0.037, + "grad_norm": 1.6496632099151611, + "learning_rate": 1.48e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.855, + "step": 1855 + }, + { + "loss": 0.0435, + "grad_norm": 2.816823959350586, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.8559999999999999, + "step": 1856 + }, + { + "loss": 0.0691, + "grad_norm": 1.9923897981643677, + "learning_rate": 1.46e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.857, + "step": 1857 + }, + { + "loss": 0.0601, + "grad_norm": 1.9515984058380127, + "learning_rate": 1.45e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.858, + "step": 1858 + }, + { + "loss": 0.0097, + "grad_norm": 3.0719552040100098, + "learning_rate": 1.44e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 1859 + }, + { + "loss": 0.0641, + "grad_norm": 1.8086748123168945, + "learning_rate": 1.43e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8599999999999999, + "step": 1860 + }, + { + "loss": 0.067, + "grad_norm": 1.6446064710617065, + "learning_rate": 1.42e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.861, + "step": 1861 + }, + { + "loss": 0.0101, + "grad_norm": 3.0983476638793945, + "learning_rate": 1.41e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 1862 + }, + { + "loss": 0.0362, + "grad_norm": 1.6780548095703125, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.863, + "step": 1863 + }, + { + "loss": 0.054, + "grad_norm": 1.5340514183044434, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8639999999999999, + "step": 1864 + }, + { + "loss": 0.0562, + "grad_norm": 1.6704845428466797, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.865, + "step": 1865 + }, + { + "loss": 0.0647, + "grad_norm": 2.0944159030914307, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.866, + "step": 1866 + }, + { + "loss": 0.0497, + "grad_norm": 1.6780622005462646, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.867, + "step": 1867 + }, + { + "loss": 0.0531, + "grad_norm": 1.5871188640594482, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8679999999999999, + "step": 1868 + }, + { + "loss": 0.061, + "grad_norm": 1.572225570678711, + "learning_rate": 1.34e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.869, + "step": 1869 + }, + { + "loss": 0.0636, + "grad_norm": 1.7540369033813477, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.87, + "step": 1870 + }, + { + "loss": 0.0516, + "grad_norm": 1.9117010831832886, + "learning_rate": 1.32e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.871, + "step": 1871 + }, + { + "loss": 0.0516, + "grad_norm": 1.8945181369781494, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8719999999999999, + "step": 1872 + }, + { + "loss": 0.1903, + "grad_norm": 7.168573379516602, + "learning_rate": 1.3e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 1.873, + "step": 1873 + }, + { + "loss": 0.0584, + "grad_norm": 1.7484742403030396, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.874, + "step": 1874 + }, + { + "loss": 0.0592, + "grad_norm": 1.998748540878296, + "learning_rate": 1.28e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.875, + "step": 1875 + }, + { + "loss": 0.0132, + "grad_norm": 3.7218382358551025, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.876, + "step": 1876 + }, + { + "loss": 0.0397, + "grad_norm": 1.7368042469024658, + "learning_rate": 1.26e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.877, + "step": 1877 + }, + { + "loss": 0.0747, + "grad_norm": 1.7804408073425293, + "learning_rate": 1.25e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8780000000000001, + "step": 1878 + }, + { + "loss": 0.0564, + "grad_norm": 1.812559962272644, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.879, + "step": 1879 + }, + { + "loss": 0.0359, + "grad_norm": 1.5748106241226196, + "learning_rate": 1.23e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.88, + "step": 1880 + }, + { + "loss": 0.1015, + "grad_norm": 2.9346442222595215, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.881, + "step": 1881 + }, + { + "loss": 0.0714, + "grad_norm": 2.8724288940429688, + "learning_rate": 1.21e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.8820000000000001, + "step": 1882 + }, + { + "loss": 0.0544, + "grad_norm": 1.6409680843353271, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.883, + "step": 1883 + }, + { + "loss": 0.0569, + "grad_norm": 1.441733479499817, + "learning_rate": 1.19e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.884, + "step": 1884 + }, + { + "loss": 0.0709, + "grad_norm": 2.3944602012634277, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.885, + "step": 1885 + }, + { + "loss": 0.0593, + "grad_norm": 2.0737223625183105, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.8860000000000001, + "step": 1886 + }, + { + "loss": 0.011, + "grad_norm": 3.4782493114471436, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.887, + "step": 1887 + }, + { + "loss": 0.0115, + "grad_norm": 3.5657458305358887, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.888, + "step": 1888 + }, + { + "loss": 0.0598, + "grad_norm": 1.5167820453643799, + "learning_rate": 1.14e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.889, + "step": 1889 + }, + { + "loss": 0.0507, + "grad_norm": 1.6942130327224731, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.8900000000000001, + "step": 1890 + }, + { + "loss": 0.05, + "grad_norm": 1.4450113773345947, + "learning_rate": 1.12e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.891, + "step": 1891 + }, + { + "loss": 0.0672, + "grad_norm": 1.7840543985366821, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.892, + "step": 1892 + }, + { + "loss": 0.0114, + "grad_norm": 3.6806554794311523, + "learning_rate": 1.1e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.893, + "step": 1893 + }, + { + "loss": 0.0433, + "grad_norm": 2.5975944995880127, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.8940000000000001, + "step": 1894 + }, + { + "loss": 0.048, + "grad_norm": 1.2934935092926025, + "learning_rate": 1.08e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.895, + "step": 1895 + }, + { + "loss": 0.0129, + "grad_norm": 3.9428789615631104, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.896, + "step": 1896 + }, + { + "loss": 0.0106, + "grad_norm": 3.178393840789795, + "learning_rate": 1.06e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.897, + "step": 1897 + }, + { + "loss": 0.0601, + "grad_norm": 1.3654727935791016, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8980000000000001, + "step": 1898 + }, + { + "loss": 0.0372, + "grad_norm": 1.596958041191101, + "learning_rate": 1.04e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.899, + "step": 1899 + }, + { + "loss": 0.0407, + "grad_norm": 1.3870348930358887, + "learning_rate": 1.03e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9, + "step": 1900 + }, + { + "loss": 0.0398, + "grad_norm": 1.8837169408798218, + "learning_rate": 1.02e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.901, + "step": 1901 + }, + { + "loss": 0.0685, + "grad_norm": 2.1320674419403076, + "learning_rate": 1.01e-06, + "num_tokens": 1308570.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9020000000000001, + "step": 1902 + }, + { + "loss": 0.0824, + "grad_norm": 2.3401284217834473, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.903, + "step": 1903 + }, + { + "loss": 0.0107, + "grad_norm": 3.2646677494049072, + "learning_rate": 9.9e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 1904 + }, + { + "loss": 0.053, + "grad_norm": 1.7195311784744263, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.905, + "step": 1905 + }, + { + "loss": 0.0388, + "grad_norm": 1.4336844682693481, + "learning_rate": 9.7e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.9060000000000001, + "step": 1906 + }, + { + "loss": 0.0496, + "grad_norm": 1.5110867023468018, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.907, + "step": 1907 + }, + { + "loss": 0.0106, + "grad_norm": 3.0311079025268555, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.908, + "step": 1908 + }, + { + "loss": 0.0536, + "grad_norm": 1.9689549207687378, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.909, + "step": 1909 + }, + { + "loss": 0.0761, + "grad_norm": 2.2891626358032227, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.9100000000000001, + "step": 1910 + }, + { + "loss": 0.0099, + "grad_norm": 2.886558771133423, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 1911 + }, + { + "loss": 0.0509, + "grad_norm": 2.247649669647217, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.912, + "step": 1912 + }, + { + "loss": 0.0396, + "grad_norm": 1.8190995454788208, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.913, + "step": 1913 + }, + { + "loss": 0.0681, + "grad_norm": 1.9473356008529663, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.9140000000000001, + "step": 1914 + }, + { + "loss": 0.0583, + "grad_norm": 1.7244383096694946, + "learning_rate": 8.8e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.915, + "step": 1915 + }, + { + "loss": 0.0497, + "grad_norm": 1.471281886100769, + "learning_rate": 8.7e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.916, + "step": 1916 + }, + { + "loss": 0.0105, + "grad_norm": 3.1323492527008057, + "learning_rate": 8.6e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.917, + "step": 1917 + }, + { + "loss": 0.0587, + "grad_norm": 1.6258044242858887, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9180000000000001, + "step": 1918 + }, + { + "loss": 0.0396, + "grad_norm": 3.7344205379486084, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.919, + "step": 1919 + }, + { + "loss": 0.0669, + "grad_norm": 1.567430853843689, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.92, + "step": 1920 + }, + { + "loss": 0.0403, + "grad_norm": 2.391710042953491, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.921, + "step": 1921 + }, + { + "loss": 0.0731, + "grad_norm": 1.7387372255325317, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 1922 + }, + { + "loss": 0.0346, + "grad_norm": 1.5562756061553955, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.923, + "step": 1923 + }, + { + "loss": 0.0094, + "grad_norm": 2.8271360397338867, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 1924 + }, + { + "loss": 0.0458, + "grad_norm": 2.486022472381592, + "learning_rate": 7.8e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.925, + "step": 1925 + }, + { + "loss": 0.0432, + "grad_norm": 1.4174907207489014, + "learning_rate": 7.7e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9260000000000002, + "step": 1926 + }, + { + "loss": 0.0685, + "grad_norm": 1.9511269330978394, + "learning_rate": 7.6e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.927, + "step": 1927 + }, + { + "loss": 0.0541, + "grad_norm": 1.7855056524276733, + "learning_rate": 7.5e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.928, + "step": 1928 + }, + { + "loss": 0.0381, + "grad_norm": 1.345107913017273, + "learning_rate": 7.4e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.929, + "step": 1929 + }, + { + "loss": 0.0405, + "grad_norm": 2.1388049125671387, + "learning_rate": 7.3e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9300000000000002, + "step": 1930 + }, + { + "loss": 0.065, + "grad_norm": 1.9286760091781616, + "learning_rate": 7.2e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.931, + "step": 1931 + }, + { + "loss": 0.0084, + "grad_norm": 2.553018808364868, + "learning_rate": 7.1e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 1932 + }, + { + "loss": 0.0591, + "grad_norm": 1.3521795272827148, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.933, + "step": 1933 + }, + { + "loss": 0.0407, + "grad_norm": 2.3110647201538086, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.9340000000000002, + "step": 1934 + }, + { + "loss": 0.0087, + "grad_norm": 2.560931921005249, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 1935 + }, + { + "loss": 0.1207, + "grad_norm": 3.6795732975006104, + "learning_rate": 6.7e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 1.936, + "step": 1936 + }, + { + "loss": 0.0079, + "grad_norm": 2.1008386611938477, + "learning_rate": 6.6e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 1937 + }, + { + "loss": 0.0087, + "grad_norm": 2.5367555618286133, + "learning_rate": 6.5e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 1938 + }, + { + "loss": 0.0518, + "grad_norm": 2.0541486740112305, + "learning_rate": 6.4e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.939, + "step": 1939 + }, + { + "loss": 0.0618, + "grad_norm": 1.8797075748443604, + "learning_rate": 6.3e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.94, + "step": 1940 + }, + { + "loss": 0.0628, + "grad_norm": 2.0876829624176025, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9409999999999998, + "step": 1941 + }, + { + "loss": 0.0453, + "grad_norm": 1.7904268503189087, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.942, + "step": 1942 + }, + { + "loss": 0.009, + "grad_norm": 2.73040771484375, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 1943 + }, + { + "loss": 0.0617, + "grad_norm": 1.6844722032546997, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.944, + "step": 1944 + }, + { + "loss": 0.0431, + "grad_norm": 1.8085075616836548, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9449999999999998, + "step": 1945 + }, + { + "loss": 0.0554, + "grad_norm": 1.8000997304916382, + "learning_rate": 5.7e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.946, + "step": 1946 + }, + { + "loss": 0.0608, + "grad_norm": 1.8177446126937866, + "learning_rate": 5.6e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.947, + "step": 1947 + }, + { + "loss": 0.0624, + "grad_norm": 1.5957430601119995, + "learning_rate": 5.5e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.948, + "step": 1948 + }, + { + "loss": 0.0615, + "grad_norm": 1.5245059728622437, + "learning_rate": 5.4e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9489999999999998, + "step": 1949 + }, + { + "loss": 0.0087, + "grad_norm": 2.8260550498962402, + "learning_rate": 5.3e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 1950 + }, + { + "loss": 0.0491, + "grad_norm": 1.5616376399993896, + "learning_rate": 5.2e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.951, + "step": 1951 + }, + { + "loss": 0.0552, + "grad_norm": 1.530611276626587, + "learning_rate": 5.1e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.952, + "step": 1952 + }, + { + "loss": 0.0563, + "grad_norm": 1.5877563953399658, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.9529999999999998, + "step": 1953 + }, + { + "loss": 0.034, + "grad_norm": 1.3671666383743286, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.954, + "step": 1954 + }, + { + "loss": 0.0447, + "grad_norm": 1.4045659303665161, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.955, + "step": 1955 + }, + { + "loss": 0.0523, + "grad_norm": 1.3664851188659668, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.956, + "step": 1956 + }, + { + "loss": 0.0545, + "grad_norm": 1.9731861352920532, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9569999999999999, + "step": 1957 + }, + { + "loss": 0.056, + "grad_norm": 1.9783090353012085, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.958, + "step": 1958 + }, + { + "loss": 0.0103, + "grad_norm": 3.2062110900878906, + "learning_rate": 4.4e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.959, + "step": 1959 + }, + { + "loss": 0.0356, + "grad_norm": 1.8231993913650513, + "learning_rate": 4.3e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.96, + "step": 1960 + }, + { + "loss": 0.0525, + "grad_norm": 1.708391785621643, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9609999999999999, + "step": 1961 + }, + { + "loss": 0.0794, + "grad_norm": 2.159344434738159, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.962, + "step": 1962 + }, + { + "loss": 0.0815, + "grad_norm": 1.9803351163864136, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 1963 + }, + { + "loss": 0.0442, + "grad_norm": 2.2135045528411865, + "learning_rate": 3.9e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.964, + "step": 1964 + }, + { + "loss": 0.0082, + "grad_norm": 2.504026174545288, + "learning_rate": 3.8e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 1965 + }, + { + "loss": 0.0524, + "grad_norm": 2.4293482303619385, + "learning_rate": 3.7e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.966, + "step": 1966 + }, + { + "loss": 0.0543, + "grad_norm": 1.5671586990356445, + "learning_rate": 3.6e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.967, + "step": 1967 + }, + { + "loss": 0.0549, + "grad_norm": 2.1507840156555176, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.968, + "step": 1968 + }, + { + "loss": 0.0561, + "grad_norm": 1.4668017625808716, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9689999999999999, + "step": 1969 + }, + { + "loss": 0.008, + "grad_norm": 2.4691226482391357, + "learning_rate": 3.3e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 1970 + }, + { + "loss": 0.0104, + "grad_norm": 3.135504722595215, + "learning_rate": 3.2e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.971, + "step": 1971 + }, + { + "loss": 0.0442, + "grad_norm": 1.5039496421813965, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 1972 + }, + { + "loss": 0.035, + "grad_norm": 1.5489939451217651, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9729999999999999, + "step": 1973 + }, + { + "loss": 0.0687, + "grad_norm": 1.601294994354248, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.974, + "step": 1974 + }, + { + "loss": 0.0629, + "grad_norm": 1.7154121398925781, + "learning_rate": 2.8e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.975, + "step": 1975 + }, + { + "loss": 0.0587, + "grad_norm": 2.0388171672821045, + "learning_rate": 2.7e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 1976 + }, + { + "loss": 0.051, + "grad_norm": 1.9510704278945923, + "learning_rate": 2.6e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9769999999999999, + "step": 1977 + }, + { + "loss": 0.0512, + "grad_norm": 1.7245160341262817, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.978, + "step": 1978 + }, + { + "loss": 0.0465, + "grad_norm": 1.383158802986145, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.979, + "step": 1979 + }, + { + "loss": 0.054, + "grad_norm": 2.2401952743530273, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.98, + "step": 1980 + }, + { + "loss": 0.0516, + "grad_norm": 2.7115116119384766, + "learning_rate": 2.2e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.9809999999999999, + "step": 1981 + }, + { + "loss": 0.0095, + "grad_norm": 2.8770017623901367, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 1982 + }, + { + "loss": 0.0618, + "grad_norm": 1.8771051168441772, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.983, + "step": 1983 + }, + { + "loss": 0.0524, + "grad_norm": 1.3788121938705444, + "learning_rate": 1.9e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.984, + "step": 1984 + }, + { + "loss": 0.0582, + "grad_norm": 1.583976149559021, + "learning_rate": 1.8e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9849999999999999, + "step": 1985 + }, + { + "loss": 0.0802, + "grad_norm": 1.9991214275360107, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.986, + "step": 1986 + }, + { + "loss": 0.0085, + "grad_norm": 2.6479129791259766, + "learning_rate": 1.6e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 1987 + }, + { + "loss": 0.06, + "grad_norm": 1.4170489311218262, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.988, + "step": 1988 + }, + { + "loss": 0.0502, + "grad_norm": 1.5151011943817139, + "learning_rate": 1.4e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9889999999999999, + "step": 1989 + }, + { + "loss": 0.0639, + "grad_norm": 1.8262159824371338, + "learning_rate": 1.3e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.99, + "step": 1990 + }, + { + "loss": 0.039, + "grad_norm": 1.5687544345855713, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.991, + "step": 1991 + }, + { + "loss": 0.0601, + "grad_norm": 1.4482007026672363, + "learning_rate": 1.1e-07, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.992, + "step": 1992 + }, + { + "loss": 0.0549, + "grad_norm": 2.2203757762908936, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.9929999999999999, + "step": 1993 + }, + { + "loss": 0.0089, + "grad_norm": 2.697735071182251, + "learning_rate": 9e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 1994 + }, + { + "loss": 0.0595, + "grad_norm": 1.751516342163086, + "learning_rate": 8e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.995, + "step": 1995 + }, + { + "loss": 0.0499, + "grad_norm": 1.9196858406066895, + "learning_rate": 7e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.996, + "step": 1996 + }, + { + "loss": 0.0385, + "grad_norm": 1.7155030965805054, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.9969999999999999, + "step": 1997 + }, + { + "loss": 0.0089, + "grad_norm": 2.759751558303833, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 1998 + }, + { + "loss": 0.0393, + "grad_norm": 1.8272897005081177, + "learning_rate": 4e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.999, + "step": 1999 + }, + { + "loss": 0.0626, + "grad_norm": 1.7148770093917847, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 2.0, + "step": 2000 + }, + { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502, + "epoch": 2.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json b/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba2cfbcdcee3753e484b0a1b3adcb35ee225098 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-0.5B-Instruct", + "unsloth_available": false, + "train_runtime": 234.6302, + "train_loss": 0.19233327957964502, + "train_metrics": { + "train_runtime": 234.6302, + "train_samples_per_second": 17.048, + "train_steps_per_second": 8.524, + "total_flos": 3810849801251328.0, + "train_loss": 0.19233327957964502 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json b/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..7d529fa55bcfe3e68b31a5702ee3f96c63526167 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 2.158, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely to continue the sentence smoothly? Please provide a brief justification for your choice.\" rationale: \"The chosen candidate is more likely to maintain coherence with the existing context and future predictions accurately.\" \n\nMy reasoning:\n- The", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.557, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI; justification: The instruction is incomplete. Please provide a specific question or task related to the candidates mentioned in order for me to choose an appropriate candidate_id and provide a justified response.\" \"rationale\": \"This question asks for a justification, but no question or task was provided in the initial instruction. To fulfill this request, I need more information about what", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 2.04, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate is most likely to be correct for the following sentence fragment: 'The quick brown fox jumps over the lazy dog.'? Choose one candidate_id and provide your justification.\" \nrationale: The choice of 'cand_07' as the", + "candidate_id": "cand_07", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.059, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"I would choose candidate_id=cand_05; rationale=This candidate uses", + "candidate_id": "cand_05", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 2.007, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you select as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"Selecting the best candidate involves considering factors such as syntactic correctness, semantic coherence", + "candidate_id": null, + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.127, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json b/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5778936849f9a5bb988c315271fbf3c3507aba26 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "sft-baseline", + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "model_index": 1, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 1, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json b/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..d8c5d1cfe6fab1b4a4647f03f5ca461b1739180f --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json @@ -0,0 +1,36011 @@ +[ + { + "loss": 2.9686, + "grad_norm": 1.1798820495605469, + "learning_rate": 2e-05, + "num_tokens": 91.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 2.9639, + "grad_norm": 1.146132469177246, + "learning_rate": 1.9995e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 1.2609, + "grad_norm": 0.2891564667224884, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 694.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 2.9479, + "grad_norm": 1.1511788368225098, + "learning_rate": 1.9985000000000003e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.002, + "step": 4 + }, + { + "loss": 0.8201, + "grad_norm": 0.27247434854507446, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1297.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 1.1688, + "grad_norm": 0.30153799057006836, + "learning_rate": 1.9975e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 2.927, + "grad_norm": 1.123976469039917, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 1900.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 2.9219, + "grad_norm": 1.1258331537246704, + "learning_rate": 1.9965e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 1.2624, + "grad_norm": 0.3105297088623047, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 2503.0, + "mean_token_accuracy": 0.7592955231666565, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.8468, + "grad_norm": 0.27270445227622986, + "learning_rate": 1.9955e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.835616409778595, + "epoch": 0.005, + "step": 10 + }, + { + "loss": 1.1895, + "grad_norm": 0.31019389629364014, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 3527.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 2.8961, + "grad_norm": 1.0758286714553833, + "learning_rate": 1.9945e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 1.1822, + "grad_norm": 0.3052140772342682, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 4130.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 2.8831, + "grad_norm": 1.0789313316345215, + "learning_rate": 1.9935e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.8383, + "grad_norm": 0.2903873026371002, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 4733.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 1.2037, + "grad_norm": 0.3023833632469177, + "learning_rate": 1.9925e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 1.2477, + "grad_norm": 0.28835517168045044, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 5757.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 1.237, + "grad_norm": 0.30421048402786255, + "learning_rate": 1.9915e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 2.8549, + "grad_norm": 1.0703911781311035, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 6360.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 1.2092, + "grad_norm": 0.30991482734680176, + "learning_rate": 1.9905e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7690802216529846, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 1.2362, + "grad_norm": 0.3097628951072693, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 7384.0, + "mean_token_accuracy": 0.7769080400466919, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 1.223, + "grad_norm": 0.31258082389831543, + "learning_rate": 1.9895000000000002e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 2.8321, + "grad_norm": 1.0650557279586792, + "learning_rate": 1.989e-05, + "num_tokens": 7987.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 1.1381, + "grad_norm": 0.31106889247894287, + "learning_rate": 1.9885e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.8059, + "grad_norm": 0.28179118037223816, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 9011.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 2.8152, + "grad_norm": 1.0609599351882935, + "learning_rate": 1.9875000000000002e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 2.8078, + "grad_norm": 1.06212317943573, + "learning_rate": 1.987e-05, + "num_tokens": 9193.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 1.205, + "grad_norm": 0.3027011752128601, + "learning_rate": 1.9865e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 1.1295, + "grad_norm": 0.30131977796554565, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 10217.0, + "mean_token_accuracy": 0.7925636172294617, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 2.7894, + "grad_norm": 1.0723512172698975, + "learning_rate": 1.9855000000000002e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 1.1157, + "grad_norm": 0.30370256304740906, + "learning_rate": 1.985e-05, + "num_tokens": 10820.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 1.2198, + "grad_norm": 0.3102725148200989, + "learning_rate": 1.9845e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7710371613502502, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 2.7699, + "grad_norm": 1.0780471563339233, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 11423.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 2.7633, + "grad_norm": 1.0721458196640015, + "learning_rate": 1.9835000000000002e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.8241, + "grad_norm": 0.2753015458583832, + "learning_rate": 1.983e-05, + "num_tokens": 12026.0, + "mean_token_accuracy": 0.8375734090805054, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 1.2029, + "grad_norm": 0.32459118962287903, + "learning_rate": 1.9825e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 2.7393, + "grad_norm": 1.089471459388733, + "learning_rate": 1.982e-05, + "num_tokens": 12629.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 2.7339, + "grad_norm": 1.085958480834961, + "learning_rate": 1.9815000000000003e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 2.7235, + "grad_norm": 1.1013903617858887, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 12811.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 1.1925, + "grad_norm": 0.322603315114975, + "learning_rate": 1.9805e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7729941010475159, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 1.0755, + "grad_norm": 0.33030447363853455, + "learning_rate": 1.98e-05, + "num_tokens": 13835.0, + "mean_token_accuracy": 0.7886496782302856, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.8072, + "grad_norm": 0.292123407125473, + "learning_rate": 1.9795000000000003e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.7719, + "grad_norm": 0.2785574495792389, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 14859.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 2.6826, + "grad_norm": 1.1196017265319824, + "learning_rate": 1.9785e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 2.6763, + "grad_norm": 1.1198991537094116, + "learning_rate": 1.978e-05, + "num_tokens": 15041.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 1.0823, + "grad_norm": 0.3456343412399292, + "learning_rate": 1.9775000000000003e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 1.1172, + "grad_norm": 0.3377469480037689, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 16065.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 1.19, + "grad_norm": 0.3273194134235382, + "learning_rate": 1.9765e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.7808219194412231, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 1.0897, + "grad_norm": 0.330640584230423, + "learning_rate": 1.976e-05, + "num_tokens": 17089.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 2.6381, + "grad_norm": 1.1452019214630127, + "learning_rate": 1.9755000000000003e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.7974, + "grad_norm": 0.30913424491882324, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 17692.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 1.175, + "grad_norm": 0.3387100100517273, + "learning_rate": 1.9745e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 1.1322, + "grad_norm": 0.3353443443775177, + "learning_rate": 1.974e-05, + "num_tokens": 18716.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 2.6086, + "grad_norm": 1.1715646982192993, + "learning_rate": 1.9735000000000003e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 2.5992, + "grad_norm": 1.1846489906311035, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 18898.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 2.5913, + "grad_norm": 1.1861159801483154, + "learning_rate": 1.9725000000000002e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 1.1598, + "grad_norm": 0.3380836546421051, + "learning_rate": 1.972e-05, + "num_tokens": 19501.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 1.1193, + "grad_norm": 0.34247249364852905, + "learning_rate": 1.9715000000000004e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 2.5644, + "grad_norm": 1.205854892730713, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 20104.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 2.5553, + "grad_norm": 1.211520791053772, + "learning_rate": 1.9705000000000002e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 2.5452, + "grad_norm": 1.2238597869873047, + "learning_rate": 1.97e-05, + "num_tokens": 20286.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 1.1531, + "grad_norm": 0.3495417535305023, + "learning_rate": 1.9695e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 1.0714, + "grad_norm": 0.3549030125141144, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 21310.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.765, + "grad_norm": 0.3008621335029602, + "learning_rate": 1.9685000000000002e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 1.0392, + "grad_norm": 0.3398958444595337, + "learning_rate": 1.968e-05, + "num_tokens": 22334.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 1.0477, + "grad_norm": 0.35012176632881165, + "learning_rate": 1.9675e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 2.4882, + "grad_norm": 1.2684752941131592, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 22937.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 2.478, + "grad_norm": 1.2892162799835205, + "learning_rate": 1.9665000000000002e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 2.4664, + "grad_norm": 1.296135663986206, + "learning_rate": 1.966e-05, + "num_tokens": 23119.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.7605, + "grad_norm": 0.3300800323486328, + "learning_rate": 1.9655e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.7663, + "grad_norm": 0.33007505536079407, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 24143.0, + "mean_token_accuracy": 0.8512719869613647, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 2.4349, + "grad_norm": 1.3247182369232178, + "learning_rate": 1.9645e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 1.0354, + "grad_norm": 0.3528023660182953, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 24746.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.738, + "grad_norm": 0.3283436894416809, + "learning_rate": 1.9635e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 1.1271, + "grad_norm": 0.38431045413017273, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 25770.0, + "mean_token_accuracy": 0.7847357988357544, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": 1.0373, + "grad_norm": 0.3673364818096161, + "learning_rate": 1.9625e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.7984344363212585, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 1.156, + "grad_norm": 0.3851627707481384, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 26794.0, + "mean_token_accuracy": 0.7788649797439575, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 2.3789, + "grad_norm": 1.3850467205047607, + "learning_rate": 1.9615e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 2.3734, + "grad_norm": 1.3814043998718262, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 26976.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 2.3599, + "grad_norm": 1.3965320587158203, + "learning_rate": 1.9605e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 2.3458, + "grad_norm": 1.4337000846862793, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 27158.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.7631, + "grad_norm": 0.328967422246933, + "learning_rate": 1.9595e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 1.0816, + "grad_norm": 0.40056440234184265, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 28182.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.761, + "grad_norm": 0.34349334239959717, + "learning_rate": 1.9585e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.7308, + "grad_norm": 0.35714098811149597, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 29206.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 2.2886, + "grad_norm": 1.4950672388076782, + "learning_rate": 1.9575e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 2.2801, + "grad_norm": 1.5058231353759766, + "learning_rate": 1.957e-05, + "num_tokens": 29388.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 2.2683, + "grad_norm": 1.5141775608062744, + "learning_rate": 1.9565e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.9814, + "grad_norm": 0.3899815082550049, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 29991.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 1.1155, + "grad_norm": 0.40274983644485474, + "learning_rate": 1.9555e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.78669273853302, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 2.2309, + "grad_norm": 1.5758429765701294, + "learning_rate": 1.955e-05, + "num_tokens": 30594.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 1.0635, + "grad_norm": 0.4182218015193939, + "learning_rate": 1.9545e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.7083, + "grad_norm": 0.35819146037101746, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 31618.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 2.1959, + "grad_norm": 1.6126611232757568, + "learning_rate": 1.9535000000000002e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 2.1797, + "grad_norm": 1.676061987876892, + "learning_rate": 1.953e-05, + "num_tokens": 31800.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 1.0347, + "grad_norm": 0.4216737151145935, + "learning_rate": 1.9525e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.6884, + "grad_norm": 0.39531153440475464, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 32824.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 2.1441, + "grad_norm": 1.7453250885009766, + "learning_rate": 1.9515000000000002e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 2.1265, + "grad_norm": 1.7851935625076294, + "learning_rate": 1.951e-05, + "num_tokens": 33006.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 2.112, + "grad_norm": 1.830625057220459, + "learning_rate": 1.9505e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 2.0989, + "grad_norm": 1.851873755455017, + "learning_rate": 1.95e-05, + "num_tokens": 33188.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.6824, + "grad_norm": 0.39206984639167786, + "learning_rate": 1.9495000000000002e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.6874, + "grad_norm": 0.3998919725418091, + "learning_rate": 1.949e-05, + "num_tokens": 34212.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 1.0692, + "grad_norm": 0.45781052112579346, + "learning_rate": 1.9485e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.052, + "step": 104 + }, + { + "loss": 1.061, + "grad_norm": 0.4857180714607239, + "learning_rate": 1.948e-05, + "num_tokens": 35236.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.9418, + "grad_norm": 0.4719521701335907, + "learning_rate": 1.9475000000000002e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.9888, + "grad_norm": 0.4797465205192566, + "learning_rate": 1.947e-05, + "num_tokens": 36260.0, + "mean_token_accuracy": 0.7964774966239929, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 1.994, + "grad_norm": 2.2058191299438477, + "learning_rate": 1.9465e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.7016, + "grad_norm": 0.41740846633911133, + "learning_rate": 1.946e-05, + "num_tokens": 36863.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.6818, + "grad_norm": 0.43658050894737244, + "learning_rate": 1.9455000000000003e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.6655, + "grad_norm": 0.46398866176605225, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 37887.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 1.9355, + "grad_norm": 2.4030585289001465, + "learning_rate": 1.9445e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 1.0308, + "grad_norm": 0.47935715317726135, + "learning_rate": 1.944e-05, + "num_tokens": 38490.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": 0.6529, + "grad_norm": 0.5175711512565613, + "learning_rate": 1.9435000000000003e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 1.9, + "grad_norm": 2.3800323009490967, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 39093.0, + "mean_token_accuracy": 0.6888889074325562, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 1.0589, + "grad_norm": 0.5446810722351074, + "learning_rate": 1.9425e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 1.8661, + "grad_norm": 2.2952208518981934, + "learning_rate": 1.942e-05, + "num_tokens": 39696.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 1.8546, + "grad_norm": 2.2471399307250977, + "learning_rate": 1.9415000000000003e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 1.8394, + "grad_norm": 2.1859543323516846, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 39878.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.6737, + "grad_norm": 0.5614652633666992, + "learning_rate": 1.9405e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.6406, + "grad_norm": 0.5995651483535767, + "learning_rate": 1.94e-05, + "num_tokens": 40902.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.9218, + "grad_norm": 0.6819480657577515, + "learning_rate": 1.9395000000000003e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.9464, + "grad_norm": 0.6670010089874268, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 41926.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.9323, + "grad_norm": 0.8481072187423706, + "learning_rate": 1.9385e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8023483157157898, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.6372, + "grad_norm": 0.5398988127708435, + "learning_rate": 1.938e-05, + "num_tokens": 42950.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.6362, + "grad_norm": 0.5465712547302246, + "learning_rate": 1.9375e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 1.7297, + "grad_norm": 2.4601035118103027, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 43553.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.6423, + "grad_norm": 0.5248544812202454, + "learning_rate": 1.9365000000000002e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 1.7024, + "grad_norm": 2.7017173767089844, + "learning_rate": 1.936e-05, + "num_tokens": 44156.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.8623, + "grad_norm": 0.6321293711662292, + "learning_rate": 1.9355e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.8852, + "grad_norm": 0.7586547136306763, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 45180.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 1.6632, + "grad_norm": 3.066443920135498, + "learning_rate": 1.9345000000000002e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 1.642, + "grad_norm": 3.3219645023345947, + "learning_rate": 1.934e-05, + "num_tokens": 45362.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 1.623, + "grad_norm": 3.5062637329101562, + "learning_rate": 1.9335e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 1.6017, + "grad_norm": 3.623307228088379, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 45544.0, + "mean_token_accuracy": 0.7111111283302307, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.8752, + "grad_norm": 0.7358177900314331, + "learning_rate": 1.9325000000000002e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.9563, + "grad_norm": 0.8089514970779419, + "learning_rate": 1.932e-05, + "num_tokens": 46568.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.9479, + "grad_norm": 0.8843920826911926, + "learning_rate": 1.9315e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.8003913760185242, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 1.5158, + "grad_norm": 3.546642303466797, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 47171.0, + "mean_token_accuracy": 0.7333333492279053, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.5831, + "grad_norm": 0.7032448053359985, + "learning_rate": 1.9305000000000002e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.8191, + "grad_norm": 0.9835058450698853, + "learning_rate": 1.93e-05, + "num_tokens": 48195.0, + "mean_token_accuracy": 0.8219178318977356, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.5936, + "grad_norm": 0.7396312952041626, + "learning_rate": 1.9295e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 1.4418, + "grad_norm": 3.6846494674682617, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 48798.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 1.4276, + "grad_norm": 3.8224549293518066, + "learning_rate": 1.9285000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 1.4024, + "grad_norm": 3.874878168106079, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 48980.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 1.3769, + "grad_norm": 3.8388218879699707, + "learning_rate": 1.9275e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.7444444298744202, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 1.3516, + "grad_norm": 3.6529314517974854, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 49162.0, + "mean_token_accuracy": 0.7555555701255798, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 1.3215, + "grad_norm": 3.6978349685668945, + "learning_rate": 1.9265000000000003e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.7666666507720947, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 1.2966, + "grad_norm": 3.7301321029663086, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 49344.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.9111, + "grad_norm": 0.9517998695373535, + "learning_rate": 1.9255e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.8140900135040283, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 1.2327, + "grad_norm": 4.175051212310791, + "learning_rate": 1.925e-05, + "num_tokens": 49947.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 1.2076, + "grad_norm": 4.348862171173096, + "learning_rate": 1.9245000000000003e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.5662, + "grad_norm": 0.9280498623847961, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 50550.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.8844, + "grad_norm": 1.042202353477478, + "learning_rate": 1.9235e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 1.1432, + "grad_norm": NaN, + "learning_rate": 1.923e-05, + "num_tokens": 51153.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 1.1364, + "grad_norm": 3.4773733615875244, + "learning_rate": 1.923e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.7888888716697693, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.5305, + "grad_norm": 1.0232493877410889, + "learning_rate": 1.9225000000000003e-05, + "num_tokens": 51756.0, + "mean_token_accuracy": 0.8806262016296387, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.8352, + "grad_norm": 1.172676920890808, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.5667, + "grad_norm": 1.041461706161499, + "learning_rate": 1.9215e-05, + "num_tokens": 52780.0, + "mean_token_accuracy": 0.878669261932373, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.5104, + "grad_norm": 1.050549030303955, + "learning_rate": 1.921e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.875, + "grad_norm": 1.1163139343261719, + "learning_rate": 1.9205000000000003e-05, + "num_tokens": 53804.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.799, + "grad_norm": 0.9202898740768433, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 1.0468, + "grad_norm": 6.722721576690674, + "learning_rate": 1.9195000000000002e-05, + "num_tokens": 54407.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 1.032, + "grad_norm": 6.30849027633667, + "learning_rate": 1.919e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.8387, + "grad_norm": 0.8642046451568604, + "learning_rate": 1.9185000000000004e-05, + "num_tokens": 55010.0, + "mean_token_accuracy": 0.8336594700813293, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.8299, + "grad_norm": 0.8796883821487427, + "learning_rate": 1.918e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.8297455906867981, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.9957, + "grad_norm": 6.16769552230835, + "learning_rate": 1.9175000000000002e-05, + "num_tokens": 55613.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.7521, + "grad_norm": 0.8700262904167175, + "learning_rate": 1.917e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.8532289862632751, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.5251, + "grad_norm": 1.2144312858581543, + "learning_rate": 1.9165000000000004e-05, + "num_tokens": 56637.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": 0.76, + "grad_norm": 0.9009570479393005, + "learning_rate": 1.916e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.941, + "grad_norm": 5.8355841636657715, + "learning_rate": 1.9155000000000002e-05, + "num_tokens": 57240.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.928, + "grad_norm": 5.541483402252197, + "learning_rate": 1.915e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.716, + "grad_norm": 1.0414000749588013, + "learning_rate": 1.9145000000000004e-05, + "num_tokens": 57843.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.8929, + "grad_norm": 4.810738563537598, + "learning_rate": 1.914e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.8111110925674438, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.7684, + "grad_norm": 1.2132883071899414, + "learning_rate": 1.9135000000000002e-05, + "num_tokens": 58446.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.6497, + "grad_norm": 1.1370697021484375, + "learning_rate": 1.913e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.6995, + "grad_norm": 1.2495081424713135, + "learning_rate": 1.9125000000000004e-05, + "num_tokens": 59470.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.4539, + "grad_norm": 1.0713244676589966, + "learning_rate": 1.912e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.8311, + "grad_norm": 8.016578674316406, + "learning_rate": 1.9115000000000002e-05, + "num_tokens": 60073.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.7657, + "grad_norm": 1.6656423807144165, + "learning_rate": 1.911e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.7687, + "grad_norm": 1.0611323118209839, + "learning_rate": 1.9105e-05, + "num_tokens": 61097.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.8062, + "grad_norm": 10.057961463928223, + "learning_rate": 1.91e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.4494, + "grad_norm": 0.8912132978439331, + "learning_rate": 1.9095000000000003e-05, + "num_tokens": 61700.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.7813, + "grad_norm": 8.121318817138672, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8444444537162781, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.762, + "grad_norm": 7.607242584228516, + "learning_rate": 1.9085e-05, + "num_tokens": 61882.0, + "mean_token_accuracy": 0.855555534362793, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.7692, + "grad_norm": 1.015843391418457, + "learning_rate": 1.908e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.7587, + "grad_norm": 0.9659166932106018, + "learning_rate": 1.9075000000000003e-05, + "num_tokens": 62906.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.6702, + "grad_norm": 1.6121653318405151, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.7191, + "grad_norm": 5.08962345123291, + "learning_rate": 1.9065e-05, + "num_tokens": 63509.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.7033, + "grad_norm": 1.2752808332443237, + "learning_rate": 1.906e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.8493150472640991, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.7025, + "grad_norm": 5.420579433441162, + "learning_rate": 1.9055e-05, + "num_tokens": 64112.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.6507, + "grad_norm": 0.9945167899131775, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.5894, + "grad_norm": 1.0229939222335815, + "learning_rate": 1.9045e-05, + "num_tokens": 65136.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.6627, + "grad_norm": 9.837233543395996, + "learning_rate": 1.904e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.7, + "grad_norm": 1.4510327577590942, + "learning_rate": 1.9035e-05, + "num_tokens": 65739.0, + "mean_token_accuracy": 0.8551859259605408, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.6437, + "grad_norm": 11.414746284484863, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.631, + "grad_norm": 10.233067512512207, + "learning_rate": 1.9025e-05, + "num_tokens": 65921.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.6945, + "grad_norm": 1.3608763217926025, + "learning_rate": 1.902e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.6546, + "grad_norm": 1.217339038848877, + "learning_rate": 1.9015e-05, + "num_tokens": 66945.0, + "mean_token_accuracy": 0.8571428656578064, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.6805, + "grad_norm": 1.5453741550445557, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.5748, + "grad_norm": 4.581247806549072, + "learning_rate": 1.9005000000000002e-05, + "num_tokens": 67548.0, + "mean_token_accuracy": 0.8999999761581421, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.6366, + "grad_norm": 1.6470707654953003, + "learning_rate": 1.9e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.4235, + "grad_norm": 0.9932326078414917, + "learning_rate": 1.8995e-05, + "num_tokens": 68572.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": 0.6296, + "grad_norm": 1.9582555294036865, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.5822, + "grad_norm": 1.569627046585083, + "learning_rate": 1.8985000000000002e-05, + "num_tokens": 69596.0, + "mean_token_accuracy": 0.8727984428405762, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.5748, + "grad_norm": 1.2322492599487305, + "learning_rate": 1.898e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8747553825378418, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.6398, + "grad_norm": 1.6496992111206055, + "learning_rate": 1.8975e-05, + "num_tokens": 70620.0, + "mean_token_accuracy": 0.8610567450523376, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.3614, + "grad_norm": 1.1484179496765137, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.6247, + "grad_norm": 2.376291275024414, + "learning_rate": 1.8965000000000002e-05, + "num_tokens": 71644.0, + "mean_token_accuracy": 0.8669275641441345, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.5296, + "grad_norm": 1.148452877998352, + "learning_rate": 1.896e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.3511, + "grad_norm": 1.6766430139541626, + "learning_rate": 1.8955e-05, + "num_tokens": 72668.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.5254, + "grad_norm": 13.195364952087402, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.5164, + "grad_norm": 10.336882591247559, + "learning_rate": 1.8945000000000002e-05, + "num_tokens": 72850.0, + "mean_token_accuracy": 0.9111111164093018, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.5768, + "grad_norm": 1.2533048391342163, + "learning_rate": 1.894e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.5941, + "grad_norm": 1.1360353231430054, + "learning_rate": 1.8935e-05, + "num_tokens": 73874.0, + "mean_token_accuracy": 0.8649706244468689, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.4831, + "grad_norm": 6.034897327423096, + "learning_rate": 1.893e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.4774, + "grad_norm": 5.36783504486084, + "learning_rate": 1.8925000000000003e-05, + "num_tokens": 74056.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.3472, + "grad_norm": 2.312915563583374, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.4547, + "grad_norm": 5.124778747558594, + "learning_rate": 1.8915e-05, + "num_tokens": 74659.0, + "mean_token_accuracy": 0.9222221970558167, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.4438, + "grad_norm": 3.7214717864990234, + "learning_rate": 1.891e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.5071, + "grad_norm": 1.825179100036621, + "learning_rate": 1.8905000000000003e-05, + "num_tokens": 75262.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.4157, + "grad_norm": 2.892442464828491, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.4085, + "grad_norm": 3.1406774520874023, + "learning_rate": 1.8895e-05, + "num_tokens": 75444.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.532, + "grad_norm": 2.529170274734497, + "learning_rate": 1.889e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.8845401406288147, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.3828, + "grad_norm": 3.846367597579956, + "learning_rate": 1.8885000000000003e-05, + "num_tokens": 76047.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.5073, + "grad_norm": 2.1968491077423096, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.888454020023346, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.5165, + "grad_norm": 1.508063793182373, + "learning_rate": 1.8875e-05, + "num_tokens": 77071.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.3491, + "grad_norm": 2.4780421257019043, + "learning_rate": 1.887e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.3379, + "grad_norm": 2.2446343898773193, + "learning_rate": 1.8865000000000003e-05, + "num_tokens": 77253.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.3318, + "grad_norm": 3.05029296875, + "learning_rate": 1.886e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.3173, + "grad_norm": 2.2870967388153076, + "learning_rate": 1.8855e-05, + "num_tokens": 77435.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.3278, + "grad_norm": 1.3750704526901245, + "learning_rate": 1.885e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.2964, + "grad_norm": 2.238151788711548, + "learning_rate": 1.8845000000000003e-05, + "num_tokens": 78038.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.305, + "grad_norm": 1.4246138334274292, + "learning_rate": 1.884e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.3385, + "grad_norm": 1.810808777809143, + "learning_rate": 1.8835000000000002e-05, + "num_tokens": 79062.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.5181, + "grad_norm": 2.939674139022827, + "learning_rate": 1.883e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.4909, + "grad_norm": 2.4543910026550293, + "learning_rate": 1.8825000000000004e-05, + "num_tokens": 80086.0, + "mean_token_accuracy": 0.8923678994178772, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.2604, + "grad_norm": 2.63846492767334, + "learning_rate": 1.882e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.2533, + "grad_norm": 3.536795139312744, + "learning_rate": 1.8815000000000002e-05, + "num_tokens": 80268.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.2449, + "grad_norm": 2.941943645477295, + "learning_rate": 1.881e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.4928, + "grad_norm": 2.69899582862854, + "learning_rate": 1.8805000000000004e-05, + "num_tokens": 80871.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.3019, + "grad_norm": 1.5328068733215332, + "learning_rate": 1.88e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.4154, + "grad_norm": 5.932051181793213, + "learning_rate": 1.8795000000000002e-05, + "num_tokens": 81895.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.4072, + "grad_norm": 3.7254579067230225, + "learning_rate": 1.879e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.9099804162979126, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.2266, + "grad_norm": 4.67811918258667, + "learning_rate": 1.8785e-05, + "num_tokens": 82498.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.2835, + "grad_norm": 2.31062650680542, + "learning_rate": 1.878e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.222, + "grad_norm": 4.9225335121154785, + "learning_rate": 1.8775000000000002e-05, + "num_tokens": 83101.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.4098, + "grad_norm": 2.3302409648895264, + "learning_rate": 1.877e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.9001957178115845, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.4401, + "grad_norm": 1.917952299118042, + "learning_rate": 1.8765e-05, + "num_tokens": 84125.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.3927, + "grad_norm": 4.312741279602051, + "learning_rate": 1.876e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.2032, + "grad_norm": 4.237610340118408, + "learning_rate": 1.8755000000000003e-05, + "num_tokens": 84728.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.2, + "grad_norm": 4.144465446472168, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.1974, + "grad_norm": 4.548800945281982, + "learning_rate": 1.8745e-05, + "num_tokens": 84910.0, + "mean_token_accuracy": 0.9555555582046509, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.2936, + "grad_norm": 1.368138313293457, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.4425, + "grad_norm": 1.6547119617462158, + "learning_rate": 1.8735e-05, + "num_tokens": 85934.0, + "mean_token_accuracy": 0.9021526575088501, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.1815, + "grad_norm": 1.936987042427063, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.3853, + "grad_norm": 1.9844653606414795, + "learning_rate": 1.8725e-05, + "num_tokens": 86537.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.3816, + "grad_norm": 2.563992977142334, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.1717, + "grad_norm": 1.9275789260864258, + "learning_rate": 1.8715e-05, + "num_tokens": 87140.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.3635, + "grad_norm": 2.198817014694214, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.166, + "grad_norm": 2.225175380706787, + "learning_rate": 1.8705e-05, + "num_tokens": 87743.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.1618, + "grad_norm": 1.4393062591552734, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.3188, + "grad_norm": 1.8201826810836792, + "learning_rate": 1.8695e-05, + "num_tokens": 88346.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.3957, + "grad_norm": 1.8483490943908691, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.9138942956924438, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.3545, + "grad_norm": 2.5658915042877197, + "learning_rate": 1.8685e-05, + "num_tokens": 89370.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.4109, + "grad_norm": 2.197061777114868, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.9041095972061157, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.3934, + "grad_norm": 1.9570775032043457, + "learning_rate": 1.8675e-05, + "num_tokens": 90394.0, + "mean_token_accuracy": 0.9119373559951782, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.149, + "grad_norm": 2.242249011993408, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.3673, + "grad_norm": 2.5640757083892822, + "learning_rate": 1.8665000000000002e-05, + "num_tokens": 90997.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.3437, + "grad_norm": 1.6239393949508667, + "learning_rate": 1.866e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.1448, + "grad_norm": 2.4205758571624756, + "learning_rate": 1.8655e-05, + "num_tokens": 91600.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.2803, + "grad_norm": 1.5447510480880737, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.2501, + "grad_norm": 1.2362499237060547, + "learning_rate": 1.8645000000000002e-05, + "num_tokens": 92624.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.263, + "grad_norm": 1.3345736265182495, + "learning_rate": 1.864e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.3598, + "grad_norm": 5.145051002502441, + "learning_rate": 1.8635e-05, + "num_tokens": 93648.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.143, + "grad_norm": 3.363790988922119, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.3858, + "grad_norm": 2.9212327003479004, + "learning_rate": 1.8625000000000002e-05, + "num_tokens": 94251.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.1404, + "grad_norm": 2.9169602394104004, + "learning_rate": 1.862e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.2422, + "grad_norm": 1.9243407249450684, + "learning_rate": 1.8615e-05, + "num_tokens": 94854.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.3585, + "grad_norm": 4.024987697601318, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.3474, + "grad_norm": 2.019094944000244, + "learning_rate": 1.8605000000000002e-05, + "num_tokens": 95878.0, + "mean_token_accuracy": 0.9158512949943542, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.3368, + "grad_norm": 1.5415781736373901, + "learning_rate": 1.86e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.1373, + "grad_norm": 3.6068742275238037, + "learning_rate": 1.8595e-05, + "num_tokens": 96481.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.2176, + "grad_norm": 1.1446317434310913, + "learning_rate": 1.859e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.1328, + "grad_norm": 3.26859974861145, + "learning_rate": 1.8585000000000002e-05, + "num_tokens": 97084.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.131, + "grad_norm": 2.849381446838379, + "learning_rate": 1.858e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.3323, + "grad_norm": 4.831865310668945, + "learning_rate": 1.8575e-05, + "num_tokens": 97687.0, + "mean_token_accuracy": 0.9178082346916199, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.3036, + "grad_norm": 1.8017945289611816, + "learning_rate": 1.857e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.3478, + "grad_norm": 4.759650707244873, + "learning_rate": 1.8565000000000003e-05, + "num_tokens": 98711.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.1239, + "grad_norm": 1.6707216501235962, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.3554, + "grad_norm": 3.568655014038086, + "learning_rate": 1.8555e-05, + "num_tokens": 99314.0, + "mean_token_accuracy": 0.9256359934806824, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.1219, + "grad_norm": 1.743139624595642, + "learning_rate": 1.855e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.3297, + "grad_norm": 3.192558526992798, + "learning_rate": 1.8545000000000003e-05, + "num_tokens": 99917.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.339, + "grad_norm": 2.8700854778289795, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.3341, + "grad_norm": 3.1597092151641846, + "learning_rate": 1.8535e-05, + "num_tokens": 100941.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.3151, + "grad_norm": 2.549912929534912, + "learning_rate": 1.853e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.249, + "grad_norm": 4.164290904998779, + "learning_rate": 1.8525000000000003e-05, + "num_tokens": 101965.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.2877, + "grad_norm": 1.8462411165237427, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.2215, + "grad_norm": 1.49083411693573, + "learning_rate": 1.8515e-05, + "num_tokens": 102989.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.2631, + "grad_norm": 1.5168116092681885, + "learning_rate": 1.851e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.3179, + "grad_norm": 3.1732399463653564, + "learning_rate": 1.8505000000000003e-05, + "num_tokens": 104013.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.315, + "grad_norm": 2.9725892543792725, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.9217221140861511, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.2763, + "grad_norm": 1.4138047695159912, + "learning_rate": 1.8495e-05, + "num_tokens": 105037.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.3151, + "grad_norm": 2.3229987621307373, + "learning_rate": 1.849e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.931506872177124, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.2862, + "grad_norm": 3.2318272590637207, + "learning_rate": 1.8485000000000003e-05, + "num_tokens": 106061.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.2339, + "grad_norm": 3.401787757873535, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.2094, + "grad_norm": 2.1061453819274902, + "learning_rate": 1.8475000000000002e-05, + "num_tokens": 107085.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.2863, + "grad_norm": 1.6479979753494263, + "learning_rate": 1.847e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.1445, + "grad_norm": 7.635932445526123, + "learning_rate": 1.8465e-05, + "num_tokens": 107688.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.1347, + "grad_norm": 6.305334091186523, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.2233, + "grad_norm": 3.41860294342041, + "learning_rate": 1.8455000000000002e-05, + "num_tokens": 108291.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.128, + "grad_norm": 5.801213264465332, + "learning_rate": 1.845e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.1283, + "grad_norm": 5.675178527832031, + "learning_rate": 1.8445e-05, + "num_tokens": 108473.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.3029, + "grad_norm": 5.509076118469238, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.1112, + "grad_norm": 2.6948108673095703, + "learning_rate": 1.8435000000000002e-05, + "num_tokens": 109076.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.107, + "grad_norm": 2.523871421813965, + "learning_rate": 1.843e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.2636, + "grad_norm": 2.1710612773895264, + "learning_rate": 1.8425e-05, + "num_tokens": 109679.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.2891, + "grad_norm": 2.2263383865356445, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.2611, + "grad_norm": 1.752862572669983, + "learning_rate": 1.8415e-05, + "num_tokens": 110703.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.1023, + "grad_norm": 3.256633996963501, + "learning_rate": 1.841e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.1009, + "grad_norm": 2.10860276222229, + "learning_rate": 1.8405e-05, + "num_tokens": 110885.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.2849, + "grad_norm": 3.3475303649902344, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.2727, + "grad_norm": 2.763415575027466, + "learning_rate": 1.8395e-05, + "num_tokens": 111909.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.1914, + "grad_norm": 1.7206056118011475, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.2981, + "grad_norm": 4.825778484344482, + "learning_rate": 1.8385e-05, + "num_tokens": 112933.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.2575, + "grad_norm": 2.3532052040100098, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.3108, + "grad_norm": 2.1766650676727295, + "learning_rate": 1.8375e-05, + "num_tokens": 113957.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.2547, + "grad_norm": 1.6271114349365234, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.2451, + "grad_norm": 1.533071517944336, + "learning_rate": 1.8365e-05, + "num_tokens": 114981.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.2362, + "grad_norm": 1.4881736040115356, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0898, + "grad_norm": 1.764446496963501, + "learning_rate": 1.8355e-05, + "num_tokens": 115584.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.2345, + "grad_norm": 1.3447750806808472, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.2802, + "grad_norm": 3.713470458984375, + "learning_rate": 1.8345e-05, + "num_tokens": 116608.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.1853, + "grad_norm": 1.427515983581543, + "learning_rate": 1.834e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0921, + "grad_norm": 2.3074567317962646, + "learning_rate": 1.8335e-05, + "num_tokens": 117211.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0887, + "grad_norm": 2.2687530517578125, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.2126, + "grad_norm": 3.1814491748809814, + "learning_rate": 1.8325e-05, + "num_tokens": 117814.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0881, + "grad_norm": 2.606569528579712, + "learning_rate": 1.832e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.1751, + "grad_norm": 2.4892592430114746, + "learning_rate": 1.8315e-05, + "num_tokens": 118417.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.2011, + "grad_norm": 2.357940673828125, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.2168, + "grad_norm": 2.8288958072662354, + "learning_rate": 1.8305000000000002e-05, + "num_tokens": 119441.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.183, + "grad_norm": 1.945565104484558, + "learning_rate": 1.83e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0746, + "grad_norm": 1.7267169952392578, + "learning_rate": 1.8295e-05, + "num_tokens": 120044.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0733, + "grad_norm": 1.9393048286437988, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0728, + "grad_norm": 2.1715469360351562, + "learning_rate": 1.8285000000000002e-05, + "num_tokens": 120226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0704, + "grad_norm": 2.0847175121307373, + "learning_rate": 1.828e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.1791, + "grad_norm": 1.5438156127929688, + "learning_rate": 1.8275e-05, + "num_tokens": 120829.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.2073, + "grad_norm": 1.6084765195846558, + "learning_rate": 1.827e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.2215, + "grad_norm": 1.543698787689209, + "learning_rate": 1.8265000000000002e-05, + "num_tokens": 121853.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.1904, + "grad_norm": 1.41824209690094, + "learning_rate": 1.826e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.2005, + "grad_norm": 1.6803160905838013, + "learning_rate": 1.8255e-05, + "num_tokens": 122877.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0605, + "grad_norm": 1.5710349082946777, + "learning_rate": 1.825e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0615, + "grad_norm": 1.633989691734314, + "learning_rate": 1.8245000000000002e-05, + "num_tokens": 123059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.1828, + "grad_norm": 1.6902644634246826, + "learning_rate": 1.824e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0558, + "grad_norm": 1.7157853841781616, + "learning_rate": 1.8235e-05, + "num_tokens": 123662.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0515, + "grad_norm": 1.4476577043533325, + "learning_rate": 1.823e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0502, + "grad_norm": 2.1938326358795166, + "learning_rate": 1.8225000000000003e-05, + "num_tokens": 123844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.1783, + "grad_norm": 2.738436460494995, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.275, + "grad_norm": 3.493831157684326, + "learning_rate": 1.8215e-05, + "num_tokens": 124868.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.1786, + "grad_norm": 1.7162284851074219, + "learning_rate": 1.821e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0448, + "grad_norm": 2.925360679626465, + "learning_rate": 1.8205000000000003e-05, + "num_tokens": 125471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.3138, + "grad_norm": 4.2967753410339355, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.0381, + "grad_norm": 1.3151957988739014, + "learning_rate": 1.8195e-05, + "num_tokens": 126074.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.1773, + "grad_norm": 1.440629243850708, + "learning_rate": 1.819e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0361, + "grad_norm": 1.378117561340332, + "learning_rate": 1.8185000000000003e-05, + "num_tokens": 126677.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0357, + "grad_norm": 1.3120638132095337, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 1.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0333, + "grad_norm": 1.1625266075134277, + "learning_rate": 1.8175e-05, + "num_tokens": 126859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0292, + "grad_norm": 1.198464035987854, + "learning_rate": 1.817e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.193, + "grad_norm": 1.9310072660446167, + "learning_rate": 1.8165000000000003e-05, + "num_tokens": 127462.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": 0.209, + "grad_norm": 1.7112150192260742, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.1398, + "grad_norm": 1.4659478664398193, + "learning_rate": 1.8155e-05, + "num_tokens": 128486.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.1688, + "grad_norm": 3.3470299243927, + "learning_rate": 1.815e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.2416, + "grad_norm": 3.232045888900757, + "learning_rate": 1.8145e-05, + "num_tokens": 129510.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0242, + "grad_norm": 2.809112548828125, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 1.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.0222, + "grad_norm": 2.652397394180298, + "learning_rate": 1.8135000000000002e-05, + "num_tokens": 129692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.1619, + "grad_norm": 1.6935186386108398, + "learning_rate": 1.813e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0206, + "grad_norm": 1.8048573732376099, + "learning_rate": 1.8125e-05, + "num_tokens": 130295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.0199, + "grad_norm": 1.7344465255737305, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0174, + "grad_norm": 1.6794533729553223, + "learning_rate": 1.8115000000000002e-05, + "num_tokens": 130477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0172, + "grad_norm": 2.995704174041748, + "learning_rate": 1.811e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 1.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.241, + "grad_norm": 2.3058347702026367, + "learning_rate": 1.8105e-05, + "num_tokens": 131080.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.2068, + "grad_norm": 2.030050277709961, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.1573, + "grad_norm": 2.108264207839966, + "learning_rate": 1.8095000000000002e-05, + "num_tokens": 132104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0128, + "grad_norm": 0.9666662812232971, + "learning_rate": 1.809e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.1613, + "grad_norm": 1.9703510999679565, + "learning_rate": 1.8085e-05, + "num_tokens": 132707.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.1579, + "grad_norm": 1.7536500692367554, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.2503, + "grad_norm": 3.074944257736206, + "learning_rate": 1.8075000000000002e-05, + "num_tokens": 133731.0, + "mean_token_accuracy": 0.9354207515716553, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.154, + "grad_norm": 2.3541879653930664, + "learning_rate": 1.807e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.1655, + "grad_norm": 1.2853813171386719, + "learning_rate": 1.8065e-05, + "num_tokens": 134755.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.1481, + "grad_norm": 1.4534378051757812, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0156, + "grad_norm": 2.346766710281372, + "learning_rate": 1.8055000000000002e-05, + "num_tokens": 135358.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0179, + "grad_norm": 2.7506628036499023, + "learning_rate": 1.805e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 1.0, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.2665, + "grad_norm": 7.800353050231934, + "learning_rate": 1.8045e-05, + "num_tokens": 135961.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0137, + "grad_norm": 1.6062291860580444, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 1.0, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.1298, + "grad_norm": 1.9706884622573853, + "learning_rate": 1.8035000000000003e-05, + "num_tokens": 136564.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.1587, + "grad_norm": 4.288624286651611, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.1706, + "grad_norm": 2.351865291595459, + "learning_rate": 1.8025e-05, + "num_tokens": 137588.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.1391, + "grad_norm": 2.3107855319976807, + "learning_rate": 1.802e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0116, + "grad_norm": 1.2413067817687988, + "learning_rate": 1.8015000000000003e-05, + "num_tokens": 138191.0, + "mean_token_accuracy": 1.0, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.1528, + "grad_norm": 2.238205671310425, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0111, + "grad_norm": 1.0291837453842163, + "learning_rate": 1.8005e-05, + "num_tokens": 138794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": 0.2551, + "grad_norm": 3.0084855556488037, + "learning_rate": 1.8e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9393346309661865, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.271, + "grad_norm": 3.355750560760498, + "learning_rate": 1.7995000000000003e-05, + "num_tokens": 139818.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.1479, + "grad_norm": 3.3119289875030518, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.1951, + "grad_norm": 3.4890756607055664, + "learning_rate": 1.7985e-05, + "num_tokens": 140842.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.1439, + "grad_norm": 2.5274429321289062, + "learning_rate": 1.798e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.1537, + "grad_norm": 3.0909008979797363, + "learning_rate": 1.7975000000000003e-05, + "num_tokens": 141866.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0107, + "grad_norm": 2.0530686378479004, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 1.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.011, + "grad_norm": 1.7325184345245361, + "learning_rate": 1.7965e-05, + "num_tokens": 142048.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.135, + "grad_norm": 1.9106756448745728, + "learning_rate": 1.796e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.177, + "grad_norm": 3.206461191177368, + "learning_rate": 1.7955000000000003e-05, + "num_tokens": 143072.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0095, + "grad_norm": 0.8696625828742981, + "learning_rate": 1.795e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 1.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.1656, + "grad_norm": 5.9883856773376465, + "learning_rate": 1.7945000000000002e-05, + "num_tokens": 143675.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.2393, + "grad_norm": 3.601959466934204, + "learning_rate": 1.794e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0092, + "grad_norm": 1.547377586364746, + "learning_rate": 1.7935000000000004e-05, + "num_tokens": 144278.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0099, + "grad_norm": 1.7349345684051514, + "learning_rate": 1.793e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 1.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.1454, + "grad_norm": 2.134899377822876, + "learning_rate": 1.7925000000000002e-05, + "num_tokens": 144881.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.2317, + "grad_norm": 3.7199866771698, + "learning_rate": 1.792e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.2081, + "grad_norm": 3.7679033279418945, + "learning_rate": 1.7915000000000004e-05, + "num_tokens": 145905.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0084, + "grad_norm": 0.7981175184249878, + "learning_rate": 1.791e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 1.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0078, + "grad_norm": 0.624564528465271, + "learning_rate": 1.7905000000000002e-05, + "num_tokens": 146087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.153, + "grad_norm": 1.46378755569458, + "learning_rate": 1.79e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0085, + "grad_norm": 1.403277039527893, + "learning_rate": 1.7895000000000004e-05, + "num_tokens": 146690.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.1413, + "grad_norm": 2.821493148803711, + "learning_rate": 1.789e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.1268, + "grad_norm": 2.5567212104797363, + "learning_rate": 1.7885000000000002e-05, + "num_tokens": 147714.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.1303, + "grad_norm": 2.5823540687561035, + "learning_rate": 1.788e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0075, + "grad_norm": 1.26413094997406, + "learning_rate": 1.7875e-05, + "num_tokens": 148317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0067, + "grad_norm": 0.9559513330459595, + "learning_rate": 1.787e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0069, + "grad_norm": 0.641984224319458, + "learning_rate": 1.7865000000000003e-05, + "num_tokens": 148499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.1762, + "grad_norm": 2.6874637603759766, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0062, + "grad_norm": 0.4612693786621094, + "learning_rate": 1.7855e-05, + "num_tokens": 149102.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.1284, + "grad_norm": 2.1469764709472656, + "learning_rate": 1.785e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.1216, + "grad_norm": 2.77829909324646, + "learning_rate": 1.7845000000000003e-05, + "num_tokens": 150126.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0056, + "grad_norm": 0.3416956067085266, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 1.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0055, + "grad_norm": 0.3599971830844879, + "learning_rate": 1.7835e-05, + "num_tokens": 150308.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0054, + "grad_norm": 0.3336946368217468, + "learning_rate": 1.783e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 1.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.1384, + "grad_norm": 2.486008882522583, + "learning_rate": 1.7825e-05, + "num_tokens": 150911.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.1366, + "grad_norm": 1.806955337524414, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.0053, + "grad_norm": 0.3250260651111603, + "learning_rate": 1.7815e-05, + "num_tokens": 151514.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0048, + "grad_norm": 0.33809739351272583, + "learning_rate": 1.781e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 1.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.1241, + "grad_norm": 1.514503002166748, + "learning_rate": 1.7805e-05, + "num_tokens": 152117.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.1369, + "grad_norm": 1.73817777633667, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.005, + "grad_norm": 0.6402959227561951, + "learning_rate": 1.7795e-05, + "num_tokens": 152720.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.1392, + "grad_norm": 2.1087169647216797, + "learning_rate": 1.779e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0056, + "grad_norm": 0.7931351661682129, + "learning_rate": 1.7785e-05, + "num_tokens": 153323.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.1216, + "grad_norm": 2.559343099594116, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.1415, + "grad_norm": 3.7847163677215576, + "learning_rate": 1.7775000000000002e-05, + "num_tokens": 154347.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0056, + "grad_norm": 0.6650505661964417, + "learning_rate": 1.777e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0058, + "grad_norm": 0.6711560487747192, + "learning_rate": 1.7765e-05, + "num_tokens": 154529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.1339, + "grad_norm": 2.383869171142578, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.1384, + "grad_norm": 2.9380829334259033, + "learning_rate": 1.7755000000000002e-05, + "num_tokens": 155553.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.1355, + "grad_norm": 3.530726432800293, + "learning_rate": 1.775e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0057, + "grad_norm": 0.6963756680488586, + "learning_rate": 1.7745e-05, + "num_tokens": 156156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0047, + "grad_norm": 0.45467251539230347, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.1322, + "grad_norm": 2.1101133823394775, + "learning_rate": 1.7735000000000002e-05, + "num_tokens": 156759.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.3436, + "grad_norm": 10.156854629516602, + "learning_rate": 1.773e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9236790537834167, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.1111, + "grad_norm": 1.9533101320266724, + "learning_rate": 1.7725e-05, + "num_tokens": 157783.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0054, + "grad_norm": 0.571807861328125, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 1.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0045, + "grad_norm": 0.6374226808547974, + "learning_rate": 1.7715000000000002e-05, + "num_tokens": 157965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.1115, + "grad_norm": 1.9669644832611084, + "learning_rate": 1.771e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.1336, + "grad_norm": 1.4811934232711792, + "learning_rate": 1.7705e-05, + "num_tokens": 158989.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.2041, + "grad_norm": 3.112797737121582, + "learning_rate": 1.77e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0045, + "grad_norm": 0.5766833424568176, + "learning_rate": 1.7695000000000003e-05, + "num_tokens": 159592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.1237, + "grad_norm": 1.863338589668274, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.1236, + "grad_norm": 2.4069719314575195, + "learning_rate": 1.7685e-05, + "num_tokens": 160616.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0055, + "grad_norm": 0.8338965177536011, + "learning_rate": 1.768e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 1.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0044, + "grad_norm": 0.5481887459754944, + "learning_rate": 1.7675000000000003e-05, + "num_tokens": 160798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.1354, + "grad_norm": 4.145319938659668, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.1279, + "grad_norm": 3.560887575149536, + "learning_rate": 1.7665e-05, + "num_tokens": 161822.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0044, + "grad_norm": 0.43582797050476074, + "learning_rate": 1.766e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 1.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.004, + "grad_norm": 0.3212014138698578, + "learning_rate": 1.7655000000000003e-05, + "num_tokens": 162004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.1956, + "grad_norm": 2.662240982055664, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0038, + "grad_norm": 0.32649490237236023, + "learning_rate": 1.7645e-05, + "num_tokens": 162607.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0039, + "grad_norm": 0.33435314893722534, + "learning_rate": 1.764e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": 0.1217, + "grad_norm": 3.422117233276367, + "learning_rate": 1.7635000000000003e-05, + "num_tokens": 163210.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.1169, + "grad_norm": 1.9841532707214355, + "learning_rate": 1.763e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0035, + "grad_norm": 0.23611226677894592, + "learning_rate": 1.7625e-05, + "num_tokens": 163813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0036, + "grad_norm": 0.35102367401123047, + "learning_rate": 1.762e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 1.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0034, + "grad_norm": 0.22219745814800262, + "learning_rate": 1.7615000000000003e-05, + "num_tokens": 163995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.1109, + "grad_norm": 1.8000237941741943, + "learning_rate": 1.761e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0034, + "grad_norm": 0.4621182084083557, + "learning_rate": 1.7605000000000002e-05, + "num_tokens": 164598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0035, + "grad_norm": 0.5149714350700378, + "learning_rate": 1.76e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.004, + "grad_norm": 0.5277268886566162, + "learning_rate": 1.7595000000000003e-05, + "num_tokens": 164780.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.1178, + "grad_norm": 1.9578617811203003, + "learning_rate": 1.759e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0032, + "grad_norm": 0.30999821424484253, + "learning_rate": 1.7585000000000002e-05, + "num_tokens": 165383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0032, + "grad_norm": 0.3227098882198334, + "learning_rate": 1.758e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 1.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0032, + "grad_norm": 0.2970958352088928, + "learning_rate": 1.7575000000000004e-05, + "num_tokens": 165565.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.1054, + "grad_norm": 3.3750076293945312, + "learning_rate": 1.757e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.003, + "grad_norm": 0.315746933221817, + "learning_rate": 1.7565000000000002e-05, + "num_tokens": 166168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.1014, + "grad_norm": 1.7110451459884644, + "learning_rate": 1.756e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.1009, + "grad_norm": 2.0282938480377197, + "learning_rate": 1.7555e-05, + "num_tokens": 167192.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0029, + "grad_norm": 0.18862634897232056, + "learning_rate": 1.755e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 1.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.1251, + "grad_norm": 1.5325688123703003, + "learning_rate": 1.7545000000000002e-05, + "num_tokens": 167795.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0032, + "grad_norm": 0.37112897634506226, + "learning_rate": 1.754e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 1.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.0031, + "grad_norm": 0.32201266288757324, + "learning_rate": 1.7535e-05, + "num_tokens": 167977.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.003, + "grad_norm": 0.32648831605911255, + "learning_rate": 1.753e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 1.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": 0.1251, + "grad_norm": 2.044515371322632, + "learning_rate": 1.7525000000000002e-05, + "num_tokens": 168580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.1099, + "grad_norm": 2.5852344036102295, + "learning_rate": 1.752e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0032, + "grad_norm": 0.33884692192077637, + "learning_rate": 1.7515e-05, + "num_tokens": 169183.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.1006, + "grad_norm": 1.9987916946411133, + "learning_rate": 1.751e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0895, + "grad_norm": 2.697984457015991, + "learning_rate": 1.7505e-05, + "num_tokens": 170207.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.0034, + "grad_norm": 0.4763769507408142, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 1.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0899, + "grad_norm": 3.0565173625946045, + "learning_rate": 1.7495e-05, + "num_tokens": 170810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0909, + "grad_norm": 1.3817325830459595, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0044, + "grad_norm": 0.8519660830497742, + "learning_rate": 1.7485e-05, + "num_tokens": 171413.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.1095, + "grad_norm": 2.0203707218170166, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0048, + "grad_norm": 1.1067970991134644, + "learning_rate": 1.7475e-05, + "num_tokens": 172016.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.1167, + "grad_norm": 2.3915855884552, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0049, + "grad_norm": 1.0700874328613281, + "learning_rate": 1.7465e-05, + "num_tokens": 172619.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.004, + "grad_norm": 0.6739718317985535, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 1.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.1176, + "grad_norm": 2.5957095623016357, + "learning_rate": 1.7455e-05, + "num_tokens": 173222.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": 0.0763, + "grad_norm": 2.0077261924743652, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0028, + "grad_norm": 0.2505457103252411, + "learning_rate": 1.7445e-05, + "num_tokens": 173825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0025, + "grad_norm": 0.1596791297197342, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 1.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.1892, + "grad_norm": 2.4415338039398193, + "learning_rate": 1.7435e-05, + "num_tokens": 174428.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.1134, + "grad_norm": 2.0744497776031494, + "learning_rate": 1.743e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0991, + "grad_norm": 2.4540417194366455, + "learning_rate": 1.7425e-05, + "num_tokens": 175452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0025, + "grad_norm": 0.17656919360160828, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.1227, + "grad_norm": 2.1174721717834473, + "learning_rate": 1.7415000000000002e-05, + "num_tokens": 176055.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0026, + "grad_norm": 0.23843693733215332, + "learning_rate": 1.741e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 1.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.1103, + "grad_norm": 3.4821200370788574, + "learning_rate": 1.7405e-05, + "num_tokens": 176658.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0027, + "grad_norm": 0.3274306654930115, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 1.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0924, + "grad_norm": 1.685363531112671, + "learning_rate": 1.7395000000000002e-05, + "num_tokens": 177261.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0028, + "grad_norm": 0.3265073299407959, + "learning_rate": 1.739e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 1.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.1099, + "grad_norm": 3.1508426666259766, + "learning_rate": 1.7385e-05, + "num_tokens": 177864.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.1034, + "grad_norm": 1.8193601369857788, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.1016, + "grad_norm": 1.59476637840271, + "learning_rate": 1.7375000000000002e-05, + "num_tokens": 178888.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.1998, + "grad_norm": 3.547844648361206, + "learning_rate": 1.737e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.004, + "grad_norm": 0.7272564172744751, + "learning_rate": 1.7365e-05, + "num_tokens": 179491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0046, + "grad_norm": 0.918525755405426, + "learning_rate": 1.736e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 1.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.1078, + "grad_norm": 2.3493764400482178, + "learning_rate": 1.7355000000000002e-05, + "num_tokens": 180094.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0042, + "grad_norm": 0.7224324941635132, + "learning_rate": 1.735e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 1.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0043, + "grad_norm": 0.6705859303474426, + "learning_rate": 1.7345e-05, + "num_tokens": 180276.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.1953, + "grad_norm": 2.93843674659729, + "learning_rate": 1.734e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.0034, + "grad_norm": 0.46903571486473083, + "learning_rate": 1.7335000000000003e-05, + "num_tokens": 180879.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0938, + "grad_norm": 2.1053452491760254, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0026, + "grad_norm": 0.24292589724063873, + "learning_rate": 1.7325e-05, + "num_tokens": 181482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0922, + "grad_norm": 2.257225275039673, + "learning_rate": 1.732e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.003, + "grad_norm": 0.4069388508796692, + "learning_rate": 1.7315000000000003e-05, + "num_tokens": 182085.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.183, + "grad_norm": 3.2919442653656006, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.1693, + "grad_norm": 2.224686861038208, + "learning_rate": 1.7305e-05, + "num_tokens": 183109.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.1085, + "grad_norm": 1.8910117149353027, + "learning_rate": 1.73e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0026, + "grad_norm": 0.40661975741386414, + "learning_rate": 1.7295000000000003e-05, + "num_tokens": 183712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0027, + "grad_norm": 0.4873325228691101, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 1.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0028, + "grad_norm": 0.6161079406738281, + "learning_rate": 1.7285e-05, + "num_tokens": 183894.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0027, + "grad_norm": 0.4630989134311676, + "learning_rate": 1.728e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 1.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0985, + "grad_norm": 1.9053902626037598, + "learning_rate": 1.7275000000000003e-05, + "num_tokens": 184497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.0026, + "grad_norm": 0.37032097578048706, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 1.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.0024, + "grad_norm": 0.27917778491973877, + "learning_rate": 1.7265e-05, + "num_tokens": 184679.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0826, + "grad_norm": 2.2242591381073, + "learning_rate": 1.726e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0023, + "grad_norm": 0.22320418059825897, + "learning_rate": 1.7255000000000003e-05, + "num_tokens": 185282.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0958, + "grad_norm": 2.1955316066741943, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.1204, + "grad_norm": 2.8383123874664307, + "learning_rate": 1.7245000000000002e-05, + "num_tokens": 186306.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0025, + "grad_norm": 0.2997134327888489, + "learning_rate": 1.724e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0024, + "grad_norm": 0.24415498971939087, + "learning_rate": 1.7235e-05, + "num_tokens": 186488.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0885, + "grad_norm": 2.02583384513855, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0927, + "grad_norm": 2.139193534851074, + "learning_rate": 1.7225000000000002e-05, + "num_tokens": 187512.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0025, + "grad_norm": 0.3212721347808838, + "learning_rate": 1.722e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.1594, + "grad_norm": 1.6018428802490234, + "learning_rate": 1.7215e-05, + "num_tokens": 188115.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0027, + "grad_norm": 0.43617552518844604, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 1.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.1228, + "grad_norm": 1.8676470518112183, + "learning_rate": 1.7205000000000002e-05, + "num_tokens": 188718.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": 0.1523, + "grad_norm": 2.5800390243530273, + "learning_rate": 1.72e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0036, + "grad_norm": 0.7294099926948547, + "learning_rate": 1.7195e-05, + "num_tokens": 189321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.0797, + "grad_norm": 2.594087600708008, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.1031, + "grad_norm": 3.2291526794433594, + "learning_rate": 1.7185e-05, + "num_tokens": 190345.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0036, + "grad_norm": 0.7465726733207703, + "learning_rate": 1.718e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 1.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.1692, + "grad_norm": 2.709357500076294, + "learning_rate": 1.7175e-05, + "num_tokens": 190948.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.1003, + "grad_norm": 2.117990493774414, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.1015, + "grad_norm": 2.4742591381073, + "learning_rate": 1.7165e-05, + "num_tokens": 191972.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0911, + "grad_norm": 2.098302125930786, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.1107, + "grad_norm": 1.915540337562561, + "learning_rate": 1.7155e-05, + "num_tokens": 192996.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0049, + "grad_norm": 1.0682960748672485, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 1.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0965, + "grad_norm": 1.5651695728302002, + "learning_rate": 1.7145e-05, + "num_tokens": 193599.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.103, + "grad_norm": 2.3110480308532715, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.005, + "grad_norm": 1.1688706874847412, + "learning_rate": 1.7135e-05, + "num_tokens": 194202.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0801, + "grad_norm": 2.4091689586639404, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.159, + "grad_norm": 2.0551347732543945, + "learning_rate": 1.7125e-05, + "num_tokens": 195226.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.004, + "grad_norm": 0.8690920472145081, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 1.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0052, + "grad_norm": 1.225834608078003, + "learning_rate": 1.7115e-05, + "num_tokens": 195408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0038, + "grad_norm": 0.7105492949485779, + "learning_rate": 1.711e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 1.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0027, + "grad_norm": 0.3135615587234497, + "learning_rate": 1.7105e-05, + "num_tokens": 195590.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0025, + "grad_norm": 0.33731189370155334, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 1.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0024, + "grad_norm": 0.6950210928916931, + "learning_rate": 1.7095e-05, + "num_tokens": 195772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.166, + "grad_norm": 3.7873523235321045, + "learning_rate": 1.709e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.081, + "grad_norm": 2.6900861263275146, + "learning_rate": 1.7085e-05, + "num_tokens": 196796.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.002, + "grad_norm": 0.19354696571826935, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 1.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0935, + "grad_norm": 2.4997594356536865, + "learning_rate": 1.7075e-05, + "num_tokens": 197399.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.002, + "grad_norm": 0.24508339166641235, + "learning_rate": 1.707e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 1.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0019, + "grad_norm": 0.1790609359741211, + "learning_rate": 1.7065e-05, + "num_tokens": 197581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.1101, + "grad_norm": 2.382162570953369, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.1892, + "grad_norm": 3.0123023986816406, + "learning_rate": 1.7055000000000002e-05, + "num_tokens": 198605.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0019, + "grad_norm": 0.27882760763168335, + "learning_rate": 1.705e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0019, + "grad_norm": 0.23136040568351746, + "learning_rate": 1.7045e-05, + "num_tokens": 198787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.1046, + "grad_norm": 1.8799446821212769, + "learning_rate": 1.704e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0018, + "grad_norm": 0.23780478537082672, + "learning_rate": 1.7035000000000002e-05, + "num_tokens": 199390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0849, + "grad_norm": 1.9498792886734009, + "learning_rate": 1.703e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.298, + "step": 596 + }, + { + "loss": 0.0953, + "grad_norm": 2.2400667667388916, + "learning_rate": 1.7025e-05, + "num_tokens": 200414.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.002, + "grad_norm": 0.3908434510231018, + "learning_rate": 1.702e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 1.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0939, + "grad_norm": 2.667379140853882, + "learning_rate": 1.7015000000000002e-05, + "num_tokens": 201017.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.0745, + "grad_norm": 2.066331624984741, + "learning_rate": 1.701e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0025, + "grad_norm": 0.5688944458961487, + "learning_rate": 1.7005e-05, + "num_tokens": 201620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.1069, + "grad_norm": 2.021451950073242, + "learning_rate": 1.7e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.003, + "grad_norm": 0.6418687105178833, + "learning_rate": 1.6995000000000002e-05, + "num_tokens": 202223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0029, + "grad_norm": 0.6194710731506348, + "learning_rate": 1.699e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 1.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.1193, + "grad_norm": 3.001216411590576, + "learning_rate": 1.6985e-05, + "num_tokens": 202826.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": 0.1078, + "grad_norm": 2.1146023273468018, + "learning_rate": 1.698e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.098, + "grad_norm": 3.064103841781616, + "learning_rate": 1.6975000000000003e-05, + "num_tokens": 203850.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0747, + "grad_norm": 3.1524202823638916, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.1506, + "grad_norm": 3.1213419437408447, + "learning_rate": 1.6965e-05, + "num_tokens": 204874.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0038, + "grad_norm": 0.8761835098266602, + "learning_rate": 1.696e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0733, + "grad_norm": 2.0461108684539795, + "learning_rate": 1.6955000000000003e-05, + "num_tokens": 205477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0948, + "grad_norm": 2.52803111076355, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0036, + "grad_norm": 0.837294340133667, + "learning_rate": 1.6945e-05, + "num_tokens": 206080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0036, + "grad_norm": 0.8330880403518677, + "learning_rate": 1.694e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0608, + "grad_norm": 1.6941643953323364, + "learning_rate": 1.6935000000000003e-05, + "num_tokens": 206683.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0897, + "grad_norm": 1.850446105003357, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.0933, + "grad_norm": 2.3541157245635986, + "learning_rate": 1.6925e-05, + "num_tokens": 207707.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0026, + "grad_norm": 0.45243605971336365, + "learning_rate": 1.692e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 1.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0694, + "grad_norm": 2.299668312072754, + "learning_rate": 1.6915e-05, + "num_tokens": 208310.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0029, + "grad_norm": 0.6032459139823914, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.0967, + "grad_norm": 2.7924766540527344, + "learning_rate": 1.6905e-05, + "num_tokens": 208913.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0027, + "grad_norm": 0.5459297299385071, + "learning_rate": 1.69e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 1.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0033, + "grad_norm": 0.7005264759063721, + "learning_rate": 1.6895e-05, + "num_tokens": 209095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0712, + "grad_norm": 2.0087270736694336, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0816, + "grad_norm": 2.023620843887329, + "learning_rate": 1.6885000000000002e-05, + "num_tokens": 210119.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0884, + "grad_norm": 3.3579723834991455, + "learning_rate": 1.688e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.1001, + "grad_norm": 2.1446380615234375, + "learning_rate": 1.6875e-05, + "num_tokens": 211143.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0024, + "grad_norm": 0.46906810998916626, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.003, + "grad_norm": 0.6180875897407532, + "learning_rate": 1.6865000000000002e-05, + "num_tokens": 211325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0024, + "grad_norm": 0.44018203020095825, + "learning_rate": 1.686e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0021, + "grad_norm": 0.3610388934612274, + "learning_rate": 1.6855e-05, + "num_tokens": 211507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0025, + "grad_norm": 0.42492103576660156, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0785, + "grad_norm": 2.052070379257202, + "learning_rate": 1.6845000000000002e-05, + "num_tokens": 212110.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0955, + "grad_norm": 1.5501021146774292, + "learning_rate": 1.684e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0017, + "grad_norm": 0.14774425327777863, + "learning_rate": 1.6835e-05, + "num_tokens": 212713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0016, + "grad_norm": 0.13003599643707275, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0016, + "grad_norm": 0.11263933777809143, + "learning_rate": 1.6825000000000002e-05, + "num_tokens": 212895.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0814, + "grad_norm": 2.4652907848358154, + "learning_rate": 1.682e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0016, + "grad_norm": 0.1284048706293106, + "learning_rate": 1.6815e-05, + "num_tokens": 213498.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0016, + "grad_norm": 0.14626798033714294, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 1.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0984, + "grad_norm": 2.53958797454834, + "learning_rate": 1.6805000000000003e-05, + "num_tokens": 214101.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0983, + "grad_norm": 2.0881552696228027, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0016, + "grad_norm": 0.14537213742733002, + "learning_rate": 1.6795e-05, + "num_tokens": 214704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.0642, + "grad_norm": 2.0831480026245117, + "learning_rate": 1.679e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.0016, + "grad_norm": 0.12770842015743256, + "learning_rate": 1.6785000000000003e-05, + "num_tokens": 215307.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0878, + "grad_norm": 2.531637668609619, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0864, + "grad_norm": 2.4697654247283936, + "learning_rate": 1.6775e-05, + "num_tokens": 216331.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0865, + "grad_norm": 1.655576229095459, + "learning_rate": 1.677e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.1086, + "grad_norm": 2.826423168182373, + "learning_rate": 1.6765000000000003e-05, + "num_tokens": 217355.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.1042, + "grad_norm": 3.4096198081970215, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.0027, + "grad_norm": 0.5534147620201111, + "learning_rate": 1.6755e-05, + "num_tokens": 217958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0611, + "grad_norm": 1.5646562576293945, + "learning_rate": 1.675e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0033, + "grad_norm": 1.048545479774475, + "learning_rate": 1.6745000000000003e-05, + "num_tokens": 218561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.005, + "grad_norm": 1.3414465188980103, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0032, + "grad_norm": 0.636330246925354, + "learning_rate": 1.6735e-05, + "num_tokens": 218743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0882, + "grad_norm": 1.7900675535202026, + "learning_rate": 1.673e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.0883, + "grad_norm": 1.8037763833999634, + "learning_rate": 1.6725000000000003e-05, + "num_tokens": 219767.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0733, + "grad_norm": 1.7987661361694336, + "learning_rate": 1.672e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0033, + "grad_norm": 0.6671841740608215, + "learning_rate": 1.6715000000000002e-05, + "num_tokens": 220370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": 0.0699, + "grad_norm": 2.178269147872925, + "learning_rate": 1.671e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0653, + "grad_norm": 2.165506601333618, + "learning_rate": 1.6705000000000004e-05, + "num_tokens": 221394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0828, + "grad_norm": 1.837323546409607, + "learning_rate": 1.67e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0798, + "grad_norm": 2.296050548553467, + "learning_rate": 1.6695000000000002e-05, + "num_tokens": 222418.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.169, + "grad_norm": 3.554818868637085, + "learning_rate": 1.669e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.1585, + "grad_norm": 2.993666887283325, + "learning_rate": 1.6685000000000004e-05, + "num_tokens": 223442.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0864, + "grad_norm": 3.0106112957000732, + "learning_rate": 1.668e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0605, + "grad_norm": 1.362823247909546, + "learning_rate": 1.6675000000000002e-05, + "num_tokens": 224466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.0055, + "grad_norm": 1.2802313566207886, + "learning_rate": 1.667e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0906, + "grad_norm": 2.1969728469848633, + "learning_rate": 1.6665000000000004e-05, + "num_tokens": 225069.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.0919, + "grad_norm": 3.0707828998565674, + "learning_rate": 1.666e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0061, + "grad_norm": 1.514074444770813, + "learning_rate": 1.6655000000000002e-05, + "num_tokens": 225672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0968, + "grad_norm": 2.7561936378479004, + "learning_rate": 1.665e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0887, + "grad_norm": 2.4263193607330322, + "learning_rate": 1.6645e-05, + "num_tokens": 226696.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0924, + "grad_norm": 2.360464572906494, + "learning_rate": 1.664e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.0926, + "grad_norm": 2.564941644668579, + "learning_rate": 1.6635000000000003e-05, + "num_tokens": 227720.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0849, + "grad_norm": 3.0359439849853516, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.1488, + "grad_norm": 2.505728006362915, + "learning_rate": 1.6625e-05, + "num_tokens": 228744.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0757, + "grad_norm": 1.8170560598373413, + "learning_rate": 1.662e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0083, + "grad_norm": 2.0260066986083984, + "learning_rate": 1.6615000000000003e-05, + "num_tokens": 229347.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0088, + "grad_norm": 2.0579655170440674, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 1.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0698, + "grad_norm": 2.465139865875244, + "learning_rate": 1.6605e-05, + "num_tokens": 229950.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.0865, + "grad_norm": 2.2099132537841797, + "learning_rate": 1.66e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0585, + "grad_norm": 2.1250336170196533, + "learning_rate": 1.6595e-05, + "num_tokens": 230974.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0047, + "grad_norm": 1.0128132104873657, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 1.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0934, + "grad_norm": 2.2283778190612793, + "learning_rate": 1.6585e-05, + "num_tokens": 231577.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0616, + "grad_norm": 1.5224443674087524, + "learning_rate": 1.658e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0831, + "grad_norm": 2.9646942615509033, + "learning_rate": 1.6575e-05, + "num_tokens": 232601.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.1237, + "grad_norm": 2.9797046184539795, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0636, + "grad_norm": 2.184934139251709, + "learning_rate": 1.6565e-05, + "num_tokens": 233625.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0624, + "grad_norm": 2.1586413383483887, + "learning_rate": 1.656e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.004, + "grad_norm": 0.7300480604171753, + "learning_rate": 1.6555e-05, + "num_tokens": 234228.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0034, + "grad_norm": 0.6544972062110901, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 1.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0696, + "grad_norm": 2.013485908508301, + "learning_rate": 1.6545e-05, + "num_tokens": 234831.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0029, + "grad_norm": 0.5221191048622131, + "learning_rate": 1.654e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 1.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0772, + "grad_norm": 1.8417952060699463, + "learning_rate": 1.6535e-05, + "num_tokens": 235434.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0713, + "grad_norm": 1.9944443702697754, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.0658, + "grad_norm": 1.900722861289978, + "learning_rate": 1.6525000000000002e-05, + "num_tokens": 236458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0679, + "grad_norm": 2.4299168586730957, + "learning_rate": 1.652e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.06, + "grad_norm": 1.561680793762207, + "learning_rate": 1.6515e-05, + "num_tokens": 237482.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0739, + "grad_norm": 1.774482011795044, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0436, + "grad_norm": 1.7762006521224976, + "learning_rate": 1.6505000000000002e-05, + "num_tokens": 238506.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0034, + "grad_norm": 0.7131043672561646, + "learning_rate": 1.65e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0823, + "grad_norm": 2.994682550430298, + "learning_rate": 1.6495e-05, + "num_tokens": 239109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": 0.0776, + "grad_norm": 2.6362464427948, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0846, + "grad_norm": 2.8052642345428467, + "learning_rate": 1.6485000000000002e-05, + "num_tokens": 240133.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0048, + "grad_norm": 1.1239407062530518, + "learning_rate": 1.648e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 1.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0841, + "grad_norm": 2.1707019805908203, + "learning_rate": 1.6475e-05, + "num_tokens": 240736.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0423, + "grad_norm": 1.9918863773345947, + "learning_rate": 1.647e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.0903, + "grad_norm": 2.1334235668182373, + "learning_rate": 1.6465000000000002e-05, + "num_tokens": 241760.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0067, + "grad_norm": 1.6682239770889282, + "learning_rate": 1.646e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 1.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0652, + "grad_norm": 1.4505804777145386, + "learning_rate": 1.6455e-05, + "num_tokens": 242363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0769, + "grad_norm": 1.6511123180389404, + "learning_rate": 1.645e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.006, + "grad_norm": 1.3824306726455688, + "learning_rate": 1.6445000000000003e-05, + "num_tokens": 242966.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0742, + "grad_norm": 2.109647512435913, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.1414, + "grad_norm": 2.5469703674316406, + "learning_rate": 1.6435e-05, + "num_tokens": 243990.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0669, + "grad_norm": 1.3465361595153809, + "learning_rate": 1.643e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.082, + "grad_norm": 2.1633052825927734, + "learning_rate": 1.6425000000000003e-05, + "num_tokens": 245014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0043, + "grad_norm": 0.926991879940033, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.143, + "grad_norm": 2.2284176349639893, + "learning_rate": 1.6415e-05, + "num_tokens": 245617.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0981, + "grad_norm": 2.301908493041992, + "learning_rate": 1.641e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0448, + "grad_norm": 1.2258681058883667, + "learning_rate": 1.6405000000000003e-05, + "num_tokens": 246641.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.0043, + "grad_norm": 0.9370044469833374, + "learning_rate": 1.64e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 1.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0788, + "grad_norm": 3.762192964553833, + "learning_rate": 1.6395e-05, + "num_tokens": 247244.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.0046, + "grad_norm": 0.9186903238296509, + "learning_rate": 1.639e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 1.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0035, + "grad_norm": 0.6930652260780334, + "learning_rate": 1.6385000000000003e-05, + "num_tokens": 247426.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.3322, + "grad_norm": 9.659932136535645, + "learning_rate": 1.638e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.927592933177948, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0662, + "grad_norm": 1.7305420637130737, + "learning_rate": 1.6375e-05, + "num_tokens": 248450.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0024, + "grad_norm": 0.3103489577770233, + "learning_rate": 1.637e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 1.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0915, + "grad_norm": 2.235250234603882, + "learning_rate": 1.6365000000000003e-05, + "num_tokens": 249053.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0596, + "grad_norm": 2.24996280670166, + "learning_rate": 1.636e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0679, + "grad_norm": 2.596879005432129, + "learning_rate": 1.6355000000000002e-05, + "num_tokens": 250077.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0642, + "grad_norm": 1.9771475791931152, + "learning_rate": 1.635e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0024, + "grad_norm": 0.7699919939041138, + "learning_rate": 1.6345000000000004e-05, + "num_tokens": 250680.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792540490627289, + "learning_rate": 1.634e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0021, + "grad_norm": 0.32606813311576843, + "learning_rate": 1.6335000000000002e-05, + "num_tokens": 250862.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0574, + "grad_norm": 2.3009800910949707, + "learning_rate": 1.633e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0966, + "grad_norm": 2.396700859069824, + "learning_rate": 1.6325e-05, + "num_tokens": 251886.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.1378, + "grad_norm": 2.726357936859131, + "learning_rate": 1.632e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0022, + "grad_norm": 0.36913836002349854, + "learning_rate": 1.6315000000000002e-05, + "num_tokens": 252489.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0021, + "grad_norm": 0.34592556953430176, + "learning_rate": 1.631e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0024, + "grad_norm": 0.45417988300323486, + "learning_rate": 1.6305e-05, + "num_tokens": 252671.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0672, + "grad_norm": 2.153691053390503, + "learning_rate": 1.63e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0021, + "grad_norm": 0.35626691579818726, + "learning_rate": 1.6295000000000002e-05, + "num_tokens": 253274.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0021, + "grad_norm": 0.37343284487724304, + "learning_rate": 1.629e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.002, + "grad_norm": 0.34979110956192017, + "learning_rate": 1.6285e-05, + "num_tokens": 253456.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.078, + "grad_norm": 2.1453590393066406, + "learning_rate": 1.628e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0019, + "grad_norm": 0.21562984585762024, + "learning_rate": 1.6275e-05, + "num_tokens": 254059.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0017, + "grad_norm": 0.18868863582611084, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.075, + "grad_norm": 2.238870143890381, + "learning_rate": 1.6265e-05, + "num_tokens": 254662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0712, + "grad_norm": 1.3297274112701416, + "learning_rate": 1.626e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.2668, + "grad_norm": 6.078666687011719, + "learning_rate": 1.6255e-05, + "num_tokens": 255686.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0017, + "grad_norm": 0.18387450277805328, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 1.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0017, + "grad_norm": 0.1908990740776062, + "learning_rate": 1.6245e-05, + "num_tokens": 255868.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0796, + "grad_norm": 1.9942879676818848, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0017, + "grad_norm": 0.18278343975543976, + "learning_rate": 1.6235e-05, + "num_tokens": 256471.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0017, + "grad_norm": 0.2012937068939209, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0019, + "grad_norm": 0.23027914762496948, + "learning_rate": 1.6225e-05, + "num_tokens": 256653.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.088, + "grad_norm": 2.3463082313537598, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0015, + "grad_norm": 0.1516222059726715, + "learning_rate": 1.6215e-05, + "num_tokens": 257256.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0592, + "grad_norm": 1.780516505241394, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0016, + "grad_norm": 0.1569552719593048, + "learning_rate": 1.6205e-05, + "num_tokens": 257859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0015, + "grad_norm": 0.15376536548137665, + "learning_rate": 1.62e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0016, + "grad_norm": 0.16803313791751862, + "learning_rate": 1.6195e-05, + "num_tokens": 258041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0877, + "grad_norm": 1.7319484949111938, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0015, + "grad_norm": 0.14868228137493134, + "learning_rate": 1.6185000000000002e-05, + "num_tokens": 258644.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0703, + "grad_norm": 1.626076102256775, + "learning_rate": 1.618e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0852, + "grad_norm": 1.4952802658081055, + "learning_rate": 1.6175e-05, + "num_tokens": 259668.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0782, + "grad_norm": 1.6785380840301514, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0806, + "grad_norm": 1.424209475517273, + "learning_rate": 1.6165000000000002e-05, + "num_tokens": 260692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0018, + "grad_norm": 0.27588197588920593, + "learning_rate": 1.616e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 1.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0682, + "grad_norm": 2.780993938446045, + "learning_rate": 1.6155e-05, + "num_tokens": 261295.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.0027, + "grad_norm": 0.5201116800308228, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 1.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0028, + "grad_norm": 0.5331841111183167, + "learning_rate": 1.6145000000000002e-05, + "num_tokens": 261477.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.1404, + "grad_norm": 3.156398296356201, + "learning_rate": 1.614e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.003, + "grad_norm": 0.5515365600585938, + "learning_rate": 1.6135e-05, + "num_tokens": 262080.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.0029, + "grad_norm": 0.5499039888381958, + "learning_rate": 1.613e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 1.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0473, + "grad_norm": 1.4062751531600952, + "learning_rate": 1.6125000000000002e-05, + "num_tokens": 262683.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0638, + "grad_norm": 1.5207608938217163, + "learning_rate": 1.612e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0018, + "grad_norm": 0.24566565454006195, + "learning_rate": 1.6115e-05, + "num_tokens": 263286.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0019, + "grad_norm": 0.26229217648506165, + "learning_rate": 1.611e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 1.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0019, + "grad_norm": 0.2518826425075531, + "learning_rate": 1.6105000000000003e-05, + "num_tokens": 263468.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.066, + "grad_norm": 1.8491489887237549, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0771, + "grad_norm": 2.3547780513763428, + "learning_rate": 1.6095e-05, + "num_tokens": 264492.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": 0.067, + "grad_norm": 1.581396222114563, + "learning_rate": 1.609e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0017, + "grad_norm": 0.22524242103099823, + "learning_rate": 1.6085000000000003e-05, + "num_tokens": 265095.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0825, + "grad_norm": 1.542362928390503, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.0019, + "grad_norm": 0.2753300964832306, + "learning_rate": 1.6075e-05, + "num_tokens": 265698.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0594, + "grad_norm": 2.435917377471924, + "learning_rate": 1.607e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0671, + "grad_norm": 1.3892773389816284, + "learning_rate": 1.6065000000000003e-05, + "num_tokens": 266722.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0552, + "grad_norm": 1.9706708192825317, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0029, + "grad_norm": 0.5541112422943115, + "learning_rate": 1.6055e-05, + "num_tokens": 267325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0765, + "grad_norm": 2.187875270843506, + "learning_rate": 1.605e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0533, + "grad_norm": 1.9069744348526, + "learning_rate": 1.6045000000000003e-05, + "num_tokens": 268349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0034, + "grad_norm": 0.6806110739707947, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0033, + "grad_norm": 0.6904415488243103, + "learning_rate": 1.6035e-05, + "num_tokens": 268531.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0835, + "grad_norm": 1.7817496061325073, + "learning_rate": 1.603e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.003, + "grad_norm": 0.576019823551178, + "learning_rate": 1.6025000000000003e-05, + "num_tokens": 269134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0444, + "grad_norm": 2.0043082237243652, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0456, + "grad_norm": 1.6300431489944458, + "learning_rate": 1.6015e-05, + "num_tokens": 270158.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.002, + "grad_norm": 0.3286590874195099, + "learning_rate": 1.601e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0593, + "grad_norm": 3.0931613445281982, + "learning_rate": 1.6005e-05, + "num_tokens": 270761.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0774, + "grad_norm": 2.7380502223968506, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0025, + "grad_norm": 0.5391877293586731, + "learning_rate": 1.5995000000000002e-05, + "num_tokens": 271364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0022, + "grad_norm": 0.43329155445098877, + "learning_rate": 1.599e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0753, + "grad_norm": 2.46846866607666, + "learning_rate": 1.5985e-05, + "num_tokens": 271967.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0021, + "grad_norm": 0.3546755313873291, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0023, + "grad_norm": 0.4083067774772644, + "learning_rate": 1.5975000000000002e-05, + "num_tokens": 272149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.002, + "grad_norm": 0.3581921458244324, + "learning_rate": 1.597e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0613, + "grad_norm": 2.8087387084960938, + "learning_rate": 1.5965e-05, + "num_tokens": 272752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0015, + "grad_norm": 0.1888950765132904, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0671, + "grad_norm": 2.2728195190429688, + "learning_rate": 1.5955e-05, + "num_tokens": 273355.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0829, + "grad_norm": 2.8371574878692627, + "learning_rate": 1.595e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0013, + "grad_norm": 0.12679244577884674, + "learning_rate": 1.5945e-05, + "num_tokens": 273958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0014, + "grad_norm": 0.14318323135375977, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0013, + "grad_norm": 0.12078670412302017, + "learning_rate": 1.5935e-05, + "num_tokens": 274140.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0605, + "grad_norm": 2.762150764465332, + "learning_rate": 1.593e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0013, + "grad_norm": 0.1383422166109085, + "learning_rate": 1.5925e-05, + "num_tokens": 274743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0012, + "grad_norm": 0.1123310998082161, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0802, + "grad_norm": 2.965071201324463, + "learning_rate": 1.5915e-05, + "num_tokens": 275346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.1343, + "grad_norm": 3.2984137535095215, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0829, + "grad_norm": 1.568178415298462, + "learning_rate": 1.5905e-05, + "num_tokens": 276370.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0014, + "grad_norm": 0.21307793259620667, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 1.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0579, + "grad_norm": 2.5958898067474365, + "learning_rate": 1.5895e-05, + "num_tokens": 276973.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0013, + "grad_norm": 0.1617453545331955, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0014, + "grad_norm": 0.1798456758260727, + "learning_rate": 1.5885e-05, + "num_tokens": 277155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0016, + "grad_norm": 0.20433904230594635, + "learning_rate": 1.588e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0486, + "grad_norm": 1.5812333822250366, + "learning_rate": 1.5875e-05, + "num_tokens": 277758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.1437, + "grad_norm": 3.0360054969787598, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0804, + "grad_norm": 2.6603028774261475, + "learning_rate": 1.5865e-05, + "num_tokens": 278782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": 0.0814, + "grad_norm": 1.870706558227539, + "learning_rate": 1.586e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0718, + "grad_norm": 1.5813627243041992, + "learning_rate": 1.5855e-05, + "num_tokens": 279806.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0729, + "grad_norm": 2.107619285583496, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0683, + "grad_norm": 1.209026575088501, + "learning_rate": 1.5845e-05, + "num_tokens": 280830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.2674, + "grad_norm": 6.916773319244385, + "learning_rate": 1.584e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0581, + "grad_norm": 2.1409847736358643, + "learning_rate": 1.5835e-05, + "num_tokens": 281854.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0054, + "grad_norm": 1.191935420036316, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0057, + "grad_norm": 1.2228178977966309, + "learning_rate": 1.5825000000000002e-05, + "num_tokens": 282036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.005, + "grad_norm": 1.1271437406539917, + "learning_rate": 1.582e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0545, + "grad_norm": 2.2059969902038574, + "learning_rate": 1.5815e-05, + "num_tokens": 282639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.1348, + "grad_norm": 2.8853166103363037, + "learning_rate": 1.581e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0027, + "grad_norm": 0.5147932767868042, + "learning_rate": 1.5805000000000002e-05, + "num_tokens": 283242.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0514, + "grad_norm": 1.7287933826446533, + "learning_rate": 1.58e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0024, + "grad_norm": 0.41022399067878723, + "learning_rate": 1.5795e-05, + "num_tokens": 283845.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0021, + "grad_norm": 0.31408146023750305, + "learning_rate": 1.579e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 1.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0019, + "grad_norm": 0.3368740677833557, + "learning_rate": 1.5785000000000002e-05, + "num_tokens": 284027.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0733, + "grad_norm": 1.9898301362991333, + "learning_rate": 1.578e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.2631, + "grad_norm": 6.1759562492370605, + "learning_rate": 1.5775e-05, + "num_tokens": 285051.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0518, + "grad_norm": 1.7494398355484009, + "learning_rate": 1.577e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0634, + "grad_norm": 3.39536452293396, + "learning_rate": 1.5765000000000002e-05, + "num_tokens": 286075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0015, + "grad_norm": 0.16311416029930115, + "learning_rate": 1.576e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0014, + "grad_norm": 0.1292622685432434, + "learning_rate": 1.5755e-05, + "num_tokens": 286257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0487, + "grad_norm": 1.4789959192276, + "learning_rate": 1.575e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0704, + "grad_norm": 1.8533966541290283, + "learning_rate": 1.5745000000000003e-05, + "num_tokens": 287281.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0431, + "grad_norm": 1.6309059858322144, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.125, + "grad_norm": 1.811131238937378, + "learning_rate": 1.5735e-05, + "num_tokens": 288305.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0018, + "grad_norm": 0.2807428240776062, + "learning_rate": 1.573e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 1.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.0991, + "grad_norm": 2.5759706497192383, + "learning_rate": 1.5725000000000003e-05, + "num_tokens": 288908.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0646, + "grad_norm": 2.325784206390381, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0021, + "grad_norm": 0.398372620344162, + "learning_rate": 1.5715e-05, + "num_tokens": 289511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.002, + "grad_norm": 0.34870296716690063, + "learning_rate": 1.571e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0458, + "grad_norm": 1.5269895792007446, + "learning_rate": 1.5705000000000003e-05, + "num_tokens": 290114.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0023, + "grad_norm": 0.4617532789707184, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.1164, + "grad_norm": 2.049588680267334, + "learning_rate": 1.5695e-05, + "num_tokens": 290717.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0707, + "grad_norm": 3.5546929836273193, + "learning_rate": 1.569e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0686, + "grad_norm": 1.6962814331054688, + "learning_rate": 1.5685e-05, + "num_tokens": 291741.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0509, + "grad_norm": 1.9832770824432373, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0028, + "grad_norm": 0.5347197651863098, + "learning_rate": 1.5675e-05, + "num_tokens": 292344.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.0716, + "grad_norm": 2.209432363510132, + "learning_rate": 1.567e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0826, + "grad_norm": 1.7408462762832642, + "learning_rate": 1.5665e-05, + "num_tokens": 293368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0553, + "grad_norm": 1.7983943223953247, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0042, + "grad_norm": 0.8812737464904785, + "learning_rate": 1.5655000000000002e-05, + "num_tokens": 293971.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0649, + "grad_norm": 2.0859007835388184, + "learning_rate": 1.565e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0581, + "grad_norm": 1.566475510597229, + "learning_rate": 1.5645e-05, + "num_tokens": 294995.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0045, + "grad_norm": 0.9423922896385193, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0517, + "grad_norm": 1.8182531595230103, + "learning_rate": 1.5635e-05, + "num_tokens": 295598.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.1177, + "grad_norm": 2.7388081550598145, + "learning_rate": 1.563e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.1132, + "grad_norm": 2.579310655593872, + "learning_rate": 1.5625e-05, + "num_tokens": 296622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": 0.065, + "grad_norm": 1.4705184698104858, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0032, + "grad_norm": 0.6671587228775024, + "learning_rate": 1.5615000000000002e-05, + "num_tokens": 297225.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0579, + "grad_norm": 2.3290131092071533, + "learning_rate": 1.561e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0814, + "grad_norm": 2.8370614051818848, + "learning_rate": 1.5605e-05, + "num_tokens": 298249.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0689, + "grad_norm": 2.715596914291382, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.0671, + "grad_norm": 1.7622898817062378, + "learning_rate": 1.5595000000000002e-05, + "num_tokens": 299273.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0042, + "grad_norm": 0.9052322506904602, + "learning_rate": 1.559e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.049, + "grad_norm": 1.3162498474121094, + "learning_rate": 1.5585e-05, + "num_tokens": 299876.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0036, + "grad_norm": 0.7319129109382629, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 1.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.0032, + "grad_norm": 0.6452810764312744, + "learning_rate": 1.5575000000000002e-05, + "num_tokens": 300058.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0757, + "grad_norm": 2.2865378856658936, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0538, + "grad_norm": 1.7665457725524902, + "learning_rate": 1.5565e-05, + "num_tokens": 301082.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.047, + "grad_norm": 1.9683163166046143, + "learning_rate": 1.556e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0664, + "grad_norm": 2.087733030319214, + "learning_rate": 1.5555000000000003e-05, + "num_tokens": 302106.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0023, + "grad_norm": 0.39902573823928833, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 1.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0021, + "grad_norm": 0.34475409984588623, + "learning_rate": 1.5545e-05, + "num_tokens": 302288.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0532, + "grad_norm": 1.763016700744629, + "learning_rate": 1.554e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0625, + "grad_norm": 2.4447097778320312, + "learning_rate": 1.5535000000000003e-05, + "num_tokens": 303312.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.2444, + "grad_norm": 5.089849948883057, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.1233, + "grad_norm": 1.9174350500106812, + "learning_rate": 1.5525e-05, + "num_tokens": 304336.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.002, + "grad_norm": 0.34749460220336914, + "learning_rate": 1.552e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 1.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.075, + "grad_norm": 1.8123295307159424, + "learning_rate": 1.5515000000000003e-05, + "num_tokens": 304939.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0942, + "grad_norm": 2.2524919509887695, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0023, + "grad_norm": 0.4282050132751465, + "learning_rate": 1.5505e-05, + "num_tokens": 305542.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0022, + "grad_norm": 0.4201665222644806, + "learning_rate": 1.55e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0022, + "grad_norm": 0.38267236948013306, + "learning_rate": 1.5495000000000003e-05, + "num_tokens": 305724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0668, + "grad_norm": 1.5852563381195068, + "learning_rate": 1.549e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0686, + "grad_norm": 2.5186655521392822, + "learning_rate": 1.5485e-05, + "num_tokens": 306748.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0018, + "grad_norm": 0.3009900450706482, + "learning_rate": 1.548e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 1.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0591, + "grad_norm": 2.0340046882629395, + "learning_rate": 1.5475000000000003e-05, + "num_tokens": 307351.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.0652, + "grad_norm": 2.206228017807007, + "learning_rate": 1.547e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0485, + "grad_norm": 1.763405203819275, + "learning_rate": 1.5465000000000002e-05, + "num_tokens": 308375.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.002, + "grad_norm": 0.35779571533203125, + "learning_rate": 1.546e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0019, + "grad_norm": 0.32313865423202515, + "learning_rate": 1.5455000000000004e-05, + "num_tokens": 308557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0749, + "grad_norm": 2.2083141803741455, + "learning_rate": 1.545e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0762, + "grad_norm": 1.5048847198486328, + "learning_rate": 1.5445000000000002e-05, + "num_tokens": 309581.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0536, + "grad_norm": 1.6958098411560059, + "learning_rate": 1.544e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0718, + "grad_norm": 1.9835456609725952, + "learning_rate": 1.5435000000000004e-05, + "num_tokens": 310605.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0459, + "grad_norm": 1.618090033531189, + "learning_rate": 1.543e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0611, + "grad_norm": 1.508302092552185, + "learning_rate": 1.5425000000000002e-05, + "num_tokens": 311629.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.1341, + "grad_norm": 3.744704008102417, + "learning_rate": 1.542e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0649, + "grad_norm": 1.4073272943496704, + "learning_rate": 1.5415e-05, + "num_tokens": 312653.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0059, + "grad_norm": 1.3199745416641235, + "learning_rate": 1.541e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0445, + "grad_norm": 1.7224688529968262, + "learning_rate": 1.5405000000000002e-05, + "num_tokens": 313256.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.0697, + "grad_norm": 1.5272228717803955, + "learning_rate": 1.54e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0061, + "grad_norm": 1.3069825172424316, + "learning_rate": 1.5395e-05, + "num_tokens": 313859.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0059, + "grad_norm": 1.285326600074768, + "learning_rate": 1.539e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0044, + "grad_norm": 0.9240864515304565, + "learning_rate": 1.5385000000000003e-05, + "num_tokens": 314041.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0535, + "grad_norm": 1.9520580768585205, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0426, + "grad_norm": 1.3014405965805054, + "learning_rate": 1.5375e-05, + "num_tokens": 315065.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0024, + "grad_norm": 0.4011932611465454, + "learning_rate": 1.537e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 1.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0019, + "grad_norm": 0.2749421298503876, + "learning_rate": 1.5365e-05, + "num_tokens": 315247.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0022, + "grad_norm": 0.31892502307891846, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 1.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0483, + "grad_norm": 2.0664267539978027, + "learning_rate": 1.5355e-05, + "num_tokens": 315850.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0819, + "grad_norm": 2.846149206161499, + "learning_rate": 1.535e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0013, + "grad_norm": 0.1373102068901062, + "learning_rate": 1.5345e-05, + "num_tokens": 316453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0013, + "grad_norm": 0.1736987680196762, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.053, + "grad_norm": 1.4268443584442139, + "learning_rate": 1.5335e-05, + "num_tokens": 317056.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0917, + "grad_norm": 1.9649128913879395, + "learning_rate": 1.533e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.1411, + "grad_norm": 2.5292632579803467, + "learning_rate": 1.5325e-05, + "num_tokens": 318080.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0527, + "grad_norm": 1.9480016231536865, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.0846, + "grad_norm": 2.2493338584899902, + "learning_rate": 1.5315e-05, + "num_tokens": 319104.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0013, + "grad_norm": 0.13474015891551971, + "learning_rate": 1.531e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 1.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0832, + "grad_norm": 1.5178154706954956, + "learning_rate": 1.5305e-05, + "num_tokens": 319707.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": 0.0795, + "grad_norm": 2.071016788482666, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.1163, + "grad_norm": 2.11936092376709, + "learning_rate": 1.5295000000000002e-05, + "num_tokens": 320731.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0018, + "grad_norm": 0.2738206088542938, + "learning_rate": 1.529e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 1.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0674, + "grad_norm": 1.7774465084075928, + "learning_rate": 1.5285e-05, + "num_tokens": 321334.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0019, + "grad_norm": 0.3061210513114929, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.1228, + "grad_norm": 2.0818684101104736, + "learning_rate": 1.5275000000000002e-05, + "num_tokens": 321937.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0716, + "grad_norm": 1.6649255752563477, + "learning_rate": 1.527e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0026, + "grad_norm": 0.477672815322876, + "learning_rate": 1.5265e-05, + "num_tokens": 322540.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0728, + "grad_norm": 1.9350183010101318, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0631, + "grad_norm": 1.786603569984436, + "learning_rate": 1.5255000000000002e-05, + "num_tokens": 323564.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.1006, + "grad_norm": 2.4447789192199707, + "learning_rate": 1.525e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0034, + "grad_norm": 0.6078147292137146, + "learning_rate": 1.5245e-05, + "num_tokens": 324167.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0769, + "grad_norm": 1.76687753200531, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.1099, + "grad_norm": 1.7330924272537231, + "learning_rate": 1.5235000000000002e-05, + "num_tokens": 325191.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.1119, + "grad_norm": 2.317302942276001, + "learning_rate": 1.523e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0047, + "grad_norm": 0.8692587018013, + "learning_rate": 1.5225e-05, + "num_tokens": 325794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0752, + "grad_norm": 2.7787444591522217, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0042, + "grad_norm": 0.7904698252677917, + "learning_rate": 1.5215000000000003e-05, + "num_tokens": 326397.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0637, + "grad_norm": 1.9206311702728271, + "learning_rate": 1.521e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0641, + "grad_norm": 1.5487322807312012, + "learning_rate": 1.5205000000000001e-05, + "num_tokens": 327421.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0034, + "grad_norm": 0.6128824949264526, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0026, + "grad_norm": 0.4303649365901947, + "learning_rate": 1.5195000000000003e-05, + "num_tokens": 327603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0024, + "grad_norm": 0.3603818118572235, + "learning_rate": 1.519e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 1.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.0722, + "grad_norm": 1.3239399194717407, + "learning_rate": 1.5185000000000001e-05, + "num_tokens": 328206.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0714, + "grad_norm": 1.5037869215011597, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0462, + "grad_norm": 1.4942961931228638, + "learning_rate": 1.5175000000000001e-05, + "num_tokens": 329230.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0019, + "grad_norm": 0.2582552134990692, + "learning_rate": 1.517e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0018, + "grad_norm": 0.22304527461528778, + "learning_rate": 1.5165000000000001e-05, + "num_tokens": 329412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.083, + "grad_norm": 2.117966890335083, + "learning_rate": 1.516e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.0018, + "grad_norm": 0.21721050143241882, + "learning_rate": 1.5155000000000001e-05, + "num_tokens": 330015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0016, + "grad_norm": 0.20195893943309784, + "learning_rate": 1.515e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0807, + "grad_norm": 2.2437827587127686, + "learning_rate": 1.5145000000000002e-05, + "num_tokens": 330618.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0803, + "grad_norm": 2.0074269771575928, + "learning_rate": 1.514e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.1081, + "grad_norm": 2.117880344390869, + "learning_rate": 1.5135000000000002e-05, + "num_tokens": 331642.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0498, + "grad_norm": 1.624760389328003, + "learning_rate": 1.513e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0017, + "grad_norm": 0.2406463772058487, + "learning_rate": 1.5125e-05, + "num_tokens": 332245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.078, + "grad_norm": 1.9976122379302979, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0017, + "grad_norm": 0.2691337466239929, + "learning_rate": 1.5115000000000002e-05, + "num_tokens": 332848.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0017, + "grad_norm": 0.3240523040294647, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.002, + "grad_norm": 0.3948870897293091, + "learning_rate": 1.5105e-05, + "num_tokens": 333030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.058, + "grad_norm": 2.228799343109131, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0019, + "grad_norm": 0.30388572812080383, + "learning_rate": 1.5095000000000002e-05, + "num_tokens": 333633.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0018, + "grad_norm": 0.23492957651615143, + "learning_rate": 1.509e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0767, + "grad_norm": 1.961020588874817, + "learning_rate": 1.5085e-05, + "num_tokens": 334236.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0015, + "grad_norm": 0.18129733204841614, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0016, + "grad_norm": 0.20082105696201324, + "learning_rate": 1.5075000000000002e-05, + "num_tokens": 334418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0715, + "grad_norm": 1.6847742795944214, + "learning_rate": 1.507e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.1066, + "grad_norm": 1.804700255393982, + "learning_rate": 1.5065e-05, + "num_tokens": 335442.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0017, + "grad_norm": 0.24969542026519775, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 1.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.076, + "grad_norm": 1.119564175605774, + "learning_rate": 1.5055000000000002e-05, + "num_tokens": 336045.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.1127, + "grad_norm": 1.9994937181472778, + "learning_rate": 1.505e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0018, + "grad_norm": 0.27987295389175415, + "learning_rate": 1.5045e-05, + "num_tokens": 336648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0019, + "grad_norm": 0.3454192876815796, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0023, + "grad_norm": 0.4122897684574127, + "learning_rate": 1.5035000000000003e-05, + "num_tokens": 336830.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": 0.1004, + "grad_norm": 1.930411696434021, + "learning_rate": 1.503e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0019, + "grad_norm": 0.29886701703071594, + "learning_rate": 1.5025000000000001e-05, + "num_tokens": 337433.0, + "mean_token_accuracy": 1.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0016, + "grad_norm": 0.2443024218082428, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.0673, + "grad_norm": 1.4124706983566284, + "learning_rate": 1.5015000000000001e-05, + "num_tokens": 338036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0799, + "grad_norm": 2.3533709049224854, + "learning_rate": 1.501e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0713, + "grad_norm": 1.8907470703125, + "learning_rate": 1.5005000000000001e-05, + "num_tokens": 339060.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0689, + "grad_norm": 2.691020965576172, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0497, + "grad_norm": 1.6671160459518433, + "learning_rate": 1.4995000000000001e-05, + "num_tokens": 340084.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.002, + "grad_norm": 0.29797157645225525, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 1.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0019, + "grad_norm": 0.29996100068092346, + "learning_rate": 1.4985000000000001e-05, + "num_tokens": 340266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.0024, + "grad_norm": 0.4070133566856384, + "learning_rate": 1.498e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0021, + "grad_norm": 0.3220314681529999, + "learning_rate": 1.4975000000000001e-05, + "num_tokens": 340448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0019, + "grad_norm": 0.3058181405067444, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0018, + "grad_norm": 0.28231292963027954, + "learning_rate": 1.4965e-05, + "num_tokens": 340630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0692, + "grad_norm": 1.5155085325241089, + "learning_rate": 1.496e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.0683, + "grad_norm": 1.8045986890792847, + "learning_rate": 1.4955000000000002e-05, + "num_tokens": 341654.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.0408, + "grad_norm": 1.349377989768982, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0734, + "grad_norm": 1.7803888320922852, + "learning_rate": 1.4945e-05, + "num_tokens": 342678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0014, + "grad_norm": 0.1658269613981247, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 1.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0625, + "grad_norm": 1.7009806632995605, + "learning_rate": 1.4935000000000002e-05, + "num_tokens": 343281.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0017, + "grad_norm": 0.25617343187332153, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 1.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.0625, + "grad_norm": 1.769629955291748, + "learning_rate": 1.4925e-05, + "num_tokens": 343884.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0017, + "grad_norm": 0.2548482418060303, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 1.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.0016, + "grad_norm": 0.2222324013710022, + "learning_rate": 1.4915000000000002e-05, + "num_tokens": 344066.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0774, + "grad_norm": 4.686360836029053, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0459, + "grad_norm": 2.749084234237671, + "learning_rate": 1.4905e-05, + "num_tokens": 345090.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.1302, + "grad_norm": 4.177389621734619, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.1173, + "grad_norm": 4.055930137634277, + "learning_rate": 1.4895000000000002e-05, + "num_tokens": 346114.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.002, + "grad_norm": 0.3603017032146454, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 1.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": 0.0693, + "grad_norm": 1.6064629554748535, + "learning_rate": 1.4885e-05, + "num_tokens": 346717.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0652, + "grad_norm": 1.3037128448486328, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0689, + "grad_norm": 2.06034779548645, + "learning_rate": 1.4875000000000002e-05, + "num_tokens": 347741.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0029, + "grad_norm": 0.5724895596504211, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 1.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0033, + "grad_norm": 0.6629590392112732, + "learning_rate": 1.4865e-05, + "num_tokens": 347923.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0024, + "grad_norm": 0.453980416059494, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 1.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0024, + "grad_norm": 0.4251463711261749, + "learning_rate": 1.4855000000000001e-05, + "num_tokens": 348105.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0019, + "grad_norm": 0.30966171622276306, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 1.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.002, + "grad_norm": 0.3118286430835724, + "learning_rate": 1.4845000000000001e-05, + "num_tokens": 348287.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0555, + "grad_norm": 1.792464256286621, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0675, + "grad_norm": 1.5182185173034668, + "learning_rate": 1.4835000000000001e-05, + "num_tokens": 349311.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0674, + "grad_norm": 2.3636367321014404, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0656, + "grad_norm": 2.3102426528930664, + "learning_rate": 1.4825000000000001e-05, + "num_tokens": 350335.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0649, + "grad_norm": 1.6550447940826416, + "learning_rate": 1.482e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0633, + "grad_norm": 1.6831378936767578, + "learning_rate": 1.4815000000000001e-05, + "num_tokens": 351359.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0012, + "grad_norm": 0.14287354052066803, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 1.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0506, + "grad_norm": 1.8767977952957153, + "learning_rate": 1.4805e-05, + "num_tokens": 351962.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0798, + "grad_norm": 1.768181562423706, + "learning_rate": 1.48e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0537, + "grad_norm": 1.7165502309799194, + "learning_rate": 1.4795000000000001e-05, + "num_tokens": 352986.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0016, + "grad_norm": 0.24984677135944366, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.06, + "grad_norm": 1.5225651264190674, + "learning_rate": 1.4785e-05, + "num_tokens": 353589.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0453, + "grad_norm": 1.48419988155365, + "learning_rate": 1.478e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": 0.0693, + "grad_norm": 1.9988808631896973, + "learning_rate": 1.4775000000000002e-05, + "num_tokens": 354613.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0419, + "grad_norm": 1.4052188396453857, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0582, + "grad_norm": 1.6217740774154663, + "learning_rate": 1.4765e-05, + "num_tokens": 355637.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0778, + "grad_norm": 1.9261959791183472, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0593, + "grad_norm": 1.315152645111084, + "learning_rate": 1.4755000000000002e-05, + "num_tokens": 356661.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0069, + "grad_norm": 1.2978978157043457, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0412, + "grad_norm": 1.215545654296875, + "learning_rate": 1.4745e-05, + "num_tokens": 357264.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0075, + "grad_norm": 1.4120475053787231, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 1.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": 0.033, + "grad_norm": 1.2826626300811768, + "learning_rate": 1.4735000000000002e-05, + "num_tokens": 357867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.0074, + "grad_norm": 1.4002093076705933, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0695, + "grad_norm": 2.1978306770324707, + "learning_rate": 1.4725e-05, + "num_tokens": 358470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0693, + "grad_norm": 1.8518682718276978, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0048, + "grad_norm": 0.920648455619812, + "learning_rate": 1.4715000000000002e-05, + "num_tokens": 359073.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0041, + "grad_norm": 0.7800686955451965, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0515, + "grad_norm": 2.606135606765747, + "learning_rate": 1.4705e-05, + "num_tokens": 359676.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0024, + "grad_norm": 0.40420445799827576, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 1.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0719, + "grad_norm": 1.9594024419784546, + "learning_rate": 1.4695e-05, + "num_tokens": 360279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0018, + "grad_norm": 0.245815709233284, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.0787, + "grad_norm": 2.42266845703125, + "learning_rate": 1.4685000000000001e-05, + "num_tokens": 360882.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0014, + "grad_norm": 0.19625961780548096, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 1.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0014, + "grad_norm": 0.18439820408821106, + "learning_rate": 1.4675000000000001e-05, + "num_tokens": 361064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0012, + "grad_norm": 0.15009146928787231, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0961, + "grad_norm": 1.6586538553237915, + "learning_rate": 1.4665000000000001e-05, + "num_tokens": 361667.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.056, + "grad_norm": 1.6204346418380737, + "learning_rate": 1.466e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0629, + "grad_norm": 3.179530382156372, + "learning_rate": 1.4655000000000001e-05, + "num_tokens": 362691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0475, + "grad_norm": 1.5324857234954834, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0475, + "grad_norm": 1.6246694326400757, + "learning_rate": 1.4645e-05, + "num_tokens": 363715.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.1217, + "grad_norm": 3.528550624847412, + "learning_rate": 1.464e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0013, + "grad_norm": 0.17739705741405487, + "learning_rate": 1.4635000000000001e-05, + "num_tokens": 364318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0736, + "grad_norm": 1.7169992923736572, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.1137, + "grad_norm": 2.5113534927368164, + "learning_rate": 1.4625e-05, + "num_tokens": 365342.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.105, + "grad_norm": 2.1154234409332275, + "learning_rate": 1.462e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.0014, + "grad_norm": 0.19033615291118622, + "learning_rate": 1.4615000000000002e-05, + "num_tokens": 365945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0521, + "grad_norm": 1.7730141878128052, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.0016, + "grad_norm": 0.24216671288013458, + "learning_rate": 1.4605e-05, + "num_tokens": 366548.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0018, + "grad_norm": 0.27462536096572876, + "learning_rate": 1.46e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": 0.0744, + "grad_norm": 1.9374821186065674, + "learning_rate": 1.4595000000000002e-05, + "num_tokens": 367151.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0679, + "grad_norm": 1.6294903755187988, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0495, + "grad_norm": 1.4929898977279663, + "learning_rate": 1.4585e-05, + "num_tokens": 368175.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0026, + "grad_norm": 0.4472891092300415, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 1.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0023, + "grad_norm": 0.36597439646720886, + "learning_rate": 1.4575000000000002e-05, + "num_tokens": 368357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0024, + "grad_norm": 0.42359644174575806, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0022, + "grad_norm": 0.37764036655426025, + "learning_rate": 1.4565e-05, + "num_tokens": 368539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0021, + "grad_norm": 0.34881848096847534, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0019, + "grad_norm": 0.2842845320701599, + "learning_rate": 1.4555000000000002e-05, + "num_tokens": 368721.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0016, + "grad_norm": 0.23593850433826447, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.0773, + "grad_norm": 1.4594675302505493, + "learning_rate": 1.4545e-05, + "num_tokens": 369324.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.1, + "grad_norm": 1.863494873046875, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0013, + "grad_norm": 0.13081954419612885, + "learning_rate": 1.4535e-05, + "num_tokens": 369927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0443, + "grad_norm": 1.7305635213851929, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0012, + "grad_norm": 0.12010564655065536, + "learning_rate": 1.4525e-05, + "num_tokens": 370530.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.046, + "grad_norm": 1.4965153932571411, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0013, + "grad_norm": 0.1335715800523758, + "learning_rate": 1.4515e-05, + "num_tokens": 371133.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0789, + "grad_norm": 2.0868091583251953, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0013, + "grad_norm": 0.1260039061307907, + "learning_rate": 1.4505000000000001e-05, + "num_tokens": 371736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0013, + "grad_norm": 0.1729843020439148, + "learning_rate": 1.45e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0014, + "grad_norm": 0.1744985431432724, + "learning_rate": 1.4495000000000001e-05, + "num_tokens": 371918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0012, + "grad_norm": 0.12203537672758102, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.1175, + "grad_norm": 2.857239007949829, + "learning_rate": 1.4485e-05, + "num_tokens": 372521.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0012, + "grad_norm": 0.13221806287765503, + "learning_rate": 1.448e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0558, + "grad_norm": 1.8117022514343262, + "learning_rate": 1.4475000000000001e-05, + "num_tokens": 373124.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.0746, + "grad_norm": 1.5601890087127686, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0667, + "grad_norm": 2.6270835399627686, + "learning_rate": 1.4465e-05, + "num_tokens": 374148.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.07, + "grad_norm": 2.4209983348846436, + "learning_rate": 1.446e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0759, + "grad_norm": 1.9546290636062622, + "learning_rate": 1.4455000000000001e-05, + "num_tokens": 375172.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0673, + "grad_norm": 2.9238405227661133, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0505, + "grad_norm": 1.4308744668960571, + "learning_rate": 1.4445e-05, + "num_tokens": 376196.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0491, + "grad_norm": 1.8547859191894531, + "learning_rate": 1.444e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0611, + "grad_norm": 1.7769485712051392, + "learning_rate": 1.4435000000000002e-05, + "num_tokens": 377220.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0026, + "grad_norm": 0.4414771497249603, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 1.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0645, + "grad_norm": 2.1288139820098877, + "learning_rate": 1.4425e-05, + "num_tokens": 377823.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0449, + "grad_norm": 1.480977177619934, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0622, + "grad_norm": 1.4551938772201538, + "learning_rate": 1.4415000000000002e-05, + "num_tokens": 378847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0435, + "grad_norm": 1.613083004951477, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0666, + "grad_norm": 1.3638219833374023, + "learning_rate": 1.4405e-05, + "num_tokens": 379871.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0447, + "grad_norm": 1.5498117208480835, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0423, + "grad_norm": 1.8802024126052856, + "learning_rate": 1.4395000000000002e-05, + "num_tokens": 380895.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0077, + "grad_norm": 1.3431289196014404, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0075, + "grad_norm": 1.2728586196899414, + "learning_rate": 1.4385e-05, + "num_tokens": 381077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0072, + "grad_norm": 1.205004096031189, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 1.0, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0776, + "grad_norm": 1.9510324001312256, + "learning_rate": 1.4375e-05, + "num_tokens": 381680.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0585, + "grad_norm": 1.6569032669067383, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0437, + "grad_norm": 1.996708631515503, + "learning_rate": 1.4365000000000002e-05, + "num_tokens": 382704.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.1022, + "grad_norm": 1.9323452711105347, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.1023, + "grad_norm": 2.318890333175659, + "learning_rate": 1.4355e-05, + "num_tokens": 383728.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0406, + "grad_norm": 1.4253126382827759, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0032, + "grad_norm": 0.5123540759086609, + "learning_rate": 1.4345000000000002e-05, + "num_tokens": 384331.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0466, + "grad_norm": 1.6153643131256104, + "learning_rate": 1.434e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.003, + "grad_norm": 0.468280553817749, + "learning_rate": 1.4335e-05, + "num_tokens": 384934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0028, + "grad_norm": 0.4284001588821411, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0484, + "grad_norm": 1.9119105339050293, + "learning_rate": 1.4325000000000003e-05, + "num_tokens": 385537.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0616, + "grad_norm": 2.9587130546569824, + "learning_rate": 1.432e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0645, + "grad_norm": 2.1663818359375, + "learning_rate": 1.4315000000000001e-05, + "num_tokens": 386561.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0022, + "grad_norm": 0.33302196860313416, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0019, + "grad_norm": 0.2560519278049469, + "learning_rate": 1.4305000000000003e-05, + "num_tokens": 386743.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0504, + "grad_norm": 2.333263397216797, + "learning_rate": 1.43e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0514, + "grad_norm": 1.790854573249817, + "learning_rate": 1.4295000000000001e-05, + "num_tokens": 387767.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0478, + "grad_norm": 1.8263012170791626, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0017, + "grad_norm": 0.22925561666488647, + "learning_rate": 1.4285000000000003e-05, + "num_tokens": 388370.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0508, + "grad_norm": 1.9549782276153564, + "learning_rate": 1.428e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0461, + "grad_norm": 2.7456071376800537, + "learning_rate": 1.4275000000000001e-05, + "num_tokens": 389394.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0019, + "grad_norm": 0.25512465834617615, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0018, + "grad_norm": 0.2454918771982193, + "learning_rate": 1.4265000000000001e-05, + "num_tokens": 389576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.0016, + "grad_norm": 0.20499202609062195, + "learning_rate": 1.426e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0016, + "grad_norm": 0.22024467587471008, + "learning_rate": 1.4255000000000002e-05, + "num_tokens": 389758.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.1054, + "grad_norm": 1.7958146333694458, + "learning_rate": 1.425e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0016, + "grad_norm": 0.19123780727386475, + "learning_rate": 1.4245000000000002e-05, + "num_tokens": 390361.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0015, + "grad_norm": 0.1973554641008377, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0515, + "grad_norm": 1.5054925680160522, + "learning_rate": 1.4235000000000002e-05, + "num_tokens": 390964.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0645, + "grad_norm": 1.4418784379959106, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0428, + "grad_norm": 1.3686002492904663, + "learning_rate": 1.4225000000000002e-05, + "num_tokens": 391988.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0015, + "grad_norm": 0.18040749430656433, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 1.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0666, + "grad_norm": 1.9525736570358276, + "learning_rate": 1.4215e-05, + "num_tokens": 392591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0416, + "grad_norm": 1.5055146217346191, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0016, + "grad_norm": 0.21493053436279297, + "learning_rate": 1.4205000000000002e-05, + "num_tokens": 393194.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0483, + "grad_norm": 1.4553972482681274, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0017, + "grad_norm": 0.24199633300304413, + "learning_rate": 1.4195e-05, + "num_tokens": 393797.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0017, + "grad_norm": 0.22347070276737213, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0446, + "grad_norm": 1.314347743988037, + "learning_rate": 1.4185000000000002e-05, + "num_tokens": 394400.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.002, + "grad_norm": 0.3113741874694824, + "learning_rate": 1.418e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 1.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0634, + "grad_norm": 1.786219596862793, + "learning_rate": 1.4175e-05, + "num_tokens": 395003.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0906, + "grad_norm": 2.9753689765930176, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0019, + "grad_norm": 0.2806491255760193, + "learning_rate": 1.4165000000000002e-05, + "num_tokens": 395606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0444, + "grad_norm": 1.8984386920928955, + "learning_rate": 1.416e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0023, + "grad_norm": 0.3554719090461731, + "learning_rate": 1.4155000000000001e-05, + "num_tokens": 396209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0021, + "grad_norm": 0.3154850900173187, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.002, + "grad_norm": 0.2822473347187042, + "learning_rate": 1.4145000000000003e-05, + "num_tokens": 396391.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.0933, + "grad_norm": 2.0030465126037598, + "learning_rate": 1.414e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0018, + "grad_norm": 0.25846239924430847, + "learning_rate": 1.4135000000000001e-05, + "num_tokens": 396994.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0576, + "grad_norm": 1.3536447286605835, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.0018, + "grad_norm": 0.23509684205055237, + "learning_rate": 1.4125000000000003e-05, + "num_tokens": 397597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0402, + "grad_norm": 1.1482503414154053, + "learning_rate": 1.412e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.065, + "grad_norm": 1.7037919759750366, + "learning_rate": 1.4115000000000001e-05, + "num_tokens": 398621.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0691, + "grad_norm": 1.7646807432174683, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0804, + "grad_norm": 1.7181248664855957, + "learning_rate": 1.4105000000000001e-05, + "num_tokens": 399645.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0019, + "grad_norm": 0.2505536675453186, + "learning_rate": 1.41e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 1.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0618, + "grad_norm": 1.5859951972961426, + "learning_rate": 1.4095000000000001e-05, + "num_tokens": 400248.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0019, + "grad_norm": 0.2755191922187805, + "learning_rate": 1.409e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 1.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0555, + "grad_norm": 1.4727070331573486, + "learning_rate": 1.4085000000000002e-05, + "num_tokens": 400851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0481, + "grad_norm": 1.8706026077270508, + "learning_rate": 1.408e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.0474, + "grad_norm": 1.1995218992233276, + "learning_rate": 1.4075000000000002e-05, + "num_tokens": 401875.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0543, + "grad_norm": 1.2178373336791992, + "learning_rate": 1.407e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0562, + "grad_norm": 1.595617413520813, + "learning_rate": 1.4065000000000002e-05, + "num_tokens": 402899.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0029, + "grad_norm": 0.46309027075767517, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 1.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0031, + "grad_norm": 0.5019537210464478, + "learning_rate": 1.4055e-05, + "num_tokens": 403081.0, + "mean_token_accuracy": 1.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.0481, + "grad_norm": 1.4502179622650146, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0612, + "grad_norm": 1.3172924518585205, + "learning_rate": 1.4045000000000002e-05, + "num_tokens": 404105.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0643, + "grad_norm": 1.8145051002502441, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0939, + "grad_norm": 2.2837142944335938, + "learning_rate": 1.4035e-05, + "num_tokens": 405129.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0444, + "grad_norm": 1.4133625030517578, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0762, + "grad_norm": 3.3270263671875, + "learning_rate": 1.4025000000000002e-05, + "num_tokens": 406153.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0382, + "grad_norm": 1.5502580404281616, + "learning_rate": 1.402e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0624, + "grad_norm": 2.8620283603668213, + "learning_rate": 1.4015e-05, + "num_tokens": 407177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0062, + "grad_norm": 0.9600316286087036, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": 0.232, + "grad_norm": 6.662532329559326, + "learning_rate": 1.4005000000000002e-05, + "num_tokens": 407780.0, + "mean_token_accuracy": 0.9373776912689209, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.2308, + "grad_norm": 5.728747844696045, + "learning_rate": 1.4e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0064, + "grad_norm": 1.0067918300628662, + "learning_rate": 1.3995e-05, + "num_tokens": 408383.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0717, + "grad_norm": 2.222224712371826, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0049, + "grad_norm": 0.7748068571090698, + "learning_rate": 1.3985000000000002e-05, + "num_tokens": 408986.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0042, + "grad_norm": 0.6555838584899902, + "learning_rate": 1.398e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 1.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.1053, + "grad_norm": 2.1453135013580322, + "learning_rate": 1.3975000000000001e-05, + "num_tokens": 409589.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0814, + "grad_norm": 2.092453718185425, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0025, + "grad_norm": 0.37734025716781616, + "learning_rate": 1.3965000000000003e-05, + "num_tokens": 410192.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.0859, + "grad_norm": 2.4313082695007324, + "learning_rate": 1.396e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0571, + "grad_norm": 1.533075213432312, + "learning_rate": 1.3955000000000001e-05, + "num_tokens": 411216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0907, + "grad_norm": 1.7440866231918335, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0015, + "grad_norm": 0.19383682310581207, + "learning_rate": 1.3945000000000001e-05, + "num_tokens": 411819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0015, + "grad_norm": 0.1786634922027588, + "learning_rate": 1.394e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.065, + "grad_norm": 2.1025426387786865, + "learning_rate": 1.3935000000000001e-05, + "num_tokens": 412422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0879, + "grad_norm": 1.9717315435409546, + "learning_rate": 1.393e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0726, + "grad_norm": 2.1733202934265137, + "learning_rate": 1.3925000000000001e-05, + "num_tokens": 413446.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": 0.0635, + "grad_norm": 2.1671876907348633, + "learning_rate": 1.392e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0728, + "grad_norm": 1.5356316566467285, + "learning_rate": 1.3915000000000001e-05, + "num_tokens": 414470.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0014, + "grad_norm": 0.16603456437587738, + "learning_rate": 1.391e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0558, + "grad_norm": 1.9890317916870117, + "learning_rate": 1.3905000000000002e-05, + "num_tokens": 415073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0015, + "grad_norm": 0.20005646347999573, + "learning_rate": 1.39e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.1005, + "grad_norm": 3.5178253650665283, + "learning_rate": 1.3895e-05, + "num_tokens": 415676.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0702, + "grad_norm": 2.5081353187561035, + "learning_rate": 1.389e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0017, + "grad_norm": 0.23757857084274292, + "learning_rate": 1.3885000000000002e-05, + "num_tokens": 416279.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0531, + "grad_norm": 1.5659825801849365, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.002, + "grad_norm": 0.3491363525390625, + "learning_rate": 1.3875e-05, + "num_tokens": 416882.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0663, + "grad_norm": 1.5751999616622925, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0018, + "grad_norm": 0.3209178149700165, + "learning_rate": 1.3865000000000002e-05, + "num_tokens": 417485.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0019, + "grad_norm": 0.3630707561969757, + "learning_rate": 1.386e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0437, + "grad_norm": 1.6397857666015625, + "learning_rate": 1.3855e-05, + "num_tokens": 418088.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0585, + "grad_norm": 2.164947748184204, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0754, + "grad_norm": 1.7066527605056763, + "learning_rate": 1.3845000000000002e-05, + "num_tokens": 419112.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0021, + "grad_norm": 0.3518334627151489, + "learning_rate": 1.384e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 1.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.0505, + "grad_norm": 1.5215017795562744, + "learning_rate": 1.3835e-05, + "num_tokens": 419715.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0572, + "grad_norm": 1.9514737129211426, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0023, + "grad_norm": 0.4249929189682007, + "learning_rate": 1.3825000000000002e-05, + "num_tokens": 420318.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0661, + "grad_norm": 1.7851744890213013, + "learning_rate": 1.382e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": 0.0621, + "grad_norm": 1.3740767240524292, + "learning_rate": 1.3815e-05, + "num_tokens": 421342.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0841, + "grad_norm": 2.665015459060669, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0028, + "grad_norm": 0.4941730797290802, + "learning_rate": 1.3805000000000003e-05, + "num_tokens": 421945.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.044, + "grad_norm": 1.4924557209014893, + "learning_rate": 1.38e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.0511, + "grad_norm": 2.1234307289123535, + "learning_rate": 1.3795000000000001e-05, + "num_tokens": 422969.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0426, + "grad_norm": 1.1785792112350464, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.0773, + "grad_norm": 1.6448895931243896, + "learning_rate": 1.3785000000000001e-05, + "num_tokens": 423993.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0594, + "grad_norm": 1.792230486869812, + "learning_rate": 1.378e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0535, + "grad_norm": 1.3552350997924805, + "learning_rate": 1.3775000000000001e-05, + "num_tokens": 425017.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0388, + "grad_norm": 1.0532437562942505, + "learning_rate": 1.377e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0759, + "grad_norm": 2.1115078926086426, + "learning_rate": 1.3765000000000001e-05, + "num_tokens": 426041.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0056, + "grad_norm": 0.8818362355232239, + "learning_rate": 1.376e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 1.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0051, + "grad_norm": 0.8002524971961975, + "learning_rate": 1.3755000000000001e-05, + "num_tokens": 426223.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0619, + "grad_norm": 2.207181692123413, + "learning_rate": 1.375e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0053, + "grad_norm": 0.814557671546936, + "learning_rate": 1.3745000000000001e-05, + "num_tokens": 426826.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0621, + "grad_norm": 1.6394788026809692, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.0678, + "grad_norm": 1.9382132291793823, + "learning_rate": 1.3735e-05, + "num_tokens": 427850.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0396, + "grad_norm": 1.3062744140625, + "learning_rate": 1.373e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.1056, + "grad_norm": 1.7765963077545166, + "learning_rate": 1.3725000000000002e-05, + "num_tokens": 428874.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0036, + "grad_norm": 0.5703164339065552, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 1.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.062, + "grad_norm": 1.6491400003433228, + "learning_rate": 1.3715e-05, + "num_tokens": 429477.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0414, + "grad_norm": 1.2670550346374512, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0977, + "grad_norm": 2.5612552165985107, + "learning_rate": 1.3705000000000002e-05, + "num_tokens": 430501.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.043, + "grad_norm": 1.5120333433151245, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0513, + "grad_norm": 1.3469822406768799, + "learning_rate": 1.3695e-05, + "num_tokens": 431525.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.052, + "grad_norm": 1.3584448099136353, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0043, + "grad_norm": 0.6871080994606018, + "learning_rate": 1.3685000000000002e-05, + "num_tokens": 432128.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0038, + "grad_norm": 0.6316184401512146, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 1.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0039, + "grad_norm": 0.6172608733177185, + "learning_rate": 1.3675e-05, + "num_tokens": 432310.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0034, + "grad_norm": 0.5193918943405151, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 1.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": 0.0545, + "grad_norm": 1.789426326751709, + "learning_rate": 1.3665000000000002e-05, + "num_tokens": 432913.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.0681, + "grad_norm": 1.8359259366989136, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0856, + "grad_norm": 2.033186197280884, + "learning_rate": 1.3655e-05, + "num_tokens": 433937.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0019, + "grad_norm": 0.2717677354812622, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0902, + "grad_norm": 1.8082786798477173, + "learning_rate": 1.3645000000000002e-05, + "num_tokens": 434540.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0019, + "grad_norm": 0.27892598509788513, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0017, + "grad_norm": 0.21636277437210083, + "learning_rate": 1.3635e-05, + "num_tokens": 434722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0017, + "grad_norm": 0.21708306670188904, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 1.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0442, + "grad_norm": 1.8083100318908691, + "learning_rate": 1.3625e-05, + "num_tokens": 435325.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0015, + "grad_norm": 0.16797110438346863, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0013, + "grad_norm": 0.1489250212907791, + "learning_rate": 1.3615000000000001e-05, + "num_tokens": 435507.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0013, + "grad_norm": 0.14432698488235474, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0845, + "grad_norm": 1.7793538570404053, + "learning_rate": 1.3605000000000001e-05, + "num_tokens": 436110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.072, + "grad_norm": 2.0468149185180664, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0012, + "grad_norm": 0.13057845830917358, + "learning_rate": 1.3595000000000001e-05, + "num_tokens": 436713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0012, + "grad_norm": 0.1187715157866478, + "learning_rate": 1.359e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0652, + "grad_norm": 1.7846852540969849, + "learning_rate": 1.3585000000000001e-05, + "num_tokens": 437316.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.001, + "grad_norm": 0.09880056232213974, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 1.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0543, + "grad_norm": 1.7948801517486572, + "learning_rate": 1.3575e-05, + "num_tokens": 437919.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0613, + "grad_norm": 1.7139854431152344, + "learning_rate": 1.357e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0932, + "grad_norm": 2.8757143020629883, + "learning_rate": 1.3565000000000001e-05, + "num_tokens": 438943.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0542, + "grad_norm": 1.7751576900482178, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0011, + "grad_norm": 0.10208199918270111, + "learning_rate": 1.3555e-05, + "num_tokens": 439546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0441, + "grad_norm": 1.3240106105804443, + "learning_rate": 1.355e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0013, + "grad_norm": 0.14222493767738342, + "learning_rate": 1.3545000000000002e-05, + "num_tokens": 440149.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0013, + "grad_norm": 0.15622317790985107, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.049, + "grad_norm": 1.685028076171875, + "learning_rate": 1.3535e-05, + "num_tokens": 440752.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0013, + "grad_norm": 0.15723161399364471, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0014, + "grad_norm": 0.1701563447713852, + "learning_rate": 1.3525000000000002e-05, + "num_tokens": 440934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0554, + "grad_norm": 1.94820237159729, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": 0.0868, + "grad_norm": 1.4613052606582642, + "learning_rate": 1.3515e-05, + "num_tokens": 441958.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0606, + "grad_norm": 1.5318107604980469, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0695, + "grad_norm": 1.676740050315857, + "learning_rate": 1.3505000000000002e-05, + "num_tokens": 442982.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0588, + "grad_norm": 1.5801854133605957, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.002, + "grad_norm": 0.27110394835472107, + "learning_rate": 1.3495e-05, + "num_tokens": 443585.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0493, + "grad_norm": 1.5821062326431274, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0409, + "grad_norm": 1.4319894313812256, + "learning_rate": 1.3485000000000002e-05, + "num_tokens": 444609.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0451, + "grad_norm": 1.562462329864502, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0621, + "grad_norm": 1.4181314706802368, + "learning_rate": 1.3475e-05, + "num_tokens": 445633.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0031, + "grad_norm": 0.48450395464897156, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0561, + "grad_norm": 1.5698680877685547, + "learning_rate": 1.3465e-05, + "num_tokens": 446236.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0035, + "grad_norm": 0.5244553685188293, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0036, + "grad_norm": 0.534037709236145, + "learning_rate": 1.3455e-05, + "num_tokens": 446418.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0728, + "grad_norm": 2.4191722869873047, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0694, + "grad_norm": 2.0287888050079346, + "learning_rate": 1.3445000000000001e-05, + "num_tokens": 447442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.057, + "grad_norm": 1.7234476804733276, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0033, + "grad_norm": 0.48596495389938354, + "learning_rate": 1.3435000000000001e-05, + "num_tokens": 448045.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0572, + "grad_norm": 1.4727040529251099, + "learning_rate": 1.343e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0031, + "grad_norm": 0.4591142535209656, + "learning_rate": 1.3425000000000001e-05, + "num_tokens": 448648.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0578, + "grad_norm": 1.542529582977295, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0544, + "grad_norm": 1.567787766456604, + "learning_rate": 1.3415e-05, + "num_tokens": 449672.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.048, + "grad_norm": 1.4822731018066406, + "learning_rate": 1.341e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0033, + "grad_norm": 0.47298771142959595, + "learning_rate": 1.3405000000000001e-05, + "num_tokens": 450275.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.0885, + "grad_norm": 2.084674119949341, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0571, + "grad_norm": 1.5821152925491333, + "learning_rate": 1.3395e-05, + "num_tokens": 451299.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.003, + "grad_norm": 0.44274547696113586, + "learning_rate": 1.339e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0461, + "grad_norm": 1.7462387084960938, + "learning_rate": 1.3385000000000001e-05, + "num_tokens": 451902.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0557, + "grad_norm": 1.9857844114303589, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0419, + "grad_norm": 1.386896014213562, + "learning_rate": 1.3375e-05, + "num_tokens": 452926.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0457, + "grad_norm": 1.6964994668960571, + "learning_rate": 1.337e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.0029, + "grad_norm": 0.42876869440078735, + "learning_rate": 1.3365000000000002e-05, + "num_tokens": 453529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.1072, + "grad_norm": 2.350618839263916, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0495, + "grad_norm": 1.449182152748108, + "learning_rate": 1.3355e-05, + "num_tokens": 454553.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0608, + "grad_norm": 2.024829149246216, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0431, + "grad_norm": 1.3092213869094849, + "learning_rate": 1.3345000000000002e-05, + "num_tokens": 455577.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0035, + "grad_norm": 0.5321254134178162, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 1.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0033, + "grad_norm": 0.4984612762928009, + "learning_rate": 1.3335e-05, + "num_tokens": 455759.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.2288, + "grad_norm": 3.947110652923584, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0031, + "grad_norm": 0.4745834767818451, + "learning_rate": 1.3325000000000002e-05, + "num_tokens": 456362.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0032, + "grad_norm": 0.5151614546775818, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 1.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0542, + "grad_norm": 1.0336432456970215, + "learning_rate": 1.3315e-05, + "num_tokens": 456965.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0562, + "grad_norm": 1.5250927209854126, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0431, + "grad_norm": 1.4132592678070068, + "learning_rate": 1.3305e-05, + "num_tokens": 457989.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.074, + "grad_norm": 1.864004373550415, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.0023, + "grad_norm": 0.32277822494506836, + "learning_rate": 1.3295e-05, + "num_tokens": 458592.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": 0.0656, + "grad_norm": 1.8421293497085571, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0784, + "grad_norm": 1.431746482849121, + "learning_rate": 1.3285e-05, + "num_tokens": 459616.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0859, + "grad_norm": 2.2143869400024414, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0638, + "grad_norm": 2.397982597351074, + "learning_rate": 1.3275e-05, + "num_tokens": 460640.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.072, + "grad_norm": 1.9987224340438843, + "learning_rate": 1.327e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0026, + "grad_norm": 0.3712107837200165, + "learning_rate": 1.3265000000000001e-05, + "num_tokens": 461243.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0027, + "grad_norm": 0.3893998861312866, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 1.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0024, + "grad_norm": 0.3540315330028534, + "learning_rate": 1.3255e-05, + "num_tokens": 461425.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0022, + "grad_norm": 0.3253246545791626, + "learning_rate": 1.325e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 1.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0585, + "grad_norm": 1.6001460552215576, + "learning_rate": 1.3245000000000001e-05, + "num_tokens": 462028.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.0472, + "grad_norm": 1.4387136697769165, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.002, + "grad_norm": 0.2645460069179535, + "learning_rate": 1.3235e-05, + "num_tokens": 462631.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0486, + "grad_norm": 1.7650330066680908, + "learning_rate": 1.323e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0018, + "grad_norm": 0.23414187133312225, + "learning_rate": 1.3225000000000001e-05, + "num_tokens": 463234.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0019, + "grad_norm": 0.2595520317554474, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0598, + "grad_norm": 1.4952349662780762, + "learning_rate": 1.3215e-05, + "num_tokens": 463837.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.0777, + "grad_norm": 1.956957221031189, + "learning_rate": 1.321e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0426, + "grad_norm": 1.263728141784668, + "learning_rate": 1.3205000000000001e-05, + "num_tokens": 464861.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0018, + "grad_norm": 0.2717933654785156, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 1.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0017, + "grad_norm": 0.24730290472507477, + "learning_rate": 1.3195e-05, + "num_tokens": 465043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0017, + "grad_norm": 0.25752246379852295, + "learning_rate": 1.319e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0594, + "grad_norm": 1.2743943929672241, + "learning_rate": 1.3185000000000002e-05, + "num_tokens": 465646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0468, + "grad_norm": 1.4228495359420776, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0015, + "grad_norm": 0.2151045948266983, + "learning_rate": 1.3175e-05, + "num_tokens": 466249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0707, + "grad_norm": 1.637633204460144, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0557, + "grad_norm": 1.91914963722229, + "learning_rate": 1.3165000000000002e-05, + "num_tokens": 467273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0017, + "grad_norm": 0.22663576900959015, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0447, + "grad_norm": 1.3842930793762207, + "learning_rate": 1.3155e-05, + "num_tokens": 467876.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0402, + "grad_norm": 1.3382936716079712, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.0722, + "grad_norm": 1.7016624212265015, + "learning_rate": 1.3145e-05, + "num_tokens": 468900.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0603, + "grad_norm": 1.7416592836380005, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0545, + "grad_norm": 2.0610973834991455, + "learning_rate": 1.3135e-05, + "num_tokens": 469924.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0027, + "grad_norm": 0.42048102617263794, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 1.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0647, + "grad_norm": 1.5505709648132324, + "learning_rate": 1.3125e-05, + "num_tokens": 470527.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0899, + "grad_norm": 1.7793169021606445, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0032, + "grad_norm": 0.5216090083122253, + "learning_rate": 1.3115000000000002e-05, + "num_tokens": 471130.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0886, + "grad_norm": 1.749000906944275, + "learning_rate": 1.311e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0917, + "grad_norm": 2.4577291011810303, + "learning_rate": 1.3105e-05, + "num_tokens": 472154.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0032, + "grad_norm": 0.5224512815475464, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 1.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0637, + "grad_norm": 1.690381646156311, + "learning_rate": 1.3095000000000003e-05, + "num_tokens": 472757.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0718, + "grad_norm": 2.1140615940093994, + "learning_rate": 1.309e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0041, + "grad_norm": 0.6610037684440613, + "learning_rate": 1.3085000000000001e-05, + "num_tokens": 473360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.1995, + "grad_norm": 5.919976711273193, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0035, + "grad_norm": 0.5762227177619934, + "learning_rate": 1.3075000000000003e-05, + "num_tokens": 473963.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0035, + "grad_norm": 0.558562695980072, + "learning_rate": 1.307e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 1.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0029, + "grad_norm": 0.4903852343559265, + "learning_rate": 1.3065000000000001e-05, + "num_tokens": 474145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0024, + "grad_norm": 0.40001630783081055, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 1.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.002, + "grad_norm": 0.3093484044075012, + "learning_rate": 1.3055000000000003e-05, + "num_tokens": 474327.0, + "mean_token_accuracy": 1.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0813, + "grad_norm": 1.846347451210022, + "learning_rate": 1.305e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0466, + "grad_norm": 1.9397575855255127, + "learning_rate": 1.3045000000000001e-05, + "num_tokens": 475351.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.0012, + "grad_norm": 0.1433739811182022, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 1.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0647, + "grad_norm": 1.7246447801589966, + "learning_rate": 1.3035000000000001e-05, + "num_tokens": 475954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0774, + "grad_norm": 1.6557238101959229, + "learning_rate": 1.303e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0732, + "grad_norm": 1.2370885610580444, + "learning_rate": 1.3025000000000002e-05, + "num_tokens": 476978.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0011, + "grad_norm": 0.11068759858608246, + "learning_rate": 1.302e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 1.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0921, + "grad_norm": 2.1499900817871094, + "learning_rate": 1.3015000000000002e-05, + "num_tokens": 477581.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0012, + "grad_norm": 0.12917853891849518, + "learning_rate": 1.301e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0644, + "grad_norm": 1.2409875392913818, + "learning_rate": 1.3005000000000002e-05, + "num_tokens": 478184.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0594, + "grad_norm": 1.3983649015426636, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0013, + "grad_norm": 0.17072346806526184, + "learning_rate": 1.2995000000000002e-05, + "num_tokens": 478787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0623, + "grad_norm": 1.6930880546569824, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0395, + "grad_norm": 1.0536465644836426, + "learning_rate": 1.2985e-05, + "num_tokens": 479811.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0593, + "grad_norm": 1.2563151121139526, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.0455, + "grad_norm": 1.3295787572860718, + "learning_rate": 1.2975000000000002e-05, + "num_tokens": 480835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.074, + "grad_norm": 1.3767396211624146, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0456, + "grad_norm": 1.3392114639282227, + "learning_rate": 1.2965e-05, + "num_tokens": 481859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.091, + "grad_norm": 2.6617116928100586, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0783, + "grad_norm": 2.208951473236084, + "learning_rate": 1.2955000000000002e-05, + "num_tokens": 482883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0026, + "grad_norm": 0.425293892621994, + "learning_rate": 1.295e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 1.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0751, + "grad_norm": 1.7252588272094727, + "learning_rate": 1.2945e-05, + "num_tokens": 483486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0032, + "grad_norm": 0.5211181640625, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 1.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": 0.0931, + "grad_norm": 2.448201894760132, + "learning_rate": 1.2935000000000002e-05, + "num_tokens": 484089.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.059, + "grad_norm": 1.2256298065185547, + "learning_rate": 1.293e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0037, + "grad_norm": 0.5853725671768188, + "learning_rate": 1.2925e-05, + "num_tokens": 484692.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0667, + "grad_norm": 1.6646796464920044, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0034, + "grad_norm": 0.5198765993118286, + "learning_rate": 1.2915000000000003e-05, + "num_tokens": 485295.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.06, + "grad_norm": 1.8327956199645996, + "learning_rate": 1.291e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0578, + "grad_norm": 1.4550710916519165, + "learning_rate": 1.2905000000000001e-05, + "num_tokens": 486319.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0035, + "grad_norm": 0.5253085494041443, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 1.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": 0.0664, + "grad_norm": 2.0553388595581055, + "learning_rate": 1.2895000000000003e-05, + "num_tokens": 486922.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0507, + "grad_norm": 1.2666943073272705, + "learning_rate": 1.289e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0419, + "grad_norm": 1.1951980590820312, + "learning_rate": 1.2885000000000001e-05, + "num_tokens": 487946.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0521, + "grad_norm": 1.5074187517166138, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0039, + "grad_norm": 0.5865699648857117, + "learning_rate": 1.2875000000000001e-05, + "num_tokens": 488549.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0032, + "grad_norm": 0.4775572121143341, + "learning_rate": 1.287e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 1.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0861, + "grad_norm": 1.977977991104126, + "learning_rate": 1.2865000000000001e-05, + "num_tokens": 489152.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0415, + "grad_norm": 1.351745843887329, + "learning_rate": 1.286e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0027, + "grad_norm": 0.3994472920894623, + "learning_rate": 1.2855000000000001e-05, + "num_tokens": 489755.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0027, + "grad_norm": 0.40307220816612244, + "learning_rate": 1.285e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 1.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0023, + "grad_norm": 0.3672088086605072, + "learning_rate": 1.2845000000000002e-05, + "num_tokens": 489937.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0024, + "grad_norm": 0.3693186938762665, + "learning_rate": 1.284e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 1.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0022, + "grad_norm": 0.3379809856414795, + "learning_rate": 1.2835000000000002e-05, + "num_tokens": 490119.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0692, + "grad_norm": 1.80624520778656, + "learning_rate": 1.283e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0015, + "grad_norm": 0.19782321155071259, + "learning_rate": 1.2825e-05, + "num_tokens": 490722.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.0765, + "grad_norm": 2.1652674674987793, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0524, + "grad_norm": 1.3651760816574097, + "learning_rate": 1.2815000000000002e-05, + "num_tokens": 491746.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0013, + "grad_norm": 0.15779025852680206, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 1.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0405, + "grad_norm": 1.4021095037460327, + "learning_rate": 1.2805e-05, + "num_tokens": 492349.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0012, + "grad_norm": 0.14934077858924866, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 1.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0466, + "grad_norm": 1.3255256414413452, + "learning_rate": 1.2795000000000002e-05, + "num_tokens": 492952.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0011, + "grad_norm": 0.13669109344482422, + "learning_rate": 1.279e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 1.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0706, + "grad_norm": 2.915336847305298, + "learning_rate": 1.2785e-05, + "num_tokens": 493555.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0012, + "grad_norm": 0.14015723764896393, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 1.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0731, + "grad_norm": 1.5240583419799805, + "learning_rate": 1.2775000000000002e-05, + "num_tokens": 494158.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0011, + "grad_norm": 0.11803555488586426, + "learning_rate": 1.277e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 1.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0011, + "grad_norm": 0.13458400964736938, + "learning_rate": 1.2765e-05, + "num_tokens": 494340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0012, + "grad_norm": 0.14607498049736023, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0011, + "grad_norm": 0.12011824548244476, + "learning_rate": 1.2755000000000002e-05, + "num_tokens": 494522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0011, + "grad_norm": 0.13116565346717834, + "learning_rate": 1.275e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0011, + "grad_norm": 0.11727877706289291, + "learning_rate": 1.2745e-05, + "num_tokens": 494704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.0501, + "grad_norm": 1.6986955404281616, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0472, + "grad_norm": 1.4376126527786255, + "learning_rate": 1.2735000000000003e-05, + "num_tokens": 495728.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.001, + "grad_norm": 0.11870448291301727, + "learning_rate": 1.273e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 1.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0011, + "grad_norm": 0.11969612538814545, + "learning_rate": 1.2725000000000001e-05, + "num_tokens": 495910.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0493, + "grad_norm": 1.3840702772140503, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.001, + "grad_norm": 0.10890035331249237, + "learning_rate": 1.2715000000000001e-05, + "num_tokens": 496513.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0011, + "grad_norm": 0.12227390706539154, + "learning_rate": 1.271e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0752, + "grad_norm": 2.110506057739258, + "learning_rate": 1.2705000000000001e-05, + "num_tokens": 497116.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0011, + "grad_norm": 0.1325536072254181, + "learning_rate": 1.27e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 1.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0386, + "grad_norm": 1.118979811668396, + "learning_rate": 1.2695000000000001e-05, + "num_tokens": 497719.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.066, + "grad_norm": 1.572615623474121, + "learning_rate": 1.269e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0679, + "grad_norm": 1.6447997093200684, + "learning_rate": 1.2685000000000001e-05, + "num_tokens": 498743.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0012, + "grad_norm": 0.1418675184249878, + "learning_rate": 1.268e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0471, + "grad_norm": 1.3554447889328003, + "learning_rate": 1.2675000000000001e-05, + "num_tokens": 499346.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0012, + "grad_norm": 0.1589028388261795, + "learning_rate": 1.267e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0943, + "grad_norm": 2.5991010665893555, + "learning_rate": 1.2665e-05, + "num_tokens": 499949.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0495, + "grad_norm": 1.6441336870193481, + "learning_rate": 1.266e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0765, + "grad_norm": 1.842661738395691, + "learning_rate": 1.2655000000000002e-05, + "num_tokens": 500973.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0016, + "grad_norm": 0.22247855365276337, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 1.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0401, + "grad_norm": 1.3632177114486694, + "learning_rate": 1.2645e-05, + "num_tokens": 501576.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0021, + "grad_norm": 0.31719765067100525, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 1.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0376, + "grad_norm": 1.1765908002853394, + "learning_rate": 1.2635000000000002e-05, + "num_tokens": 502179.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0024, + "grad_norm": 0.33981993794441223, + "learning_rate": 1.263e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 1.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0712, + "grad_norm": 1.7833467721939087, + "learning_rate": 1.2625e-05, + "num_tokens": 502782.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0407, + "grad_norm": 1.2483290433883667, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0597, + "grad_norm": 1.2847890853881836, + "learning_rate": 1.2615000000000002e-05, + "num_tokens": 503806.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0706, + "grad_norm": 2.0048041343688965, + "learning_rate": 1.261e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0033, + "grad_norm": 0.48029038310050964, + "learning_rate": 1.2605e-05, + "num_tokens": 504409.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0853, + "grad_norm": 1.8489866256713867, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0664, + "grad_norm": 1.9049607515335083, + "learning_rate": 1.2595000000000002e-05, + "num_tokens": 505433.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0038, + "grad_norm": 0.5629300475120544, + "learning_rate": 1.259e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0035, + "grad_norm": 0.5016162395477295, + "learning_rate": 1.2585e-05, + "num_tokens": 505615.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.0034, + "grad_norm": 0.533896803855896, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 1.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0398, + "grad_norm": 1.6724116802215576, + "learning_rate": 1.2575000000000002e-05, + "num_tokens": 506218.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0649, + "grad_norm": 1.1757819652557373, + "learning_rate": 1.257e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0028, + "grad_norm": 0.3974631726741791, + "learning_rate": 1.2565e-05, + "num_tokens": 506821.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0022, + "grad_norm": 0.33079567551612854, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 1.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0612, + "grad_norm": 1.6804654598236084, + "learning_rate": 1.2555000000000001e-05, + "num_tokens": 507424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0814, + "grad_norm": 1.6637822389602661, + "learning_rate": 1.255e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0463, + "grad_norm": 1.2395890951156616, + "learning_rate": 1.2545000000000001e-05, + "num_tokens": 508448.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0022, + "grad_norm": 0.3290168046951294, + "learning_rate": 1.254e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0474, + "grad_norm": 1.62813138961792, + "learning_rate": 1.2535000000000001e-05, + "num_tokens": 509051.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0432, + "grad_norm": 1.1684247255325317, + "learning_rate": 1.253e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.191, + "grad_norm": 4.108924865722656, + "learning_rate": 1.2525000000000001e-05, + "num_tokens": 510075.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0022, + "grad_norm": 0.32842448353767395, + "learning_rate": 1.252e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0692, + "grad_norm": 1.0593329668045044, + "learning_rate": 1.2515000000000001e-05, + "num_tokens": 510678.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.046, + "grad_norm": 1.279249906539917, + "learning_rate": 1.251e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0021, + "grad_norm": 0.32091253995895386, + "learning_rate": 1.2505e-05, + "num_tokens": 511281.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0404, + "grad_norm": 1.2973002195358276, + "learning_rate": 1.25e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0023, + "grad_norm": 0.34064143896102905, + "learning_rate": 1.2495000000000001e-05, + "num_tokens": 511884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0793, + "grad_norm": 1.864046573638916, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0023, + "grad_norm": 0.3757898211479187, + "learning_rate": 1.2485e-05, + "num_tokens": 512487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0024, + "grad_norm": 0.381061315536499, + "learning_rate": 1.248e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 1.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0676, + "grad_norm": 1.62307608127594, + "learning_rate": 1.2475000000000002e-05, + "num_tokens": 513090.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.047, + "grad_norm": 1.570786476135254, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0566, + "grad_norm": 1.7626087665557861, + "learning_rate": 1.2465e-05, + "num_tokens": 514114.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0581, + "grad_norm": 1.7678264379501343, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0417, + "grad_norm": 1.4467406272888184, + "learning_rate": 1.2455000000000002e-05, + "num_tokens": 515138.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0712, + "grad_norm": 1.5711795091629028, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.0026, + "grad_norm": 0.41801631450653076, + "learning_rate": 1.2445e-05, + "num_tokens": 515741.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0499, + "grad_norm": 1.5882858037948608, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0504, + "grad_norm": 1.1772035360336304, + "learning_rate": 1.2435000000000002e-05, + "num_tokens": 516765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0618, + "grad_norm": 1.7687872648239136, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0664, + "grad_norm": 1.677937626838684, + "learning_rate": 1.2425e-05, + "num_tokens": 517789.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.004, + "grad_norm": 0.654071569442749, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 1.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0409, + "grad_norm": 1.5208879709243774, + "learning_rate": 1.2415000000000002e-05, + "num_tokens": 518392.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0035, + "grad_norm": 0.5567553639411926, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 1.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0798, + "grad_norm": 2.2302029132843018, + "learning_rate": 1.2405e-05, + "num_tokens": 518995.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0669, + "grad_norm": 2.0240256786346436, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0839, + "grad_norm": 1.8468784093856812, + "learning_rate": 1.2395e-05, + "num_tokens": 520019.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0584, + "grad_norm": 2.1111018657684326, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0043, + "grad_norm": 0.755431592464447, + "learning_rate": 1.2385000000000001e-05, + "num_tokens": 520622.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0432, + "grad_norm": 1.864660620689392, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0693, + "grad_norm": 3.3374569416046143, + "learning_rate": 1.2375000000000001e-05, + "num_tokens": 521646.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0388, + "grad_norm": 1.5575084686279297, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.0645, + "grad_norm": 1.5467334985733032, + "learning_rate": 1.2365000000000001e-05, + "num_tokens": 522670.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0037, + "grad_norm": 0.5897421836853027, + "learning_rate": 1.236e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 1.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0819, + "grad_norm": 3.0543386936187744, + "learning_rate": 1.2355000000000001e-05, + "num_tokens": 523273.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.004, + "grad_norm": 0.647894024848938, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 1.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0032, + "grad_norm": 0.5120076537132263, + "learning_rate": 1.2345e-05, + "num_tokens": 523455.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0032, + "grad_norm": 0.50294429063797, + "learning_rate": 1.234e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 1.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0654, + "grad_norm": 1.3424628973007202, + "learning_rate": 1.2335000000000001e-05, + "num_tokens": 524058.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0898, + "grad_norm": 2.0473086833953857, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0023, + "grad_norm": 0.36929139494895935, + "learning_rate": 1.2325e-05, + "num_tokens": 524661.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0021, + "grad_norm": 0.3227180540561676, + "learning_rate": 1.232e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 1.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0663, + "grad_norm": 1.83015775680542, + "learning_rate": 1.2315000000000002e-05, + "num_tokens": 525264.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0657, + "grad_norm": 1.8247884511947632, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0016, + "grad_norm": 0.21814872324466705, + "learning_rate": 1.2305e-05, + "num_tokens": 525867.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.07, + "grad_norm": 1.3606796264648438, + "learning_rate": 1.23e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.0521, + "grad_norm": 1.5558913946151733, + "learning_rate": 1.2295000000000002e-05, + "num_tokens": 526891.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": 0.0768, + "grad_norm": 1.718390703201294, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.2012, + "grad_norm": 3.623452663421631, + "learning_rate": 1.2285e-05, + "num_tokens": 527915.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0922, + "grad_norm": 2.289684534072876, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.0665, + "grad_norm": 1.6864427328109741, + "learning_rate": 1.2275000000000002e-05, + "num_tokens": 528939.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0017, + "grad_norm": 0.2226596623659134, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 1.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.195, + "grad_norm": 3.805149555206299, + "learning_rate": 1.2265e-05, + "num_tokens": 529542.0, + "mean_token_accuracy": 0.9412915706634521, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0651, + "grad_norm": 1.3887238502502441, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0016, + "grad_norm": 0.20220878720283508, + "learning_rate": 1.2255000000000002e-05, + "num_tokens": 530145.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0456, + "grad_norm": 1.4763877391815186, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0017, + "grad_norm": 0.2297908216714859, + "learning_rate": 1.2245e-05, + "num_tokens": 530748.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0479, + "grad_norm": 1.846569538116455, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0018, + "grad_norm": 0.2527587115764618, + "learning_rate": 1.2235e-05, + "num_tokens": 531351.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0828, + "grad_norm": 1.8091585636138916, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.002, + "grad_norm": 0.29240918159484863, + "learning_rate": 1.2225e-05, + "num_tokens": 531954.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.0568, + "grad_norm": 1.4905025959014893, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0022, + "grad_norm": 0.29934078454971313, + "learning_rate": 1.2215e-05, + "num_tokens": 532557.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.0655, + "grad_norm": 1.620811939239502, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0699, + "grad_norm": 1.4509178400039673, + "learning_rate": 1.2205000000000001e-05, + "num_tokens": 533581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0537, + "grad_norm": 1.6190178394317627, + "learning_rate": 1.22e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.083, + "grad_norm": 2.0025248527526855, + "learning_rate": 1.2195000000000001e-05, + "num_tokens": 534605.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3503265976905823, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 1.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": 0.0699, + "grad_norm": 1.2692803144454956, + "learning_rate": 1.2185e-05, + "num_tokens": 535208.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0024, + "grad_norm": 0.3514065146446228, + "learning_rate": 1.218e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 1.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0025, + "grad_norm": 0.3770548701286316, + "learning_rate": 1.2175000000000001e-05, + "num_tokens": 535390.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0024, + "grad_norm": 0.3553021550178528, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 1.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0657, + "grad_norm": 1.3145198822021484, + "learning_rate": 1.2165e-05, + "num_tokens": 535993.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0453, + "grad_norm": 1.1688368320465088, + "learning_rate": 1.216e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.1801, + "grad_norm": 3.7217485904693604, + "learning_rate": 1.2155000000000001e-05, + "num_tokens": 537017.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0029, + "grad_norm": 0.4446180462837219, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 1.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0863, + "grad_norm": 2.0155787467956543, + "learning_rate": 1.2145e-05, + "num_tokens": 537620.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0022, + "grad_norm": 0.3482968807220459, + "learning_rate": 1.214e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0023, + "grad_norm": 0.32771721482276917, + "learning_rate": 1.2135000000000002e-05, + "num_tokens": 537802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.056, + "grad_norm": 1.8173542022705078, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.039, + "grad_norm": 1.1963605880737305, + "learning_rate": 1.2125e-05, + "num_tokens": 538826.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.0594, + "grad_norm": 1.7138198614120483, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.002, + "grad_norm": 0.2943565249443054, + "learning_rate": 1.2115000000000002e-05, + "num_tokens": 539429.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.002, + "grad_norm": 0.2892753481864929, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0019, + "grad_norm": 0.2714136838912964, + "learning_rate": 1.2105e-05, + "num_tokens": 539611.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0504, + "grad_norm": 1.0601574182510376, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0019, + "grad_norm": 0.2627917230129242, + "learning_rate": 1.2095000000000002e-05, + "num_tokens": 540214.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0566, + "grad_norm": 1.1405881643295288, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0018, + "grad_norm": 0.2452574223279953, + "learning_rate": 1.2085e-05, + "num_tokens": 540817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.0018, + "grad_norm": 0.24650417268276215, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0014, + "grad_norm": 0.19634543359279633, + "learning_rate": 1.2075e-05, + "num_tokens": 540999.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.0014, + "grad_norm": 0.17830893397331238, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.045, + "grad_norm": 1.1427490711212158, + "learning_rate": 1.2065e-05, + "num_tokens": 541602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.043, + "grad_norm": 1.0804896354675293, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0729, + "grad_norm": 1.6100242137908936, + "learning_rate": 1.2055e-05, + "num_tokens": 542626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.0585, + "grad_norm": 1.2319777011871338, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0014, + "grad_norm": 0.18333016335964203, + "learning_rate": 1.2045e-05, + "num_tokens": 543229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0014, + "grad_norm": 0.17933838069438934, + "learning_rate": 1.204e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.0606, + "grad_norm": 1.531948208808899, + "learning_rate": 1.2035e-05, + "num_tokens": 543832.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.0798, + "grad_norm": 1.4439104795455933, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.0798, + "grad_norm": 1.6658635139465332, + "learning_rate": 1.2025e-05, + "num_tokens": 544856.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.0666, + "grad_norm": 1.2919996976852417, + "learning_rate": 1.202e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.0526, + "grad_norm": 1.7219940423965454, + "learning_rate": 1.2015000000000001e-05, + "num_tokens": 545880.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0453, + "grad_norm": 1.3877556324005127, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0675, + "grad_norm": 1.6357606649398804, + "learning_rate": 1.2005e-05, + "num_tokens": 546904.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0023, + "grad_norm": 0.3360651433467865, + "learning_rate": 1.2e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0025, + "grad_norm": 0.36647501587867737, + "learning_rate": 1.1995000000000001e-05, + "num_tokens": 547086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.1876, + "grad_norm": 3.880563974380493, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0026, + "grad_norm": 0.3927272856235504, + "learning_rate": 1.1985e-05, + "num_tokens": 547689.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0616, + "grad_norm": 1.807646632194519, + "learning_rate": 1.198e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.0939, + "grad_norm": 3.455456018447876, + "learning_rate": 1.1975000000000001e-05, + "num_tokens": 548713.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0576, + "grad_norm": 1.2851530313491821, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0613, + "grad_norm": 1.2460367679595947, + "learning_rate": 1.1965e-05, + "num_tokens": 549737.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0498, + "grad_norm": 1.8220652341842651, + "learning_rate": 1.196e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0029, + "grad_norm": 0.43996259570121765, + "learning_rate": 1.1955000000000002e-05, + "num_tokens": 550340.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.164, + "grad_norm": 3.639434814453125, + "learning_rate": 1.195e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0033, + "grad_norm": 0.49846982955932617, + "learning_rate": 1.1945e-05, + "num_tokens": 550943.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.0034, + "grad_norm": 0.5146701335906982, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0552, + "grad_norm": 0.9798343777656555, + "learning_rate": 1.1935000000000002e-05, + "num_tokens": 551546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0033, + "grad_norm": 0.49275118112564087, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 1.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.0699, + "grad_norm": 1.1279994249343872, + "learning_rate": 1.1925e-05, + "num_tokens": 552149.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0029, + "grad_norm": 0.4336951673030853, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0682, + "grad_norm": 1.8408714532852173, + "learning_rate": 1.1915e-05, + "num_tokens": 552752.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0025, + "grad_norm": 0.3696609139442444, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0468, + "grad_norm": 1.6169545650482178, + "learning_rate": 1.1905e-05, + "num_tokens": 553355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0669, + "grad_norm": 1.641153335571289, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0019, + "grad_norm": 0.2700659930706024, + "learning_rate": 1.1895e-05, + "num_tokens": 553958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0021, + "grad_norm": 0.30612003803253174, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0755, + "grad_norm": 1.821285367012024, + "learning_rate": 1.1885e-05, + "num_tokens": 554561.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0628, + "grad_norm": 1.6025607585906982, + "learning_rate": 1.188e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0018, + "grad_norm": 0.24747499823570251, + "learning_rate": 1.1875e-05, + "num_tokens": 555164.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0017, + "grad_norm": 0.2355332225561142, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 1.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0016, + "grad_norm": 0.22167058289051056, + "learning_rate": 1.1865000000000002e-05, + "num_tokens": 555346.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0014, + "grad_norm": 0.1909945011138916, + "learning_rate": 1.186e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0014, + "grad_norm": 0.17070873081684113, + "learning_rate": 1.1855e-05, + "num_tokens": 555528.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0714, + "grad_norm": 1.4018418788909912, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0859, + "grad_norm": 2.558520793914795, + "learning_rate": 1.1845000000000003e-05, + "num_tokens": 556552.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0012, + "grad_norm": 0.14977574348449707, + "learning_rate": 1.184e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0011, + "grad_norm": 0.12937067449092865, + "learning_rate": 1.1835000000000001e-05, + "num_tokens": 556734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0604, + "grad_norm": 1.5028055906295776, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0012, + "grad_norm": 0.13798221945762634, + "learning_rate": 1.1825000000000003e-05, + "num_tokens": 557337.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0508, + "grad_norm": 1.1325984001159668, + "learning_rate": 1.182e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0608, + "grad_norm": 1.3021001815795898, + "learning_rate": 1.1815000000000001e-05, + "num_tokens": 558361.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0563, + "grad_norm": 1.5208338499069214, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.0669, + "grad_norm": 1.6899033784866333, + "learning_rate": 1.1805000000000001e-05, + "num_tokens": 559385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0582, + "grad_norm": 1.563767910003662, + "learning_rate": 1.18e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.0674, + "grad_norm": 1.4604460000991821, + "learning_rate": 1.1795000000000001e-05, + "num_tokens": 560409.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.051, + "grad_norm": 1.4536890983581543, + "learning_rate": 1.179e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0681, + "grad_norm": 1.4582575559616089, + "learning_rate": 1.1785000000000002e-05, + "num_tokens": 561433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0625, + "grad_norm": 1.5202876329421997, + "learning_rate": 1.178e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0018, + "grad_norm": 0.25325441360473633, + "learning_rate": 1.1775000000000002e-05, + "num_tokens": 562036.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0533, + "grad_norm": 1.4468379020690918, + "learning_rate": 1.177e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0023, + "grad_norm": 0.32276058197021484, + "learning_rate": 1.1765000000000002e-05, + "num_tokens": 562639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0025, + "grad_norm": 0.36645182967185974, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.067, + "grad_norm": 2.532277822494507, + "learning_rate": 1.1755e-05, + "num_tokens": 563242.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0025, + "grad_norm": 0.3641115427017212, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 1.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0621, + "grad_norm": 1.6259859800338745, + "learning_rate": 1.1745000000000002e-05, + "num_tokens": 563845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.0431, + "grad_norm": 1.5126338005065918, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0416, + "grad_norm": 1.3851490020751953, + "learning_rate": 1.1735e-05, + "num_tokens": 564869.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0621, + "grad_norm": 1.7890119552612305, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.0661, + "grad_norm": 1.2367877960205078, + "learning_rate": 1.1725000000000002e-05, + "num_tokens": 565893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0032, + "grad_norm": 0.49922677874565125, + "learning_rate": 1.172e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 1.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.0033, + "grad_norm": 0.49921202659606934, + "learning_rate": 1.1715e-05, + "num_tokens": 566075.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.0035, + "grad_norm": 0.5215579867362976, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0031, + "grad_norm": 0.43590739369392395, + "learning_rate": 1.1705000000000002e-05, + "num_tokens": 566257.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0397, + "grad_norm": 1.2309280633926392, + "learning_rate": 1.17e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.04, + "grad_norm": 1.2009049654006958, + "learning_rate": 1.1695e-05, + "num_tokens": 567281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0608, + "grad_norm": 1.7890830039978027, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0022, + "grad_norm": 0.33328190445899963, + "learning_rate": 1.1685000000000002e-05, + "num_tokens": 567884.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0725, + "grad_norm": 1.7722251415252686, + "learning_rate": 1.168e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.002, + "grad_norm": 0.2905958592891693, + "learning_rate": 1.1675000000000001e-05, + "num_tokens": 568487.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0778, + "grad_norm": 1.8844209909439087, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0558, + "grad_norm": 1.4232587814331055, + "learning_rate": 1.1665000000000003e-05, + "num_tokens": 569511.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0588, + "grad_norm": 1.4562510251998901, + "learning_rate": 1.166e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.0019, + "grad_norm": 0.2660907804965973, + "learning_rate": 1.1655000000000001e-05, + "num_tokens": 570114.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.079, + "grad_norm": 1.9491440057754517, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.055, + "grad_norm": 1.847509741783142, + "learning_rate": 1.1645000000000001e-05, + "num_tokens": 571138.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0517, + "grad_norm": 1.504838466644287, + "learning_rate": 1.164e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": 0.0416, + "grad_norm": 1.0979009866714478, + "learning_rate": 1.1635000000000001e-05, + "num_tokens": 572162.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0448, + "grad_norm": 1.3496202230453491, + "learning_rate": 1.163e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0026, + "grad_norm": 0.382183700799942, + "learning_rate": 1.1625000000000001e-05, + "num_tokens": 572765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0026, + "grad_norm": 0.37047019600868225, + "learning_rate": 1.162e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 1.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0702, + "grad_norm": 1.7991583347320557, + "learning_rate": 1.1615000000000001e-05, + "num_tokens": 573368.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0442, + "grad_norm": 1.4013893604278564, + "learning_rate": 1.161e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.0409, + "grad_norm": 1.3295344114303589, + "learning_rate": 1.1605000000000002e-05, + "num_tokens": 574392.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.0388, + "grad_norm": 1.3626537322998047, + "learning_rate": 1.16e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0031, + "grad_norm": 0.4437231123447418, + "learning_rate": 1.1595e-05, + "num_tokens": 574995.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0036, + "grad_norm": 0.5210691094398499, + "learning_rate": 1.159e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 1.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.065, + "grad_norm": 2.1340172290802, + "learning_rate": 1.1585000000000002e-05, + "num_tokens": 575598.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0647, + "grad_norm": 1.9830479621887207, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0684, + "grad_norm": 2.2673563957214355, + "learning_rate": 1.1575e-05, + "num_tokens": 576622.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0031, + "grad_norm": 0.44506582617759705, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 1.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.038, + "grad_norm": 1.131693959236145, + "learning_rate": 1.1565000000000002e-05, + "num_tokens": 577225.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.0369, + "grad_norm": 1.1869642734527588, + "learning_rate": 1.156e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0031, + "grad_norm": 0.4332590401172638, + "learning_rate": 1.1555e-05, + "num_tokens": 577828.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0026, + "grad_norm": 0.359754741191864, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 1.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0028, + "grad_norm": 0.3960857689380646, + "learning_rate": 1.1545000000000002e-05, + "num_tokens": 578010.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0569, + "grad_norm": 1.7389343976974487, + "learning_rate": 1.154e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0714, + "grad_norm": 1.75542414188385, + "learning_rate": 1.1535e-05, + "num_tokens": 579034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0026, + "grad_norm": 0.3733665943145752, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 1.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151729702949524, + "learning_rate": 1.1525000000000002e-05, + "num_tokens": 579216.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0725, + "grad_norm": 2.008699417114258, + "learning_rate": 1.152e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0679, + "grad_norm": 2.3607006072998047, + "learning_rate": 1.1515e-05, + "num_tokens": 580240.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.043, + "grad_norm": 1.3802534341812134, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0018, + "grad_norm": 0.24884727597236633, + "learning_rate": 1.1505000000000003e-05, + "num_tokens": 580843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0517, + "grad_norm": 1.4253575801849365, + "learning_rate": 1.15e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0479, + "grad_norm": 1.2443790435791016, + "learning_rate": 1.1495000000000001e-05, + "num_tokens": 581867.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0017, + "grad_norm": 0.22854706645011902, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 1.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0742, + "grad_norm": 1.5941340923309326, + "learning_rate": 1.1485000000000001e-05, + "num_tokens": 582470.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.002, + "grad_norm": 0.27522599697113037, + "learning_rate": 1.148e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 1.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0019, + "grad_norm": 0.2548190653324127, + "learning_rate": 1.1475000000000001e-05, + "num_tokens": 582652.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0586, + "grad_norm": 0.9956546425819397, + "learning_rate": 1.147e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0399, + "grad_norm": 1.2318187952041626, + "learning_rate": 1.1465000000000001e-05, + "num_tokens": 583676.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.057, + "grad_norm": 1.2258297204971313, + "learning_rate": 1.146e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0593, + "grad_norm": 1.4450581073760986, + "learning_rate": 1.1455000000000001e-05, + "num_tokens": 584700.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0589, + "grad_norm": 2.703789472579956, + "learning_rate": 1.145e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0022, + "grad_norm": 0.2988422214984894, + "learning_rate": 1.1445000000000001e-05, + "num_tokens": 585303.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.002, + "grad_norm": 0.2543957829475403, + "learning_rate": 1.144e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0634, + "grad_norm": 1.5069470405578613, + "learning_rate": 1.1435e-05, + "num_tokens": 585906.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0777, + "grad_norm": 1.8321071863174438, + "learning_rate": 1.143e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0705, + "grad_norm": 1.7684837579727173, + "learning_rate": 1.1425000000000002e-05, + "num_tokens": 586930.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0646, + "grad_norm": 1.7334975004196167, + "learning_rate": 1.142e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0657, + "grad_norm": 1.7223514318466187, + "learning_rate": 1.1415e-05, + "num_tokens": 587954.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0551, + "grad_norm": 2.0270273685455322, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0642, + "grad_norm": 1.5014370679855347, + "learning_rate": 1.1405000000000002e-05, + "num_tokens": 588978.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0766, + "grad_norm": 1.7329357862472534, + "learning_rate": 1.14e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.0038, + "grad_norm": 0.5561279654502869, + "learning_rate": 1.1395e-05, + "num_tokens": 589581.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0805, + "grad_norm": 2.5624947547912598, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0036, + "grad_norm": 0.5101985931396484, + "learning_rate": 1.1385000000000002e-05, + "num_tokens": 590184.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0564, + "grad_norm": 1.227173924446106, + "learning_rate": 1.138e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0036, + "grad_norm": 0.5354023575782776, + "learning_rate": 1.1375e-05, + "num_tokens": 590787.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0443, + "grad_norm": 1.4744853973388672, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0715, + "grad_norm": 1.5623061656951904, + "learning_rate": 1.1365000000000002e-05, + "num_tokens": 591811.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0529, + "grad_norm": 1.357082486152649, + "learning_rate": 1.136e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0037, + "grad_norm": 0.54876309633255, + "learning_rate": 1.1355e-05, + "num_tokens": 592414.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.0635, + "grad_norm": 1.2679226398468018, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0458, + "grad_norm": 1.1748446226119995, + "learning_rate": 1.1345000000000002e-05, + "num_tokens": 593438.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0035, + "grad_norm": 0.5624827146530151, + "learning_rate": 1.134e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 1.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.003, + "grad_norm": 0.4557420015335083, + "learning_rate": 1.1335e-05, + "num_tokens": 593620.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.003, + "grad_norm": 0.46185532212257385, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 1.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0028, + "grad_norm": 0.42278051376342773, + "learning_rate": 1.1325e-05, + "num_tokens": 593802.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0453, + "grad_norm": 1.387130856513977, + "learning_rate": 1.132e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025925099849701, + "learning_rate": 1.1315000000000001e-05, + "num_tokens": 594405.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0022, + "grad_norm": 0.33897924423217773, + "learning_rate": 1.131e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 1.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0737, + "grad_norm": 1.979303240776062, + "learning_rate": 1.1305000000000001e-05, + "num_tokens": 595008.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.064, + "grad_norm": 1.5425118207931519, + "learning_rate": 1.13e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0671, + "grad_norm": 1.1620323657989502, + "learning_rate": 1.1295000000000001e-05, + "num_tokens": 596032.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0785, + "grad_norm": 2.378268003463745, + "learning_rate": 1.129e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0016, + "grad_norm": 0.22170788049697876, + "learning_rate": 1.1285000000000001e-05, + "num_tokens": 596635.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0015, + "grad_norm": 0.20151561498641968, + "learning_rate": 1.128e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0017, + "grad_norm": 0.2272740602493286, + "learning_rate": 1.1275e-05, + "num_tokens": 596817.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.0013, + "grad_norm": 0.15716217458248138, + "learning_rate": 1.127e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0609, + "grad_norm": 1.5205357074737549, + "learning_rate": 1.1265000000000001e-05, + "num_tokens": 597420.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0013, + "grad_norm": 0.16709472239017487, + "learning_rate": 1.126e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 1.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0771, + "grad_norm": 1.7946810722351074, + "learning_rate": 1.1255e-05, + "num_tokens": 598023.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0581, + "grad_norm": 1.250422716140747, + "learning_rate": 1.125e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0566, + "grad_norm": 1.8859542608261108, + "learning_rate": 1.1245000000000002e-05, + "num_tokens": 599047.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.042, + "grad_norm": 1.3896710872650146, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0012, + "grad_norm": 0.13600599765777588, + "learning_rate": 1.1235e-05, + "num_tokens": 599650.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0455, + "grad_norm": 1.2671265602111816, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0792, + "grad_norm": 1.9507051706314087, + "learning_rate": 1.1225000000000002e-05, + "num_tokens": 600674.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0015, + "grad_norm": 0.18869547545909882, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 1.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0643, + "grad_norm": 2.124163866043091, + "learning_rate": 1.1215e-05, + "num_tokens": 601277.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0017, + "grad_norm": 0.22649085521697998, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 1.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0014, + "grad_norm": 0.1775384545326233, + "learning_rate": 1.1205000000000002e-05, + "num_tokens": 601459.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0675, + "grad_norm": 2.2713491916656494, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0504, + "grad_norm": 1.3982276916503906, + "learning_rate": 1.1195e-05, + "num_tokens": 602483.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0478, + "grad_norm": 1.40345299243927, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0655, + "grad_norm": 2.0257670879364014, + "learning_rate": 1.1185000000000002e-05, + "num_tokens": 603507.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.0019, + "grad_norm": 0.2651630938053131, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": 0.0772, + "grad_norm": 2.0185799598693848, + "learning_rate": 1.1175e-05, + "num_tokens": 604110.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0022, + "grad_norm": 0.30773913860321045, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 1.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0401, + "grad_norm": 1.1661447286605835, + "learning_rate": 1.1165e-05, + "num_tokens": 604713.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0805, + "grad_norm": 2.5561182498931885, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.0023, + "grad_norm": 0.3356492221355438, + "learning_rate": 1.1155e-05, + "num_tokens": 605316.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0519, + "grad_norm": 1.2280339002609253, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.0412, + "grad_norm": 1.1461997032165527, + "learning_rate": 1.1145000000000001e-05, + "num_tokens": 606340.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0024, + "grad_norm": 0.33912718296051025, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0026, + "grad_norm": 0.3827052116394043, + "learning_rate": 1.1135000000000001e-05, + "num_tokens": 606522.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0026, + "grad_norm": 0.4025944471359253, + "learning_rate": 1.113e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0025, + "grad_norm": 0.34845641255378723, + "learning_rate": 1.1125000000000001e-05, + "num_tokens": 606704.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0704, + "grad_norm": 1.9853920936584473, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0431, + "grad_norm": 1.3894938230514526, + "learning_rate": 1.1115e-05, + "num_tokens": 607728.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.069, + "grad_norm": 1.2977555990219116, + "learning_rate": 1.111e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0366, + "grad_norm": 1.1859874725341797, + "learning_rate": 1.1105000000000001e-05, + "num_tokens": 608752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0022, + "grad_norm": 0.3078896105289459, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 1.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.002, + "grad_norm": 0.28668129444122314, + "learning_rate": 1.1095e-05, + "num_tokens": 608934.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0021, + "grad_norm": 0.30314162373542786, + "learning_rate": 1.109e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 1.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0741, + "grad_norm": 1.5230200290679932, + "learning_rate": 1.1085000000000001e-05, + "num_tokens": 609537.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.002, + "grad_norm": 0.26326534152030945, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 1.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.002, + "grad_norm": 0.2711552381515503, + "learning_rate": 1.1075e-05, + "num_tokens": 609719.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.0616, + "grad_norm": 1.274338960647583, + "learning_rate": 1.107e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.0016, + "grad_norm": 0.2114490568637848, + "learning_rate": 1.1065000000000002e-05, + "num_tokens": 610322.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0682, + "grad_norm": 1.6731176376342773, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0813, + "grad_norm": 1.9255222082138062, + "learning_rate": 1.1055e-05, + "num_tokens": 611346.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0016, + "grad_norm": 0.21615324914455414, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0688, + "grad_norm": 1.5003544092178345, + "learning_rate": 1.1045000000000002e-05, + "num_tokens": 611949.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0014, + "grad_norm": 0.18165816366672516, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 1.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0583, + "grad_norm": 1.9068502187728882, + "learning_rate": 1.1035e-05, + "num_tokens": 612552.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0015, + "grad_norm": 0.18768055737018585, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 1.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0014, + "grad_norm": 0.1921229511499405, + "learning_rate": 1.1025000000000002e-05, + "num_tokens": 612734.0, + "mean_token_accuracy": 1.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0015, + "grad_norm": 0.19404935836791992, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0619, + "grad_norm": 1.6527628898620605, + "learning_rate": 1.1015e-05, + "num_tokens": 613337.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0413, + "grad_norm": 1.2340315580368042, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0015, + "grad_norm": 0.19533570110797882, + "learning_rate": 1.1005e-05, + "num_tokens": 613940.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0639, + "grad_norm": 1.0601844787597656, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0014, + "grad_norm": 0.18472979962825775, + "learning_rate": 1.0995e-05, + "num_tokens": 614543.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0642, + "grad_norm": 1.2736060619354248, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0581, + "grad_norm": 1.4980621337890625, + "learning_rate": 1.0985e-05, + "num_tokens": 615567.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0489, + "grad_norm": 1.1453659534454346, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0625, + "grad_norm": 1.6183781623840332, + "learning_rate": 1.0975e-05, + "num_tokens": 616591.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0018, + "grad_norm": 0.24508105218410492, + "learning_rate": 1.097e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 1.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.002, + "grad_norm": 0.2894340753555298, + "learning_rate": 1.0965000000000001e-05, + "num_tokens": 616773.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0394, + "grad_norm": 1.3422820568084717, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0018, + "grad_norm": 0.26346835494041443, + "learning_rate": 1.0955e-05, + "num_tokens": 617376.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.002, + "grad_norm": 0.28616681694984436, + "learning_rate": 1.095e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0629, + "grad_norm": 1.515001654624939, + "learning_rate": 1.0945000000000001e-05, + "num_tokens": 617979.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.0429, + "grad_norm": 1.3231642246246338, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0498, + "grad_norm": 1.3477892875671387, + "learning_rate": 1.0935e-05, + "num_tokens": 619003.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.0686, + "grad_norm": 1.4584791660308838, + "learning_rate": 1.093e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.0021, + "grad_norm": 0.29815393686294556, + "learning_rate": 1.0925000000000001e-05, + "num_tokens": 619606.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.087, + "grad_norm": 2.550358533859253, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.0021, + "grad_norm": 0.3024434447288513, + "learning_rate": 1.0915e-05, + "num_tokens": 620209.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0559, + "grad_norm": 1.8500303030014038, + "learning_rate": 1.091e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0024, + "grad_norm": 0.3702225685119629, + "learning_rate": 1.0905000000000001e-05, + "num_tokens": 620812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0782, + "grad_norm": 1.9154956340789795, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0613, + "grad_norm": 1.6961833238601685, + "learning_rate": 1.0895e-05, + "num_tokens": 621836.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0022, + "grad_norm": 0.3193221390247345, + "learning_rate": 1.089e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 1.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0025, + "grad_norm": 0.36297887563705444, + "learning_rate": 1.0885000000000002e-05, + "num_tokens": 622018.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0022, + "grad_norm": 0.3415636420249939, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0408, + "grad_norm": 1.2334237098693848, + "learning_rate": 1.0875e-05, + "num_tokens": 622621.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.002, + "grad_norm": 0.2912217974662781, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 1.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.043, + "grad_norm": 1.9397270679473877, + "learning_rate": 1.0865000000000002e-05, + "num_tokens": 623224.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.0395, + "grad_norm": 1.2516388893127441, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0018, + "grad_norm": 0.24329343438148499, + "learning_rate": 1.0855e-05, + "num_tokens": 623827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0019, + "grad_norm": 0.2603467106819153, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0588, + "grad_norm": 1.736319661140442, + "learning_rate": 1.0845e-05, + "num_tokens": 624430.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0014, + "grad_norm": 0.19694186747074127, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 1.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.0015, + "grad_norm": 0.20471760630607605, + "learning_rate": 1.0835e-05, + "num_tokens": 624612.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0016, + "grad_norm": 0.21806074678897858, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0014, + "grad_norm": 0.19000421464443207, + "learning_rate": 1.0825e-05, + "num_tokens": 624794.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.0516, + "grad_norm": 1.4601935148239136, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0713, + "grad_norm": 2.011367082595825, + "learning_rate": 1.0815e-05, + "num_tokens": 625818.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0012, + "grad_norm": 0.15841880440711975, + "learning_rate": 1.081e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 1.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0711, + "grad_norm": 2.100233793258667, + "learning_rate": 1.0805e-05, + "num_tokens": 626421.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0012, + "grad_norm": 0.1544499695301056, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 1.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0012, + "grad_norm": 0.15288732945919037, + "learning_rate": 1.0794999999999999e-05, + "num_tokens": 626603.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.0379, + "grad_norm": 1.210354208946228, + "learning_rate": 1.079e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.042, + "grad_norm": 1.1011019945144653, + "learning_rate": 1.0785000000000001e-05, + "num_tokens": 627627.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0646, + "grad_norm": 1.4223557710647583, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0011, + "grad_norm": 0.14515887200832367, + "learning_rate": 1.0775e-05, + "num_tokens": 628230.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0012, + "grad_norm": 0.14745497703552246, + "learning_rate": 1.077e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0013, + "grad_norm": 0.16342398524284363, + "learning_rate": 1.0765000000000001e-05, + "num_tokens": 628412.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0711, + "grad_norm": 1.4518134593963623, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0719, + "grad_norm": 1.6602455377578735, + "learning_rate": 1.0755e-05, + "num_tokens": 629436.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0676, + "grad_norm": 1.4668382406234741, + "learning_rate": 1.075e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0675, + "grad_norm": 1.7040259838104248, + "learning_rate": 1.0745000000000001e-05, + "num_tokens": 630460.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0015, + "grad_norm": 0.2076033502817154, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 1.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0578, + "grad_norm": 1.4224144220352173, + "learning_rate": 1.0735e-05, + "num_tokens": 631063.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0359, + "grad_norm": 1.0415198802947998, + "learning_rate": 1.073e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0549, + "grad_norm": 1.3249598741531372, + "learning_rate": 1.0725000000000001e-05, + "num_tokens": 632087.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27819395065307617, + "learning_rate": 1.072e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 1.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.002, + "grad_norm": 0.28510138392448425, + "learning_rate": 1.0715e-05, + "num_tokens": 632269.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0023, + "grad_norm": 0.33845254778862, + "learning_rate": 1.071e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.0022, + "grad_norm": 0.3247784972190857, + "learning_rate": 1.0705000000000002e-05, + "num_tokens": 632451.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.043, + "grad_norm": 1.0912247896194458, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.0578, + "grad_norm": 1.1355180740356445, + "learning_rate": 1.0695e-05, + "num_tokens": 633475.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.0024, + "grad_norm": 0.3479563593864441, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 1.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0022, + "grad_norm": 0.3158959448337555, + "learning_rate": 1.0685e-05, + "num_tokens": 633657.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": 0.0428, + "grad_norm": 1.4031771421432495, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.055, + "grad_norm": 1.2979878187179565, + "learning_rate": 1.0675e-05, + "num_tokens": 634681.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0021, + "grad_norm": 0.30659785866737366, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 1.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0407, + "grad_norm": 1.1281771659851074, + "learning_rate": 1.0665e-05, + "num_tokens": 635284.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0021, + "grad_norm": 0.3046596050262451, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 1.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.002, + "grad_norm": 0.29561498761177063, + "learning_rate": 1.0655e-05, + "num_tokens": 635466.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.042, + "grad_norm": 1.11528480052948, + "learning_rate": 1.065e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0603, + "grad_norm": 1.633859634399414, + "learning_rate": 1.0645e-05, + "num_tokens": 636490.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0617, + "grad_norm": 1.5089678764343262, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0393, + "grad_norm": 1.644981026649475, + "learning_rate": 1.0634999999999999e-05, + "num_tokens": 637514.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0548, + "grad_norm": 1.4219714403152466, + "learning_rate": 1.063e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0022, + "grad_norm": 0.3061341941356659, + "learning_rate": 1.0625e-05, + "num_tokens": 638117.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0439, + "grad_norm": 1.3055533170700073, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0844, + "grad_norm": 2.4925858974456787, + "learning_rate": 1.0615000000000003e-05, + "num_tokens": 639141.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0409, + "grad_norm": 1.2279584407806396, + "learning_rate": 1.061e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.0023, + "grad_norm": 0.3406059145927429, + "learning_rate": 1.0605000000000001e-05, + "num_tokens": 639744.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.0024, + "grad_norm": 0.3423788249492645, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 1.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0758, + "grad_norm": 2.193775177001953, + "learning_rate": 1.0595000000000003e-05, + "num_tokens": 640347.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0397, + "grad_norm": 1.2993077039718628, + "learning_rate": 1.059e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0025, + "grad_norm": 0.37831318378448486, + "learning_rate": 1.0585000000000001e-05, + "num_tokens": 640950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0695, + "grad_norm": 1.9661240577697754, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0514, + "grad_norm": 1.348526954650879, + "learning_rate": 1.0575000000000001e-05, + "num_tokens": 641974.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0422, + "grad_norm": 1.4465380907058716, + "learning_rate": 1.057e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0792, + "grad_norm": 1.823074460029602, + "learning_rate": 1.0565000000000001e-05, + "num_tokens": 642998.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0707, + "grad_norm": 1.9393905401229858, + "learning_rate": 1.056e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": 0.0765, + "grad_norm": 2.4390299320220947, + "learning_rate": 1.0555000000000001e-05, + "num_tokens": 644022.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0377, + "grad_norm": 1.2858082056045532, + "learning_rate": 1.055e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.036, + "grad_norm": 1.1891300678253174, + "learning_rate": 1.0545000000000002e-05, + "num_tokens": 645046.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0596, + "grad_norm": 1.3432769775390625, + "learning_rate": 1.054e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0409, + "grad_norm": 1.3289687633514404, + "learning_rate": 1.0535000000000002e-05, + "num_tokens": 646070.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0703, + "grad_norm": 1.9712656736373901, + "learning_rate": 1.053e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0674, + "grad_norm": 1.360931634902954, + "learning_rate": 1.0525e-05, + "num_tokens": 647094.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0712, + "grad_norm": 1.7070671319961548, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0553, + "grad_norm": 1.2540414333343506, + "learning_rate": 1.0515000000000002e-05, + "num_tokens": 648118.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0462, + "grad_norm": 1.0861750841140747, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0606, + "grad_norm": 1.2730586528778076, + "learning_rate": 1.0505e-05, + "num_tokens": 649142.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0678, + "grad_norm": 1.881486177444458, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0358, + "grad_norm": 1.520228385925293, + "learning_rate": 1.0495000000000002e-05, + "num_tokens": 650166.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0102, + "grad_norm": 1.2519571781158447, + "learning_rate": 1.049e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0603, + "grad_norm": 1.7512507438659668, + "learning_rate": 1.0485e-05, + "num_tokens": 650769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": 0.0422, + "grad_norm": 1.2172882556915283, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0111, + "grad_norm": 1.2125916481018066, + "learning_rate": 1.0475000000000002e-05, + "num_tokens": 651372.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0104, + "grad_norm": 1.187291145324707, + "learning_rate": 1.047e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.067, + "grad_norm": 1.5227930545806885, + "learning_rate": 1.0465e-05, + "num_tokens": 651975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0512, + "grad_norm": 1.1584064960479736, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0585, + "grad_norm": 1.5452741384506226, + "learning_rate": 1.0455000000000002e-05, + "num_tokens": 652999.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.037, + "grad_norm": 1.2185399532318115, + "learning_rate": 1.045e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0073, + "grad_norm": 0.8913355469703674, + "learning_rate": 1.0445e-05, + "num_tokens": 653602.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.1718, + "grad_norm": 3.605719804763794, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0545, + "grad_norm": 0.8743512034416199, + "learning_rate": 1.0435000000000003e-05, + "num_tokens": 654626.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0601, + "grad_norm": 1.5047037601470947, + "learning_rate": 1.043e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0048, + "grad_norm": 0.6472101211547852, + "learning_rate": 1.0425000000000001e-05, + "num_tokens": 655229.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0819, + "grad_norm": 2.8786802291870117, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0722, + "grad_norm": 1.6400585174560547, + "learning_rate": 1.0415000000000001e-05, + "num_tokens": 656253.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0379, + "grad_norm": 1.1578104496002197, + "learning_rate": 1.041e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0651, + "grad_norm": 1.9455623626708984, + "learning_rate": 1.0405000000000001e-05, + "num_tokens": 657277.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0588, + "grad_norm": 1.3513238430023193, + "learning_rate": 1.04e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0584, + "grad_norm": 2.0099873542785645, + "learning_rate": 1.0395000000000001e-05, + "num_tokens": 658301.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0422, + "grad_norm": 1.1260371208190918, + "learning_rate": 1.039e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.1567, + "grad_norm": 4.341492652893066, + "learning_rate": 1.0385000000000001e-05, + "num_tokens": 659325.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0034, + "grad_norm": 0.5023797154426575, + "learning_rate": 1.038e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 1.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.0515, + "grad_norm": 1.3957620859146118, + "learning_rate": 1.0375000000000001e-05, + "num_tokens": 659928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.074, + "grad_norm": 1.8058022260665894, + "learning_rate": 1.037e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.0683, + "grad_norm": 1.5976930856704712, + "learning_rate": 1.0365e-05, + "num_tokens": 660952.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.042, + "grad_norm": 1.2127424478530884, + "learning_rate": 1.036e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0525, + "grad_norm": 1.24295175075531, + "learning_rate": 1.0355000000000002e-05, + "num_tokens": 661976.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0553, + "grad_norm": 1.3676091432571411, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0043, + "grad_norm": 0.5990502834320068, + "learning_rate": 1.0345e-05, + "num_tokens": 662579.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.0651, + "grad_norm": 1.8467062711715698, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.0035, + "grad_norm": 0.4997740089893341, + "learning_rate": 1.0335000000000002e-05, + "num_tokens": 663182.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0564, + "grad_norm": 0.9972801804542542, + "learning_rate": 1.033e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0587, + "grad_norm": 1.6288121938705444, + "learning_rate": 1.0325e-05, + "num_tokens": 664206.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0387, + "grad_norm": 1.0264148712158203, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.0044, + "grad_norm": 0.6445260047912598, + "learning_rate": 1.0315000000000002e-05, + "num_tokens": 664809.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0379, + "grad_norm": 1.0764647722244263, + "learning_rate": 1.031e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.0483, + "grad_norm": 1.6414856910705566, + "learning_rate": 1.0305e-05, + "num_tokens": 665833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.0392, + "grad_norm": 1.0878779888153076, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0721, + "grad_norm": 1.8314939737319946, + "learning_rate": 1.0295000000000002e-05, + "num_tokens": 666857.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0404, + "grad_norm": 1.2442834377288818, + "learning_rate": 1.029e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0344, + "grad_norm": 1.0829095840454102, + "learning_rate": 1.0285e-05, + "num_tokens": 667881.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.005, + "grad_norm": 0.7069464921951294, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 1.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0701, + "grad_norm": 1.8649088144302368, + "learning_rate": 1.0275000000000002e-05, + "num_tokens": 668484.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0445, + "grad_norm": 1.5859991312026978, + "learning_rate": 1.027e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.0617, + "grad_norm": 1.400742530822754, + "learning_rate": 1.0265e-05, + "num_tokens": 669508.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0527, + "grad_norm": 1.4805254936218262, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.0052, + "grad_norm": 0.7180629968643188, + "learning_rate": 1.0255000000000001e-05, + "num_tokens": 670111.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0404, + "grad_norm": 1.3597116470336914, + "learning_rate": 1.025e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0054, + "grad_norm": 0.7400949597358704, + "learning_rate": 1.0245000000000001e-05, + "num_tokens": 670714.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.0049, + "grad_norm": 0.6836004853248596, + "learning_rate": 1.024e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 1.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0736, + "grad_norm": 2.3706512451171875, + "learning_rate": 1.0235000000000001e-05, + "num_tokens": 671317.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0045, + "grad_norm": 0.6252732872962952, + "learning_rate": 1.023e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 1.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0538, + "grad_norm": 1.2009153366088867, + "learning_rate": 1.0225000000000001e-05, + "num_tokens": 671920.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0032, + "grad_norm": 0.4667681157588959, + "learning_rate": 1.022e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 1.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0612, + "grad_norm": 1.505027413368225, + "learning_rate": 1.0215000000000001e-05, + "num_tokens": 672523.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.0551, + "grad_norm": 1.3336291313171387, + "learning_rate": 1.021e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0402, + "grad_norm": 1.1181267499923706, + "learning_rate": 1.0205e-05, + "num_tokens": 673547.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0633, + "grad_norm": 1.5764997005462646, + "learning_rate": 1.02e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0024, + "grad_norm": 0.33718812465667725, + "learning_rate": 1.0195000000000001e-05, + "num_tokens": 674150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0683, + "grad_norm": 1.428412675857544, + "learning_rate": 1.019e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441157937049866, + "learning_rate": 1.0185e-05, + "num_tokens": 674753.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0023, + "grad_norm": 0.33211714029312134, + "learning_rate": 1.018e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 1.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0022, + "grad_norm": 0.3089843988418579, + "learning_rate": 1.0175000000000002e-05, + "num_tokens": 674935.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0635, + "grad_norm": 1.286823034286499, + "learning_rate": 1.017e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0419, + "grad_norm": 1.0465713739395142, + "learning_rate": 1.0165e-05, + "num_tokens": 675959.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0019, + "grad_norm": 0.27270686626434326, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 1.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0614, + "grad_norm": 1.536331295967102, + "learning_rate": 1.0155000000000002e-05, + "num_tokens": 676562.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0521, + "grad_norm": 1.3282392024993896, + "learning_rate": 1.015e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0573, + "grad_norm": 1.3458013534545898, + "learning_rate": 1.0145e-05, + "num_tokens": 677586.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0607, + "grad_norm": 1.5142616033554077, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0427, + "grad_norm": 1.3866674900054932, + "learning_rate": 1.0135000000000002e-05, + "num_tokens": 678610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0652, + "grad_norm": 1.3013007640838623, + "learning_rate": 1.013e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0021, + "grad_norm": 0.2967868447303772, + "learning_rate": 1.0125e-05, + "num_tokens": 679213.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.002, + "grad_norm": 0.2977685332298279, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0022, + "grad_norm": 0.3109460473060608, + "learning_rate": 1.0115000000000002e-05, + "num_tokens": 679395.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0563, + "grad_norm": 1.1927019357681274, + "learning_rate": 1.011e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0019, + "grad_norm": 0.27015697956085205, + "learning_rate": 1.0105e-05, + "num_tokens": 679998.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.054, + "grad_norm": 1.8113130331039429, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0688, + "grad_norm": 1.6508032083511353, + "learning_rate": 1.0095e-05, + "num_tokens": 681022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0502, + "grad_norm": 1.1528620719909668, + "learning_rate": 1.009e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29425331950187683, + "learning_rate": 1.0085000000000001e-05, + "num_tokens": 681625.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0641, + "grad_norm": 1.702049732208252, + "learning_rate": 1.008e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.036, + "grad_norm": 1.1969891786575317, + "learning_rate": 1.0075000000000001e-05, + "num_tokens": 682649.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0022, + "grad_norm": 0.31679248809814453, + "learning_rate": 1.007e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 1.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.0403, + "grad_norm": 1.1920922994613647, + "learning_rate": 1.0065000000000001e-05, + "num_tokens": 683252.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0544, + "grad_norm": 1.1415454149246216, + "learning_rate": 1.006e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0028, + "grad_norm": 0.42351487278938293, + "learning_rate": 1.0055000000000001e-05, + "num_tokens": 683855.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0466, + "grad_norm": 1.6247456073760986, + "learning_rate": 1.005e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0524, + "grad_norm": 1.2605568170547485, + "learning_rate": 1.0045e-05, + "num_tokens": 684879.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.057, + "grad_norm": 1.483921766281128, + "learning_rate": 1.004e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0029, + "grad_norm": 0.420865923166275, + "learning_rate": 1.0035000000000001e-05, + "num_tokens": 685482.0, + "mean_token_accuracy": 1.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0483, + "grad_norm": 1.9411001205444336, + "learning_rate": 1.003e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0418, + "grad_norm": 1.1357734203338623, + "learning_rate": 1.0025e-05, + "num_tokens": 686506.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0031, + "grad_norm": 0.4264874756336212, + "learning_rate": 1.002e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 1.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0628, + "grad_norm": 1.5096089839935303, + "learning_rate": 1.0015000000000002e-05, + "num_tokens": 687109.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.003, + "grad_norm": 0.41657188534736633, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0, + "step": 2000 + }, + { + "loss": 0.0028, + "grad_norm": 0.3918426036834717, + "learning_rate": 1.0005e-05, + "num_tokens": 687291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0005, + "step": 2001 + }, + { + "loss": 0.0524, + "grad_norm": 1.1938209533691406, + "learning_rate": 1e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.001, + "step": 2002 + }, + { + "loss": 0.0027, + "grad_norm": 0.3788990080356598, + "learning_rate": 9.995000000000002e-06, + "num_tokens": 687894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0015, + "step": 2003 + }, + { + "loss": 0.0025, + "grad_norm": 0.3577810227870941, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 687985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.002, + "step": 2004 + }, + { + "loss": 0.0024, + "grad_norm": 0.3305366039276123, + "learning_rate": 9.985000000000002e-06, + "num_tokens": 688076.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0025, + "step": 2005 + }, + { + "loss": 0.002, + "grad_norm": 0.277047336101532, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 688167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.003, + "step": 2006 + }, + { + "loss": 0.0019, + "grad_norm": 0.2567979693412781, + "learning_rate": 9.975000000000002e-06, + "num_tokens": 688258.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0035, + "step": 2007 + }, + { + "loss": 0.0682, + "grad_norm": 1.844512701034546, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.004, + "step": 2008 + }, + { + "loss": 0.0487, + "grad_norm": 1.2499569654464722, + "learning_rate": 9.965000000000002e-06, + "num_tokens": 689282.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0045, + "step": 2009 + }, + { + "loss": 0.0432, + "grad_norm": 1.2406448125839233, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.005, + "step": 2010 + }, + { + "loss": 0.0804, + "grad_norm": 1.833058476448059, + "learning_rate": 9.955000000000002e-06, + "num_tokens": 690306.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0055, + "step": 2011 + }, + { + "loss": 0.0464, + "grad_norm": 1.3244189023971558, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.006, + "step": 2012 + }, + { + "loss": 0.0416, + "grad_norm": 1.044066309928894, + "learning_rate": 9.945e-06, + "num_tokens": 691330.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0065, + "step": 2013 + }, + { + "loss": 0.0646, + "grad_norm": 1.5272581577301025, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.007, + "step": 2014 + }, + { + "loss": 0.0401, + "grad_norm": 1.2222588062286377, + "learning_rate": 9.935e-06, + "num_tokens": 692354.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0075, + "step": 2015 + }, + { + "loss": 0.0833, + "grad_norm": 2.3880302906036377, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.008, + "step": 2016 + }, + { + "loss": 0.0661, + "grad_norm": 1.666345238685608, + "learning_rate": 9.925e-06, + "num_tokens": 693378.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0085, + "step": 2017 + }, + { + "loss": 0.061, + "grad_norm": 1.2552286386489868, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.009, + "step": 2018 + }, + { + "loss": 0.0022, + "grad_norm": 0.2978605329990387, + "learning_rate": 9.915e-06, + "num_tokens": 693981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0095, + "step": 2019 + }, + { + "loss": 0.0419, + "grad_norm": 1.1351749897003174, + "learning_rate": 9.91e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.01, + "step": 2020 + }, + { + "loss": 0.0028, + "grad_norm": 0.4339805245399475, + "learning_rate": 9.905000000000001e-06, + "num_tokens": 694584.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0105, + "step": 2021 + }, + { + "loss": 0.0027, + "grad_norm": 0.3737834393978119, + "learning_rate": 9.9e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 2022 + }, + { + "loss": 0.0724, + "grad_norm": 1.6216633319854736, + "learning_rate": 9.895000000000001e-06, + "num_tokens": 695187.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0115, + "step": 2023 + }, + { + "loss": 0.0026, + "grad_norm": 0.38558149337768555, + "learning_rate": 9.89e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 1.0, + "epoch": 1.012, + "step": 2024 + }, + { + "loss": 0.0457, + "grad_norm": 1.2241498231887817, + "learning_rate": 9.885000000000001e-06, + "num_tokens": 695790.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0125, + "step": 2025 + }, + { + "loss": 0.0387, + "grad_norm": 1.4335367679595947, + "learning_rate": 9.88e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.013, + "step": 2026 + }, + { + "loss": 0.0716, + "grad_norm": 1.5836760997772217, + "learning_rate": 9.875000000000001e-06, + "num_tokens": 696814.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0135, + "step": 2027 + }, + { + "loss": 0.0419, + "grad_norm": 1.2072887420654297, + "learning_rate": 9.87e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.014, + "step": 2028 + }, + { + "loss": 0.0376, + "grad_norm": 0.9630845189094543, + "learning_rate": 9.865000000000001e-06, + "num_tokens": 697838.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.0145, + "step": 2029 + }, + { + "loss": 0.0562, + "grad_norm": 1.396782636642456, + "learning_rate": 9.86e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.015, + "step": 2030 + }, + { + "loss": 0.0611, + "grad_norm": 1.526076316833496, + "learning_rate": 9.855000000000001e-06, + "num_tokens": 698862.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0155, + "step": 2031 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280098915100098, + "learning_rate": 9.85e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.016, + "step": 2032 + }, + { + "loss": 0.0036, + "grad_norm": 0.5271911025047302, + "learning_rate": 9.845000000000001e-06, + "num_tokens": 699044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0165, + "step": 2033 + }, + { + "loss": 0.0638, + "grad_norm": 1.2341188192367554, + "learning_rate": 9.84e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.017, + "step": 2034 + }, + { + "loss": 0.0386, + "grad_norm": 1.0637688636779785, + "learning_rate": 9.835000000000002e-06, + "num_tokens": 700068.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0175, + "step": 2035 + }, + { + "loss": 0.0036, + "grad_norm": 0.52369225025177, + "learning_rate": 9.83e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 1.0, + "epoch": 1.018, + "step": 2036 + }, + { + "loss": 0.0494, + "grad_norm": 2.351320266723633, + "learning_rate": 9.825000000000002e-06, + "num_tokens": 700671.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0185, + "step": 2037 + }, + { + "loss": 0.0034, + "grad_norm": 0.4984705150127411, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.019, + "step": 2038 + }, + { + "loss": 0.0406, + "grad_norm": 1.5286310911178589, + "learning_rate": 9.815000000000002e-06, + "num_tokens": 701274.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0195, + "step": 2039 + }, + { + "loss": 0.0523, + "grad_norm": 1.7273446321487427, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.02, + "step": 2040 + }, + { + "loss": 0.0033, + "grad_norm": 0.4823690950870514, + "learning_rate": 9.805000000000002e-06, + "num_tokens": 701877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0205, + "step": 2041 + }, + { + "loss": 0.0032, + "grad_norm": 0.4507608711719513, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.021, + "step": 2042 + }, + { + "loss": 0.0703, + "grad_norm": 1.77262544631958, + "learning_rate": 9.795000000000002e-06, + "num_tokens": 702480.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0215, + "step": 2043 + }, + { + "loss": 0.0026, + "grad_norm": 0.3709382116794586, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.022, + "step": 2044 + }, + { + "loss": 0.0683, + "grad_norm": 3.5564355850219727, + "learning_rate": 9.785e-06, + "num_tokens": 703083.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0225, + "step": 2045 + }, + { + "loss": 0.0024, + "grad_norm": 0.3166162967681885, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.023, + "step": 2046 + }, + { + "loss": 0.0022, + "grad_norm": 0.2928009331226349, + "learning_rate": 9.775e-06, + "num_tokens": 703265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0235, + "step": 2047 + }, + { + "loss": 0.0621, + "grad_norm": 1.902612566947937, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.024, + "step": 2048 + }, + { + "loss": 0.0018, + "grad_norm": 0.23954610526561737, + "learning_rate": 9.765e-06, + "num_tokens": 703868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0245, + "step": 2049 + }, + { + "loss": 0.0409, + "grad_norm": 1.3355653285980225, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.025, + "step": 2050 + }, + { + "loss": 0.0705, + "grad_norm": 1.6696054935455322, + "learning_rate": 9.755e-06, + "num_tokens": 704892.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0255, + "step": 2051 + }, + { + "loss": 0.0016, + "grad_norm": 0.22299779951572418, + "learning_rate": 9.75e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.026, + "step": 2052 + }, + { + "loss": 0.0016, + "grad_norm": 0.21063728630542755, + "learning_rate": 9.745e-06, + "num_tokens": 705074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0265, + "step": 2053 + }, + { + "loss": 0.0696, + "grad_norm": 1.6844984292984009, + "learning_rate": 9.74e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.027, + "step": 2054 + }, + { + "loss": 0.0714, + "grad_norm": 1.5383219718933105, + "learning_rate": 9.735e-06, + "num_tokens": 706098.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0275, + "step": 2055 + }, + { + "loss": 0.0015, + "grad_norm": 0.19807161390781403, + "learning_rate": 9.73e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.028, + "step": 2056 + }, + { + "loss": 0.0014, + "grad_norm": 0.19030039012432098, + "learning_rate": 9.725000000000001e-06, + "num_tokens": 706280.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0285, + "step": 2057 + }, + { + "loss": 0.0013, + "grad_norm": 0.16322408616542816, + "learning_rate": 9.72e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 1.0, + "epoch": 1.029, + "step": 2058 + }, + { + "loss": 0.0014, + "grad_norm": 0.17665083706378937, + "learning_rate": 9.715000000000001e-06, + "num_tokens": 706462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0295, + "step": 2059 + }, + { + "loss": 0.0669, + "grad_norm": 1.8765722513198853, + "learning_rate": 9.71e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.03, + "step": 2060 + }, + { + "loss": 0.0768, + "grad_norm": 1.7586760520935059, + "learning_rate": 9.705000000000001e-06, + "num_tokens": 707486.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0305, + "step": 2061 + }, + { + "loss": 0.0696, + "grad_norm": 1.258619785308838, + "learning_rate": 9.7e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.031, + "step": 2062 + }, + { + "loss": 0.0493, + "grad_norm": 1.2884832620620728, + "learning_rate": 9.695000000000001e-06, + "num_tokens": 708510.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0315, + "step": 2063 + }, + { + "loss": 0.0012, + "grad_norm": 0.15901947021484375, + "learning_rate": 9.69e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.032, + "step": 2064 + }, + { + "loss": 0.0656, + "grad_norm": 1.3002307415008545, + "learning_rate": 9.685000000000001e-06, + "num_tokens": 709113.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0325, + "step": 2065 + }, + { + "loss": 0.0013, + "grad_norm": 0.17090171575546265, + "learning_rate": 9.68e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.033, + "step": 2066 + }, + { + "loss": 0.0013, + "grad_norm": 0.1825355738401413, + "learning_rate": 9.675000000000001e-06, + "num_tokens": 709295.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0335, + "step": 2067 + }, + { + "loss": 0.0459, + "grad_norm": 1.092247724533081, + "learning_rate": 9.67e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.034, + "step": 2068 + }, + { + "loss": 0.0648, + "grad_norm": 1.4761494398117065, + "learning_rate": 9.665000000000001e-06, + "num_tokens": 710319.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.0345, + "step": 2069 + }, + { + "loss": 0.0014, + "grad_norm": 0.1826472133398056, + "learning_rate": 9.66e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 1.0, + "epoch": 1.035, + "step": 2070 + }, + { + "loss": 0.0461, + "grad_norm": 1.338349461555481, + "learning_rate": 9.655000000000002e-06, + "num_tokens": 710922.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0355, + "step": 2071 + }, + { + "loss": 0.0567, + "grad_norm": 1.0566164255142212, + "learning_rate": 9.65e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.036, + "step": 2072 + }, + { + "loss": 0.0015, + "grad_norm": 0.19834326207637787, + "learning_rate": 9.645000000000002e-06, + "num_tokens": 711525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0365, + "step": 2073 + }, + { + "loss": 0.0418, + "grad_norm": 1.210045576095581, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.037, + "step": 2074 + }, + { + "loss": 0.0016, + "grad_norm": 0.22290614247322083, + "learning_rate": 9.635000000000002e-06, + "num_tokens": 712128.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0375, + "step": 2075 + }, + { + "loss": 0.0695, + "grad_norm": 1.4690190553665161, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.038, + "step": 2076 + }, + { + "loss": 0.0016, + "grad_norm": 0.2209765613079071, + "learning_rate": 9.625e-06, + "num_tokens": 712731.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0385, + "step": 2077 + }, + { + "loss": 0.0018, + "grad_norm": 0.23313096165657043, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 2078 + }, + { + "loss": 0.0017, + "grad_norm": 0.23196078836917877, + "learning_rate": 9.615e-06, + "num_tokens": 712913.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0395, + "step": 2079 + }, + { + "loss": 0.0541, + "grad_norm": 1.220723032951355, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.04, + "step": 2080 + }, + { + "loss": 0.0018, + "grad_norm": 0.2516387403011322, + "learning_rate": 9.605e-06, + "num_tokens": 713516.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0405, + "step": 2081 + }, + { + "loss": 0.0424, + "grad_norm": 1.0561903715133667, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.041, + "step": 2082 + }, + { + "loss": 0.0438, + "grad_norm": 1.2110846042633057, + "learning_rate": 9.595e-06, + "num_tokens": 714540.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0415, + "step": 2083 + }, + { + "loss": 0.0018, + "grad_norm": 0.24697688221931458, + "learning_rate": 9.59e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 1.0, + "epoch": 1.042, + "step": 2084 + }, + { + "loss": 0.0388, + "grad_norm": 1.0054850578308105, + "learning_rate": 9.585e-06, + "num_tokens": 715143.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0425, + "step": 2085 + }, + { + "loss": 0.0713, + "grad_norm": 1.8077067136764526, + "learning_rate": 9.58e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.043, + "step": 2086 + }, + { + "loss": 0.0018, + "grad_norm": 0.24363017082214355, + "learning_rate": 9.575e-06, + "num_tokens": 715746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0435, + "step": 2087 + }, + { + "loss": 0.0016, + "grad_norm": 0.21341845393180847, + "learning_rate": 9.57e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 2088 + }, + { + "loss": 0.0391, + "grad_norm": 1.3833376169204712, + "learning_rate": 9.565e-06, + "num_tokens": 716349.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0445, + "step": 2089 + }, + { + "loss": 0.0393, + "grad_norm": 0.9772108793258667, + "learning_rate": 9.56e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.045, + "step": 2090 + }, + { + "loss": 0.002, + "grad_norm": 0.283633828163147, + "learning_rate": 9.555e-06, + "num_tokens": 716952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0455, + "step": 2091 + }, + { + "loss": 0.0728, + "grad_norm": 1.849652647972107, + "learning_rate": 9.55e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.046, + "step": 2092 + }, + { + "loss": 0.0022, + "grad_norm": 0.3161669969558716, + "learning_rate": 9.545000000000001e-06, + "num_tokens": 717555.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0465, + "step": 2093 + }, + { + "loss": 0.0587, + "grad_norm": 1.600858449935913, + "learning_rate": 9.54e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.047, + "step": 2094 + }, + { + "loss": 0.0021, + "grad_norm": 0.2948978543281555, + "learning_rate": 9.535000000000001e-06, + "num_tokens": 718158.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0475, + "step": 2095 + }, + { + "loss": 0.0019, + "grad_norm": 0.27492448687553406, + "learning_rate": 9.53e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.048, + "step": 2096 + }, + { + "loss": 0.0382, + "grad_norm": 1.2440471649169922, + "learning_rate": 9.525000000000001e-06, + "num_tokens": 718761.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0485, + "step": 2097 + }, + { + "loss": 0.058, + "grad_norm": 1.5657495260238647, + "learning_rate": 9.52e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 2098 + }, + { + "loss": 0.0018, + "grad_norm": 0.2510983645915985, + "learning_rate": 9.515000000000001e-06, + "num_tokens": 719364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0495, + "step": 2099 + }, + { + "loss": 0.0677, + "grad_norm": 2.6615045070648193, + "learning_rate": 9.51e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.05, + "step": 2100 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355963945388794, + "learning_rate": 9.505000000000001e-06, + "num_tokens": 719967.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0505, + "step": 2101 + }, + { + "loss": 0.0628, + "grad_norm": 1.4263781309127808, + "learning_rate": 9.5e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.051, + "step": 2102 + }, + { + "loss": 0.0384, + "grad_norm": 1.3316160440444946, + "learning_rate": 9.495000000000001e-06, + "num_tokens": 720991.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0515, + "step": 2103 + }, + { + "loss": 0.0413, + "grad_norm": 1.2754371166229248, + "learning_rate": 9.49e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.052, + "step": 2104 + }, + { + "loss": 0.0551, + "grad_norm": 1.9524251222610474, + "learning_rate": 9.485000000000002e-06, + "num_tokens": 722015.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0525, + "step": 2105 + }, + { + "loss": 0.0551, + "grad_norm": 1.5522267818450928, + "learning_rate": 9.48e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.053, + "step": 2106 + }, + { + "loss": 0.0019, + "grad_norm": 0.27614012360572815, + "learning_rate": 9.475000000000002e-06, + "num_tokens": 722618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0535, + "step": 2107 + }, + { + "loss": 0.0606, + "grad_norm": 1.409346103668213, + "learning_rate": 9.47e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.054, + "step": 2108 + }, + { + "loss": 0.0024, + "grad_norm": 0.357972115278244, + "learning_rate": 9.465e-06, + "num_tokens": 723221.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0545, + "step": 2109 + }, + { + "loss": 0.0023, + "grad_norm": 0.3270082175731659, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 1.0, + "epoch": 1.055, + "step": 2110 + }, + { + "loss": 0.0024, + "grad_norm": 0.3454654812812805, + "learning_rate": 9.455e-06, + "num_tokens": 723403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0555, + "step": 2111 + }, + { + "loss": 0.0024, + "grad_norm": 0.352299302816391, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 1.0, + "epoch": 1.056, + "step": 2112 + }, + { + "loss": 0.002, + "grad_norm": 0.27746516466140747, + "learning_rate": 9.445e-06, + "num_tokens": 723585.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0565, + "step": 2113 + }, + { + "loss": 0.002, + "grad_norm": 0.2780683636665344, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 1.0, + "epoch": 1.057, + "step": 2114 + }, + { + "loss": 0.0464, + "grad_norm": 1.5355291366577148, + "learning_rate": 9.435e-06, + "num_tokens": 724188.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0575, + "step": 2115 + }, + { + "loss": 0.0017, + "grad_norm": 0.2329765260219574, + "learning_rate": 9.43e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.058, + "step": 2116 + }, + { + "loss": 0.0015, + "grad_norm": 0.20377217233181, + "learning_rate": 9.425e-06, + "num_tokens": 724370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0585, + "step": 2117 + }, + { + "loss": 0.0014, + "grad_norm": 0.1731068193912506, + "learning_rate": 9.42e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 2118 + }, + { + "loss": 0.0349, + "grad_norm": 1.301210641860962, + "learning_rate": 9.415e-06, + "num_tokens": 724973.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.0594999999999999, + "step": 2119 + }, + { + "loss": 0.0012, + "grad_norm": 0.15070641040802002, + "learning_rate": 9.41e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.06, + "step": 2120 + }, + { + "loss": 0.0012, + "grad_norm": 0.13666701316833496, + "learning_rate": 9.405e-06, + "num_tokens": 725155.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0605, + "step": 2121 + }, + { + "loss": 0.0011, + "grad_norm": 0.13183920085430145, + "learning_rate": 9.4e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 2122 + }, + { + "loss": 0.0735, + "grad_norm": 2.157339096069336, + "learning_rate": 9.395e-06, + "num_tokens": 725758.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.0615, + "step": 2123 + }, + { + "loss": 0.0434, + "grad_norm": 1.441329836845398, + "learning_rate": 9.39e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.062, + "step": 2124 + }, + { + "loss": 0.001, + "grad_norm": 0.11148537695407867, + "learning_rate": 9.385e-06, + "num_tokens": 726361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0625, + "step": 2125 + }, + { + "loss": 0.0363, + "grad_norm": 1.2650766372680664, + "learning_rate": 9.38e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9902152419090271, + "epoch": 1.063, + "step": 2126 + }, + { + "loss": 0.042, + "grad_norm": 1.170820951461792, + "learning_rate": 9.375000000000001e-06, + "num_tokens": 727385.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0635, + "step": 2127 + }, + { + "loss": 0.0375, + "grad_norm": 1.31922447681427, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.064, + "step": 2128 + }, + { + "loss": 0.0009, + "grad_norm": 0.10702881962060928, + "learning_rate": 9.365000000000001e-06, + "num_tokens": 727988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0645, + "step": 2129 + }, + { + "loss": 0.001, + "grad_norm": 0.12134991586208344, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 2130 + }, + { + "loss": 0.001, + "grad_norm": 0.12518537044525146, + "learning_rate": 9.355000000000001e-06, + "num_tokens": 728170.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0655000000000001, + "step": 2131 + }, + { + "loss": 0.0443, + "grad_norm": 1.5640217065811157, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.066, + "step": 2132 + }, + { + "loss": 0.043, + "grad_norm": 1.7402693033218384, + "learning_rate": 9.345000000000001e-06, + "num_tokens": 729194.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0665, + "step": 2133 + }, + { + "loss": 0.0572, + "grad_norm": 1.478943109512329, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.067, + "step": 2134 + }, + { + "loss": 0.0436, + "grad_norm": 1.75895357131958, + "learning_rate": 9.335000000000001e-06, + "num_tokens": 730218.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0675, + "step": 2135 + }, + { + "loss": 0.0011, + "grad_norm": 0.14104828238487244, + "learning_rate": 9.33e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.068, + "step": 2136 + }, + { + "loss": 0.0014, + "grad_norm": 0.1940988302230835, + "learning_rate": 9.325000000000001e-06, + "num_tokens": 730400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0685, + "step": 2137 + }, + { + "loss": 0.0012, + "grad_norm": 0.15279027819633484, + "learning_rate": 9.32e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 2138 + }, + { + "loss": 0.0627, + "grad_norm": 1.8744264841079712, + "learning_rate": 9.315000000000001e-06, + "num_tokens": 731003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0695000000000001, + "step": 2139 + }, + { + "loss": 0.045, + "grad_norm": 1.4347468614578247, + "learning_rate": 9.31e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.07, + "step": 2140 + }, + { + "loss": 0.0711, + "grad_norm": 1.9654953479766846, + "learning_rate": 9.305000000000002e-06, + "num_tokens": 732027.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0705, + "step": 2141 + }, + { + "loss": 0.0723, + "grad_norm": 1.851762294769287, + "learning_rate": 9.3e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.071, + "step": 2142 + }, + { + "loss": 0.0397, + "grad_norm": 1.1016762256622314, + "learning_rate": 9.295e-06, + "num_tokens": 733051.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0715, + "step": 2143 + }, + { + "loss": 0.0614, + "grad_norm": 1.278972864151001, + "learning_rate": 9.29e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.072, + "step": 2144 + }, + { + "loss": 0.0578, + "grad_norm": 1.5237491130828857, + "learning_rate": 9.285e-06, + "num_tokens": 734075.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0725, + "step": 2145 + }, + { + "loss": 0.0021, + "grad_norm": 0.29453045129776, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.073, + "step": 2146 + }, + { + "loss": 0.0756, + "grad_norm": 1.90165376663208, + "learning_rate": 9.275e-06, + "num_tokens": 734678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0735, + "step": 2147 + }, + { + "loss": 0.0025, + "grad_norm": 0.3552635610103607, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.074, + "step": 2148 + }, + { + "loss": 0.0615, + "grad_norm": 1.3596733808517456, + "learning_rate": 9.265e-06, + "num_tokens": 735281.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0745, + "step": 2149 + }, + { + "loss": 0.0571, + "grad_norm": 1.0499508380889893, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.075, + "step": 2150 + }, + { + "loss": 0.0593, + "grad_norm": 1.4813532829284668, + "learning_rate": 9.255e-06, + "num_tokens": 736305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0755, + "step": 2151 + }, + { + "loss": 0.0451, + "grad_norm": 1.1956957578659058, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.076, + "step": 2152 + }, + { + "loss": 0.0035, + "grad_norm": 0.5021563172340393, + "learning_rate": 9.245e-06, + "num_tokens": 736908.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0765, + "step": 2153 + }, + { + "loss": 0.0035, + "grad_norm": 0.5023340582847595, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 2154 + }, + { + "loss": 0.0593, + "grad_norm": 1.3515294790267944, + "learning_rate": 9.235e-06, + "num_tokens": 737511.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.0775, + "step": 2155 + }, + { + "loss": 0.0036, + "grad_norm": 0.5020677447319031, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.078, + "step": 2156 + }, + { + "loss": 0.0034, + "grad_norm": 0.4873979985713959, + "learning_rate": 9.225e-06, + "num_tokens": 737693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0785, + "step": 2157 + }, + { + "loss": 0.0582, + "grad_norm": 1.3766424655914307, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.079, + "step": 2158 + }, + { + "loss": 0.0631, + "grad_norm": 1.1943955421447754, + "learning_rate": 9.215e-06, + "num_tokens": 738717.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0795, + "step": 2159 + }, + { + "loss": 0.003, + "grad_norm": 0.43413516879081726, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.08, + "step": 2160 + }, + { + "loss": 0.0031, + "grad_norm": 0.44669783115386963, + "learning_rate": 9.205e-06, + "num_tokens": 738899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0805, + "step": 2161 + }, + { + "loss": 0.0561, + "grad_norm": 1.3388497829437256, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.081, + "step": 2162 + }, + { + "loss": 0.0426, + "grad_norm": 1.8933428525924683, + "learning_rate": 9.195000000000001e-06, + "num_tokens": 739923.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.0815, + "step": 2163 + }, + { + "loss": 0.06, + "grad_norm": 1.3706074953079224, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.082, + "step": 2164 + }, + { + "loss": 0.0621, + "grad_norm": 1.443211555480957, + "learning_rate": 9.185000000000001e-06, + "num_tokens": 740947.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0825, + "step": 2165 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098005950450897, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 1.0, + "epoch": 1.083, + "step": 2166 + }, + { + "loss": 0.06, + "grad_norm": 1.2332003116607666, + "learning_rate": 9.175000000000001e-06, + "num_tokens": 741550.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.0835, + "step": 2167 + }, + { + "loss": 0.0682, + "grad_norm": 1.4077450037002563, + "learning_rate": 9.17e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.084, + "step": 2168 + }, + { + "loss": 0.0584, + "grad_norm": 1.4201141595840454, + "learning_rate": 9.165000000000001e-06, + "num_tokens": 742574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0845, + "step": 2169 + }, + { + "loss": 0.0024, + "grad_norm": 0.3220980167388916, + "learning_rate": 9.16e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.085, + "step": 2170 + }, + { + "loss": 0.0571, + "grad_norm": 1.3979272842407227, + "learning_rate": 9.155000000000001e-06, + "num_tokens": 743177.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0855, + "step": 2171 + }, + { + "loss": 0.0572, + "grad_norm": 1.6924889087677002, + "learning_rate": 9.15e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.086, + "step": 2172 + }, + { + "loss": 0.0708, + "grad_norm": 1.7350118160247803, + "learning_rate": 9.145000000000001e-06, + "num_tokens": 744201.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.0865, + "step": 2173 + }, + { + "loss": 0.0024, + "grad_norm": 0.3453267812728882, + "learning_rate": 9.14e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 1.0, + "epoch": 1.087, + "step": 2174 + }, + { + "loss": 0.0028, + "grad_norm": 0.3845599293708801, + "learning_rate": 9.135e-06, + "num_tokens": 744383.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0875, + "step": 2175 + }, + { + "loss": 0.0023, + "grad_norm": 0.32928982377052307, + "learning_rate": 9.13e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.088, + "step": 2176 + }, + { + "loss": 0.0025, + "grad_norm": 0.3593277335166931, + "learning_rate": 9.125e-06, + "num_tokens": 744565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0885, + "step": 2177 + }, + { + "loss": 0.0447, + "grad_norm": 1.6252307891845703, + "learning_rate": 9.12e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.089, + "step": 2178 + }, + { + "loss": 0.0664, + "grad_norm": 1.3326979875564575, + "learning_rate": 9.115e-06, + "num_tokens": 745589.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.0895, + "step": 2179 + }, + { + "loss": 0.0713, + "grad_norm": 2.490602493286133, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.09, + "step": 2180 + }, + { + "loss": 0.0577, + "grad_norm": 1.2613682746887207, + "learning_rate": 9.105e-06, + "num_tokens": 746613.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0905, + "step": 2181 + }, + { + "loss": 0.0604, + "grad_norm": 1.8400533199310303, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.091, + "step": 2182 + }, + { + "loss": 0.0546, + "grad_norm": 1.577405571937561, + "learning_rate": 9.095e-06, + "num_tokens": 747637.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0915, + "step": 2183 + }, + { + "loss": 0.1758, + "grad_norm": 3.9485361576080322, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 1.092, + "step": 2184 + }, + { + "loss": 0.0407, + "grad_norm": 1.4230077266693115, + "learning_rate": 9.085e-06, + "num_tokens": 748661.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.0925, + "step": 2185 + }, + { + "loss": 0.0024, + "grad_norm": 0.3441873788833618, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 1.0, + "epoch": 1.093, + "step": 2186 + }, + { + "loss": 0.0574, + "grad_norm": 1.059336543083191, + "learning_rate": 9.075e-06, + "num_tokens": 749264.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.0935, + "step": 2187 + }, + { + "loss": 0.0022, + "grad_norm": 0.3150666058063507, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.094, + "step": 2188 + }, + { + "loss": 0.0401, + "grad_norm": 1.1904288530349731, + "learning_rate": 9.065e-06, + "num_tokens": 749867.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.0945, + "step": 2189 + }, + { + "loss": 0.0024, + "grad_norm": 0.3425971567630768, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 1.0, + "epoch": 1.095, + "step": 2190 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606398403644562, + "learning_rate": 9.055e-06, + "num_tokens": 750049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0955, + "step": 2191 + }, + { + "loss": 0.0025, + "grad_norm": 0.3754805028438568, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.096, + "step": 2192 + }, + { + "loss": 0.0512, + "grad_norm": 1.1577214002609253, + "learning_rate": 9.045e-06, + "num_tokens": 750652.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0965, + "step": 2193 + }, + { + "loss": 0.0022, + "grad_norm": 0.3151845633983612, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 1.0, + "epoch": 1.097, + "step": 2194 + }, + { + "loss": 0.0386, + "grad_norm": 1.1814777851104736, + "learning_rate": 9.035e-06, + "num_tokens": 751255.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.0975, + "step": 2195 + }, + { + "loss": 0.002, + "grad_norm": 0.2940640151500702, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.098, + "step": 2196 + }, + { + "loss": 0.0021, + "grad_norm": 0.3114289939403534, + "learning_rate": 9.025e-06, + "num_tokens": 751437.0, + "mean_token_accuracy": 1.0, + "epoch": 1.0985, + "step": 2197 + }, + { + "loss": 0.0587, + "grad_norm": 1.5265949964523315, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.099, + "step": 2198 + }, + { + "loss": 0.0584, + "grad_norm": 1.182391881942749, + "learning_rate": 9.015000000000001e-06, + "num_tokens": 752461.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.0995, + "step": 2199 + }, + { + "loss": 0.0018, + "grad_norm": 0.2633577287197113, + "learning_rate": 9.01e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1, + "step": 2200 + }, + { + "loss": 0.0019, + "grad_norm": 0.26985710859298706, + "learning_rate": 9.005000000000001e-06, + "num_tokens": 752643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1005, + "step": 2201 + }, + { + "loss": 0.0017, + "grad_norm": 0.23652321100234985, + "learning_rate": 9e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 2202 + }, + { + "loss": 0.0578, + "grad_norm": 1.4083077907562256, + "learning_rate": 8.995000000000001e-06, + "num_tokens": 753246.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1015, + "step": 2203 + }, + { + "loss": 0.0595, + "grad_norm": 1.427134394645691, + "learning_rate": 8.99e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.102, + "step": 2204 + }, + { + "loss": 0.0539, + "grad_norm": 1.3228328227996826, + "learning_rate": 8.985000000000001e-06, + "num_tokens": 754270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1025, + "step": 2205 + }, + { + "loss": 0.0015, + "grad_norm": 0.2133481651544571, + "learning_rate": 8.98e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.103, + "step": 2206 + }, + { + "loss": 0.0821, + "grad_norm": 2.5287461280822754, + "learning_rate": 8.975e-06, + "num_tokens": 754873.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1035, + "step": 2207 + }, + { + "loss": 0.0623, + "grad_norm": 1.4041988849639893, + "learning_rate": 8.97e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.104, + "step": 2208 + }, + { + "loss": 0.0409, + "grad_norm": 1.1858478784561157, + "learning_rate": 8.965e-06, + "num_tokens": 755897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1045, + "step": 2209 + }, + { + "loss": 0.0583, + "grad_norm": 1.219450831413269, + "learning_rate": 8.96e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.105, + "step": 2210 + }, + { + "loss": 0.0414, + "grad_norm": 1.1721197366714478, + "learning_rate": 8.955e-06, + "num_tokens": 756921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1055, + "step": 2211 + }, + { + "loss": 0.053, + "grad_norm": 1.277345895767212, + "learning_rate": 8.95e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.106, + "step": 2212 + }, + { + "loss": 0.0625, + "grad_norm": 1.3503938913345337, + "learning_rate": 8.945e-06, + "num_tokens": 757945.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1065, + "step": 2213 + }, + { + "loss": 0.002, + "grad_norm": 0.30203038454055786, + "learning_rate": 8.94e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.107, + "step": 2214 + }, + { + "loss": 0.0022, + "grad_norm": 0.35174328088760376, + "learning_rate": 8.935e-06, + "num_tokens": 758127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1075, + "step": 2215 + }, + { + "loss": 0.0423, + "grad_norm": 1.168192744255066, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.108, + "step": 2216 + }, + { + "loss": 0.0764, + "grad_norm": 1.3265845775604248, + "learning_rate": 8.925e-06, + "num_tokens": 759151.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1085, + "step": 2217 + }, + { + "loss": 0.1833, + "grad_norm": 3.288583755493164, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 1.109, + "step": 2218 + }, + { + "loss": 0.0029, + "grad_norm": 0.44568195939064026, + "learning_rate": 8.915e-06, + "num_tokens": 759754.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1095, + "step": 2219 + }, + { + "loss": 0.0027, + "grad_norm": 0.409576416015625, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 2220 + }, + { + "loss": 0.0033, + "grad_norm": 0.4960649907588959, + "learning_rate": 8.905e-06, + "num_tokens": 759936.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1105, + "step": 2221 + }, + { + "loss": 0.1642, + "grad_norm": 2.6913421154022217, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.111, + "step": 2222 + }, + { + "loss": 0.0715, + "grad_norm": 1.5037237405776978, + "learning_rate": 8.895e-06, + "num_tokens": 760960.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1115, + "step": 2223 + }, + { + "loss": 0.0562, + "grad_norm": 1.152312159538269, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.112, + "step": 2224 + }, + { + "loss": 0.0025, + "grad_norm": 0.3840191960334778, + "learning_rate": 8.885e-06, + "num_tokens": 761563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1125, + "step": 2225 + }, + { + "loss": 0.0421, + "grad_norm": 1.0708019733428955, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.113, + "step": 2226 + }, + { + "loss": 0.0713, + "grad_norm": 1.2928557395935059, + "learning_rate": 8.875e-06, + "num_tokens": 762587.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1135, + "step": 2227 + }, + { + "loss": 0.0622, + "grad_norm": 1.3733391761779785, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.114, + "step": 2228 + }, + { + "loss": 0.0029, + "grad_norm": 0.42555150389671326, + "learning_rate": 8.865e-06, + "num_tokens": 763190.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1145, + "step": 2229 + }, + { + "loss": 0.0457, + "grad_norm": 1.3084357976913452, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.115, + "step": 2230 + }, + { + "loss": 0.0027, + "grad_norm": 0.3956111967563629, + "learning_rate": 8.855e-06, + "num_tokens": 763793.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1155, + "step": 2231 + }, + { + "loss": 0.066, + "grad_norm": 1.3650692701339722, + "learning_rate": 8.85e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.116, + "step": 2232 + }, + { + "loss": 0.0029, + "grad_norm": 0.4088021516799927, + "learning_rate": 8.845000000000001e-06, + "num_tokens": 764396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1165, + "step": 2233 + }, + { + "loss": 0.0397, + "grad_norm": 1.2808146476745605, + "learning_rate": 8.84e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.117, + "step": 2234 + }, + { + "loss": 0.0027, + "grad_norm": 0.3983195126056671, + "learning_rate": 8.835000000000001e-06, + "num_tokens": 764999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1175, + "step": 2235 + }, + { + "loss": 0.0423, + "grad_norm": 1.1593605279922485, + "learning_rate": 8.83e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1179999999999999, + "step": 2236 + }, + { + "loss": 0.0649, + "grad_norm": 1.5087552070617676, + "learning_rate": 8.825000000000001e-06, + "num_tokens": 766023.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1185, + "step": 2237 + }, + { + "loss": 0.0683, + "grad_norm": 1.5192102193832397, + "learning_rate": 8.82e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.119, + "step": 2238 + }, + { + "loss": 0.0588, + "grad_norm": 1.386413812637329, + "learning_rate": 8.815e-06, + "num_tokens": 767047.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1195, + "step": 2239 + }, + { + "loss": 0.14, + "grad_norm": 2.439119815826416, + "learning_rate": 8.81e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.12, + "step": 2240 + }, + { + "loss": 0.0029, + "grad_norm": 0.4191952049732208, + "learning_rate": 8.805e-06, + "num_tokens": 767650.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1205, + "step": 2241 + }, + { + "loss": 0.0397, + "grad_norm": 1.169542908668518, + "learning_rate": 8.8e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.121, + "step": 2242 + }, + { + "loss": 0.0584, + "grad_norm": 1.2895692586898804, + "learning_rate": 8.795e-06, + "num_tokens": 768674.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1215, + "step": 2243 + }, + { + "loss": 0.0582, + "grad_norm": 1.274592638015747, + "learning_rate": 8.79e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1219999999999999, + "step": 2244 + }, + { + "loss": 0.0032, + "grad_norm": 0.44238153100013733, + "learning_rate": 8.785e-06, + "num_tokens": 769277.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1225, + "step": 2245 + }, + { + "loss": 0.0032, + "grad_norm": 0.4488213360309601, + "learning_rate": 8.78e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.123, + "step": 2246 + }, + { + "loss": 0.003, + "grad_norm": 0.43088752031326294, + "learning_rate": 8.775e-06, + "num_tokens": 769459.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1235, + "step": 2247 + }, + { + "loss": 0.0366, + "grad_norm": 1.2531421184539795, + "learning_rate": 8.77e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.124, + "step": 2248 + }, + { + "loss": 0.0029, + "grad_norm": 0.40329650044441223, + "learning_rate": 8.765e-06, + "num_tokens": 770062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1245, + "step": 2249 + }, + { + "loss": 0.0527, + "grad_norm": 1.196119785308838, + "learning_rate": 8.76e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.125, + "step": 2250 + }, + { + "loss": 0.0468, + "grad_norm": 1.571480393409729, + "learning_rate": 8.755e-06, + "num_tokens": 771086.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1255, + "step": 2251 + }, + { + "loss": 0.0024, + "grad_norm": 0.32946687936782837, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.126, + "step": 2252 + }, + { + "loss": 0.0023, + "grad_norm": 0.3213779628276825, + "learning_rate": 8.745000000000002e-06, + "num_tokens": 771268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1265, + "step": 2253 + }, + { + "loss": 0.0381, + "grad_norm": 1.36893630027771, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.127, + "step": 2254 + }, + { + "loss": 0.0023, + "grad_norm": 0.3214550316333771, + "learning_rate": 8.735000000000002e-06, + "num_tokens": 771871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1275, + "step": 2255 + }, + { + "loss": 0.0389, + "grad_norm": 1.1307684183120728, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1280000000000001, + "step": 2256 + }, + { + "loss": 0.0021, + "grad_norm": 0.30145928263664246, + "learning_rate": 8.725000000000002e-06, + "num_tokens": 772474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1285, + "step": 2257 + }, + { + "loss": 0.0018, + "grad_norm": 0.24611108005046844, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.129, + "step": 2258 + }, + { + "loss": 0.0652, + "grad_norm": 1.5593312978744507, + "learning_rate": 8.715e-06, + "num_tokens": 773077.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1295, + "step": 2259 + }, + { + "loss": 0.1724, + "grad_norm": 3.1925134658813477, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.13, + "step": 2260 + }, + { + "loss": 0.0016, + "grad_norm": 0.2210361361503601, + "learning_rate": 8.705e-06, + "num_tokens": 773680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1305, + "step": 2261 + }, + { + "loss": 0.044, + "grad_norm": 1.1579885482788086, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.131, + "step": 2262 + }, + { + "loss": 0.0812, + "grad_norm": 2.0770068168640137, + "learning_rate": 8.695e-06, + "num_tokens": 774704.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1315, + "step": 2263 + }, + { + "loss": 0.0376, + "grad_norm": 1.1654012203216553, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1320000000000001, + "step": 2264 + }, + { + "loss": 0.0017, + "grad_norm": 0.22535240650177002, + "learning_rate": 8.685e-06, + "num_tokens": 775307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1325, + "step": 2265 + }, + { + "loss": 0.0017, + "grad_norm": 0.2348785251379013, + "learning_rate": 8.68e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 2266 + }, + { + "loss": 0.0017, + "grad_norm": 0.24279342591762543, + "learning_rate": 8.675e-06, + "num_tokens": 775489.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1335, + "step": 2267 + }, + { + "loss": 0.0748, + "grad_norm": 1.5453892946243286, + "learning_rate": 8.67e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.134, + "step": 2268 + }, + { + "loss": 0.0015, + "grad_norm": 0.20795051753520966, + "learning_rate": 8.665000000000001e-06, + "num_tokens": 776092.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1345, + "step": 2269 + }, + { + "loss": 0.0016, + "grad_norm": 0.21314096450805664, + "learning_rate": 8.66e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 2270 + }, + { + "loss": 0.0016, + "grad_norm": 0.22147318720817566, + "learning_rate": 8.655000000000001e-06, + "num_tokens": 776274.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1355, + "step": 2271 + }, + { + "loss": 0.0511, + "grad_norm": 1.1325373649597168, + "learning_rate": 8.65e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1360000000000001, + "step": 2272 + }, + { + "loss": 0.0014, + "grad_norm": 0.18845656514167786, + "learning_rate": 8.645000000000001e-06, + "num_tokens": 776877.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1365, + "step": 2273 + }, + { + "loss": 0.0013, + "grad_norm": 0.16952817142009735, + "learning_rate": 8.64e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 2274 + }, + { + "loss": 0.0621, + "grad_norm": 1.329026222229004, + "learning_rate": 8.635000000000001e-06, + "num_tokens": 777480.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1375, + "step": 2275 + }, + { + "loss": 0.0416, + "grad_norm": 1.105779767036438, + "learning_rate": 8.63e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.138, + "step": 2276 + }, + { + "loss": 0.0467, + "grad_norm": 1.1847842931747437, + "learning_rate": 8.625000000000001e-06, + "num_tokens": 778504.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1385, + "step": 2277 + }, + { + "loss": 0.0414, + "grad_norm": 1.0636855363845825, + "learning_rate": 8.62e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.139, + "step": 2278 + }, + { + "loss": 0.058, + "grad_norm": 1.3789916038513184, + "learning_rate": 8.615000000000001e-06, + "num_tokens": 779528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1395, + "step": 2279 + }, + { + "loss": 0.0649, + "grad_norm": 1.1419354677200317, + "learning_rate": 8.61e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1400000000000001, + "step": 2280 + }, + { + "loss": 0.0014, + "grad_norm": 0.19384142756462097, + "learning_rate": 8.605000000000001e-06, + "num_tokens": 780131.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1405, + "step": 2281 + }, + { + "loss": 0.0015, + "grad_norm": 0.19773858785629272, + "learning_rate": 8.6e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 2282 + }, + { + "loss": 0.0557, + "grad_norm": 1.190521001815796, + "learning_rate": 8.595000000000002e-06, + "num_tokens": 780734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1415, + "step": 2283 + }, + { + "loss": 0.0017, + "grad_norm": 0.23638860881328583, + "learning_rate": 8.59e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 1.0, + "epoch": 1.142, + "step": 2284 + }, + { + "loss": 0.0017, + "grad_norm": 0.24933819472789764, + "learning_rate": 8.585000000000002e-06, + "num_tokens": 780916.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1425, + "step": 2285 + }, + { + "loss": 0.0017, + "grad_norm": 0.22720065712928772, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 2286 + }, + { + "loss": 0.0416, + "grad_norm": 1.214958667755127, + "learning_rate": 8.575000000000002e-06, + "num_tokens": 781519.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1435, + "step": 2287 + }, + { + "loss": 0.054, + "grad_norm": 0.9985194206237793, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.144, + "step": 2288 + }, + { + "loss": 0.0017, + "grad_norm": 0.24114187061786652, + "learning_rate": 8.565000000000002e-06, + "num_tokens": 782122.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1445, + "step": 2289 + }, + { + "loss": 0.0574, + "grad_norm": 1.4530028104782104, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.145, + "step": 2290 + }, + { + "loss": 0.0018, + "grad_norm": 0.2544173002243042, + "learning_rate": 8.555e-06, + "num_tokens": 782725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1455, + "step": 2291 + }, + { + "loss": 0.0017, + "grad_norm": 0.23475930094718933, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 2292 + }, + { + "loss": 0.0708, + "grad_norm": 1.619470477104187, + "learning_rate": 8.545e-06, + "num_tokens": 783328.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.1465, + "step": 2293 + }, + { + "loss": 0.0019, + "grad_norm": 0.2572467029094696, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 1.0, + "epoch": 1.147, + "step": 2294 + }, + { + "loss": 0.0019, + "grad_norm": 0.26701951026916504, + "learning_rate": 8.535e-06, + "num_tokens": 783510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1475, + "step": 2295 + }, + { + "loss": 0.0471, + "grad_norm": 1.147359848022461, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.148, + "step": 2296 + }, + { + "loss": 0.0485, + "grad_norm": 1.0665885210037231, + "learning_rate": 8.525e-06, + "num_tokens": 784534.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1485, + "step": 2297 + }, + { + "loss": 0.0017, + "grad_norm": 0.23322324454784393, + "learning_rate": 8.52e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.149, + "step": 2298 + }, + { + "loss": 0.0667, + "grad_norm": 1.4317374229431152, + "learning_rate": 8.515e-06, + "num_tokens": 785137.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1495, + "step": 2299 + }, + { + "loss": 0.0651, + "grad_norm": 1.4495528936386108, + "learning_rate": 8.51e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.15, + "step": 2300 + }, + { + "loss": 0.0018, + "grad_norm": 0.24990759789943695, + "learning_rate": 8.505e-06, + "num_tokens": 785740.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1505, + "step": 2301 + }, + { + "loss": 0.0673, + "grad_norm": 1.3833082914352417, + "learning_rate": 8.5e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.151, + "step": 2302 + }, + { + "loss": 0.0384, + "grad_norm": 1.0650711059570312, + "learning_rate": 8.495e-06, + "num_tokens": 786764.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1515, + "step": 2303 + }, + { + "loss": 0.0017, + "grad_norm": 0.2362237423658371, + "learning_rate": 8.49e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.152, + "step": 2304 + }, + { + "loss": 0.0362, + "grad_norm": 1.2261658906936646, + "learning_rate": 8.485000000000001e-06, + "num_tokens": 787367.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1525, + "step": 2305 + }, + { + "loss": 0.0021, + "grad_norm": 0.285277396440506, + "learning_rate": 8.48e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 1.0, + "epoch": 1.153, + "step": 2306 + }, + { + "loss": 0.0018, + "grad_norm": 0.24331547319889069, + "learning_rate": 8.475000000000001e-06, + "num_tokens": 787549.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1535, + "step": 2307 + }, + { + "loss": 0.057, + "grad_norm": 1.260392427444458, + "learning_rate": 8.47e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.154, + "step": 2308 + }, + { + "loss": 0.002, + "grad_norm": 0.26841071248054504, + "learning_rate": 8.465000000000001e-06, + "num_tokens": 788152.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1545, + "step": 2309 + }, + { + "loss": 0.0018, + "grad_norm": 0.25016698241233826, + "learning_rate": 8.46e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 2310 + }, + { + "loss": 0.002, + "grad_norm": 0.2738337218761444, + "learning_rate": 8.455000000000001e-06, + "num_tokens": 788334.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1555, + "step": 2311 + }, + { + "loss": 0.0017, + "grad_norm": 0.2311965376138687, + "learning_rate": 8.45e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 2312 + }, + { + "loss": 0.0608, + "grad_norm": 1.6522681713104248, + "learning_rate": 8.445000000000001e-06, + "num_tokens": 788937.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1565, + "step": 2313 + }, + { + "loss": 0.0595, + "grad_norm": 1.3370118141174316, + "learning_rate": 8.44e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.157, + "step": 2314 + }, + { + "loss": 0.0706, + "grad_norm": 1.5185800790786743, + "learning_rate": 8.435000000000001e-06, + "num_tokens": 789961.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1575, + "step": 2315 + }, + { + "loss": 0.0015, + "grad_norm": 0.20058579742908478, + "learning_rate": 8.43e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 1.0, + "epoch": 1.158, + "step": 2316 + }, + { + "loss": 0.0736, + "grad_norm": 1.6871758699417114, + "learning_rate": 8.425000000000001e-06, + "num_tokens": 790564.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.1585, + "step": 2317 + }, + { + "loss": 0.0684, + "grad_norm": 1.7638912200927734, + "learning_rate": 8.42e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.159, + "step": 2318 + }, + { + "loss": 0.0017, + "grad_norm": 0.23336097598075867, + "learning_rate": 8.415000000000002e-06, + "num_tokens": 791167.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1595, + "step": 2319 + }, + { + "loss": 0.0596, + "grad_norm": 1.3170890808105469, + "learning_rate": 8.41e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.16, + "step": 2320 + }, + { + "loss": 0.0566, + "grad_norm": 1.8501343727111816, + "learning_rate": 8.405000000000002e-06, + "num_tokens": 792191.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1605, + "step": 2321 + }, + { + "loss": 0.0679, + "grad_norm": 1.3065072298049927, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.161, + "step": 2322 + }, + { + "loss": 0.0577, + "grad_norm": 1.3374840021133423, + "learning_rate": 8.395e-06, + "num_tokens": 793215.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1615, + "step": 2323 + }, + { + "loss": 0.0651, + "grad_norm": 1.2627785205841064, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.162, + "step": 2324 + }, + { + "loss": 0.0589, + "grad_norm": 1.1249433755874634, + "learning_rate": 8.385e-06, + "num_tokens": 794239.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1625, + "step": 2325 + }, + { + "loss": 0.0022, + "grad_norm": 0.31153878569602966, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.163, + "step": 2326 + }, + { + "loss": 0.0376, + "grad_norm": 1.2043869495391846, + "learning_rate": 8.375e-06, + "num_tokens": 794842.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.1635, + "step": 2327 + }, + { + "loss": 0.0024, + "grad_norm": 0.3410504162311554, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 1.0, + "epoch": 1.164, + "step": 2328 + }, + { + "loss": 0.0497, + "grad_norm": 1.3358232975006104, + "learning_rate": 8.365e-06, + "num_tokens": 795445.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1645, + "step": 2329 + }, + { + "loss": 0.062, + "grad_norm": 1.3019129037857056, + "learning_rate": 8.36e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.165, + "step": 2330 + }, + { + "loss": 0.1411, + "grad_norm": 3.1003713607788086, + "learning_rate": 8.355e-06, + "num_tokens": 796469.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.1655, + "step": 2331 + }, + { + "loss": 0.0675, + "grad_norm": 1.4928791522979736, + "learning_rate": 8.35e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.166, + "step": 2332 + }, + { + "loss": 0.0032, + "grad_norm": 0.47702810168266296, + "learning_rate": 8.345e-06, + "num_tokens": 797072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1665, + "step": 2333 + }, + { + "loss": 0.0486, + "grad_norm": 1.189456820487976, + "learning_rate": 8.34e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.167, + "step": 2334 + }, + { + "loss": 0.0033, + "grad_norm": 0.5152677893638611, + "learning_rate": 8.335e-06, + "num_tokens": 797675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1675, + "step": 2335 + }, + { + "loss": 0.0463, + "grad_norm": 1.3805276155471802, + "learning_rate": 8.33e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.168, + "step": 2336 + }, + { + "loss": 0.0653, + "grad_norm": 1.7025351524353027, + "learning_rate": 8.325e-06, + "num_tokens": 798699.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1685, + "step": 2337 + }, + { + "loss": 0.0031, + "grad_norm": 0.44580474495887756, + "learning_rate": 8.32e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 1.0, + "epoch": 1.169, + "step": 2338 + }, + { + "loss": 0.0462, + "grad_norm": 1.3915964365005493, + "learning_rate": 8.315000000000001e-06, + "num_tokens": 799302.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1695, + "step": 2339 + }, + { + "loss": 0.0689, + "grad_norm": 1.3206253051757812, + "learning_rate": 8.31e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.17, + "step": 2340 + }, + { + "loss": 0.0616, + "grad_norm": 1.0774954557418823, + "learning_rate": 8.305000000000001e-06, + "num_tokens": 800326.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1705, + "step": 2341 + }, + { + "loss": 0.0036, + "grad_norm": 0.5280348658561707, + "learning_rate": 8.3e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 1.0, + "epoch": 1.171, + "step": 2342 + }, + { + "loss": 0.0534, + "grad_norm": 1.1514171361923218, + "learning_rate": 8.295000000000001e-06, + "num_tokens": 800929.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1715, + "step": 2343 + }, + { + "loss": 0.0034, + "grad_norm": 0.4936150014400482, + "learning_rate": 8.29e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 1.0, + "epoch": 1.172, + "step": 2344 + }, + { + "loss": 0.0411, + "grad_norm": 1.091706395149231, + "learning_rate": 8.285000000000001e-06, + "num_tokens": 801532.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1724999999999999, + "step": 2345 + }, + { + "loss": 0.0633, + "grad_norm": 1.2277299165725708, + "learning_rate": 8.28e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.173, + "step": 2346 + }, + { + "loss": 0.0032, + "grad_norm": 0.4532278776168823, + "learning_rate": 8.275000000000001e-06, + "num_tokens": 802135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1735, + "step": 2347 + }, + { + "loss": 0.0033, + "grad_norm": 0.467818021774292, + "learning_rate": 8.27e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.174, + "step": 2348 + }, + { + "loss": 0.0528, + "grad_norm": 1.7821072340011597, + "learning_rate": 8.265000000000001e-06, + "num_tokens": 802738.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1745, + "step": 2349 + }, + { + "loss": 0.0415, + "grad_norm": 1.4086565971374512, + "learning_rate": 8.26e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.175, + "step": 2350 + }, + { + "loss": 0.045, + "grad_norm": 1.1930326223373413, + "learning_rate": 8.255000000000001e-06, + "num_tokens": 803762.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.1755, + "step": 2351 + }, + { + "loss": 0.0028, + "grad_norm": 0.4077257215976715, + "learning_rate": 8.25e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 1.0, + "epoch": 1.176, + "step": 2352 + }, + { + "loss": 0.0535, + "grad_norm": 1.0156196355819702, + "learning_rate": 8.245000000000002e-06, + "num_tokens": 804365.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1764999999999999, + "step": 2353 + }, + { + "loss": 0.0544, + "grad_norm": 1.701621413230896, + "learning_rate": 8.24e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.177, + "step": 2354 + }, + { + "loss": 0.0408, + "grad_norm": 1.3804023265838623, + "learning_rate": 8.235e-06, + "num_tokens": 805389.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1775, + "step": 2355 + }, + { + "loss": 0.0538, + "grad_norm": 1.4935331344604492, + "learning_rate": 8.23e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.178, + "step": 2356 + }, + { + "loss": 0.0031, + "grad_norm": 0.46967241168022156, + "learning_rate": 8.225e-06, + "num_tokens": 805992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1785, + "step": 2357 + }, + { + "loss": 0.003, + "grad_norm": 0.4181312620639801, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.179, + "step": 2358 + }, + { + "loss": 0.003, + "grad_norm": 0.4292071461677551, + "learning_rate": 8.215e-06, + "num_tokens": 806174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1795, + "step": 2359 + }, + { + "loss": 0.0025, + "grad_norm": 0.3606574833393097, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.18, + "step": 2360 + }, + { + "loss": 0.0384, + "grad_norm": 1.0812703371047974, + "learning_rate": 8.205e-06, + "num_tokens": 806777.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1804999999999999, + "step": 2361 + }, + { + "loss": 0.0025, + "grad_norm": 0.36413413286209106, + "learning_rate": 8.2e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 1.0, + "epoch": 1.181, + "step": 2362 + }, + { + "loss": 0.0632, + "grad_norm": 1.3525351285934448, + "learning_rate": 8.195e-06, + "num_tokens": 807380.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.1815, + "step": 2363 + }, + { + "loss": 0.0021, + "grad_norm": 0.29519718885421753, + "learning_rate": 8.19e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.182, + "step": 2364 + }, + { + "loss": 0.002, + "grad_norm": 0.28825369477272034, + "learning_rate": 8.185e-06, + "num_tokens": 807562.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1825, + "step": 2365 + }, + { + "loss": 0.0364, + "grad_norm": 1.0907576084136963, + "learning_rate": 8.18e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.183, + "step": 2366 + }, + { + "loss": 0.0682, + "grad_norm": 1.3050081729888916, + "learning_rate": 8.175e-06, + "num_tokens": 808586.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1835, + "step": 2367 + }, + { + "loss": 0.0424, + "grad_norm": 1.141483187675476, + "learning_rate": 8.17e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.184, + "step": 2368 + }, + { + "loss": 0.0019, + "grad_norm": 0.26355233788490295, + "learning_rate": 8.165e-06, + "num_tokens": 809189.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1844999999999999, + "step": 2369 + }, + { + "loss": 0.0744, + "grad_norm": 1.7785593271255493, + "learning_rate": 8.16e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.185, + "step": 2370 + }, + { + "loss": 0.0657, + "grad_norm": 1.3623268604278564, + "learning_rate": 8.155e-06, + "num_tokens": 810213.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1855, + "step": 2371 + }, + { + "loss": 0.0549, + "grad_norm": 1.1436368227005005, + "learning_rate": 8.15e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.186, + "step": 2372 + }, + { + "loss": 0.0539, + "grad_norm": 1.2383182048797607, + "learning_rate": 8.145e-06, + "num_tokens": 811237.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1865, + "step": 2373 + }, + { + "loss": 0.0018, + "grad_norm": 0.24816246330738068, + "learning_rate": 8.14e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.187, + "step": 2374 + }, + { + "loss": 0.0409, + "grad_norm": 1.240695834159851, + "learning_rate": 8.135000000000001e-06, + "num_tokens": 811840.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1875, + "step": 2375 + }, + { + "loss": 0.0364, + "grad_norm": 0.927349328994751, + "learning_rate": 8.13e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.188, + "step": 2376 + }, + { + "loss": 0.002, + "grad_norm": 0.28636854887008667, + "learning_rate": 8.125000000000001e-06, + "num_tokens": 812443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1885, + "step": 2377 + }, + { + "loss": 0.0021, + "grad_norm": 0.3085651397705078, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 2378 + }, + { + "loss": 0.0733, + "grad_norm": 1.627233862876892, + "learning_rate": 8.115000000000001e-06, + "num_tokens": 813046.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1895, + "step": 2379 + }, + { + "loss": 0.0523, + "grad_norm": 1.2803730964660645, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.19, + "step": 2380 + }, + { + "loss": 0.0358, + "grad_norm": 1.134440302848816, + "learning_rate": 8.105000000000001e-06, + "num_tokens": 814070.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.1905000000000001, + "step": 2381 + }, + { + "loss": 0.062, + "grad_norm": 1.7024178504943848, + "learning_rate": 8.1e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.191, + "step": 2382 + }, + { + "loss": 0.0555, + "grad_norm": 1.755904197692871, + "learning_rate": 8.095000000000001e-06, + "num_tokens": 815094.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.1915, + "step": 2383 + }, + { + "loss": 0.0028, + "grad_norm": 0.4056146442890167, + "learning_rate": 8.09e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 1.0, + "epoch": 1.192, + "step": 2384 + }, + { + "loss": 0.0415, + "grad_norm": 1.3847079277038574, + "learning_rate": 8.085000000000001e-06, + "num_tokens": 815697.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1925, + "step": 2385 + }, + { + "loss": 0.041, + "grad_norm": 1.05851149559021, + "learning_rate": 8.08e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.193, + "step": 2386 + }, + { + "loss": 0.0683, + "grad_norm": 1.5797926187515259, + "learning_rate": 8.075000000000001e-06, + "num_tokens": 816721.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1935, + "step": 2387 + }, + { + "loss": 0.003, + "grad_norm": 0.44755682349205017, + "learning_rate": 8.07e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 1.0, + "epoch": 1.194, + "step": 2388 + }, + { + "loss": 0.0035, + "grad_norm": 0.5333588719367981, + "learning_rate": 8.065e-06, + "num_tokens": 816903.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1945000000000001, + "step": 2389 + }, + { + "loss": 0.0034, + "grad_norm": 0.5025861263275146, + "learning_rate": 8.06e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 2390 + }, + { + "loss": 0.0657, + "grad_norm": 1.9265213012695312, + "learning_rate": 8.055e-06, + "num_tokens": 817506.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.1955, + "step": 2391 + }, + { + "loss": 0.0029, + "grad_norm": 0.4326709508895874, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.196, + "step": 2392 + }, + { + "loss": 0.0385, + "grad_norm": 1.282583236694336, + "learning_rate": 8.045e-06, + "num_tokens": 818109.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.1965, + "step": 2393 + }, + { + "loss": 0.048, + "grad_norm": 1.7246921062469482, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.197, + "step": 2394 + }, + { + "loss": 0.0529, + "grad_norm": 1.3816536664962769, + "learning_rate": 8.035e-06, + "num_tokens": 819133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1975, + "step": 2395 + }, + { + "loss": 0.0025, + "grad_norm": 0.36934202909469604, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 1.0, + "epoch": 1.198, + "step": 2396 + }, + { + "loss": 0.0701, + "grad_norm": 1.844415307044983, + "learning_rate": 8.025e-06, + "num_tokens": 819736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.1985000000000001, + "step": 2397 + }, + { + "loss": 0.0026, + "grad_norm": 0.3918537199497223, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 1.0, + "epoch": 1.199, + "step": 2398 + }, + { + "loss": 0.0025, + "grad_norm": 0.3629172444343567, + "learning_rate": 8.015e-06, + "num_tokens": 819918.0, + "mean_token_accuracy": 1.0, + "epoch": 1.1995, + "step": 2399 + }, + { + "loss": 0.0593, + "grad_norm": 1.3562273979187012, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2, + "step": 2400 + }, + { + "loss": 0.0415, + "grad_norm": 1.1191670894622803, + "learning_rate": 8.005e-06, + "num_tokens": 820942.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2005, + "step": 2401 + }, + { + "loss": 0.0021, + "grad_norm": 0.3028194308280945, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 1.0, + "epoch": 1.201, + "step": 2402 + }, + { + "loss": 0.0021, + "grad_norm": 0.3161010444164276, + "learning_rate": 7.995e-06, + "num_tokens": 821124.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2015, + "step": 2403 + }, + { + "loss": 0.0631, + "grad_norm": 1.4275634288787842, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.202, + "step": 2404 + }, + { + "loss": 0.0018, + "grad_norm": 0.2525792121887207, + "learning_rate": 7.985e-06, + "num_tokens": 821727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2025000000000001, + "step": 2405 + }, + { + "loss": 0.0576, + "grad_norm": 1.2019566297531128, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.203, + "step": 2406 + }, + { + "loss": 0.0019, + "grad_norm": 0.28433406352996826, + "learning_rate": 7.975e-06, + "num_tokens": 822330.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2035, + "step": 2407 + }, + { + "loss": 0.0018, + "grad_norm": 0.26680925488471985, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 1.0, + "epoch": 1.204, + "step": 2408 + }, + { + "loss": 0.0523, + "grad_norm": 1.5135900974273682, + "learning_rate": 7.965e-06, + "num_tokens": 822933.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2045, + "step": 2409 + }, + { + "loss": 0.0595, + "grad_norm": 1.425874948501587, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.205, + "step": 2410 + }, + { + "loss": 0.0688, + "grad_norm": 1.7353657484054565, + "learning_rate": 7.955000000000001e-06, + "num_tokens": 823957.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2055, + "step": 2411 + }, + { + "loss": 0.0016, + "grad_norm": 0.22734731435775757, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.206, + "step": 2412 + }, + { + "loss": 0.0016, + "grad_norm": 0.22473861277103424, + "learning_rate": 7.945000000000001e-06, + "num_tokens": 824139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2065, + "step": 2413 + }, + { + "loss": 0.0016, + "grad_norm": 0.23369428515434265, + "learning_rate": 7.94e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 2414 + }, + { + "loss": 0.0018, + "grad_norm": 0.25014567375183105, + "learning_rate": 7.935000000000001e-06, + "num_tokens": 824321.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2075, + "step": 2415 + }, + { + "loss": 0.0701, + "grad_norm": 1.4806315898895264, + "learning_rate": 7.93e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.208, + "step": 2416 + }, + { + "loss": 0.0015, + "grad_norm": 0.1993637979030609, + "learning_rate": 7.925000000000001e-06, + "num_tokens": 824924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2085, + "step": 2417 + }, + { + "loss": 0.0548, + "grad_norm": 1.2813140153884888, + "learning_rate": 7.92e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.209, + "step": 2418 + }, + { + "loss": 0.0552, + "grad_norm": 1.2722525596618652, + "learning_rate": 7.915000000000001e-06, + "num_tokens": 825948.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2095, + "step": 2419 + }, + { + "loss": 0.0013, + "grad_norm": 0.17925392091274261, + "learning_rate": 7.91e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 1.0, + "epoch": 1.21, + "step": 2420 + }, + { + "loss": 0.0013, + "grad_norm": 0.18519414961338043, + "learning_rate": 7.905e-06, + "num_tokens": 826130.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2105, + "step": 2421 + }, + { + "loss": 0.041, + "grad_norm": 1.3869478702545166, + "learning_rate": 7.9e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.211, + "step": 2422 + }, + { + "loss": 0.0013, + "grad_norm": 0.1751483976840973, + "learning_rate": 7.895e-06, + "num_tokens": 826733.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2115, + "step": 2423 + }, + { + "loss": 0.05, + "grad_norm": 1.0098025798797607, + "learning_rate": 7.89e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.212, + "step": 2424 + }, + { + "loss": 0.0605, + "grad_norm": 1.3178874254226685, + "learning_rate": 7.885e-06, + "num_tokens": 827757.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2125, + "step": 2425 + }, + { + "loss": 0.0013, + "grad_norm": 0.18827441334724426, + "learning_rate": 7.88e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 1.0, + "epoch": 1.213, + "step": 2426 + }, + { + "loss": 0.064, + "grad_norm": 1.4484566450119019, + "learning_rate": 7.875e-06, + "num_tokens": 828360.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2135, + "step": 2427 + }, + { + "loss": 0.0014, + "grad_norm": 0.19540052115917206, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 1.0, + "epoch": 1.214, + "step": 2428 + }, + { + "loss": 0.0623, + "grad_norm": 1.3592177629470825, + "learning_rate": 7.865e-06, + "num_tokens": 828963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2145, + "step": 2429 + }, + { + "loss": 0.0014, + "grad_norm": 0.20412060618400574, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.215, + "step": 2430 + }, + { + "loss": 0.0617, + "grad_norm": 1.755582332611084, + "learning_rate": 7.855e-06, + "num_tokens": 829566.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2155, + "step": 2431 + }, + { + "loss": 0.0631, + "grad_norm": 1.2380058765411377, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.216, + "step": 2432 + }, + { + "loss": 0.0375, + "grad_norm": 1.3119670152664185, + "learning_rate": 7.845e-06, + "num_tokens": 830590.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2165, + "step": 2433 + }, + { + "loss": 0.0015, + "grad_norm": 0.22137387096881866, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 1.0, + "epoch": 1.217, + "step": 2434 + }, + { + "loss": 0.0017, + "grad_norm": 0.2416553795337677, + "learning_rate": 7.835e-06, + "num_tokens": 830772.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2175, + "step": 2435 + }, + { + "loss": 0.0015, + "grad_norm": 0.21708650887012482, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 2436 + }, + { + "loss": 0.0016, + "grad_norm": 0.23922832310199738, + "learning_rate": 7.825e-06, + "num_tokens": 830954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2185, + "step": 2437 + }, + { + "loss": 0.0016, + "grad_norm": 0.2385343313217163, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 2438 + }, + { + "loss": 0.065, + "grad_norm": 1.4742591381072998, + "learning_rate": 7.815e-06, + "num_tokens": 831557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2195, + "step": 2439 + }, + { + "loss": 0.0016, + "grad_norm": 0.2341725379228592, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 1.0, + "epoch": 1.22, + "step": 2440 + }, + { + "loss": 0.0615, + "grad_norm": 1.4791371822357178, + "learning_rate": 7.805e-06, + "num_tokens": 832160.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2205, + "step": 2441 + }, + { + "loss": 0.048, + "grad_norm": 1.601716160774231, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.221, + "step": 2442 + }, + { + "loss": 0.0014, + "grad_norm": 0.19947591423988342, + "learning_rate": 7.795e-06, + "num_tokens": 832763.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2215, + "step": 2443 + }, + { + "loss": 0.0801, + "grad_norm": 1.753954291343689, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.222, + "step": 2444 + }, + { + "loss": 0.0015, + "grad_norm": 0.21398615837097168, + "learning_rate": 7.785000000000001e-06, + "num_tokens": 833366.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2225, + "step": 2445 + }, + { + "loss": 0.0655, + "grad_norm": 1.799574851989746, + "learning_rate": 7.78e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.223, + "step": 2446 + }, + { + "loss": 0.0438, + "grad_norm": 1.332261085510254, + "learning_rate": 7.775000000000001e-06, + "num_tokens": 834390.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2235, + "step": 2447 + }, + { + "loss": 0.044, + "grad_norm": 1.238344430923462, + "learning_rate": 7.77e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.224, + "step": 2448 + }, + { + "loss": 0.0015, + "grad_norm": 0.2137579768896103, + "learning_rate": 7.765000000000001e-06, + "num_tokens": 834993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2245, + "step": 2449 + }, + { + "loss": 0.0438, + "grad_norm": 1.1821973323822021, + "learning_rate": 7.76e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.225, + "step": 2450 + }, + { + "loss": 0.0562, + "grad_norm": 1.4905529022216797, + "learning_rate": 7.755000000000001e-06, + "num_tokens": 836017.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2255, + "step": 2451 + }, + { + "loss": 0.0015, + "grad_norm": 0.21731820702552795, + "learning_rate": 7.75e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 1.0, + "epoch": 1.226, + "step": 2452 + }, + { + "loss": 0.0017, + "grad_norm": 0.25909724831581116, + "learning_rate": 7.745e-06, + "num_tokens": 836199.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2265, + "step": 2453 + }, + { + "loss": 0.0016, + "grad_norm": 0.22781187295913696, + "learning_rate": 7.74e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 2454 + }, + { + "loss": 0.0016, + "grad_norm": 0.24323998391628265, + "learning_rate": 7.735e-06, + "num_tokens": 836381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2275, + "step": 2455 + }, + { + "loss": 0.0594, + "grad_norm": 1.5349161624908447, + "learning_rate": 7.73e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.228, + "step": 2456 + }, + { + "loss": 0.0017, + "grad_norm": 0.24151335656642914, + "learning_rate": 7.725e-06, + "num_tokens": 836984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2285, + "step": 2457 + }, + { + "loss": 0.0016, + "grad_norm": 0.23347225785255432, + "learning_rate": 7.72e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 2458 + }, + { + "loss": 0.0017, + "grad_norm": 0.24232612550258636, + "learning_rate": 7.715e-06, + "num_tokens": 837166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2295, + "step": 2459 + }, + { + "loss": 0.0016, + "grad_norm": 0.23151801526546478, + "learning_rate": 7.71e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 2460 + }, + { + "loss": 0.0586, + "grad_norm": 1.4122602939605713, + "learning_rate": 7.705e-06, + "num_tokens": 837769.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2305, + "step": 2461 + }, + { + "loss": 0.0014, + "grad_norm": 0.19469626247882843, + "learning_rate": 7.7e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.231, + "step": 2462 + }, + { + "loss": 0.0637, + "grad_norm": 1.675697684288025, + "learning_rate": 7.695e-06, + "num_tokens": 838372.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2315, + "step": 2463 + }, + { + "loss": 0.0013, + "grad_norm": 0.17535777390003204, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.232, + "step": 2464 + }, + { + "loss": 0.0549, + "grad_norm": 1.1719900369644165, + "learning_rate": 7.685e-06, + "num_tokens": 838975.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2325, + "step": 2465 + }, + { + "loss": 0.0013, + "grad_norm": 0.16398227214813232, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.233, + "step": 2466 + }, + { + "loss": 0.0674, + "grad_norm": 1.7502342462539673, + "learning_rate": 7.675e-06, + "num_tokens": 839578.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.2335, + "step": 2467 + }, + { + "loss": 0.0013, + "grad_norm": 0.17352193593978882, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.234, + "step": 2468 + }, + { + "loss": 0.063, + "grad_norm": 1.5015274286270142, + "learning_rate": 7.665e-06, + "num_tokens": 840181.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2345, + "step": 2469 + }, + { + "loss": 0.0611, + "grad_norm": 1.3142430782318115, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2349999999999999, + "step": 2470 + }, + { + "loss": 0.0589, + "grad_norm": 1.3366830348968506, + "learning_rate": 7.655e-06, + "num_tokens": 841205.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2355, + "step": 2471 + }, + { + "loss": 0.0013, + "grad_norm": 0.17301248013973236, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.236, + "step": 2472 + }, + { + "loss": 0.0435, + "grad_norm": 1.1996126174926758, + "learning_rate": 7.645e-06, + "num_tokens": 841808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2365, + "step": 2473 + }, + { + "loss": 0.0015, + "grad_norm": 0.21387803554534912, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.237, + "step": 2474 + }, + { + "loss": 0.064, + "grad_norm": 1.3917018175125122, + "learning_rate": 7.635e-06, + "num_tokens": 842411.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2375, + "step": 2475 + }, + { + "loss": 0.0014, + "grad_norm": 0.20352397859096527, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.238, + "step": 2476 + }, + { + "loss": 0.0015, + "grad_norm": 0.21035854518413544, + "learning_rate": 7.625e-06, + "num_tokens": 842593.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2385, + "step": 2477 + }, + { + "loss": 0.0384, + "grad_norm": 1.1954495906829834, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.2389999999999999, + "step": 2478 + }, + { + "loss": 0.0398, + "grad_norm": 1.3171675205230713, + "learning_rate": 7.615e-06, + "num_tokens": 843617.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2395, + "step": 2479 + }, + { + "loss": 0.0016, + "grad_norm": 0.22742266952991486, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.24, + "step": 2480 + }, + { + "loss": 0.0505, + "grad_norm": 1.463847041130066, + "learning_rate": 7.605e-06, + "num_tokens": 844220.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2405, + "step": 2481 + }, + { + "loss": 0.0634, + "grad_norm": 1.0150220394134521, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.241, + "step": 2482 + }, + { + "loss": 0.0628, + "grad_norm": 1.2490217685699463, + "learning_rate": 7.595e-06, + "num_tokens": 845244.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2415, + "step": 2483 + }, + { + "loss": 0.0568, + "grad_norm": 0.9812212586402893, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.242, + "step": 2484 + }, + { + "loss": 0.0684, + "grad_norm": 1.4887269735336304, + "learning_rate": 7.585e-06, + "num_tokens": 846268.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2425, + "step": 2485 + }, + { + "loss": 0.002, + "grad_norm": 0.2907889485359192, + "learning_rate": 7.58e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2429999999999999, + "step": 2486 + }, + { + "loss": 0.0024, + "grad_norm": 0.3490116596221924, + "learning_rate": 7.575e-06, + "num_tokens": 846450.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2435, + "step": 2487 + }, + { + "loss": 0.0379, + "grad_norm": 0.9351921081542969, + "learning_rate": 7.57e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.244, + "step": 2488 + }, + { + "loss": 0.0409, + "grad_norm": 1.486227035522461, + "learning_rate": 7.565e-06, + "num_tokens": 847474.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2445, + "step": 2489 + }, + { + "loss": 0.0024, + "grad_norm": 0.35926783084869385, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.245, + "step": 2490 + }, + { + "loss": 0.0547, + "grad_norm": 1.216343879699707, + "learning_rate": 7.5550000000000005e-06, + "num_tokens": 848077.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2455, + "step": 2491 + }, + { + "loss": 0.0622, + "grad_norm": 1.0978708267211914, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.246, + "step": 2492 + }, + { + "loss": 0.0026, + "grad_norm": 0.3695952892303467, + "learning_rate": 7.545e-06, + "num_tokens": 848680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2465, + "step": 2493 + }, + { + "loss": 0.0712, + "grad_norm": 1.1717898845672607, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2469999999999999, + "step": 2494 + }, + { + "loss": 0.003, + "grad_norm": 0.4548373818397522, + "learning_rate": 7.535e-06, + "num_tokens": 849283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2475, + "step": 2495 + }, + { + "loss": 0.003, + "grad_norm": 0.4568769335746765, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.248, + "step": 2496 + }, + { + "loss": 0.0024, + "grad_norm": 0.36542901396751404, + "learning_rate": 7.525e-06, + "num_tokens": 849465.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2485, + "step": 2497 + }, + { + "loss": 0.0566, + "grad_norm": 1.315274715423584, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.249, + "step": 2498 + }, + { + "loss": 0.0026, + "grad_norm": 0.39514294266700745, + "learning_rate": 7.515e-06, + "num_tokens": 850068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2495, + "step": 2499 + }, + { + "loss": 0.0678, + "grad_norm": 1.530604362487793, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.25, + "step": 2500 + }, + { + "loss": 0.0022, + "grad_norm": 0.3104536533355713, + "learning_rate": 7.505e-06, + "num_tokens": 850671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2505, + "step": 2501 + }, + { + "loss": 0.0019, + "grad_norm": 0.2783941924571991, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 1.0, + "epoch": 1.251, + "step": 2502 + }, + { + "loss": 0.0597, + "grad_norm": 1.77070951461792, + "learning_rate": 7.495000000000001e-06, + "num_tokens": 851274.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2515, + "step": 2503 + }, + { + "loss": 0.0019, + "grad_norm": 0.2808924913406372, + "learning_rate": 7.49e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 1.0, + "epoch": 1.252, + "step": 2504 + }, + { + "loss": 0.0441, + "grad_norm": 1.070281982421875, + "learning_rate": 7.485000000000001e-06, + "num_tokens": 851877.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2525, + "step": 2505 + }, + { + "loss": 0.0018, + "grad_norm": 0.25118544697761536, + "learning_rate": 7.48e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2530000000000001, + "step": 2506 + }, + { + "loss": 0.0698, + "grad_norm": 1.3499447107315063, + "learning_rate": 7.475000000000001e-06, + "num_tokens": 852480.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2535, + "step": 2507 + }, + { + "loss": 0.0016, + "grad_norm": 0.23157145082950592, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 1.0, + "epoch": 1.254, + "step": 2508 + }, + { + "loss": 0.0384, + "grad_norm": 1.1759817600250244, + "learning_rate": 7.465000000000001e-06, + "num_tokens": 853083.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2545, + "step": 2509 + }, + { + "loss": 0.0017, + "grad_norm": 0.24023179709911346, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.255, + "step": 2510 + }, + { + "loss": 0.0559, + "grad_norm": 1.3075677156448364, + "learning_rate": 7.4550000000000015e-06, + "num_tokens": 853686.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2555, + "step": 2511 + }, + { + "loss": 0.0691, + "grad_norm": 1.5931618213653564, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.256, + "step": 2512 + }, + { + "loss": 0.0015, + "grad_norm": 0.21379417181015015, + "learning_rate": 7.445000000000001e-06, + "num_tokens": 854289.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2565, + "step": 2513 + }, + { + "loss": 0.0016, + "grad_norm": 0.22427783906459808, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 2514 + }, + { + "loss": 0.0585, + "grad_norm": 1.3955110311508179, + "learning_rate": 7.435000000000001e-06, + "num_tokens": 854892.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2575, + "step": 2515 + }, + { + "loss": 0.0016, + "grad_norm": 0.22540539503097534, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 1.0, + "epoch": 1.258, + "step": 2516 + }, + { + "loss": 0.0015, + "grad_norm": 0.20957466959953308, + "learning_rate": 7.425000000000001e-06, + "num_tokens": 855074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2585, + "step": 2517 + }, + { + "loss": 0.0013, + "grad_norm": 0.17798997461795807, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 2518 + }, + { + "loss": 0.0681, + "grad_norm": 1.692757487297058, + "learning_rate": 7.415000000000001e-06, + "num_tokens": 855677.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2595, + "step": 2519 + }, + { + "loss": 0.0013, + "grad_norm": 0.18327295780181885, + "learning_rate": 7.41e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 1.0, + "epoch": 1.26, + "step": 2520 + }, + { + "loss": 0.0694, + "grad_norm": 1.3426337242126465, + "learning_rate": 7.405000000000001e-06, + "num_tokens": 856280.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2605, + "step": 2521 + }, + { + "loss": 0.0575, + "grad_norm": 1.3755184412002563, + "learning_rate": 7.4e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2610000000000001, + "step": 2522 + }, + { + "loss": 0.0012, + "grad_norm": 0.15550144016742706, + "learning_rate": 7.395000000000001e-06, + "num_tokens": 856883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2615, + "step": 2523 + }, + { + "loss": 0.0013, + "grad_norm": 0.18434429168701172, + "learning_rate": 7.39e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 2524 + }, + { + "loss": 0.0561, + "grad_norm": 1.3532037734985352, + "learning_rate": 7.385000000000001e-06, + "num_tokens": 857486.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2625, + "step": 2525 + }, + { + "loss": 0.0783, + "grad_norm": 2.749722719192505, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.263, + "step": 2526 + }, + { + "loss": 0.0739, + "grad_norm": 1.7389228343963623, + "learning_rate": 7.375000000000001e-06, + "num_tokens": 858510.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2635, + "step": 2527 + }, + { + "loss": 0.0596, + "grad_norm": 1.5434712171554565, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.264, + "step": 2528 + }, + { + "loss": 0.0012, + "grad_norm": 0.16660870611667633, + "learning_rate": 7.365000000000001e-06, + "num_tokens": 859113.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2645, + "step": 2529 + }, + { + "loss": 0.0466, + "grad_norm": 1.1618560552597046, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2650000000000001, + "step": 2530 + }, + { + "loss": 0.066, + "grad_norm": 1.4426238536834717, + "learning_rate": 7.355000000000001e-06, + "num_tokens": 860137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2655, + "step": 2531 + }, + { + "loss": 0.0014, + "grad_norm": 0.1874425858259201, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 1.0, + "epoch": 1.266, + "step": 2532 + }, + { + "loss": 0.0574, + "grad_norm": 1.2460824251174927, + "learning_rate": 7.345000000000001e-06, + "num_tokens": 860740.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2665, + "step": 2533 + }, + { + "loss": 0.0722, + "grad_norm": 1.7045679092407227, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.267, + "step": 2534 + }, + { + "loss": 0.0641, + "grad_norm": 1.4023394584655762, + "learning_rate": 7.335000000000001e-06, + "num_tokens": 861764.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2675, + "step": 2535 + }, + { + "loss": 0.0018, + "grad_norm": 0.25083932280540466, + "learning_rate": 7.33e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 1.0, + "epoch": 1.268, + "step": 2536 + }, + { + "loss": 0.0625, + "grad_norm": 1.2308841943740845, + "learning_rate": 7.325000000000001e-06, + "num_tokens": 862367.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2685, + "step": 2537 + }, + { + "loss": 0.1399, + "grad_norm": 2.6957058906555176, + "learning_rate": 7.32e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.2690000000000001, + "step": 2538 + }, + { + "loss": 0.0403, + "grad_norm": 1.0539931058883667, + "learning_rate": 7.315000000000001e-06, + "num_tokens": 863391.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2695, + "step": 2539 + }, + { + "loss": 0.0603, + "grad_norm": 1.6862679719924927, + "learning_rate": 7.31e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.27, + "step": 2540 + }, + { + "loss": 0.0022, + "grad_norm": 0.3110877275466919, + "learning_rate": 7.305000000000001e-06, + "num_tokens": 863994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2705, + "step": 2541 + }, + { + "loss": 0.0521, + "grad_norm": 1.1967720985412598, + "learning_rate": 7.3e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.271, + "step": 2542 + }, + { + "loss": 0.1383, + "grad_norm": 2.653751850128174, + "learning_rate": 7.295000000000001e-06, + "num_tokens": 865018.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.2715, + "step": 2543 + }, + { + "loss": 0.0025, + "grad_norm": 0.3700110614299774, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.272, + "step": 2544 + }, + { + "loss": 0.0031, + "grad_norm": 0.42906609177589417, + "learning_rate": 7.2850000000000006e-06, + "num_tokens": 865200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2725, + "step": 2545 + }, + { + "loss": 0.0437, + "grad_norm": 1.104537010192871, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.2730000000000001, + "step": 2546 + }, + { + "loss": 0.0027, + "grad_norm": 0.3919247090816498, + "learning_rate": 7.275000000000001e-06, + "num_tokens": 865803.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2735, + "step": 2547 + }, + { + "loss": 0.0029, + "grad_norm": 0.4317328929901123, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 2548 + }, + { + "loss": 0.0025, + "grad_norm": 0.37341031432151794, + "learning_rate": 7.265000000000001e-06, + "num_tokens": 865985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2745, + "step": 2549 + }, + { + "loss": 0.0416, + "grad_norm": 1.0737035274505615, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.275, + "step": 2550 + }, + { + "loss": 0.0646, + "grad_norm": 1.3107216358184814, + "learning_rate": 7.255000000000001e-06, + "num_tokens": 867009.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2755, + "step": 2551 + }, + { + "loss": 0.0381, + "grad_norm": 0.9233097434043884, + "learning_rate": 7.25e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.276, + "step": 2552 + }, + { + "loss": 0.056, + "grad_norm": 1.2655408382415771, + "learning_rate": 7.245000000000001e-06, + "num_tokens": 868033.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2765, + "step": 2553 + }, + { + "loss": 0.0519, + "grad_norm": 1.2633070945739746, + "learning_rate": 7.24e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2770000000000001, + "step": 2554 + }, + { + "loss": 0.0666, + "grad_norm": 1.5826315879821777, + "learning_rate": 7.235000000000001e-06, + "num_tokens": 869057.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2775, + "step": 2555 + }, + { + "loss": 0.0026, + "grad_norm": 0.3732459545135498, + "learning_rate": 7.23e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 1.0, + "epoch": 1.278, + "step": 2556 + }, + { + "loss": 0.0384, + "grad_norm": 0.9308870434761047, + "learning_rate": 7.225000000000001e-06, + "num_tokens": 869660.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.2785, + "step": 2557 + }, + { + "loss": 0.0027, + "grad_norm": 0.3898535668849945, + "learning_rate": 7.22e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 1.0, + "epoch": 1.279, + "step": 2558 + }, + { + "loss": 0.0416, + "grad_norm": 1.0320757627487183, + "learning_rate": 7.215000000000001e-06, + "num_tokens": 870263.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2795, + "step": 2559 + }, + { + "loss": 0.0028, + "grad_norm": 0.4121858477592468, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 1.0, + "epoch": 1.28, + "step": 2560 + }, + { + "loss": 0.0028, + "grad_norm": 0.4276776611804962, + "learning_rate": 7.2050000000000005e-06, + "num_tokens": 870445.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2805, + "step": 2561 + }, + { + "loss": 0.0407, + "grad_norm": 0.9345077872276306, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.2810000000000001, + "step": 2562 + }, + { + "loss": 0.0025, + "grad_norm": 0.3605985641479492, + "learning_rate": 7.1950000000000006e-06, + "num_tokens": 871048.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2814999999999999, + "step": 2563 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346655070781708, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.282, + "step": 2564 + }, + { + "loss": 0.0744, + "grad_norm": 1.8985601663589478, + "learning_rate": 7.185000000000001e-06, + "num_tokens": 871651.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.2825, + "step": 2565 + }, + { + "loss": 0.0388, + "grad_norm": 0.96394282579422, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.283, + "step": 2566 + }, + { + "loss": 0.0682, + "grad_norm": 1.4056230783462524, + "learning_rate": 7.175000000000001e-06, + "num_tokens": 872675.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2835, + "step": 2567 + }, + { + "loss": 0.0022, + "grad_norm": 0.3106633722782135, + "learning_rate": 7.17e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 1.0, + "epoch": 1.284, + "step": 2568 + }, + { + "loss": 0.0384, + "grad_norm": 1.064553141593933, + "learning_rate": 7.165000000000001e-06, + "num_tokens": 873278.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.2845, + "step": 2569 + }, + { + "loss": 0.0626, + "grad_norm": 1.0392028093338013, + "learning_rate": 7.16e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.285, + "step": 2570 + }, + { + "loss": 0.0022, + "grad_norm": 0.30655112862586975, + "learning_rate": 7.155000000000001e-06, + "num_tokens": 873881.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2854999999999999, + "step": 2571 + }, + { + "loss": 0.0673, + "grad_norm": 1.5468289852142334, + "learning_rate": 7.15e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.286, + "step": 2572 + }, + { + "loss": 0.0498, + "grad_norm": 1.2830432653427124, + "learning_rate": 7.145000000000001e-06, + "num_tokens": 874905.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2865, + "step": 2573 + }, + { + "loss": 0.055, + "grad_norm": 1.0863239765167236, + "learning_rate": 7.14e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.287, + "step": 2574 + }, + { + "loss": 0.0606, + "grad_norm": 1.434999704360962, + "learning_rate": 7.135000000000001e-06, + "num_tokens": 875929.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.2875, + "step": 2575 + }, + { + "loss": 0.0532, + "grad_norm": 1.290963888168335, + "learning_rate": 7.13e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.288, + "step": 2576 + }, + { + "loss": 0.0026, + "grad_norm": 0.36665645241737366, + "learning_rate": 7.125e-06, + "num_tokens": 876532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2885, + "step": 2577 + }, + { + "loss": 0.0485, + "grad_norm": 1.2393323183059692, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.289, + "step": 2578 + }, + { + "loss": 0.0029, + "grad_norm": 0.3994691073894501, + "learning_rate": 7.1150000000000005e-06, + "num_tokens": 877135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2894999999999999, + "step": 2579 + }, + { + "loss": 0.0544, + "grad_norm": 1.361981987953186, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.29, + "step": 2580 + }, + { + "loss": 0.0529, + "grad_norm": 1.1892880201339722, + "learning_rate": 7.105000000000001e-06, + "num_tokens": 878159.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.2905, + "step": 2581 + }, + { + "loss": 0.069, + "grad_norm": 1.5022639036178589, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.291, + "step": 2582 + }, + { + "loss": 0.0594, + "grad_norm": 1.2174897193908691, + "learning_rate": 7.095000000000001e-06, + "num_tokens": 879183.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2915, + "step": 2583 + }, + { + "loss": 0.0723, + "grad_norm": 2.1814920902252197, + "learning_rate": 7.09e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.292, + "step": 2584 + }, + { + "loss": 0.0544, + "grad_norm": 1.1524139642715454, + "learning_rate": 7.085000000000001e-06, + "num_tokens": 880207.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.2925, + "step": 2585 + }, + { + "loss": 0.0035, + "grad_norm": 0.5082859396934509, + "learning_rate": 7.08e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.293, + "step": 2586 + }, + { + "loss": 0.0034, + "grad_norm": 0.49455657601356506, + "learning_rate": 7.075000000000001e-06, + "num_tokens": 880389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2934999999999999, + "step": 2587 + }, + { + "loss": 0.0516, + "grad_norm": 1.1291673183441162, + "learning_rate": 7.07e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.294, + "step": 2588 + }, + { + "loss": 0.0402, + "grad_norm": 1.073132038116455, + "learning_rate": 7.065000000000001e-06, + "num_tokens": 881413.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2945, + "step": 2589 + }, + { + "loss": 0.0409, + "grad_norm": 1.1712205410003662, + "learning_rate": 7.06e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.295, + "step": 2590 + }, + { + "loss": 0.0596, + "grad_norm": 1.2515616416931152, + "learning_rate": 7.055000000000001e-06, + "num_tokens": 882437.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2955, + "step": 2591 + }, + { + "loss": 0.0039, + "grad_norm": 0.5442217588424683, + "learning_rate": 7.05e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.296, + "step": 2592 + }, + { + "loss": 0.0041, + "grad_norm": 0.5982818603515625, + "learning_rate": 7.045e-06, + "num_tokens": 882619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2965, + "step": 2593 + }, + { + "loss": 0.0558, + "grad_norm": 1.3499200344085693, + "learning_rate": 7.04e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.297, + "step": 2594 + }, + { + "loss": 0.0038, + "grad_norm": 0.5531075596809387, + "learning_rate": 7.035e-06, + "num_tokens": 883222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2974999999999999, + "step": 2595 + }, + { + "loss": 0.0716, + "grad_norm": 1.8495835065841675, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.298, + "step": 2596 + }, + { + "loss": 0.0387, + "grad_norm": 1.2195173501968384, + "learning_rate": 7.0250000000000005e-06, + "num_tokens": 884246.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.2985, + "step": 2597 + }, + { + "loss": 0.0715, + "grad_norm": 1.7892330884933472, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.299, + "step": 2598 + }, + { + "loss": 0.0034, + "grad_norm": 0.5045487284660339, + "learning_rate": 7.015000000000001e-06, + "num_tokens": 884849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2995, + "step": 2599 + }, + { + "loss": 0.0551, + "grad_norm": 1.5834842920303345, + "learning_rate": 7.01e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3, + "step": 2600 + }, + { + "loss": 0.0037, + "grad_norm": 0.5456190705299377, + "learning_rate": 7.005000000000001e-06, + "num_tokens": 885452.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3005, + "step": 2601 + }, + { + "loss": 0.0036, + "grad_norm": 0.5648893117904663, + "learning_rate": 7e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.301, + "step": 2602 + }, + { + "loss": 0.06, + "grad_norm": 1.417505145072937, + "learning_rate": 6.995000000000001e-06, + "num_tokens": 886055.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3014999999999999, + "step": 2603 + }, + { + "loss": 0.0684, + "grad_norm": 1.5355315208435059, + "learning_rate": 6.99e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.302, + "step": 2604 + }, + { + "loss": 0.0027, + "grad_norm": 0.4013388454914093, + "learning_rate": 6.985000000000001e-06, + "num_tokens": 886658.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3025, + "step": 2605 + }, + { + "loss": 0.0026, + "grad_norm": 0.38935649394989014, + "learning_rate": 6.98e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 1.0, + "epoch": 1.303, + "step": 2606 + }, + { + "loss": 0.0578, + "grad_norm": 1.1277109384536743, + "learning_rate": 6.975000000000001e-06, + "num_tokens": 887261.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3035, + "step": 2607 + }, + { + "loss": 0.0023, + "grad_norm": 0.3507567048072815, + "learning_rate": 6.97e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.304, + "step": 2608 + }, + { + "loss": 0.0021, + "grad_norm": 0.3047695755958557, + "learning_rate": 6.965e-06, + "num_tokens": 887443.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3045, + "step": 2609 + }, + { + "loss": 0.0564, + "grad_norm": 1.2580876350402832, + "learning_rate": 6.96e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.305, + "step": 2610 + }, + { + "loss": 0.0018, + "grad_norm": 0.26692500710487366, + "learning_rate": 6.955e-06, + "num_tokens": 888046.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3054999999999999, + "step": 2611 + }, + { + "loss": 0.0601, + "grad_norm": 1.2882280349731445, + "learning_rate": 6.95e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.306, + "step": 2612 + }, + { + "loss": 0.0662, + "grad_norm": 1.3626042604446411, + "learning_rate": 6.945e-06, + "num_tokens": 889070.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3065, + "step": 2613 + }, + { + "loss": 0.0015, + "grad_norm": 0.20663970708847046, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 1.0, + "epoch": 1.307, + "step": 2614 + }, + { + "loss": 0.0421, + "grad_norm": 1.0858242511749268, + "learning_rate": 6.9350000000000005e-06, + "num_tokens": 889673.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3075, + "step": 2615 + }, + { + "loss": 0.061, + "grad_norm": 1.1361438035964966, + "learning_rate": 6.93e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.308, + "step": 2616 + }, + { + "loss": 0.053, + "grad_norm": 1.0651867389678955, + "learning_rate": 6.925000000000001e-06, + "num_tokens": 890697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3085, + "step": 2617 + }, + { + "loss": 0.0648, + "grad_norm": 1.4413301944732666, + "learning_rate": 6.92e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.309, + "step": 2618 + }, + { + "loss": 0.0016, + "grad_norm": 0.23106220364570618, + "learning_rate": 6.915000000000001e-06, + "num_tokens": 891300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3094999999999999, + "step": 2619 + }, + { + "loss": 0.0596, + "grad_norm": 1.1959160566329956, + "learning_rate": 6.91e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.31, + "step": 2620 + }, + { + "loss": 0.0625, + "grad_norm": 1.4631091356277466, + "learning_rate": 6.905000000000001e-06, + "num_tokens": 892324.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3105, + "step": 2621 + }, + { + "loss": 0.0385, + "grad_norm": 1.1421785354614258, + "learning_rate": 6.9e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.311, + "step": 2622 + }, + { + "loss": 0.0644, + "grad_norm": 1.3361622095108032, + "learning_rate": 6.895000000000001e-06, + "num_tokens": 893348.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3115, + "step": 2623 + }, + { + "loss": 0.0393, + "grad_norm": 1.3101776838302612, + "learning_rate": 6.89e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.312, + "step": 2624 + }, + { + "loss": 0.0415, + "grad_norm": 1.2668944597244263, + "learning_rate": 6.885e-06, + "num_tokens": 894372.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3125, + "step": 2625 + }, + { + "loss": 0.0637, + "grad_norm": 1.8910597562789917, + "learning_rate": 6.88e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.313, + "step": 2626 + }, + { + "loss": 0.0385, + "grad_norm": 1.383195161819458, + "learning_rate": 6.875e-06, + "num_tokens": 895396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3135, + "step": 2627 + }, + { + "loss": 0.0029, + "grad_norm": 0.41114333271980286, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.314, + "step": 2628 + }, + { + "loss": 0.0709, + "grad_norm": 2.5799410343170166, + "learning_rate": 6.865e-06, + "num_tokens": 895999.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3145, + "step": 2629 + }, + { + "loss": 0.0717, + "grad_norm": 1.9481109380722046, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.315, + "step": 2630 + }, + { + "loss": 0.0031, + "grad_norm": 0.4399254620075226, + "learning_rate": 6.8550000000000004e-06, + "num_tokens": 896602.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3155000000000001, + "step": 2631 + }, + { + "loss": 0.0692, + "grad_norm": 1.7998204231262207, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.316, + "step": 2632 + }, + { + "loss": 0.0589, + "grad_norm": 1.2681806087493896, + "learning_rate": 6.8450000000000005e-06, + "num_tokens": 897626.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3165, + "step": 2633 + }, + { + "loss": 0.1572, + "grad_norm": 2.9861464500427246, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.317, + "step": 2634 + }, + { + "loss": 0.0033, + "grad_norm": 0.4804554879665375, + "learning_rate": 6.835000000000001e-06, + "num_tokens": 898229.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3175, + "step": 2635 + }, + { + "loss": 0.0039, + "grad_norm": 0.5298879742622375, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 1.0, + "epoch": 1.318, + "step": 2636 + }, + { + "loss": 0.0033, + "grad_norm": 0.45830750465393066, + "learning_rate": 6.825000000000001e-06, + "num_tokens": 898411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3185, + "step": 2637 + }, + { + "loss": 0.0759, + "grad_norm": 2.195838451385498, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.319, + "step": 2638 + }, + { + "loss": 0.0028, + "grad_norm": 0.3985951840877533, + "learning_rate": 6.815000000000001e-06, + "num_tokens": 899014.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3195000000000001, + "step": 2639 + }, + { + "loss": 0.0435, + "grad_norm": 1.082383155822754, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.32, + "step": 2640 + }, + { + "loss": 0.0031, + "grad_norm": 0.4386924207210541, + "learning_rate": 6.805000000000001e-06, + "num_tokens": 899617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3205, + "step": 2641 + }, + { + "loss": 0.044, + "grad_norm": 1.3280903100967407, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.321, + "step": 2642 + }, + { + "loss": 0.0024, + "grad_norm": 0.34161683917045593, + "learning_rate": 6.795e-06, + "num_tokens": 900220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3215, + "step": 2643 + }, + { + "loss": 0.0026, + "grad_norm": 0.3536019027233124, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.322, + "step": 2644 + }, + { + "loss": 0.0721, + "grad_norm": 1.825214147567749, + "learning_rate": 6.785e-06, + "num_tokens": 900823.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.3225, + "step": 2645 + }, + { + "loss": 0.0603, + "grad_norm": 1.441401481628418, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.323, + "step": 2646 + }, + { + "loss": 0.0552, + "grad_norm": 1.026498556137085, + "learning_rate": 6.775e-06, + "num_tokens": 901847.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3235000000000001, + "step": 2647 + }, + { + "loss": 0.0607, + "grad_norm": 1.567400574684143, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.324, + "step": 2648 + }, + { + "loss": 0.0365, + "grad_norm": 1.1754707098007202, + "learning_rate": 6.7650000000000005e-06, + "num_tokens": 902871.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3245, + "step": 2649 + }, + { + "loss": 0.0634, + "grad_norm": 1.0925911664962769, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.325, + "step": 2650 + }, + { + "loss": 0.0022, + "grad_norm": 0.3080379068851471, + "learning_rate": 6.7550000000000005e-06, + "num_tokens": 903474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3255, + "step": 2651 + }, + { + "loss": 0.0024, + "grad_norm": 0.3412145972251892, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.326, + "step": 2652 + }, + { + "loss": 0.0612, + "grad_norm": 1.387506127357483, + "learning_rate": 6.745000000000001e-06, + "num_tokens": 904077.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3265, + "step": 2653 + }, + { + "loss": 0.0543, + "grad_norm": 1.0726388692855835, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.327, + "step": 2654 + }, + { + "loss": 0.0515, + "grad_norm": 1.3620095252990723, + "learning_rate": 6.735000000000001e-06, + "num_tokens": 905101.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3275000000000001, + "step": 2655 + }, + { + "loss": 0.0536, + "grad_norm": 0.999693751335144, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.328, + "step": 2656 + }, + { + "loss": 0.0725, + "grad_norm": 1.338326096534729, + "learning_rate": 6.725000000000001e-06, + "num_tokens": 906125.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3285, + "step": 2657 + }, + { + "loss": 0.0025, + "grad_norm": 0.3621944487094879, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.329, + "step": 2658 + }, + { + "loss": 0.0027, + "grad_norm": 0.3732605576515198, + "learning_rate": 6.715e-06, + "num_tokens": 906307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3295, + "step": 2659 + }, + { + "loss": 0.0025, + "grad_norm": 0.3675785958766937, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 2660 + }, + { + "loss": 0.0546, + "grad_norm": 1.420166015625, + "learning_rate": 6.705e-06, + "num_tokens": 906910.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3305, + "step": 2661 + }, + { + "loss": 0.065, + "grad_norm": 1.7972251176834106, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.331, + "step": 2662 + }, + { + "loss": 0.0026, + "grad_norm": 0.38739708065986633, + "learning_rate": 6.695e-06, + "num_tokens": 907513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3315000000000001, + "step": 2663 + }, + { + "loss": 0.0621, + "grad_norm": 1.1773098707199097, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.332, + "step": 2664 + }, + { + "loss": 0.047, + "grad_norm": 1.3367711305618286, + "learning_rate": 6.685e-06, + "num_tokens": 908537.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3325, + "step": 2665 + }, + { + "loss": 0.0614, + "grad_norm": 1.5761219263076782, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.333, + "step": 2666 + }, + { + "loss": 0.0028, + "grad_norm": 0.39666748046875, + "learning_rate": 6.6750000000000005e-06, + "num_tokens": 909140.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3335, + "step": 2667 + }, + { + "loss": 0.0026, + "grad_norm": 0.38161027431488037, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 2668 + }, + { + "loss": 0.0027, + "grad_norm": 0.3782355785369873, + "learning_rate": 6.6650000000000006e-06, + "num_tokens": 909322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3345, + "step": 2669 + }, + { + "loss": 0.0449, + "grad_norm": 1.2690225839614868, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.335, + "step": 2670 + }, + { + "loss": 0.0618, + "grad_norm": 1.4404915571212769, + "learning_rate": 6.655000000000001e-06, + "num_tokens": 910346.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3355000000000001, + "step": 2671 + }, + { + "loss": 0.0593, + "grad_norm": 1.6381967067718506, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.336, + "step": 2672 + }, + { + "loss": 0.0023, + "grad_norm": 0.3195578455924988, + "learning_rate": 6.645000000000001e-06, + "num_tokens": 910949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3365, + "step": 2673 + }, + { + "loss": 0.1244, + "grad_norm": 2.2930221557617188, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.337, + "step": 2674 + }, + { + "loss": 0.061, + "grad_norm": 1.1066110134124756, + "learning_rate": 6.635e-06, + "num_tokens": 911973.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3375, + "step": 2675 + }, + { + "loss": 0.0023, + "grad_norm": 0.3287852704524994, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.338, + "step": 2676 + }, + { + "loss": 0.0723, + "grad_norm": 1.8842978477478027, + "learning_rate": 6.625e-06, + "num_tokens": 912576.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3385, + "step": 2677 + }, + { + "loss": 0.0616, + "grad_norm": 1.410254955291748, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.339, + "step": 2678 + }, + { + "loss": 0.0661, + "grad_norm": 1.7658559083938599, + "learning_rate": 6.615e-06, + "num_tokens": 913600.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3395000000000001, + "step": 2679 + }, + { + "loss": 0.0023, + "grad_norm": 0.3321514427661896, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.34, + "step": 2680 + }, + { + "loss": 0.0026, + "grad_norm": 0.38943803310394287, + "learning_rate": 6.605e-06, + "num_tokens": 913782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3405, + "step": 2681 + }, + { + "loss": 0.0533, + "grad_norm": 1.220119833946228, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.341, + "step": 2682 + }, + { + "loss": 0.0577, + "grad_norm": 1.4489399194717407, + "learning_rate": 6.595e-06, + "num_tokens": 914806.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3415, + "step": 2683 + }, + { + "loss": 0.0534, + "grad_norm": 1.437482237815857, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.342, + "step": 2684 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185518980026245, + "learning_rate": 6.5850000000000005e-06, + "num_tokens": 915409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3425, + "step": 2685 + }, + { + "loss": 0.0557, + "grad_norm": 1.233544945716858, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.343, + "step": 2686 + }, + { + "loss": 0.1326, + "grad_norm": 2.9976046085357666, + "learning_rate": 6.5750000000000006e-06, + "num_tokens": 916433.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.3435000000000001, + "step": 2687 + }, + { + "loss": 0.0555, + "grad_norm": 1.1236023902893066, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3439999999999999, + "step": 2688 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615230619907379, + "learning_rate": 6.565000000000001e-06, + "num_tokens": 917036.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3445, + "step": 2689 + }, + { + "loss": 0.0613, + "grad_norm": 1.391479730606079, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.345, + "step": 2690 + }, + { + "loss": 0.0023, + "grad_norm": 0.32829907536506653, + "learning_rate": 6.555e-06, + "num_tokens": 917639.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3455, + "step": 2691 + }, + { + "loss": 0.0025, + "grad_norm": 0.35658934712409973, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 1.0, + "epoch": 1.346, + "step": 2692 + }, + { + "loss": 0.0028, + "grad_norm": 0.40413787961006165, + "learning_rate": 6.545e-06, + "num_tokens": 917821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3465, + "step": 2693 + }, + { + "loss": 0.0023, + "grad_norm": 0.3243667185306549, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 1.0, + "epoch": 1.347, + "step": 2694 + }, + { + "loss": 0.0023, + "grad_norm": 0.33630460500717163, + "learning_rate": 6.535e-06, + "num_tokens": 918003.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3475, + "step": 2695 + }, + { + "loss": 0.0529, + "grad_norm": 1.6163023710250854, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3479999999999999, + "step": 2696 + }, + { + "loss": 0.0678, + "grad_norm": 1.5625479221343994, + "learning_rate": 6.525e-06, + "num_tokens": 919027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.3485, + "step": 2697 + }, + { + "loss": 0.0676, + "grad_norm": 1.5719348192214966, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.349, + "step": 2698 + }, + { + "loss": 0.002, + "grad_norm": 0.2859533727169037, + "learning_rate": 6.515e-06, + "num_tokens": 919630.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3495, + "step": 2699 + }, + { + "loss": 0.0434, + "grad_norm": 1.324418067932129, + "learning_rate": 6.51e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.35, + "step": 2700 + }, + { + "loss": 0.042, + "grad_norm": 1.3165403604507446, + "learning_rate": 6.505e-06, + "num_tokens": 920654.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3505, + "step": 2701 + }, + { + "loss": 0.0018, + "grad_norm": 0.2492700070142746, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.351, + "step": 2702 + }, + { + "loss": 0.1336, + "grad_norm": 2.710927963256836, + "learning_rate": 6.4950000000000005e-06, + "num_tokens": 921257.0, + "mean_token_accuracy": 0.9530332684516907, + "epoch": 1.3515, + "step": 2703 + }, + { + "loss": 0.059, + "grad_norm": 1.8472118377685547, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3519999999999999, + "step": 2704 + }, + { + "loss": 0.0448, + "grad_norm": 1.164633870124817, + "learning_rate": 6.485000000000001e-06, + "num_tokens": 922281.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3525, + "step": 2705 + }, + { + "loss": 0.0544, + "grad_norm": 1.3916175365447998, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.353, + "step": 2706 + }, + { + "loss": 0.0463, + "grad_norm": 1.397131085395813, + "learning_rate": 6.475e-06, + "num_tokens": 923305.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3535, + "step": 2707 + }, + { + "loss": 0.0019, + "grad_norm": 0.26947012543678284, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 1.0, + "epoch": 1.354, + "step": 2708 + }, + { + "loss": 0.0017, + "grad_norm": 0.23892365396022797, + "learning_rate": 6.465e-06, + "num_tokens": 923487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3545, + "step": 2709 + }, + { + "loss": 0.0018, + "grad_norm": 0.25066784024238586, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 2710 + }, + { + "loss": 0.0435, + "grad_norm": 1.2238185405731201, + "learning_rate": 6.455e-06, + "num_tokens": 924090.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3555, + "step": 2711 + }, + { + "loss": 0.0019, + "grad_norm": 0.26420801877975464, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3559999999999999, + "step": 2712 + }, + { + "loss": 0.0572, + "grad_norm": 1.1416776180267334, + "learning_rate": 6.445e-06, + "num_tokens": 924693.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3565, + "step": 2713 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754037082195282, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.357, + "step": 2714 + }, + { + "loss": 0.0018, + "grad_norm": 0.25344598293304443, + "learning_rate": 6.435e-06, + "num_tokens": 924875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3575, + "step": 2715 + }, + { + "loss": 0.0017, + "grad_norm": 0.23587873578071594, + "learning_rate": 6.43e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 2716 + }, + { + "loss": 0.0701, + "grad_norm": 1.6822742223739624, + "learning_rate": 6.425e-06, + "num_tokens": 925478.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3585, + "step": 2717 + }, + { + "loss": 0.0017, + "grad_norm": 0.22698912024497986, + "learning_rate": 6.42e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 1.0, + "epoch": 1.359, + "step": 2718 + }, + { + "loss": 0.044, + "grad_norm": 1.2083390951156616, + "learning_rate": 6.415e-06, + "num_tokens": 926081.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3595, + "step": 2719 + }, + { + "loss": 0.0017, + "grad_norm": 0.23327840864658356, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3599999999999999, + "step": 2720 + }, + { + "loss": 0.0557, + "grad_norm": 1.281182885169983, + "learning_rate": 6.4050000000000005e-06, + "num_tokens": 926684.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3605, + "step": 2721 + }, + { + "loss": 0.0539, + "grad_norm": 1.1743288040161133, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.361, + "step": 2722 + }, + { + "loss": 0.0646, + "grad_norm": 1.2470465898513794, + "learning_rate": 6.395e-06, + "num_tokens": 927708.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3615, + "step": 2723 + }, + { + "loss": 0.0015, + "grad_norm": 0.20256949961185455, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 1.0, + "epoch": 1.362, + "step": 2724 + }, + { + "loss": 0.0394, + "grad_norm": 1.1593482494354248, + "learning_rate": 6.385e-06, + "num_tokens": 928311.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3625, + "step": 2725 + }, + { + "loss": 0.0737, + "grad_norm": 1.937491774559021, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.363, + "step": 2726 + }, + { + "loss": 0.0438, + "grad_norm": 1.1960216760635376, + "learning_rate": 6.375e-06, + "num_tokens": 929335.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3635, + "step": 2727 + }, + { + "loss": 0.0016, + "grad_norm": 0.21763351559638977, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3639999999999999, + "step": 2728 + }, + { + "loss": 0.0017, + "grad_norm": 0.24479590356349945, + "learning_rate": 6.365e-06, + "num_tokens": 929517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3645, + "step": 2729 + }, + { + "loss": 0.0619, + "grad_norm": 1.315623164176941, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.365, + "step": 2730 + }, + { + "loss": 0.0016, + "grad_norm": 0.2220989614725113, + "learning_rate": 6.355e-06, + "num_tokens": 930120.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3655, + "step": 2731 + }, + { + "loss": 0.0017, + "grad_norm": 0.2321062982082367, + "learning_rate": 6.35e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 2732 + }, + { + "loss": 0.0017, + "grad_norm": 0.23798637092113495, + "learning_rate": 6.345e-06, + "num_tokens": 930302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3665, + "step": 2733 + }, + { + "loss": 0.0577, + "grad_norm": 1.2568942308425903, + "learning_rate": 6.34e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.367, + "step": 2734 + }, + { + "loss": 0.041, + "grad_norm": 1.6406105756759644, + "learning_rate": 6.335e-06, + "num_tokens": 931326.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3675, + "step": 2735 + }, + { + "loss": 0.0517, + "grad_norm": 1.235734224319458, + "learning_rate": 6.33e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3679999999999999, + "step": 2736 + }, + { + "loss": 0.0423, + "grad_norm": 0.9826679825782776, + "learning_rate": 6.3250000000000004e-06, + "num_tokens": 932350.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3685, + "step": 2737 + }, + { + "loss": 0.0018, + "grad_norm": 0.26410505175590515, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.369, + "step": 2738 + }, + { + "loss": 0.002, + "grad_norm": 0.2839818596839905, + "learning_rate": 6.315e-06, + "num_tokens": 932532.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3695, + "step": 2739 + }, + { + "loss": 0.0533, + "grad_norm": 1.2392011880874634, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.37, + "step": 2740 + }, + { + "loss": 0.0017, + "grad_norm": 0.23982419073581696, + "learning_rate": 6.305e-06, + "num_tokens": 933135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3705, + "step": 2741 + }, + { + "loss": 0.0548, + "grad_norm": 1.4777438640594482, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.371, + "step": 2742 + }, + { + "loss": 0.0019, + "grad_norm": 0.2724550664424896, + "learning_rate": 6.295e-06, + "num_tokens": 933738.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3715, + "step": 2743 + }, + { + "loss": 0.0019, + "grad_norm": 0.2623855173587799, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3719999999999999, + "step": 2744 + }, + { + "loss": 0.0583, + "grad_norm": 1.0648019313812256, + "learning_rate": 6.285e-06, + "num_tokens": 934341.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3725, + "step": 2745 + }, + { + "loss": 0.0725, + "grad_norm": 1.589500069618225, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.373, + "step": 2746 + }, + { + "loss": 0.0617, + "grad_norm": 1.4101024866104126, + "learning_rate": 6.275e-06, + "num_tokens": 935365.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3735, + "step": 2747 + }, + { + "loss": 0.0019, + "grad_norm": 0.2686757743358612, + "learning_rate": 6.27e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 1.0, + "epoch": 1.374, + "step": 2748 + }, + { + "loss": 0.0451, + "grad_norm": 1.6723026037216187, + "learning_rate": 6.265e-06, + "num_tokens": 935968.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3745, + "step": 2749 + }, + { + "loss": 0.1481, + "grad_norm": 2.561096668243408, + "learning_rate": 6.26e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.375, + "step": 2750 + }, + { + "loss": 0.0593, + "grad_norm": 1.1495637893676758, + "learning_rate": 6.255e-06, + "num_tokens": 936992.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3755, + "step": 2751 + }, + { + "loss": 0.0583, + "grad_norm": 1.0880846977233887, + "learning_rate": 6.25e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.376, + "step": 2752 + }, + { + "loss": 0.0641, + "grad_norm": 1.4671814441680908, + "learning_rate": 6.245000000000001e-06, + "num_tokens": 938016.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.3765, + "step": 2753 + }, + { + "loss": 0.0022, + "grad_norm": 0.3182397186756134, + "learning_rate": 6.24e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 1.0, + "epoch": 1.377, + "step": 2754 + }, + { + "loss": 0.0605, + "grad_norm": 1.1844297647476196, + "learning_rate": 6.235000000000001e-06, + "num_tokens": 938619.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.3775, + "step": 2755 + }, + { + "loss": 0.0633, + "grad_norm": 1.227432131767273, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3780000000000001, + "step": 2756 + }, + { + "loss": 0.0026, + "grad_norm": 0.3716835677623749, + "learning_rate": 6.225000000000001e-06, + "num_tokens": 939222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3785, + "step": 2757 + }, + { + "loss": 0.0599, + "grad_norm": 1.3364546298980713, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.379, + "step": 2758 + }, + { + "loss": 0.0532, + "grad_norm": 1.3746514320373535, + "learning_rate": 6.215000000000001e-06, + "num_tokens": 940246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3795, + "step": 2759 + }, + { + "loss": 0.0696, + "grad_norm": 1.6494160890579224, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.38, + "step": 2760 + }, + { + "loss": 0.0031, + "grad_norm": 0.4407944083213806, + "learning_rate": 6.205000000000001e-06, + "num_tokens": 940849.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3805, + "step": 2761 + }, + { + "loss": 0.0559, + "grad_norm": 1.3899201154708862, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.381, + "step": 2762 + }, + { + "loss": 0.0393, + "grad_norm": 1.0294471979141235, + "learning_rate": 6.195000000000001e-06, + "num_tokens": 941873.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.3815, + "step": 2763 + }, + { + "loss": 0.0028, + "grad_norm": 0.41492387652397156, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3820000000000001, + "step": 2764 + }, + { + "loss": 0.039, + "grad_norm": 1.2755433320999146, + "learning_rate": 6.185000000000001e-06, + "num_tokens": 942476.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3825, + "step": 2765 + }, + { + "loss": 0.0407, + "grad_norm": 1.1641042232513428, + "learning_rate": 6.18e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.383, + "step": 2766 + }, + { + "loss": 0.0033, + "grad_norm": 0.45876702666282654, + "learning_rate": 6.175000000000001e-06, + "num_tokens": 943079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3835, + "step": 2767 + }, + { + "loss": 0.053, + "grad_norm": 1.1277137994766235, + "learning_rate": 6.17e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.384, + "step": 2768 + }, + { + "loss": 0.069, + "grad_norm": 1.974735140800476, + "learning_rate": 6.165000000000001e-06, + "num_tokens": 944103.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3845, + "step": 2769 + }, + { + "loss": 0.0399, + "grad_norm": 1.308519959449768, + "learning_rate": 6.16e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.385, + "step": 2770 + }, + { + "loss": 0.0399, + "grad_norm": 1.3881995677947998, + "learning_rate": 6.155000000000001e-06, + "num_tokens": 945127.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3855, + "step": 2771 + }, + { + "loss": 0.0388, + "grad_norm": 1.376846194267273, + "learning_rate": 6.15e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3860000000000001, + "step": 2772 + }, + { + "loss": 0.0565, + "grad_norm": 1.6753615140914917, + "learning_rate": 6.145000000000001e-06, + "num_tokens": 946151.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.3865, + "step": 2773 + }, + { + "loss": 0.0537, + "grad_norm": 1.350510597229004, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.387, + "step": 2774 + }, + { + "loss": 0.0348, + "grad_norm": 1.0870490074157715, + "learning_rate": 6.1350000000000006e-06, + "num_tokens": 947175.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3875, + "step": 2775 + }, + { + "loss": 0.0041, + "grad_norm": 0.5800921320915222, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 1.0, + "epoch": 1.388, + "step": 2776 + }, + { + "loss": 0.0046, + "grad_norm": 0.6146813631057739, + "learning_rate": 6.125000000000001e-06, + "num_tokens": 947357.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3885, + "step": 2777 + }, + { + "loss": 0.0685, + "grad_norm": 2.028545618057251, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.389, + "step": 2778 + }, + { + "loss": 0.0562, + "grad_norm": 1.10191011428833, + "learning_rate": 6.115000000000001e-06, + "num_tokens": 948381.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3895, + "step": 2779 + }, + { + "loss": 0.057, + "grad_norm": 1.6782788038253784, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.3900000000000001, + "step": 2780 + }, + { + "loss": 0.0048, + "grad_norm": 0.6447672843933105, + "learning_rate": 6.105000000000001e-06, + "num_tokens": 948984.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3905, + "step": 2781 + }, + { + "loss": 0.0045, + "grad_norm": 0.6120741963386536, + "learning_rate": 6.1e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.391, + "step": 2782 + }, + { + "loss": 0.0037, + "grad_norm": 0.5294094085693359, + "learning_rate": 6.095000000000001e-06, + "num_tokens": 949166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3915, + "step": 2783 + }, + { + "loss": 0.0041, + "grad_norm": 0.5634744167327881, + "learning_rate": 6.09e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.392, + "step": 2784 + }, + { + "loss": 0.0543, + "grad_norm": 1.1946736574172974, + "learning_rate": 6.085000000000001e-06, + "num_tokens": 949769.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3925, + "step": 2785 + }, + { + "loss": 0.0393, + "grad_norm": 1.366204857826233, + "learning_rate": 6.08e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.393, + "step": 2786 + }, + { + "loss": 0.0031, + "grad_norm": 0.4588482677936554, + "learning_rate": 6.075000000000001e-06, + "num_tokens": 950372.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3935, + "step": 2787 + }, + { + "loss": 0.0741, + "grad_norm": 1.6554986238479614, + "learning_rate": 6.07e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.3940000000000001, + "step": 2788 + }, + { + "loss": 0.0358, + "grad_norm": 1.0052374601364136, + "learning_rate": 6.065000000000001e-06, + "num_tokens": 951396.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.3945, + "step": 2789 + }, + { + "loss": 0.0029, + "grad_norm": 0.4081237316131592, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 1.0, + "epoch": 1.395, + "step": 2790 + }, + { + "loss": 0.0627, + "grad_norm": 1.5037425756454468, + "learning_rate": 6.0550000000000005e-06, + "num_tokens": 951999.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.3955, + "step": 2791 + }, + { + "loss": 0.0024, + "grad_norm": 0.36483630537986755, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.396, + "step": 2792 + }, + { + "loss": 0.0455, + "grad_norm": 1.2050751447677612, + "learning_rate": 6.0450000000000006e-06, + "num_tokens": 952602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.3965, + "step": 2793 + }, + { + "loss": 0.0021, + "grad_norm": 0.3035581111907959, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.397, + "step": 2794 + }, + { + "loss": 0.0025, + "grad_norm": 0.3607647716999054, + "learning_rate": 6.035000000000001e-06, + "num_tokens": 952784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3975, + "step": 2795 + }, + { + "loss": 0.0625, + "grad_norm": 1.2081470489501953, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.3980000000000001, + "step": 2796 + }, + { + "loss": 0.0425, + "grad_norm": 1.0764844417572021, + "learning_rate": 6.025000000000001e-06, + "num_tokens": 953808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3985, + "step": 2797 + }, + { + "loss": 0.0632, + "grad_norm": 1.425076961517334, + "learning_rate": 6.02e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.399, + "step": 2798 + }, + { + "loss": 0.0395, + "grad_norm": 0.9470378160476685, + "learning_rate": 6.015000000000001e-06, + "num_tokens": 954832.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.3995, + "step": 2799 + }, + { + "loss": 0.0404, + "grad_norm": 1.0599867105484009, + "learning_rate": 6.01e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4, + "step": 2800 + }, + { + "loss": 0.0577, + "grad_norm": 1.2933481931686401, + "learning_rate": 6.005000000000001e-06, + "num_tokens": 955856.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4005, + "step": 2801 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215804398059845, + "learning_rate": 6e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 1.0, + "epoch": 1.401, + "step": 2802 + }, + { + "loss": 0.0601, + "grad_norm": 1.4103161096572876, + "learning_rate": 5.995000000000001e-06, + "num_tokens": 956459.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4015, + "step": 2803 + }, + { + "loss": 0.0022, + "grad_norm": 0.303093820810318, + "learning_rate": 5.99e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4020000000000001, + "step": 2804 + }, + { + "loss": 0.0663, + "grad_norm": 1.360801339149475, + "learning_rate": 5.985000000000001e-06, + "num_tokens": 957062.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4025, + "step": 2805 + }, + { + "loss": 0.0022, + "grad_norm": 0.3075718581676483, + "learning_rate": 5.98e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 1.0, + "epoch": 1.403, + "step": 2806 + }, + { + "loss": 0.0602, + "grad_norm": 1.137125849723816, + "learning_rate": 5.975e-06, + "num_tokens": 957665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4035, + "step": 2807 + }, + { + "loss": 0.0022, + "grad_norm": 0.30045661330223083, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.404, + "step": 2808 + }, + { + "loss": 0.0392, + "grad_norm": 1.0042834281921387, + "learning_rate": 5.9650000000000005e-06, + "num_tokens": 958268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4045, + "step": 2809 + }, + { + "loss": 0.0401, + "grad_norm": 1.117727279663086, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.405, + "step": 2810 + }, + { + "loss": 0.0703, + "grad_norm": 1.4459725618362427, + "learning_rate": 5.955000000000001e-06, + "num_tokens": 959292.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4055, + "step": 2811 + }, + { + "loss": 0.0621, + "grad_norm": 1.3719003200531006, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4060000000000001, + "step": 2812 + }, + { + "loss": 0.0023, + "grad_norm": 0.31605690717697144, + "learning_rate": 5.945000000000001e-06, + "num_tokens": 959895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4064999999999999, + "step": 2813 + }, + { + "loss": 0.0605, + "grad_norm": 1.3043557405471802, + "learning_rate": 5.94e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.407, + "step": 2814 + }, + { + "loss": 0.0653, + "grad_norm": 1.2358129024505615, + "learning_rate": 5.935000000000001e-06, + "num_tokens": 960919.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4075, + "step": 2815 + }, + { + "loss": 0.0025, + "grad_norm": 0.3330060839653015, + "learning_rate": 5.93e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.408, + "step": 2816 + }, + { + "loss": 0.058, + "grad_norm": 1.1393845081329346, + "learning_rate": 5.925000000000001e-06, + "num_tokens": 961522.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4085, + "step": 2817 + }, + { + "loss": 0.0689, + "grad_norm": 1.4732993841171265, + "learning_rate": 5.92e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.409, + "step": 2818 + }, + { + "loss": 0.0028, + "grad_norm": 0.37631359696388245, + "learning_rate": 5.915000000000001e-06, + "num_tokens": 962125.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4095, + "step": 2819 + }, + { + "loss": 0.0026, + "grad_norm": 0.35936713218688965, + "learning_rate": 5.91e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 1.0, + "epoch": 1.41, + "step": 2820 + }, + { + "loss": 0.0558, + "grad_norm": 1.2061470746994019, + "learning_rate": 5.905000000000001e-06, + "num_tokens": 962728.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4104999999999999, + "step": 2821 + }, + { + "loss": 0.0582, + "grad_norm": 1.513380527496338, + "learning_rate": 5.9e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.411, + "step": 2822 + }, + { + "loss": 0.0418, + "grad_norm": 1.2391456365585327, + "learning_rate": 5.895e-06, + "num_tokens": 963752.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4115, + "step": 2823 + }, + { + "loss": 0.069, + "grad_norm": 1.4670116901397705, + "learning_rate": 5.89e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.412, + "step": 2824 + }, + { + "loss": 0.0028, + "grad_norm": 0.3788264989852905, + "learning_rate": 5.885e-06, + "num_tokens": 964355.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4125, + "step": 2825 + }, + { + "loss": 0.0027, + "grad_norm": 0.3687077462673187, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 1.0, + "epoch": 1.413, + "step": 2826 + }, + { + "loss": 0.0399, + "grad_norm": 1.233347773551941, + "learning_rate": 5.8750000000000005e-06, + "num_tokens": 964958.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4135, + "step": 2827 + }, + { + "loss": 0.0027, + "grad_norm": 0.37683984637260437, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 1.0, + "epoch": 1.414, + "step": 2828 + }, + { + "loss": 0.048, + "grad_norm": 1.2649948596954346, + "learning_rate": 5.865000000000001e-06, + "num_tokens": 965561.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4144999999999999, + "step": 2829 + }, + { + "loss": 0.0589, + "grad_norm": 1.3882242441177368, + "learning_rate": 5.86e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.415, + "step": 2830 + }, + { + "loss": 0.0362, + "grad_norm": 1.1658241748809814, + "learning_rate": 5.855000000000001e-06, + "num_tokens": 966585.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4155, + "step": 2831 + }, + { + "loss": 0.0521, + "grad_norm": 1.0679434537887573, + "learning_rate": 5.85e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.416, + "step": 2832 + }, + { + "loss": 0.003, + "grad_norm": 0.40383246541023254, + "learning_rate": 5.845000000000001e-06, + "num_tokens": 967188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4165, + "step": 2833 + }, + { + "loss": 0.0427, + "grad_norm": 1.2304917573928833, + "learning_rate": 5.84e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.417, + "step": 2834 + }, + { + "loss": 0.0538, + "grad_norm": 1.1524217128753662, + "learning_rate": 5.835000000000001e-06, + "num_tokens": 968212.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4175, + "step": 2835 + }, + { + "loss": 0.0379, + "grad_norm": 0.9404373168945312, + "learning_rate": 5.83e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.418, + "step": 2836 + }, + { + "loss": 0.0031, + "grad_norm": 0.4096873104572296, + "learning_rate": 5.825000000000001e-06, + "num_tokens": 968815.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4184999999999999, + "step": 2837 + }, + { + "loss": 0.0028, + "grad_norm": 0.37403908371925354, + "learning_rate": 5.82e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.419, + "step": 2838 + }, + { + "loss": 0.0361, + "grad_norm": 0.9613595604896545, + "learning_rate": 5.815e-06, + "num_tokens": 969418.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.4195, + "step": 2839 + }, + { + "loss": 0.0571, + "grad_norm": 1.3871361017227173, + "learning_rate": 5.81e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.42, + "step": 2840 + }, + { + "loss": 0.0365, + "grad_norm": 1.060208797454834, + "learning_rate": 5.805e-06, + "num_tokens": 970442.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4205, + "step": 2841 + }, + { + "loss": 0.0031, + "grad_norm": 0.4013337790966034, + "learning_rate": 5.8e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 1.0, + "epoch": 1.421, + "step": 2842 + }, + { + "loss": 0.041, + "grad_norm": 1.2097371816635132, + "learning_rate": 5.795e-06, + "num_tokens": 971045.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4215, + "step": 2843 + }, + { + "loss": 0.0614, + "grad_norm": 1.1929858922958374, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.422, + "step": 2844 + }, + { + "loss": 0.0559, + "grad_norm": 1.3881855010986328, + "learning_rate": 5.7850000000000005e-06, + "num_tokens": 972069.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4224999999999999, + "step": 2845 + }, + { + "loss": 0.0649, + "grad_norm": 1.5359828472137451, + "learning_rate": 5.78e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.423, + "step": 2846 + }, + { + "loss": 0.0562, + "grad_norm": 1.2387086153030396, + "learning_rate": 5.775000000000001e-06, + "num_tokens": 973093.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4235, + "step": 2847 + }, + { + "loss": 0.0634, + "grad_norm": 1.30796480178833, + "learning_rate": 5.77e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.424, + "step": 2848 + }, + { + "loss": 0.0035, + "grad_norm": 0.4502550959587097, + "learning_rate": 5.765000000000001e-06, + "num_tokens": 973696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4245, + "step": 2849 + }, + { + "loss": 0.0625, + "grad_norm": 1.4468958377838135, + "learning_rate": 5.76e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.425, + "step": 2850 + }, + { + "loss": 0.0675, + "grad_norm": 1.6001074314117432, + "learning_rate": 5.755000000000001e-06, + "num_tokens": 974720.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.4255, + "step": 2851 + }, + { + "loss": 0.0039, + "grad_norm": 0.5094487071037292, + "learning_rate": 5.75e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.426, + "step": 2852 + }, + { + "loss": 0.039, + "grad_norm": 0.9305217266082764, + "learning_rate": 5.745000000000001e-06, + "num_tokens": 975323.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4264999999999999, + "step": 2853 + }, + { + "loss": 0.0379, + "grad_norm": 0.9311109185218811, + "learning_rate": 5.74e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.427, + "step": 2854 + }, + { + "loss": 0.0656, + "grad_norm": 1.3803378343582153, + "learning_rate": 5.735e-06, + "num_tokens": 976347.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4275, + "step": 2855 + }, + { + "loss": 0.0495, + "grad_norm": 1.455142855644226, + "learning_rate": 5.73e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.428, + "step": 2856 + }, + { + "loss": 0.048, + "grad_norm": 0.9757342338562012, + "learning_rate": 5.725e-06, + "num_tokens": 977371.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4285, + "step": 2857 + }, + { + "loss": 0.07, + "grad_norm": 1.3820722103118896, + "learning_rate": 5.72e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.429, + "step": 2858 + }, + { + "loss": 0.0496, + "grad_norm": 0.9005600810050964, + "learning_rate": 5.715e-06, + "num_tokens": 978395.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4295, + "step": 2859 + }, + { + "loss": 0.0588, + "grad_norm": 1.1311612129211426, + "learning_rate": 5.71e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.43, + "step": 2860 + }, + { + "loss": 0.0603, + "grad_norm": 1.2565733194351196, + "learning_rate": 5.7050000000000004e-06, + "num_tokens": 979419.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4304999999999999, + "step": 2861 + }, + { + "loss": 0.0061, + "grad_norm": 0.7569929361343384, + "learning_rate": 5.7e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 1.0, + "epoch": 1.431, + "step": 2862 + }, + { + "loss": 0.0061, + "grad_norm": 0.757468044757843, + "learning_rate": 5.6950000000000005e-06, + "num_tokens": 979601.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4315, + "step": 2863 + }, + { + "loss": 0.0442, + "grad_norm": 1.3257757425308228, + "learning_rate": 5.69e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.432, + "step": 2864 + }, + { + "loss": 0.0054, + "grad_norm": 0.7246440649032593, + "learning_rate": 5.685000000000001e-06, + "num_tokens": 980204.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4325, + "step": 2865 + }, + { + "loss": 0.0558, + "grad_norm": 1.1359434127807617, + "learning_rate": 5.68e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.433, + "step": 2866 + }, + { + "loss": 0.0059, + "grad_norm": 0.7417834997177124, + "learning_rate": 5.675000000000001e-06, + "num_tokens": 980807.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4335, + "step": 2867 + }, + { + "loss": 0.0046, + "grad_norm": 0.6065738201141357, + "learning_rate": 5.67e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 1.0, + "epoch": 1.434, + "step": 2868 + }, + { + "loss": 0.0045, + "grad_norm": 0.6112881898880005, + "learning_rate": 5.665000000000001e-06, + "num_tokens": 980989.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4344999999999999, + "step": 2869 + }, + { + "loss": 0.0598, + "grad_norm": 1.1446788311004639, + "learning_rate": 5.66e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.435, + "step": 2870 + }, + { + "loss": 0.004, + "grad_norm": 0.5359569787979126, + "learning_rate": 5.655e-06, + "num_tokens": 981592.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4355, + "step": 2871 + }, + { + "loss": 0.0372, + "grad_norm": 1.0225598812103271, + "learning_rate": 5.65e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.436, + "step": 2872 + }, + { + "loss": 0.0031, + "grad_norm": 0.4344872236251831, + "learning_rate": 5.645e-06, + "num_tokens": 982195.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4365, + "step": 2873 + }, + { + "loss": 0.0035, + "grad_norm": 0.4770989418029785, + "learning_rate": 5.64e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 1.0, + "epoch": 1.437, + "step": 2874 + }, + { + "loss": 0.1529, + "grad_norm": 2.6292223930358887, + "learning_rate": 5.635e-06, + "num_tokens": 982798.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4375, + "step": 2875 + }, + { + "loss": 0.0536, + "grad_norm": 1.1502479314804077, + "learning_rate": 5.63e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.438, + "step": 2876 + }, + { + "loss": 0.0541, + "grad_norm": 1.5837680101394653, + "learning_rate": 5.625e-06, + "num_tokens": 983822.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4385, + "step": 2877 + }, + { + "loss": 0.0621, + "grad_norm": 1.0932730436325073, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.439, + "step": 2878 + }, + { + "loss": 0.0024, + "grad_norm": 0.3176769018173218, + "learning_rate": 5.6150000000000005e-06, + "num_tokens": 984425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4395, + "step": 2879 + }, + { + "loss": 0.056, + "grad_norm": 1.2500354051589966, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.44, + "step": 2880 + }, + { + "loss": 0.046, + "grad_norm": 1.282015323638916, + "learning_rate": 5.6050000000000005e-06, + "num_tokens": 985449.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4405000000000001, + "step": 2881 + }, + { + "loss": 0.0672, + "grad_norm": 1.5532522201538086, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.441, + "step": 2882 + }, + { + "loss": 0.0571, + "grad_norm": 1.1880862712860107, + "learning_rate": 5.595000000000001e-06, + "num_tokens": 986473.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4415, + "step": 2883 + }, + { + "loss": 0.0019, + "grad_norm": 0.26678329706192017, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.442, + "step": 2884 + }, + { + "loss": 0.002, + "grad_norm": 0.26291605830192566, + "learning_rate": 5.585000000000001e-06, + "num_tokens": 986655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4425, + "step": 2885 + }, + { + "loss": 0.002, + "grad_norm": 0.2711234986782074, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.443, + "step": 2886 + }, + { + "loss": 0.0021, + "grad_norm": 0.2862178087234497, + "learning_rate": 5.575000000000001e-06, + "num_tokens": 986837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4435, + "step": 2887 + }, + { + "loss": 0.0571, + "grad_norm": 1.3704899549484253, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.444, + "step": 2888 + }, + { + "loss": 0.0585, + "grad_norm": 1.0157582759857178, + "learning_rate": 5.565e-06, + "num_tokens": 987861.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4445000000000001, + "step": 2889 + }, + { + "loss": 0.0377, + "grad_norm": 1.079724669456482, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.445, + "step": 2890 + }, + { + "loss": 0.14, + "grad_norm": 1.9184038639068604, + "learning_rate": 5.555e-06, + "num_tokens": 988885.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.4455, + "step": 2891 + }, + { + "loss": 0.0019, + "grad_norm": 0.25762176513671875, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.446, + "step": 2892 + }, + { + "loss": 0.0702, + "grad_norm": 1.5166800022125244, + "learning_rate": 5.545e-06, + "num_tokens": 989488.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4465, + "step": 2893 + }, + { + "loss": 0.0394, + "grad_norm": 1.1091899871826172, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.447, + "step": 2894 + }, + { + "loss": 0.0647, + "grad_norm": 1.4911457300186157, + "learning_rate": 5.535e-06, + "num_tokens": 990512.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4475, + "step": 2895 + }, + { + "loss": 0.063, + "grad_norm": 1.6225489377975464, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.448, + "step": 2896 + }, + { + "loss": 0.041, + "grad_norm": 1.3053377866744995, + "learning_rate": 5.5250000000000005e-06, + "num_tokens": 991536.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4485000000000001, + "step": 2897 + }, + { + "loss": 0.002, + "grad_norm": 0.27576708793640137, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 1.0, + "epoch": 1.449, + "step": 2898 + }, + { + "loss": 0.0019, + "grad_norm": 0.26415082812309265, + "learning_rate": 5.5150000000000006e-06, + "num_tokens": 991718.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4495, + "step": 2899 + }, + { + "loss": 0.0021, + "grad_norm": 0.29174545407295227, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 2900 + }, + { + "loss": 0.0573, + "grad_norm": 1.38834810256958, + "learning_rate": 5.505000000000001e-06, + "num_tokens": 992321.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4505, + "step": 2901 + }, + { + "loss": 0.0443, + "grad_norm": 1.4421913623809814, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.451, + "step": 2902 + }, + { + "loss": 0.0022, + "grad_norm": 0.29639050364494324, + "learning_rate": 5.495000000000001e-06, + "num_tokens": 992924.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4515, + "step": 2903 + }, + { + "loss": 0.0655, + "grad_norm": 1.5755751132965088, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.452, + "step": 2904 + }, + { + "loss": 0.0022, + "grad_norm": 0.2955166697502136, + "learning_rate": 5.485e-06, + "num_tokens": 993527.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4525000000000001, + "step": 2905 + }, + { + "loss": 0.0021, + "grad_norm": 0.2841387689113617, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.453, + "step": 2906 + }, + { + "loss": 0.0021, + "grad_norm": 0.286550909280777, + "learning_rate": 5.475e-06, + "num_tokens": 993709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4535, + "step": 2907 + }, + { + "loss": 0.0357, + "grad_norm": 1.0881201028823853, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.454, + "step": 2908 + }, + { + "loss": 0.0409, + "grad_norm": 1.0831390619277954, + "learning_rate": 5.465e-06, + "num_tokens": 994733.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4545, + "step": 2909 + }, + { + "loss": 0.0573, + "grad_norm": 1.2077234983444214, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.455, + "step": 2910 + }, + { + "loss": 0.0567, + "grad_norm": 1.2307626008987427, + "learning_rate": 5.455e-06, + "num_tokens": 995757.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4555, + "step": 2911 + }, + { + "loss": 0.067, + "grad_norm": 1.356170654296875, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.456, + "step": 2912 + }, + { + "loss": 0.0019, + "grad_norm": 0.2535565495491028, + "learning_rate": 5.445e-06, + "num_tokens": 996360.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4565000000000001, + "step": 2913 + }, + { + "loss": 0.0366, + "grad_norm": 1.0972084999084473, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.457, + "step": 2914 + }, + { + "loss": 0.054, + "grad_norm": 1.0509806871414185, + "learning_rate": 5.4350000000000005e-06, + "num_tokens": 997384.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4575, + "step": 2915 + }, + { + "loss": 0.0609, + "grad_norm": 1.3918635845184326, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.458, + "step": 2916 + }, + { + "loss": 0.0388, + "grad_norm": 1.0420371294021606, + "learning_rate": 5.4250000000000006e-06, + "num_tokens": 998408.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4585, + "step": 2917 + }, + { + "loss": 0.072, + "grad_norm": 1.3679769039154053, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.459, + "step": 2918 + }, + { + "loss": 0.0027, + "grad_norm": 0.3709925413131714, + "learning_rate": 5.415000000000001e-06, + "num_tokens": 999011.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4595, + "step": 2919 + }, + { + "loss": 0.0661, + "grad_norm": 1.381754755973816, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.46, + "step": 2920 + }, + { + "loss": 0.041, + "grad_norm": 1.2045968770980835, + "learning_rate": 5.405e-06, + "num_tokens": 1000035.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4605000000000001, + "step": 2921 + }, + { + "loss": 0.0023, + "grad_norm": 0.3062268793582916, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 1.0, + "epoch": 1.461, + "step": 2922 + }, + { + "loss": 0.0464, + "grad_norm": 1.0317680835723877, + "learning_rate": 5.395e-06, + "num_tokens": 1000638.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4615, + "step": 2923 + }, + { + "loss": 0.0495, + "grad_norm": 1.3268100023269653, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.462, + "step": 2924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6260963678359985, + "learning_rate": 5.385e-06, + "num_tokens": 1001662.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.4625, + "step": 2925 + }, + { + "loss": 0.0553, + "grad_norm": 1.0903215408325195, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.463, + "step": 2926 + }, + { + "loss": 0.0029, + "grad_norm": 0.3851076066493988, + "learning_rate": 5.375e-06, + "num_tokens": 1002265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4635, + "step": 2927 + }, + { + "loss": 0.0692, + "grad_norm": 1.6572927236557007, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.464, + "step": 2928 + }, + { + "loss": 0.0625, + "grad_norm": 1.5664637088775635, + "learning_rate": 5.365e-06, + "num_tokens": 1003289.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4645000000000001, + "step": 2929 + }, + { + "loss": 0.0626, + "grad_norm": 1.198908805847168, + "learning_rate": 5.36e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.465, + "step": 2930 + }, + { + "loss": 0.0641, + "grad_norm": 1.2499873638153076, + "learning_rate": 5.355e-06, + "num_tokens": 1004313.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4655, + "step": 2931 + }, + { + "loss": 0.0042, + "grad_norm": 0.5362296104431152, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 1.0, + "epoch": 1.466, + "step": 2932 + }, + { + "loss": 0.0037, + "grad_norm": 0.49612900614738464, + "learning_rate": 5.3450000000000005e-06, + "num_tokens": 1004495.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4665, + "step": 2933 + }, + { + "loss": 0.0039, + "grad_norm": 0.5115715861320496, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.467, + "step": 2934 + }, + { + "loss": 0.056, + "grad_norm": 1.3353906869888306, + "learning_rate": 5.335000000000001e-06, + "num_tokens": 1005098.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4675, + "step": 2935 + }, + { + "loss": 0.0407, + "grad_norm": 1.1807116270065308, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.468, + "step": 2936 + }, + { + "loss": 0.0551, + "grad_norm": 1.257308006286621, + "learning_rate": 5.325e-06, + "num_tokens": 1006122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4685000000000001, + "step": 2937 + }, + { + "loss": 0.0606, + "grad_norm": 1.2219009399414062, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4689999999999999, + "step": 2938 + }, + { + "loss": 0.0403, + "grad_norm": 1.094189167022705, + "learning_rate": 5.315e-06, + "num_tokens": 1007146.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4695, + "step": 2939 + }, + { + "loss": 0.0467, + "grad_norm": 1.1191236972808838, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.47, + "step": 2940 + }, + { + "loss": 0.0556, + "grad_norm": 1.1905457973480225, + "learning_rate": 5.305e-06, + "num_tokens": 1008170.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4705, + "step": 2941 + }, + { + "loss": 0.0038, + "grad_norm": 0.5084776282310486, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 1.0, + "epoch": 1.471, + "step": 2942 + }, + { + "loss": 0.0558, + "grad_norm": 0.9725843071937561, + "learning_rate": 5.295e-06, + "num_tokens": 1008773.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4715, + "step": 2943 + }, + { + "loss": 0.058, + "grad_norm": 1.1404790878295898, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.472, + "step": 2944 + }, + { + "loss": 0.0038, + "grad_norm": 0.4927501380443573, + "learning_rate": 5.285e-06, + "num_tokens": 1009376.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4725, + "step": 2945 + }, + { + "loss": 0.052, + "grad_norm": 1.0383561849594116, + "learning_rate": 5.28e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.4729999999999999, + "step": 2946 + }, + { + "loss": 0.0039, + "grad_norm": 0.5245242118835449, + "learning_rate": 5.275e-06, + "num_tokens": 1009979.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4735, + "step": 2947 + }, + { + "loss": 0.0599, + "grad_norm": 1.137878179550171, + "learning_rate": 5.27e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.474, + "step": 2948 + }, + { + "loss": 0.0039, + "grad_norm": 0.5066397190093994, + "learning_rate": 5.265e-06, + "num_tokens": 1010582.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4745, + "step": 2949 + }, + { + "loss": 0.0037, + "grad_norm": 0.4922652542591095, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 1.0, + "epoch": 1.475, + "step": 2950 + }, + { + "loss": 0.0402, + "grad_norm": 1.1538424491882324, + "learning_rate": 5.2550000000000005e-06, + "num_tokens": 1011185.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4755, + "step": 2951 + }, + { + "loss": 0.0562, + "grad_norm": 1.8279345035552979, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.476, + "step": 2952 + }, + { + "loss": 0.0636, + "grad_norm": 1.2982397079467773, + "learning_rate": 5.245e-06, + "num_tokens": 1012209.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4765, + "step": 2953 + }, + { + "loss": 0.0033, + "grad_norm": 0.4363272488117218, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4769999999999999, + "step": 2954 + }, + { + "loss": 0.0549, + "grad_norm": 1.556806206703186, + "learning_rate": 5.235e-06, + "num_tokens": 1012812.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4775, + "step": 2955 + }, + { + "loss": 0.0358, + "grad_norm": 1.0845907926559448, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.478, + "step": 2956 + }, + { + "loss": 0.0032, + "grad_norm": 0.4301038384437561, + "learning_rate": 5.225e-06, + "num_tokens": 1013415.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4785, + "step": 2957 + }, + { + "loss": 0.003, + "grad_norm": 0.3937813341617584, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 1.0, + "epoch": 1.479, + "step": 2958 + }, + { + "loss": 0.0403, + "grad_norm": 0.9416876435279846, + "learning_rate": 5.215e-06, + "num_tokens": 1014018.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4795, + "step": 2959 + }, + { + "loss": 0.0029, + "grad_norm": 0.3991153836250305, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 1.0, + "epoch": 1.48, + "step": 2960 + }, + { + "loss": 0.0367, + "grad_norm": 1.106955885887146, + "learning_rate": 5.205e-06, + "num_tokens": 1014621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4805, + "step": 2961 + }, + { + "loss": 0.0586, + "grad_norm": 1.3418941497802734, + "learning_rate": 5.2e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.4809999999999999, + "step": 2962 + }, + { + "loss": 0.0358, + "grad_norm": 0.9489701390266418, + "learning_rate": 5.195e-06, + "num_tokens": 1015645.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4815, + "step": 2963 + }, + { + "loss": 0.0629, + "grad_norm": 1.0855809450149536, + "learning_rate": 5.19e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.482, + "step": 2964 + }, + { + "loss": 0.0027, + "grad_norm": 0.3812173306941986, + "learning_rate": 5.185e-06, + "num_tokens": 1016248.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4825, + "step": 2965 + }, + { + "loss": 0.0028, + "grad_norm": 0.3925476372241974, + "learning_rate": 5.18e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 1.0, + "epoch": 1.483, + "step": 2966 + }, + { + "loss": 0.0567, + "grad_norm": 1.3809915781021118, + "learning_rate": 5.1750000000000004e-06, + "num_tokens": 1016851.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4835, + "step": 2967 + }, + { + "loss": 0.0428, + "grad_norm": 1.4269046783447266, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.484, + "step": 2968 + }, + { + "loss": 0.0026, + "grad_norm": 0.3535688519477844, + "learning_rate": 5.165e-06, + "num_tokens": 1017454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4845, + "step": 2969 + }, + { + "loss": 0.0025, + "grad_norm": 0.34918057918548584, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4849999999999999, + "step": 2970 + }, + { + "loss": 0.0025, + "grad_norm": 0.34093669056892395, + "learning_rate": 5.155e-06, + "num_tokens": 1017636.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4855, + "step": 2971 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282490372657776, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.486, + "step": 2972 + }, + { + "loss": 0.0762, + "grad_norm": 2.083855628967285, + "learning_rate": 5.145e-06, + "num_tokens": 1018239.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.4865, + "step": 2973 + }, + { + "loss": 0.0548, + "grad_norm": 1.5333393812179565, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.487, + "step": 2974 + }, + { + "loss": 0.0373, + "grad_norm": 1.078650712966919, + "learning_rate": 5.135e-06, + "num_tokens": 1019263.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4875, + "step": 2975 + }, + { + "loss": 0.0447, + "grad_norm": 1.3176923990249634, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.488, + "step": 2976 + }, + { + "loss": 0.0023, + "grad_norm": 0.3142336308956146, + "learning_rate": 5.125e-06, + "num_tokens": 1019866.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4885, + "step": 2977 + }, + { + "loss": 0.0021, + "grad_norm": 0.2898966073989868, + "learning_rate": 5.12e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 2978 + }, + { + "loss": 0.046, + "grad_norm": 1.2612260580062866, + "learning_rate": 5.115e-06, + "num_tokens": 1020469.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4895, + "step": 2979 + }, + { + "loss": 0.0718, + "grad_norm": 2.1195919513702393, + "learning_rate": 5.11e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.49, + "step": 2980 + }, + { + "loss": 0.002, + "grad_norm": 0.2805778682231903, + "learning_rate": 5.105e-06, + "num_tokens": 1021072.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4905, + "step": 2981 + }, + { + "loss": 0.002, + "grad_norm": 0.2843017280101776, + "learning_rate": 5.1e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 2982 + }, + { + "loss": 0.002, + "grad_norm": 0.277892529964447, + "learning_rate": 5.095e-06, + "num_tokens": 1021254.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4915, + "step": 2983 + }, + { + "loss": 0.0422, + "grad_norm": 1.0654278993606567, + "learning_rate": 5.09e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.492, + "step": 2984 + }, + { + "loss": 0.0021, + "grad_norm": 0.29488760232925415, + "learning_rate": 5.085e-06, + "num_tokens": 1021857.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4925, + "step": 2985 + }, + { + "loss": 0.0392, + "grad_norm": 1.086630940437317, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.4929999999999999, + "step": 2986 + }, + { + "loss": 0.0018, + "grad_norm": 0.24030831456184387, + "learning_rate": 5.075e-06, + "num_tokens": 1022460.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4935, + "step": 2987 + }, + { + "loss": 0.0406, + "grad_norm": 0.9846900105476379, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.494, + "step": 2988 + }, + { + "loss": 0.0418, + "grad_norm": 1.6849744319915771, + "learning_rate": 5.065e-06, + "num_tokens": 1023484.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4945, + "step": 2989 + }, + { + "loss": 0.0015, + "grad_norm": 0.2105080932378769, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 1.0, + "epoch": 1.495, + "step": 2990 + }, + { + "loss": 0.0019, + "grad_norm": 0.26552438735961914, + "learning_rate": 5.055e-06, + "num_tokens": 1023666.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4955, + "step": 2991 + }, + { + "loss": 0.0016, + "grad_norm": 0.21752813458442688, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 2992 + }, + { + "loss": 0.0666, + "grad_norm": 1.4344254732131958, + "learning_rate": 5.045e-06, + "num_tokens": 1024269.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.4965, + "step": 2993 + }, + { + "loss": 0.0415, + "grad_norm": 1.1530293226242065, + "learning_rate": 5.04e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.4969999999999999, + "step": 2994 + }, + { + "loss": 0.0365, + "grad_norm": 1.0033750534057617, + "learning_rate": 5.035e-06, + "num_tokens": 1025293.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.4975, + "step": 2995 + }, + { + "loss": 0.0369, + "grad_norm": 1.062666654586792, + "learning_rate": 5.03e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.498, + "step": 2996 + }, + { + "loss": 0.0016, + "grad_norm": 0.23261243104934692, + "learning_rate": 5.025e-06, + "num_tokens": 1025896.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4985, + "step": 2997 + }, + { + "loss": 0.0019, + "grad_norm": 0.26436832547187805, + "learning_rate": 5.02e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 2998 + }, + { + "loss": 0.0395, + "grad_norm": 1.0828720331192017, + "learning_rate": 5.015e-06, + "num_tokens": 1026499.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.4995, + "step": 2999 + }, + { + "loss": 0.0018, + "grad_norm": 0.24229036271572113, + "learning_rate": 5.01e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5, + "step": 3000 + }, + { + "loss": 0.0636, + "grad_norm": 1.5817841291427612, + "learning_rate": 5.0049999999999995e-06, + "num_tokens": 1027102.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5005, + "step": 3001 + }, + { + "loss": 0.0016, + "grad_norm": 0.21737374365329742, + "learning_rate": 5e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.501, + "step": 3002 + }, + { + "loss": 0.0535, + "grad_norm": 1.0760457515716553, + "learning_rate": 4.9950000000000005e-06, + "num_tokens": 1027705.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5015, + "step": 3003 + }, + { + "loss": 0.0702, + "grad_norm": 1.5160242319107056, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.502, + "step": 3004 + }, + { + "loss": 0.002, + "grad_norm": 0.28444817662239075, + "learning_rate": 4.9850000000000006e-06, + "num_tokens": 1028308.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5025, + "step": 3005 + }, + { + "loss": 0.0659, + "grad_norm": 1.394598364830017, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5030000000000001, + "step": 3006 + }, + { + "loss": 0.0549, + "grad_norm": 1.4268598556518555, + "learning_rate": 4.975000000000001e-06, + "num_tokens": 1029332.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5034999999999998, + "step": 3007 + }, + { + "loss": 0.0693, + "grad_norm": 1.3022048473358154, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.504, + "step": 3008 + }, + { + "loss": 0.0577, + "grad_norm": 1.6034104824066162, + "learning_rate": 4.965000000000001e-06, + "num_tokens": 1030356.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5045, + "step": 3009 + }, + { + "loss": 0.002, + "grad_norm": 0.26663535833358765, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.505, + "step": 3010 + }, + { + "loss": 0.0021, + "grad_norm": 0.29342901706695557, + "learning_rate": 4.955e-06, + "num_tokens": 1030538.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5055, + "step": 3011 + }, + { + "loss": 0.0574, + "grad_norm": 1.232057809829712, + "learning_rate": 4.95e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.506, + "step": 3012 + }, + { + "loss": 0.0022, + "grad_norm": 0.2940972149372101, + "learning_rate": 4.945e-06, + "num_tokens": 1031141.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5065, + "step": 3013 + }, + { + "loss": 0.0022, + "grad_norm": 0.3054879307746887, + "learning_rate": 4.94e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 3014 + }, + { + "loss": 0.002, + "grad_norm": 0.2681850492954254, + "learning_rate": 4.935e-06, + "num_tokens": 1031323.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5074999999999998, + "step": 3015 + }, + { + "loss": 0.0018, + "grad_norm": 0.24893507361412048, + "learning_rate": 4.93e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 3016 + }, + { + "loss": 0.0514, + "grad_norm": 0.9832684993743896, + "learning_rate": 4.925e-06, + "num_tokens": 1031926.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5085, + "step": 3017 + }, + { + "loss": 0.0546, + "grad_norm": 1.0513758659362793, + "learning_rate": 4.92e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.509, + "step": 3018 + }, + { + "loss": 0.0438, + "grad_norm": 1.3256640434265137, + "learning_rate": 4.915e-06, + "num_tokens": 1032950.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5095, + "step": 3019 + }, + { + "loss": 0.039, + "grad_norm": 1.1269205808639526, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.51, + "step": 3020 + }, + { + "loss": 0.0606, + "grad_norm": 1.2971444129943848, + "learning_rate": 4.9050000000000005e-06, + "num_tokens": 1033974.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5105, + "step": 3021 + }, + { + "loss": 0.0018, + "grad_norm": 0.24280324578285217, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5110000000000001, + "step": 3022 + }, + { + "loss": 0.0726, + "grad_norm": 1.984804630279541, + "learning_rate": 4.8950000000000006e-06, + "num_tokens": 1034577.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.5114999999999998, + "step": 3023 + }, + { + "loss": 0.0444, + "grad_norm": 1.1891791820526123, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.512, + "step": 3024 + }, + { + "loss": 0.0425, + "grad_norm": 1.3020859956741333, + "learning_rate": 4.885000000000001e-06, + "num_tokens": 1035601.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5125, + "step": 3025 + }, + { + "loss": 0.0397, + "grad_norm": 0.8992137312889099, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.513, + "step": 3026 + }, + { + "loss": 0.0518, + "grad_norm": 1.0060539245605469, + "learning_rate": 4.875e-06, + "num_tokens": 1036625.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5135, + "step": 3027 + }, + { + "loss": 0.0618, + "grad_norm": 1.2295892238616943, + "learning_rate": 4.87e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.514, + "step": 3028 + }, + { + "loss": 0.057, + "grad_norm": 1.2740446329116821, + "learning_rate": 4.865e-06, + "num_tokens": 1037649.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5145, + "step": 3029 + }, + { + "loss": 0.067, + "grad_norm": 1.2444658279418945, + "learning_rate": 4.86e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5150000000000001, + "step": 3030 + }, + { + "loss": 0.0389, + "grad_norm": 1.0539816617965698, + "learning_rate": 4.855e-06, + "num_tokens": 1038673.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5154999999999998, + "step": 3031 + }, + { + "loss": 0.0613, + "grad_norm": 1.2166608572006226, + "learning_rate": 4.85e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.516, + "step": 3032 + }, + { + "loss": 0.0636, + "grad_norm": 1.2355148792266846, + "learning_rate": 4.845e-06, + "num_tokens": 1039697.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5165, + "step": 3033 + }, + { + "loss": 0.0586, + "grad_norm": 1.195371150970459, + "learning_rate": 4.84e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.517, + "step": 3034 + }, + { + "loss": 0.0031, + "grad_norm": 0.4328796863555908, + "learning_rate": 4.835e-06, + "num_tokens": 1040300.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5175, + "step": 3035 + }, + { + "loss": 0.0033, + "grad_norm": 0.4462224841117859, + "learning_rate": 4.83e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 3036 + }, + { + "loss": 0.0404, + "grad_norm": 1.2766720056533813, + "learning_rate": 4.825e-06, + "num_tokens": 1040903.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5185, + "step": 3037 + }, + { + "loss": 0.0038, + "grad_norm": 0.5095945000648499, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5190000000000001, + "step": 3038 + }, + { + "loss": 0.0528, + "grad_norm": 1.006589651107788, + "learning_rate": 4.8150000000000005e-06, + "num_tokens": 1041506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5194999999999999, + "step": 3039 + }, + { + "loss": 0.0417, + "grad_norm": 1.2964030504226685, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.52, + "step": 3040 + }, + { + "loss": 0.0592, + "grad_norm": 1.1840168237686157, + "learning_rate": 4.805000000000001e-06, + "num_tokens": 1042530.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5205, + "step": 3041 + }, + { + "loss": 0.0038, + "grad_norm": 0.49861085414886475, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 1.0, + "epoch": 1.521, + "step": 3042 + }, + { + "loss": 0.0037, + "grad_norm": 0.49751704931259155, + "learning_rate": 4.795e-06, + "num_tokens": 1042712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5215, + "step": 3043 + }, + { + "loss": 0.0481, + "grad_norm": 1.022782564163208, + "learning_rate": 4.79e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.522, + "step": 3044 + }, + { + "loss": 0.0038, + "grad_norm": 0.49228596687316895, + "learning_rate": 4.785e-06, + "num_tokens": 1043315.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5225, + "step": 3045 + }, + { + "loss": 0.0376, + "grad_norm": 1.1729862689971924, + "learning_rate": 4.78e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5230000000000001, + "step": 3046 + }, + { + "loss": 0.0653, + "grad_norm": 1.5206072330474854, + "learning_rate": 4.775e-06, + "num_tokens": 1044339.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5234999999999999, + "step": 3047 + }, + { + "loss": 0.0633, + "grad_norm": 1.2756298780441284, + "learning_rate": 4.77e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.524, + "step": 3048 + }, + { + "loss": 0.0036, + "grad_norm": 0.4977829158306122, + "learning_rate": 4.765e-06, + "num_tokens": 1044942.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5245, + "step": 3049 + }, + { + "loss": 0.0526, + "grad_norm": 1.0627686977386475, + "learning_rate": 4.76e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.525, + "step": 3050 + }, + { + "loss": 0.0381, + "grad_norm": 1.1623107194900513, + "learning_rate": 4.755e-06, + "num_tokens": 1045966.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5255, + "step": 3051 + }, + { + "loss": 0.0036, + "grad_norm": 0.5119946002960205, + "learning_rate": 4.75e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.526, + "step": 3052 + }, + { + "loss": 0.0581, + "grad_norm": 1.3532719612121582, + "learning_rate": 4.745e-06, + "num_tokens": 1046569.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5265, + "step": 3053 + }, + { + "loss": 0.0594, + "grad_norm": 1.2599351406097412, + "learning_rate": 4.74e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5270000000000001, + "step": 3054 + }, + { + "loss": 0.0033, + "grad_norm": 0.4622514843940735, + "learning_rate": 4.735e-06, + "num_tokens": 1047172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5274999999999999, + "step": 3055 + }, + { + "loss": 0.0728, + "grad_norm": 1.6162607669830322, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.528, + "step": 3056 + }, + { + "loss": 0.0627, + "grad_norm": 1.4714545011520386, + "learning_rate": 4.7250000000000005e-06, + "num_tokens": 1048196.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5285, + "step": 3057 + }, + { + "loss": 0.0034, + "grad_norm": 0.48141252994537354, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 1.0, + "epoch": 1.529, + "step": 3058 + }, + { + "loss": 0.0385, + "grad_norm": 1.0676530599594116, + "learning_rate": 4.715e-06, + "num_tokens": 1048799.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5295, + "step": 3059 + }, + { + "loss": 0.0032, + "grad_norm": 0.44829145073890686, + "learning_rate": 4.71e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 1.0, + "epoch": 1.53, + "step": 3060 + }, + { + "loss": 0.0031, + "grad_norm": 0.4258093535900116, + "learning_rate": 4.705e-06, + "num_tokens": 1048981.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5305, + "step": 3061 + }, + { + "loss": 0.0715, + "grad_norm": 1.3509596586227417, + "learning_rate": 4.7e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.5310000000000001, + "step": 3062 + }, + { + "loss": 0.0341, + "grad_norm": 1.0876250267028809, + "learning_rate": 4.695e-06, + "num_tokens": 1050005.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5314999999999999, + "step": 3063 + }, + { + "loss": 0.0611, + "grad_norm": 1.3174924850463867, + "learning_rate": 4.69e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.532, + "step": 3064 + }, + { + "loss": 0.0417, + "grad_norm": 1.123489499092102, + "learning_rate": 4.685000000000001e-06, + "num_tokens": 1051029.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5325, + "step": 3065 + }, + { + "loss": 0.066, + "grad_norm": 1.7399777173995972, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.533, + "step": 3066 + }, + { + "loss": 0.0028, + "grad_norm": 0.38190290331840515, + "learning_rate": 4.675000000000001e-06, + "num_tokens": 1051632.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5335, + "step": 3067 + }, + { + "loss": 0.0651, + "grad_norm": 1.4947158098220825, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.534, + "step": 3068 + }, + { + "loss": 0.003, + "grad_norm": 0.40696173906326294, + "learning_rate": 4.665e-06, + "num_tokens": 1052235.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5345, + "step": 3069 + }, + { + "loss": 0.0555, + "grad_norm": 1.2926570177078247, + "learning_rate": 4.66e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5350000000000001, + "step": 3070 + }, + { + "loss": 0.0625, + "grad_norm": 1.2110244035720825, + "learning_rate": 4.655e-06, + "num_tokens": 1053259.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5354999999999999, + "step": 3071 + }, + { + "loss": 0.0033, + "grad_norm": 0.44495561718940735, + "learning_rate": 4.65e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 1.0, + "epoch": 1.536, + "step": 3072 + }, + { + "loss": 0.0574, + "grad_norm": 1.1019057035446167, + "learning_rate": 4.645e-06, + "num_tokens": 1053862.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.5365, + "step": 3073 + }, + { + "loss": 0.003, + "grad_norm": 0.4128797650337219, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 1.0, + "epoch": 1.537, + "step": 3074 + }, + { + "loss": 0.0572, + "grad_norm": 1.164238452911377, + "learning_rate": 4.6350000000000005e-06, + "num_tokens": 1054465.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5375, + "step": 3075 + }, + { + "loss": 0.0631, + "grad_norm": 1.4220542907714844, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.538, + "step": 3076 + }, + { + "loss": 0.0377, + "grad_norm": 1.2259591817855835, + "learning_rate": 4.625000000000001e-06, + "num_tokens": 1055489.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5385, + "step": 3077 + }, + { + "loss": 0.003, + "grad_norm": 0.4099157154560089, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5390000000000001, + "step": 3078 + }, + { + "loss": 0.0027, + "grad_norm": 0.3750811219215393, + "learning_rate": 4.615000000000001e-06, + "num_tokens": 1055671.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5394999999999999, + "step": 3079 + }, + { + "loss": 0.0621, + "grad_norm": 1.2325596809387207, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.54, + "step": 3080 + }, + { + "loss": 0.0504, + "grad_norm": 0.9959844350814819, + "learning_rate": 4.605000000000001e-06, + "num_tokens": 1056695.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5405, + "step": 3081 + }, + { + "loss": 0.0574, + "grad_norm": 1.0301742553710938, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.541, + "step": 3082 + }, + { + "loss": 0.0512, + "grad_norm": 1.0320547819137573, + "learning_rate": 4.595000000000001e-06, + "num_tokens": 1057719.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5415, + "step": 3083 + }, + { + "loss": 0.0561, + "grad_norm": 1.225005865097046, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.542, + "step": 3084 + }, + { + "loss": 0.0376, + "grad_norm": 1.1090381145477295, + "learning_rate": 4.585e-06, + "num_tokens": 1058743.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.5425, + "step": 3085 + }, + { + "loss": 0.0032, + "grad_norm": 0.44738513231277466, + "learning_rate": 4.58e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5430000000000001, + "step": 3086 + }, + { + "loss": 0.0031, + "grad_norm": 0.4485037624835968, + "learning_rate": 4.575e-06, + "num_tokens": 1058925.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5434999999999999, + "step": 3087 + }, + { + "loss": 0.0703, + "grad_norm": 1.630645751953125, + "learning_rate": 4.57e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.544, + "step": 3088 + }, + { + "loss": 0.0034, + "grad_norm": 0.4586680233478546, + "learning_rate": 4.565e-06, + "num_tokens": 1059528.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5445, + "step": 3089 + }, + { + "loss": 0.003, + "grad_norm": 0.41872572898864746, + "learning_rate": 4.56e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 1.0, + "epoch": 1.545, + "step": 3090 + }, + { + "loss": 0.0433, + "grad_norm": 1.1152652502059937, + "learning_rate": 4.5550000000000004e-06, + "num_tokens": 1060131.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5455, + "step": 3091 + }, + { + "loss": 0.0025, + "grad_norm": 0.35068032145500183, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.546, + "step": 3092 + }, + { + "loss": 0.0396, + "grad_norm": 1.0990018844604492, + "learning_rate": 4.5450000000000005e-06, + "num_tokens": 1060734.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5465, + "step": 3093 + }, + { + "loss": 0.0635, + "grad_norm": 1.6193867921829224, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5470000000000002, + "step": 3094 + }, + { + "loss": 0.0027, + "grad_norm": 0.3813343644142151, + "learning_rate": 4.535000000000001e-06, + "num_tokens": 1061337.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5474999999999999, + "step": 3095 + }, + { + "loss": 0.0025, + "grad_norm": 0.3389427363872528, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 1.0, + "epoch": 1.548, + "step": 3096 + }, + { + "loss": 0.0652, + "grad_norm": 1.455460786819458, + "learning_rate": 4.525000000000001e-06, + "num_tokens": 1061940.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5485, + "step": 3097 + }, + { + "loss": 0.0596, + "grad_norm": 1.318932056427002, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.549, + "step": 3098 + }, + { + "loss": 0.0021, + "grad_norm": 0.30851492285728455, + "learning_rate": 4.515000000000001e-06, + "num_tokens": 1062543.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5495, + "step": 3099 + }, + { + "loss": 0.0021, + "grad_norm": 0.29576948285102844, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 1.0, + "epoch": 1.55, + "step": 3100 + }, + { + "loss": 0.0021, + "grad_norm": 0.29117029905319214, + "learning_rate": 4.505e-06, + "num_tokens": 1062725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5505, + "step": 3101 + }, + { + "loss": 0.04, + "grad_norm": 1.1777619123458862, + "learning_rate": 4.5e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5510000000000002, + "step": 3102 + }, + { + "loss": 0.0538, + "grad_norm": 1.1641870737075806, + "learning_rate": 4.495e-06, + "num_tokens": 1063749.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5514999999999999, + "step": 3103 + }, + { + "loss": 0.0423, + "grad_norm": 1.3220707178115845, + "learning_rate": 4.49e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.552, + "step": 3104 + }, + { + "loss": 0.0021, + "grad_norm": 0.30619239807128906, + "learning_rate": 4.485e-06, + "num_tokens": 1064352.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5525, + "step": 3105 + }, + { + "loss": 0.0681, + "grad_norm": 1.3809969425201416, + "learning_rate": 4.48e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.553, + "step": 3106 + }, + { + "loss": 0.055, + "grad_norm": 1.1956359148025513, + "learning_rate": 4.475e-06, + "num_tokens": 1065376.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5535, + "step": 3107 + }, + { + "loss": 0.0573, + "grad_norm": 1.2887022495269775, + "learning_rate": 4.47e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.554, + "step": 3108 + }, + { + "loss": 0.0554, + "grad_norm": 1.1560310125350952, + "learning_rate": 4.4650000000000004e-06, + "num_tokens": 1066400.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5545, + "step": 3109 + }, + { + "loss": 0.0021, + "grad_norm": 0.29395192861557007, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5550000000000002, + "step": 3110 + }, + { + "loss": 0.0652, + "grad_norm": 1.608464002609253, + "learning_rate": 4.4550000000000005e-06, + "num_tokens": 1067003.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5554999999999999, + "step": 3111 + }, + { + "loss": 0.0558, + "grad_norm": 1.2650138139724731, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.556, + "step": 3112 + }, + { + "loss": 0.0458, + "grad_norm": 1.2872962951660156, + "learning_rate": 4.445000000000001e-06, + "num_tokens": 1068027.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5565, + "step": 3113 + }, + { + "loss": 0.0022, + "grad_norm": 0.30732589960098267, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.557, + "step": 3114 + }, + { + "loss": 0.0558, + "grad_norm": 1.0926036834716797, + "learning_rate": 4.435000000000001e-06, + "num_tokens": 1068630.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5575, + "step": 3115 + }, + { + "loss": 0.0023, + "grad_norm": 0.32145828008651733, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 1.0, + "epoch": 1.558, + "step": 3116 + }, + { + "loss": 0.0373, + "grad_norm": 1.1655807495117188, + "learning_rate": 4.425e-06, + "num_tokens": 1069233.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5585, + "step": 3117 + }, + { + "loss": 0.0769, + "grad_norm": 1.796105980873108, + "learning_rate": 4.42e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.5590000000000002, + "step": 3118 + }, + { + "loss": 0.0026, + "grad_norm": 0.3620903789997101, + "learning_rate": 4.415e-06, + "num_tokens": 1069836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5594999999999999, + "step": 3119 + }, + { + "loss": 0.0429, + "grad_norm": 1.309659481048584, + "learning_rate": 4.41e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.56, + "step": 3120 + }, + { + "loss": 0.0023, + "grad_norm": 0.32819899916648865, + "learning_rate": 4.405e-06, + "num_tokens": 1070439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5605, + "step": 3121 + }, + { + "loss": 0.0576, + "grad_norm": 1.0110256671905518, + "learning_rate": 4.4e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.561, + "step": 3122 + }, + { + "loss": 0.0474, + "grad_norm": 1.327854037284851, + "learning_rate": 4.395e-06, + "num_tokens": 1071463.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5615, + "step": 3123 + }, + { + "loss": 0.0371, + "grad_norm": 1.2000775337219238, + "learning_rate": 4.39e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.562, + "step": 3124 + }, + { + "loss": 0.0532, + "grad_norm": 1.1874752044677734, + "learning_rate": 4.385e-06, + "num_tokens": 1072487.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5625, + "step": 3125 + }, + { + "loss": 0.0387, + "grad_norm": 1.2780605554580688, + "learning_rate": 4.38e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.563, + "step": 3126 + }, + { + "loss": 0.0029, + "grad_norm": 0.38496679067611694, + "learning_rate": 4.3750000000000005e-06, + "num_tokens": 1073090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5635, + "step": 3127 + }, + { + "loss": 0.0028, + "grad_norm": 0.3800834119319916, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.564, + "step": 3128 + }, + { + "loss": 0.0386, + "grad_norm": 1.077006459236145, + "learning_rate": 4.3650000000000006e-06, + "num_tokens": 1073693.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5645, + "step": 3129 + }, + { + "loss": 0.0669, + "grad_norm": 1.2879207134246826, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.565, + "step": 3130 + }, + { + "loss": 0.0027, + "grad_norm": 0.37664031982421875, + "learning_rate": 4.355000000000001e-06, + "num_tokens": 1074296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5655000000000001, + "step": 3131 + }, + { + "loss": 0.0026, + "grad_norm": 0.35762181878089905, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5659999999999998, + "step": 3132 + }, + { + "loss": 0.0026, + "grad_norm": 0.3616492450237274, + "learning_rate": 4.345000000000001e-06, + "num_tokens": 1074478.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5665, + "step": 3133 + }, + { + "loss": 0.054, + "grad_norm": 1.413800835609436, + "learning_rate": 4.34e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.567, + "step": 3134 + }, + { + "loss": 0.0549, + "grad_norm": 1.1791685819625854, + "learning_rate": 4.335e-06, + "num_tokens": 1075502.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5675, + "step": 3135 + }, + { + "loss": 0.0382, + "grad_norm": 1.1417726278305054, + "learning_rate": 4.33e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.568, + "step": 3136 + }, + { + "loss": 0.0586, + "grad_norm": 1.360926866531372, + "learning_rate": 4.325e-06, + "num_tokens": 1076526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5685, + "step": 3137 + }, + { + "loss": 0.0569, + "grad_norm": 1.1636319160461426, + "learning_rate": 4.32e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.569, + "step": 3138 + }, + { + "loss": 0.0024, + "grad_norm": 0.3462548851966858, + "learning_rate": 4.315e-06, + "num_tokens": 1077129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5695000000000001, + "step": 3139 + }, + { + "loss": 0.0619, + "grad_norm": 1.3171995878219604, + "learning_rate": 4.31e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5699999999999998, + "step": 3140 + }, + { + "loss": 0.0026, + "grad_norm": 0.35494717955589294, + "learning_rate": 4.305e-06, + "num_tokens": 1077732.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5705, + "step": 3141 + }, + { + "loss": 0.003, + "grad_norm": 0.4175266921520233, + "learning_rate": 4.3e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 3142 + }, + { + "loss": 0.0588, + "grad_norm": 1.5107394456863403, + "learning_rate": 4.295e-06, + "num_tokens": 1078335.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5715, + "step": 3143 + }, + { + "loss": 0.0583, + "grad_norm": 1.5851935148239136, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.572, + "step": 3144 + }, + { + "loss": 0.0401, + "grad_norm": 1.1422215700149536, + "learning_rate": 4.2850000000000005e-06, + "num_tokens": 1079359.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5725, + "step": 3145 + }, + { + "loss": 0.0429, + "grad_norm": 1.3809804916381836, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.573, + "step": 3146 + }, + { + "loss": 0.0397, + "grad_norm": 1.1466025114059448, + "learning_rate": 4.2750000000000006e-06, + "num_tokens": 1080383.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5735000000000001, + "step": 3147 + }, + { + "loss": 0.0389, + "grad_norm": 1.035447120666504, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5739999999999998, + "step": 3148 + }, + { + "loss": 0.0029, + "grad_norm": 0.39080947637557983, + "learning_rate": 4.265000000000001e-06, + "num_tokens": 1080986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5745, + "step": 3149 + }, + { + "loss": 0.0029, + "grad_norm": 0.39702585339546204, + "learning_rate": 4.26e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.575, + "step": 3150 + }, + { + "loss": 0.0376, + "grad_norm": 1.0406111478805542, + "learning_rate": 4.255e-06, + "num_tokens": 1081589.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5755, + "step": 3151 + }, + { + "loss": 0.0029, + "grad_norm": 0.40471911430358887, + "learning_rate": 4.25e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 1.0, + "epoch": 1.576, + "step": 3152 + }, + { + "loss": 0.0542, + "grad_norm": 1.382663607597351, + "learning_rate": 4.245e-06, + "num_tokens": 1082192.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5765, + "step": 3153 + }, + { + "loss": 0.0026, + "grad_norm": 0.39454102516174316, + "learning_rate": 4.24e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.577, + "step": 3154 + }, + { + "loss": 0.0515, + "grad_norm": 1.1649845838546753, + "learning_rate": 4.235e-06, + "num_tokens": 1082795.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5775000000000001, + "step": 3155 + }, + { + "loss": 0.0383, + "grad_norm": 1.10068941116333, + "learning_rate": 4.23e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5779999999999998, + "step": 3156 + }, + { + "loss": 0.0417, + "grad_norm": 1.2253996133804321, + "learning_rate": 4.225e-06, + "num_tokens": 1083819.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5785, + "step": 3157 + }, + { + "loss": 0.0028, + "grad_norm": 0.3961932361125946, + "learning_rate": 4.22e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 1.0, + "epoch": 1.579, + "step": 3158 + }, + { + "loss": 0.0503, + "grad_norm": 1.089829921722412, + "learning_rate": 4.215e-06, + "num_tokens": 1084422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5795, + "step": 3159 + }, + { + "loss": 0.0026, + "grad_norm": 0.3804922103881836, + "learning_rate": 4.21e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.58, + "step": 3160 + }, + { + "loss": 0.0551, + "grad_norm": 1.131371259689331, + "learning_rate": 4.205e-06, + "num_tokens": 1085025.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.5805, + "step": 3161 + }, + { + "loss": 0.0707, + "grad_norm": 1.5008512735366821, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.581, + "step": 3162 + }, + { + "loss": 0.1371, + "grad_norm": 2.452535629272461, + "learning_rate": 4.1950000000000005e-06, + "num_tokens": 1086049.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.5815000000000001, + "step": 3163 + }, + { + "loss": 0.0375, + "grad_norm": 1.132121205329895, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5819999999999999, + "step": 3164 + }, + { + "loss": 0.0372, + "grad_norm": 1.136691689491272, + "learning_rate": 4.185000000000001e-06, + "num_tokens": 1087073.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.5825, + "step": 3165 + }, + { + "loss": 0.066, + "grad_norm": 1.451141595840454, + "learning_rate": 4.18e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.583, + "step": 3166 + }, + { + "loss": 0.0601, + "grad_norm": 1.3219071626663208, + "learning_rate": 4.175e-06, + "num_tokens": 1088097.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5835, + "step": 3167 + }, + { + "loss": 0.0033, + "grad_norm": 0.44295263290405273, + "learning_rate": 4.17e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.584, + "step": 3168 + }, + { + "loss": 0.0033, + "grad_norm": 0.4387746751308441, + "learning_rate": 4.165e-06, + "num_tokens": 1088279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5845, + "step": 3169 + }, + { + "loss": 0.0031, + "grad_norm": 0.42495018243789673, + "learning_rate": 4.16e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 3170 + }, + { + "loss": 0.0032, + "grad_norm": 0.43195274472236633, + "learning_rate": 4.155e-06, + "num_tokens": 1088461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5855000000000001, + "step": 3171 + }, + { + "loss": 0.0383, + "grad_norm": 1.089600682258606, + "learning_rate": 4.15e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5859999999999999, + "step": 3172 + }, + { + "loss": 0.037, + "grad_norm": 1.125685691833496, + "learning_rate": 4.145e-06, + "num_tokens": 1089485.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.5865, + "step": 3173 + }, + { + "loss": 0.0028, + "grad_norm": 0.3951958119869232, + "learning_rate": 4.14e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 1.0, + "epoch": 1.587, + "step": 3174 + }, + { + "loss": 0.0032, + "grad_norm": 0.4249975085258484, + "learning_rate": 4.135e-06, + "num_tokens": 1089667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5875, + "step": 3175 + }, + { + "loss": 0.003, + "grad_norm": 0.4017711281776428, + "learning_rate": 4.13e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 1.0, + "epoch": 1.588, + "step": 3176 + }, + { + "loss": 0.0554, + "grad_norm": 1.5242044925689697, + "learning_rate": 4.125e-06, + "num_tokens": 1090270.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5885, + "step": 3177 + }, + { + "loss": 0.0397, + "grad_norm": 1.1341863870620728, + "learning_rate": 4.12e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.589, + "step": 3178 + }, + { + "loss": 0.0027, + "grad_norm": 0.36381402611732483, + "learning_rate": 4.115e-06, + "num_tokens": 1090873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5895000000000001, + "step": 3179 + }, + { + "loss": 0.0607, + "grad_norm": 1.1853790283203125, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5899999999999999, + "step": 3180 + }, + { + "loss": 0.0643, + "grad_norm": 1.3047658205032349, + "learning_rate": 4.1050000000000005e-06, + "num_tokens": 1091897.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5905, + "step": 3181 + }, + { + "loss": 0.0026, + "grad_norm": 0.35462620854377747, + "learning_rate": 4.1e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 1.0, + "epoch": 1.591, + "step": 3182 + }, + { + "loss": 0.0551, + "grad_norm": 1.313693642616272, + "learning_rate": 4.095e-06, + "num_tokens": 1092500.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.5915, + "step": 3183 + }, + { + "loss": 0.0476, + "grad_norm": 1.3256938457489014, + "learning_rate": 4.09e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.592, + "step": 3184 + }, + { + "loss": 0.0674, + "grad_norm": 1.4579592943191528, + "learning_rate": 4.085e-06, + "num_tokens": 1093524.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5925, + "step": 3185 + }, + { + "loss": 0.0654, + "grad_norm": 1.39744234085083, + "learning_rate": 4.08e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.593, + "step": 3186 + }, + { + "loss": 0.0024, + "grad_norm": 0.3426502048969269, + "learning_rate": 4.075e-06, + "num_tokens": 1094127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5935000000000001, + "step": 3187 + }, + { + "loss": 0.0025, + "grad_norm": 0.34538590908050537, + "learning_rate": 4.07e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 3188 + }, + { + "loss": 0.0023, + "grad_norm": 0.317192405462265, + "learning_rate": 4.065e-06, + "num_tokens": 1094309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5945, + "step": 3189 + }, + { + "loss": 0.067, + "grad_norm": 1.3644077777862549, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.595, + "step": 3190 + }, + { + "loss": 0.0403, + "grad_norm": 1.0108872652053833, + "learning_rate": 4.055000000000001e-06, + "num_tokens": 1095333.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5955, + "step": 3191 + }, + { + "loss": 0.0023, + "grad_norm": 0.32959794998168945, + "learning_rate": 4.05e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 1.0, + "epoch": 1.596, + "step": 3192 + }, + { + "loss": 0.0695, + "grad_norm": 1.4694541692733765, + "learning_rate": 4.045e-06, + "num_tokens": 1095936.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.5965, + "step": 3193 + }, + { + "loss": 0.0579, + "grad_norm": 1.4185339212417603, + "learning_rate": 4.04e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.597, + "step": 3194 + }, + { + "loss": 0.0023, + "grad_norm": 0.3271894156932831, + "learning_rate": 4.035e-06, + "num_tokens": 1096539.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5975000000000001, + "step": 3195 + }, + { + "loss": 0.0687, + "grad_norm": 1.3683706521987915, + "learning_rate": 4.03e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.5979999999999999, + "step": 3196 + }, + { + "loss": 0.0022, + "grad_norm": 0.3076697289943695, + "learning_rate": 4.0250000000000004e-06, + "num_tokens": 1097142.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5985, + "step": 3197 + }, + { + "loss": 0.0633, + "grad_norm": 1.3920204639434814, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.599, + "step": 3198 + }, + { + "loss": 0.0025, + "grad_norm": 0.340093195438385, + "learning_rate": 4.0150000000000005e-06, + "num_tokens": 1097745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5995, + "step": 3199 + }, + { + "loss": 0.0446, + "grad_norm": 1.343589186668396, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6, + "step": 3200 + }, + { + "loss": 0.0019, + "grad_norm": 0.27124884724617004, + "learning_rate": 4.005000000000001e-06, + "num_tokens": 1098348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6005, + "step": 3201 + }, + { + "loss": 0.0404, + "grad_norm": 0.9648232460021973, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.601, + "step": 3202 + }, + { + "loss": 0.0019, + "grad_norm": 0.27278977632522583, + "learning_rate": 3.995000000000001e-06, + "num_tokens": 1098951.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6015000000000001, + "step": 3203 + }, + { + "loss": 0.0376, + "grad_norm": 1.0787500143051147, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6019999999999999, + "step": 3204 + }, + { + "loss": 0.0528, + "grad_norm": 1.1423871517181396, + "learning_rate": 3.985000000000001e-06, + "num_tokens": 1099975.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6025, + "step": 3205 + }, + { + "loss": 0.0428, + "grad_norm": 1.0963202714920044, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.603, + "step": 3206 + }, + { + "loss": 0.0023, + "grad_norm": 0.3151981234550476, + "learning_rate": 3.975000000000001e-06, + "num_tokens": 1100578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6035, + "step": 3207 + }, + { + "loss": 0.0627, + "grad_norm": 1.3276523351669312, + "learning_rate": 3.97e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.604, + "step": 3208 + }, + { + "loss": 0.0644, + "grad_norm": 1.2610445022583008, + "learning_rate": 3.965e-06, + "num_tokens": 1101602.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6045, + "step": 3209 + }, + { + "loss": 0.0605, + "grad_norm": 1.5303077697753906, + "learning_rate": 3.96e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.605, + "step": 3210 + }, + { + "loss": 0.0428, + "grad_norm": 1.1033059358596802, + "learning_rate": 3.955e-06, + "num_tokens": 1102626.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6055000000000001, + "step": 3211 + }, + { + "loss": 0.0025, + "grad_norm": 0.3444884419441223, + "learning_rate": 3.95e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6059999999999999, + "step": 3212 + }, + { + "loss": 0.0021, + "grad_norm": 0.30967977643013, + "learning_rate": 3.945e-06, + "num_tokens": 1102808.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6065, + "step": 3213 + }, + { + "loss": 0.0023, + "grad_norm": 0.3297445774078369, + "learning_rate": 3.94e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.607, + "step": 3214 + }, + { + "loss": 0.0389, + "grad_norm": 0.9863300323486328, + "learning_rate": 3.9350000000000004e-06, + "num_tokens": 1103411.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6075, + "step": 3215 + }, + { + "loss": 0.0024, + "grad_norm": 0.34737643599510193, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.608, + "step": 3216 + }, + { + "loss": 0.0636, + "grad_norm": 1.4206818342208862, + "learning_rate": 3.9250000000000005e-06, + "num_tokens": 1104014.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6085, + "step": 3217 + }, + { + "loss": 0.0635, + "grad_norm": 1.3302878141403198, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.609, + "step": 3218 + }, + { + "loss": 0.0023, + "grad_norm": 0.34072810411453247, + "learning_rate": 3.915000000000001e-06, + "num_tokens": 1104617.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6095000000000002, + "step": 3219 + }, + { + "loss": 0.0023, + "grad_norm": 0.324464350938797, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6099999999999999, + "step": 3220 + }, + { + "loss": 0.041, + "grad_norm": 1.2196465730667114, + "learning_rate": 3.905000000000001e-06, + "num_tokens": 1105220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6105, + "step": 3221 + }, + { + "loss": 0.0609, + "grad_norm": 1.3683393001556396, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.611, + "step": 3222 + }, + { + "loss": 0.067, + "grad_norm": 1.3955715894699097, + "learning_rate": 3.895000000000001e-06, + "num_tokens": 1106244.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6115, + "step": 3223 + }, + { + "loss": 0.0681, + "grad_norm": 1.2971601486206055, + "learning_rate": 3.89e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.612, + "step": 3224 + }, + { + "loss": 0.0399, + "grad_norm": 0.9620857834815979, + "learning_rate": 3.885e-06, + "num_tokens": 1107268.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6125, + "step": 3225 + }, + { + "loss": 0.0563, + "grad_norm": 1.419252634048462, + "learning_rate": 3.88e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.613, + "step": 3226 + }, + { + "loss": 0.0025, + "grad_norm": 0.3523210883140564, + "learning_rate": 3.875e-06, + "num_tokens": 1107871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6135000000000002, + "step": 3227 + }, + { + "loss": 0.0025, + "grad_norm": 0.3481607437133789, + "learning_rate": 3.87e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 3228 + }, + { + "loss": 0.0668, + "grad_norm": 1.5234949588775635, + "learning_rate": 3.865e-06, + "num_tokens": 1108474.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6145, + "step": 3229 + }, + { + "loss": 0.065, + "grad_norm": 1.0866061449050903, + "learning_rate": 3.86e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.615, + "step": 3230 + }, + { + "loss": 0.0023, + "grad_norm": 0.32322317361831665, + "learning_rate": 3.855e-06, + "num_tokens": 1109077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6155, + "step": 3231 + }, + { + "loss": 0.0028, + "grad_norm": 0.3983127474784851, + "learning_rate": 3.85e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 3232 + }, + { + "loss": 0.0028, + "grad_norm": 0.3855290114879608, + "learning_rate": 3.8450000000000005e-06, + "num_tokens": 1109259.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6165, + "step": 3233 + }, + { + "loss": 0.0628, + "grad_norm": 1.2134065628051758, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.617, + "step": 3234 + }, + { + "loss": 0.0026, + "grad_norm": 0.3645097613334656, + "learning_rate": 3.8350000000000006e-06, + "num_tokens": 1109862.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6175000000000002, + "step": 3235 + }, + { + "loss": 0.0564, + "grad_norm": 1.3227709531784058, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6179999999999999, + "step": 3236 + }, + { + "loss": 0.0356, + "grad_norm": 1.1357544660568237, + "learning_rate": 3.825000000000001e-06, + "num_tokens": 1110886.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6185, + "step": 3237 + }, + { + "loss": 0.002, + "grad_norm": 0.2842106819152832, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.619, + "step": 3238 + }, + { + "loss": 0.0021, + "grad_norm": 0.2954864501953125, + "learning_rate": 3.815000000000001e-06, + "num_tokens": 1111068.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6195, + "step": 3239 + }, + { + "loss": 0.0535, + "grad_norm": 1.2989691495895386, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.62, + "step": 3240 + }, + { + "loss": 0.0633, + "grad_norm": 1.4842454195022583, + "learning_rate": 3.8050000000000004e-06, + "num_tokens": 1112092.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6205, + "step": 3241 + }, + { + "loss": 0.0613, + "grad_norm": 1.4029802083969116, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.621, + "step": 3242 + }, + { + "loss": 0.0021, + "grad_norm": 0.3039712905883789, + "learning_rate": 3.7950000000000005e-06, + "num_tokens": 1112695.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6215000000000002, + "step": 3243 + }, + { + "loss": 0.0564, + "grad_norm": 1.3126254081726074, + "learning_rate": 3.79e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6219999999999999, + "step": 3244 + }, + { + "loss": 0.0372, + "grad_norm": 1.1704014539718628, + "learning_rate": 3.785e-06, + "num_tokens": 1113719.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6225, + "step": 3245 + }, + { + "loss": 0.0438, + "grad_norm": 1.2828481197357178, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.623, + "step": 3246 + }, + { + "loss": 0.0023, + "grad_norm": 0.343226820230484, + "learning_rate": 3.7750000000000003e-06, + "num_tokens": 1114322.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6235, + "step": 3247 + }, + { + "loss": 0.0402, + "grad_norm": 1.072348952293396, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.624, + "step": 3248 + }, + { + "loss": 0.0372, + "grad_norm": 1.061455488204956, + "learning_rate": 3.7650000000000004e-06, + "num_tokens": 1115346.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6245, + "step": 3249 + }, + { + "loss": 0.0621, + "grad_norm": 1.3332241773605347, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.625, + "step": 3250 + }, + { + "loss": 0.0665, + "grad_norm": 1.4206236600875854, + "learning_rate": 3.7550000000000005e-06, + "num_tokens": 1116370.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6255, + "step": 3251 + }, + { + "loss": 0.0616, + "grad_norm": 1.5544387102127075, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.626, + "step": 3252 + }, + { + "loss": 0.0024, + "grad_norm": 0.34623461961746216, + "learning_rate": 3.745e-06, + "num_tokens": 1116973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6265, + "step": 3253 + }, + { + "loss": 0.0611, + "grad_norm": 1.2223175764083862, + "learning_rate": 3.74e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.627, + "step": 3254 + }, + { + "loss": 0.0517, + "grad_norm": 1.338625192642212, + "learning_rate": 3.7350000000000002e-06, + "num_tokens": 1117997.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6275, + "step": 3255 + }, + { + "loss": 0.0567, + "grad_norm": 1.3747273683547974, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6280000000000001, + "step": 3256 + }, + { + "loss": 0.0026, + "grad_norm": 0.36324965953826904, + "learning_rate": 3.7250000000000003e-06, + "num_tokens": 1118600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6284999999999998, + "step": 3257 + }, + { + "loss": 0.0025, + "grad_norm": 0.3447258472442627, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 3258 + }, + { + "loss": 0.0026, + "grad_norm": 0.36628466844558716, + "learning_rate": 3.7150000000000004e-06, + "num_tokens": 1118782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6295, + "step": 3259 + }, + { + "loss": 0.0535, + "grad_norm": 1.2702912092208862, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.63, + "step": 3260 + }, + { + "loss": 0.0026, + "grad_norm": 0.37140271067619324, + "learning_rate": 3.705e-06, + "num_tokens": 1119385.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6305, + "step": 3261 + }, + { + "loss": 0.003, + "grad_norm": 0.4019966721534729, + "learning_rate": 3.7e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 1.0, + "epoch": 1.631, + "step": 3262 + }, + { + "loss": 0.0669, + "grad_norm": 1.4418880939483643, + "learning_rate": 3.695e-06, + "num_tokens": 1119988.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6315, + "step": 3263 + }, + { + "loss": 0.0396, + "grad_norm": 1.2212142944335938, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6320000000000001, + "step": 3264 + }, + { + "loss": 0.0026, + "grad_norm": 0.37143605947494507, + "learning_rate": 3.6850000000000003e-06, + "num_tokens": 1120591.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6324999999999998, + "step": 3265 + }, + { + "loss": 0.0588, + "grad_norm": 1.3627078533172607, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.633, + "step": 3266 + }, + { + "loss": 0.0027, + "grad_norm": 0.3791561722755432, + "learning_rate": 3.6750000000000004e-06, + "num_tokens": 1121194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6335, + "step": 3267 + }, + { + "loss": 0.0567, + "grad_norm": 1.289622187614441, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.634, + "step": 3268 + }, + { + "loss": 0.0579, + "grad_norm": 1.220171332359314, + "learning_rate": 3.665e-06, + "num_tokens": 1122218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6345, + "step": 3269 + }, + { + "loss": 0.0543, + "grad_norm": 1.3633372783660889, + "learning_rate": 3.66e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.635, + "step": 3270 + }, + { + "loss": 0.0376, + "grad_norm": 1.1212244033813477, + "learning_rate": 3.655e-06, + "num_tokens": 1123242.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6355, + "step": 3271 + }, + { + "loss": 0.066, + "grad_norm": 1.352933645248413, + "learning_rate": 3.65e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6360000000000001, + "step": 3272 + }, + { + "loss": 0.0469, + "grad_norm": 1.09308922290802, + "learning_rate": 3.6450000000000003e-06, + "num_tokens": 1124266.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6364999999999998, + "step": 3273 + }, + { + "loss": 0.1411, + "grad_norm": 2.6187405586242676, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9491193890571594, + "epoch": 1.637, + "step": 3274 + }, + { + "loss": 0.0414, + "grad_norm": 1.162994146347046, + "learning_rate": 3.6350000000000003e-06, + "num_tokens": 1125290.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6375, + "step": 3275 + }, + { + "loss": 0.0028, + "grad_norm": 0.3896919786930084, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.638, + "step": 3276 + }, + { + "loss": 0.0026, + "grad_norm": 0.3726244270801544, + "learning_rate": 3.625e-06, + "num_tokens": 1125472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6385, + "step": 3277 + }, + { + "loss": 0.0026, + "grad_norm": 0.36463192105293274, + "learning_rate": 3.62e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.639, + "step": 3278 + }, + { + "loss": 0.0507, + "grad_norm": 1.3470423221588135, + "learning_rate": 3.615e-06, + "num_tokens": 1126075.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6395, + "step": 3279 + }, + { + "loss": 0.0683, + "grad_norm": 1.4609153270721436, + "learning_rate": 3.61e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.6400000000000001, + "step": 3280 + }, + { + "loss": 0.0535, + "grad_norm": 1.1537185907363892, + "learning_rate": 3.6050000000000002e-06, + "num_tokens": 1127099.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6404999999999998, + "step": 3281 + }, + { + "loss": 0.0608, + "grad_norm": 1.3845043182373047, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.641, + "step": 3282 + }, + { + "loss": 0.0447, + "grad_norm": 1.212424397468567, + "learning_rate": 3.5950000000000003e-06, + "num_tokens": 1128123.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6415, + "step": 3283 + }, + { + "loss": 0.0026, + "grad_norm": 0.37876564264297485, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 1.0, + "epoch": 1.642, + "step": 3284 + }, + { + "loss": 0.0408, + "grad_norm": 1.2840468883514404, + "learning_rate": 3.585e-06, + "num_tokens": 1128726.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6425, + "step": 3285 + }, + { + "loss": 0.0386, + "grad_norm": 1.1343239545822144, + "learning_rate": 3.58e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.643, + "step": 3286 + }, + { + "loss": 0.0381, + "grad_norm": 1.1031399965286255, + "learning_rate": 3.575e-06, + "num_tokens": 1129750.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6435, + "step": 3287 + }, + { + "loss": 0.0728, + "grad_norm": 1.8012501001358032, + "learning_rate": 3.57e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.6440000000000001, + "step": 3288 + }, + { + "loss": 0.003, + "grad_norm": 0.42031532526016235, + "learning_rate": 3.565e-06, + "num_tokens": 1130353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6444999999999999, + "step": 3289 + }, + { + "loss": 0.0028, + "grad_norm": 0.42307499051094055, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 1.0, + "epoch": 1.645, + "step": 3290 + }, + { + "loss": 0.0656, + "grad_norm": 1.4206976890563965, + "learning_rate": 3.5550000000000003e-06, + "num_tokens": 1130956.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.6455, + "step": 3291 + }, + { + "loss": 0.0373, + "grad_norm": 1.0836045742034912, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.646, + "step": 3292 + }, + { + "loss": 0.0666, + "grad_norm": 1.4353013038635254, + "learning_rate": 3.545e-06, + "num_tokens": 1131980.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6465, + "step": 3293 + }, + { + "loss": 0.0033, + "grad_norm": 0.48532357811927795, + "learning_rate": 3.54e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.647, + "step": 3294 + }, + { + "loss": 0.0032, + "grad_norm": 0.4415268898010254, + "learning_rate": 3.535e-06, + "num_tokens": 1132162.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6475, + "step": 3295 + }, + { + "loss": 0.0029, + "grad_norm": 0.41665494441986084, + "learning_rate": 3.53e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 3296 + }, + { + "loss": 0.0638, + "grad_norm": 1.2469731569290161, + "learning_rate": 3.525e-06, + "num_tokens": 1132765.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6484999999999999, + "step": 3297 + }, + { + "loss": 0.0614, + "grad_norm": 1.251099944114685, + "learning_rate": 3.52e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.649, + "step": 3298 + }, + { + "loss": 0.0027, + "grad_norm": 0.39604058861732483, + "learning_rate": 3.5150000000000002e-06, + "num_tokens": 1133368.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6495, + "step": 3299 + }, + { + "loss": 0.0588, + "grad_norm": 1.0699150562286377, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.65, + "step": 3300 + }, + { + "loss": 0.0583, + "grad_norm": 1.2757554054260254, + "learning_rate": 3.505e-06, + "num_tokens": 1134392.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6505, + "step": 3301 + }, + { + "loss": 0.0401, + "grad_norm": 1.3257462978363037, + "learning_rate": 3.5e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.651, + "step": 3302 + }, + { + "loss": 0.0643, + "grad_norm": 1.4011600017547607, + "learning_rate": 3.495e-06, + "num_tokens": 1135416.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6515, + "step": 3303 + }, + { + "loss": 0.0587, + "grad_norm": 1.5523959398269653, + "learning_rate": 3.49e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6520000000000001, + "step": 3304 + }, + { + "loss": 0.0602, + "grad_norm": 1.1153236627578735, + "learning_rate": 3.485e-06, + "num_tokens": 1136440.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6524999999999999, + "step": 3305 + }, + { + "loss": 0.0032, + "grad_norm": 0.4743506610393524, + "learning_rate": 3.48e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 1.0, + "epoch": 1.653, + "step": 3306 + }, + { + "loss": 0.0032, + "grad_norm": 0.44705691933631897, + "learning_rate": 3.475e-06, + "num_tokens": 1136622.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6535, + "step": 3307 + }, + { + "loss": 0.0627, + "grad_norm": 1.376706838607788, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.654, + "step": 3308 + }, + { + "loss": 0.0578, + "grad_norm": 1.3461076021194458, + "learning_rate": 3.465e-06, + "num_tokens": 1137646.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6545, + "step": 3309 + }, + { + "loss": 0.0028, + "grad_norm": 0.4053739011287689, + "learning_rate": 3.46e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 1.0, + "epoch": 1.655, + "step": 3310 + }, + { + "loss": 0.0028, + "grad_norm": 0.4151926636695862, + "learning_rate": 3.455e-06, + "num_tokens": 1137828.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6555, + "step": 3311 + }, + { + "loss": 0.003, + "grad_norm": 0.42436280846595764, + "learning_rate": 3.45e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6560000000000001, + "step": 3312 + }, + { + "loss": 0.0029, + "grad_norm": 0.41050389409065247, + "learning_rate": 3.445e-06, + "num_tokens": 1138010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6564999999999999, + "step": 3313 + }, + { + "loss": 0.0562, + "grad_norm": 1.2650190591812134, + "learning_rate": 3.44e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.657, + "step": 3314 + }, + { + "loss": 0.0558, + "grad_norm": 1.1567943096160889, + "learning_rate": 3.4350000000000006e-06, + "num_tokens": 1139034.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6575, + "step": 3315 + }, + { + "loss": 0.0413, + "grad_norm": 1.3011746406555176, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.658, + "step": 3316 + }, + { + "loss": 0.0569, + "grad_norm": 1.4117727279663086, + "learning_rate": 3.4250000000000007e-06, + "num_tokens": 1140058.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6585, + "step": 3317 + }, + { + "loss": 0.0027, + "grad_norm": 0.3829484283924103, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.659, + "step": 3318 + }, + { + "loss": 0.0516, + "grad_norm": 1.152258038520813, + "learning_rate": 3.4150000000000003e-06, + "num_tokens": 1140661.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6595, + "step": 3319 + }, + { + "loss": 0.0396, + "grad_norm": 1.20711088180542, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6600000000000001, + "step": 3320 + }, + { + "loss": 0.0522, + "grad_norm": 1.251099705696106, + "learning_rate": 3.4050000000000004e-06, + "num_tokens": 1141685.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6604999999999999, + "step": 3321 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730953454971313, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.661, + "step": 3322 + }, + { + "loss": 0.0613, + "grad_norm": 1.5974045991897583, + "learning_rate": 3.3950000000000005e-06, + "num_tokens": 1142709.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6615, + "step": 3323 + }, + { + "loss": 0.0522, + "grad_norm": 1.416182518005371, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.662, + "step": 3324 + }, + { + "loss": 0.0595, + "grad_norm": 1.381279706954956, + "learning_rate": 3.3850000000000006e-06, + "num_tokens": 1143733.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6625, + "step": 3325 + }, + { + "loss": 0.0563, + "grad_norm": 1.2484899759292603, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.663, + "step": 3326 + }, + { + "loss": 0.0029, + "grad_norm": 0.41797107458114624, + "learning_rate": 3.3750000000000003e-06, + "num_tokens": 1144336.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6635, + "step": 3327 + }, + { + "loss": 0.0027, + "grad_norm": 0.39544638991355896, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6640000000000001, + "step": 3328 + }, + { + "loss": 0.0371, + "grad_norm": 1.0045322179794312, + "learning_rate": 3.3650000000000004e-06, + "num_tokens": 1144939.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6644999999999999, + "step": 3329 + }, + { + "loss": 0.0671, + "grad_norm": 1.530097246170044, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.665, + "step": 3330 + }, + { + "loss": 0.0529, + "grad_norm": 1.179215669631958, + "learning_rate": 3.3550000000000005e-06, + "num_tokens": 1145963.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6655, + "step": 3331 + }, + { + "loss": 0.0033, + "grad_norm": 0.46830442547798157, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 1.0, + "epoch": 1.666, + "step": 3332 + }, + { + "loss": 0.0031, + "grad_norm": 0.44680675864219666, + "learning_rate": 3.3450000000000006e-06, + "num_tokens": 1146145.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6665, + "step": 3333 + }, + { + "loss": 0.0591, + "grad_norm": 2.0427138805389404, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.667, + "step": 3334 + }, + { + "loss": 0.0446, + "grad_norm": 1.0700162649154663, + "learning_rate": 3.3350000000000003e-06, + "num_tokens": 1147169.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.6675, + "step": 3335 + }, + { + "loss": 0.0352, + "grad_norm": 0.953519344329834, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6680000000000001, + "step": 3336 + }, + { + "loss": 0.0402, + "grad_norm": 1.208362102508545, + "learning_rate": 3.3250000000000004e-06, + "num_tokens": 1148193.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6684999999999999, + "step": 3337 + }, + { + "loss": 0.0034, + "grad_norm": 0.48497405648231506, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 1.0, + "epoch": 1.669, + "step": 3338 + }, + { + "loss": 0.0031, + "grad_norm": 0.4533288776874542, + "learning_rate": 3.3150000000000004e-06, + "num_tokens": 1148375.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6695, + "step": 3339 + }, + { + "loss": 0.0531, + "grad_norm": 1.031333088874817, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.67, + "step": 3340 + }, + { + "loss": 0.0029, + "grad_norm": 0.40945783257484436, + "learning_rate": 3.3050000000000005e-06, + "num_tokens": 1148978.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6705, + "step": 3341 + }, + { + "loss": 0.0643, + "grad_norm": 1.0990197658538818, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.671, + "step": 3342 + }, + { + "loss": 0.0379, + "grad_norm": 1.0483911037445068, + "learning_rate": 3.2950000000000002e-06, + "num_tokens": 1150002.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6715, + "step": 3343 + }, + { + "loss": 0.0489, + "grad_norm": 1.0835374593734741, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6720000000000002, + "step": 3344 + }, + { + "loss": 0.0033, + "grad_norm": 0.4901528060436249, + "learning_rate": 3.2850000000000003e-06, + "num_tokens": 1150605.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6724999999999999, + "step": 3345 + }, + { + "loss": 0.0029, + "grad_norm": 0.41757330298423767, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.673, + "step": 3346 + }, + { + "loss": 0.0379, + "grad_norm": 0.9371951818466187, + "learning_rate": 3.2750000000000004e-06, + "num_tokens": 1151208.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6735, + "step": 3347 + }, + { + "loss": 0.0397, + "grad_norm": 1.0155102014541626, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.674, + "step": 3348 + }, + { + "loss": 0.0027, + "grad_norm": 0.3897286653518677, + "learning_rate": 3.2650000000000005e-06, + "num_tokens": 1151811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6745, + "step": 3349 + }, + { + "loss": 0.0028, + "grad_norm": 0.4042399525642395, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 1.0, + "epoch": 1.675, + "step": 3350 + }, + { + "loss": 0.003, + "grad_norm": 0.43666109442710876, + "learning_rate": 3.255e-06, + "num_tokens": 1151993.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6755, + "step": 3351 + }, + { + "loss": 0.0029, + "grad_norm": 0.42103472352027893, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6760000000000002, + "step": 3352 + }, + { + "loss": 0.0028, + "grad_norm": 0.41361838579177856, + "learning_rate": 3.2450000000000003e-06, + "num_tokens": 1152175.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6764999999999999, + "step": 3353 + }, + { + "loss": 0.0357, + "grad_norm": 0.9301024675369263, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.677, + "step": 3354 + }, + { + "loss": 0.0025, + "grad_norm": 0.3655649721622467, + "learning_rate": 3.2350000000000004e-06, + "num_tokens": 1152778.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6775, + "step": 3355 + }, + { + "loss": 0.0363, + "grad_norm": 1.0852001905441284, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.678, + "step": 3356 + }, + { + "loss": 0.0021, + "grad_norm": 0.3051436245441437, + "learning_rate": 3.2250000000000005e-06, + "num_tokens": 1153381.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6785, + "step": 3357 + }, + { + "loss": 0.0025, + "grad_norm": 0.38162630796432495, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 3358 + }, + { + "loss": 0.0022, + "grad_norm": 0.33861595392227173, + "learning_rate": 3.215e-06, + "num_tokens": 1153563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6795, + "step": 3359 + }, + { + "loss": 0.0021, + "grad_norm": 0.311531126499176, + "learning_rate": 3.21e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 3360 + }, + { + "loss": 0.002, + "grad_norm": 0.30146220326423645, + "learning_rate": 3.2050000000000002e-06, + "num_tokens": 1153745.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6804999999999999, + "step": 3361 + }, + { + "loss": 0.0019, + "grad_norm": 0.28205639123916626, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 3362 + }, + { + "loss": 0.0483, + "grad_norm": 1.185204029083252, + "learning_rate": 3.1950000000000003e-06, + "num_tokens": 1154348.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6815, + "step": 3363 + }, + { + "loss": 0.0705, + "grad_norm": 1.442715048789978, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.682, + "step": 3364 + }, + { + "loss": 0.059, + "grad_norm": 1.5234472751617432, + "learning_rate": 3.1850000000000004e-06, + "num_tokens": 1155372.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6825, + "step": 3365 + }, + { + "loss": 0.0712, + "grad_norm": 1.9519693851470947, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.683, + "step": 3366 + }, + { + "loss": 0.041, + "grad_norm": 1.0349758863449097, + "learning_rate": 3.175e-06, + "num_tokens": 1156396.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6835, + "step": 3367 + }, + { + "loss": 0.0423, + "grad_norm": 1.263643503189087, + "learning_rate": 3.17e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.6840000000000002, + "step": 3368 + }, + { + "loss": 0.0015, + "grad_norm": 0.21718572080135345, + "learning_rate": 3.165e-06, + "num_tokens": 1156999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6844999999999999, + "step": 3369 + }, + { + "loss": 0.0612, + "grad_norm": 1.4974867105484009, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.685, + "step": 3370 + }, + { + "loss": 0.0684, + "grad_norm": 1.3690571784973145, + "learning_rate": 3.1550000000000003e-06, + "num_tokens": 1158023.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6855, + "step": 3371 + }, + { + "loss": 0.0015, + "grad_norm": 0.22092363238334656, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 1.0, + "epoch": 1.686, + "step": 3372 + }, + { + "loss": 0.0466, + "grad_norm": 1.359930157661438, + "learning_rate": 3.1450000000000004e-06, + "num_tokens": 1158626.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6865, + "step": 3373 + }, + { + "loss": 0.0017, + "grad_norm": 0.23505748808383942, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 1.0, + "epoch": 1.687, + "step": 3374 + }, + { + "loss": 0.0412, + "grad_norm": 1.154797077178955, + "learning_rate": 3.135e-06, + "num_tokens": 1159229.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6875, + "step": 3375 + }, + { + "loss": 0.0688, + "grad_norm": 1.5609385967254639, + "learning_rate": 3.13e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.688, + "step": 3376 + }, + { + "loss": 0.0689, + "grad_norm": 1.9219101667404175, + "learning_rate": 3.125e-06, + "num_tokens": 1160253.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.6885, + "step": 3377 + }, + { + "loss": 0.0528, + "grad_norm": 1.4017720222473145, + "learning_rate": 3.12e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.689, + "step": 3378 + }, + { + "loss": 0.0018, + "grad_norm": 0.2644074261188507, + "learning_rate": 3.1150000000000002e-06, + "num_tokens": 1160856.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6895, + "step": 3379 + }, + { + "loss": 0.0359, + "grad_norm": 1.1351364850997925, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.69, + "step": 3380 + }, + { + "loss": 0.0561, + "grad_norm": 1.2852329015731812, + "learning_rate": 3.1050000000000003e-06, + "num_tokens": 1161880.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.6905000000000001, + "step": 3381 + }, + { + "loss": 0.0019, + "grad_norm": 0.2809182107448578, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6909999999999998, + "step": 3382 + }, + { + "loss": 0.0019, + "grad_norm": 0.2629799544811249, + "learning_rate": 3.0950000000000004e-06, + "num_tokens": 1162062.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6915, + "step": 3383 + }, + { + "loss": 0.0583, + "grad_norm": 1.3401031494140625, + "learning_rate": 3.09e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.692, + "step": 3384 + }, + { + "loss": 0.0019, + "grad_norm": 0.2741340398788452, + "learning_rate": 3.085e-06, + "num_tokens": 1162665.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6925, + "step": 3385 + }, + { + "loss": 0.0019, + "grad_norm": 0.2670257091522217, + "learning_rate": 3.08e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 3386 + }, + { + "loss": 0.0529, + "grad_norm": 0.9913851022720337, + "learning_rate": 3.075e-06, + "num_tokens": 1163268.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.6935, + "step": 3387 + }, + { + "loss": 0.0018, + "grad_norm": 0.2675456404685974, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 1.0, + "epoch": 1.694, + "step": 3388 + }, + { + "loss": 0.0405, + "grad_norm": 1.6220101118087769, + "learning_rate": 3.0650000000000003e-06, + "num_tokens": 1163871.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6945000000000001, + "step": 3389 + }, + { + "loss": 0.0478, + "grad_norm": 1.0595648288726807, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6949999999999998, + "step": 3390 + }, + { + "loss": 0.0022, + "grad_norm": 0.3088478446006775, + "learning_rate": 3.0550000000000004e-06, + "num_tokens": 1164474.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6955, + "step": 3391 + }, + { + "loss": 0.0501, + "grad_norm": 1.3393687009811401, + "learning_rate": 3.05e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.696, + "step": 3392 + }, + { + "loss": 0.0019, + "grad_norm": 0.2677120566368103, + "learning_rate": 3.045e-06, + "num_tokens": 1165077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6965, + "step": 3393 + }, + { + "loss": 0.0519, + "grad_norm": 1.1974607706069946, + "learning_rate": 3.04e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.697, + "step": 3394 + }, + { + "loss": 0.0406, + "grad_norm": 1.0820717811584473, + "learning_rate": 3.035e-06, + "num_tokens": 1166101.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.6975, + "step": 3395 + }, + { + "loss": 0.002, + "grad_norm": 0.2836916148662567, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.698, + "step": 3396 + }, + { + "loss": 0.002, + "grad_norm": 0.2837901711463928, + "learning_rate": 3.0250000000000003e-06, + "num_tokens": 1166283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6985000000000001, + "step": 3397 + }, + { + "loss": 0.0546, + "grad_norm": 1.4433382749557495, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6989999999999998, + "step": 3398 + }, + { + "loss": 0.0021, + "grad_norm": 0.2978130877017975, + "learning_rate": 3.0150000000000004e-06, + "num_tokens": 1166886.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6995, + "step": 3399 + }, + { + "loss": 0.002, + "grad_norm": 0.2806030511856079, + "learning_rate": 3.01e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 3400 + }, + { + "loss": 0.0636, + "grad_norm": 1.3879796266555786, + "learning_rate": 3.005e-06, + "num_tokens": 1167489.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7005, + "step": 3401 + }, + { + "loss": 0.002, + "grad_norm": 0.2759900689125061, + "learning_rate": 3e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 1.0, + "epoch": 1.701, + "step": 3402 + }, + { + "loss": 0.0574, + "grad_norm": 1.3505700826644897, + "learning_rate": 2.995e-06, + "num_tokens": 1168092.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7015, + "step": 3403 + }, + { + "loss": 0.0554, + "grad_norm": 1.4108113050460815, + "learning_rate": 2.99e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.702, + "step": 3404 + }, + { + "loss": 0.0558, + "grad_norm": 1.5085475444793701, + "learning_rate": 2.9850000000000002e-06, + "num_tokens": 1169116.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7025000000000001, + "step": 3405 + }, + { + "loss": 0.0019, + "grad_norm": 0.2683292031288147, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7029999999999998, + "step": 3406 + }, + { + "loss": 0.0367, + "grad_norm": 1.1768198013305664, + "learning_rate": 2.9750000000000003e-06, + "num_tokens": 1169719.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7035, + "step": 3407 + }, + { + "loss": 0.002, + "grad_norm": 0.2821144759654999, + "learning_rate": 2.97e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 1.0, + "epoch": 1.704, + "step": 3408 + }, + { + "loss": 0.0018, + "grad_norm": 0.26630160212516785, + "learning_rate": 2.965e-06, + "num_tokens": 1169901.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7045, + "step": 3409 + }, + { + "loss": 0.0018, + "grad_norm": 0.2571128308773041, + "learning_rate": 2.96e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 3410 + }, + { + "loss": 0.002, + "grad_norm": 0.28111621737480164, + "learning_rate": 2.955e-06, + "num_tokens": 1170083.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7055, + "step": 3411 + }, + { + "loss": 0.002, + "grad_norm": 0.27419018745422363, + "learning_rate": 2.95e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 3412 + }, + { + "loss": 0.0019, + "grad_norm": 0.26888176798820496, + "learning_rate": 2.945e-06, + "num_tokens": 1170265.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7065000000000001, + "step": 3413 + }, + { + "loss": 0.0018, + "grad_norm": 0.2536250352859497, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 3414 + }, + { + "loss": 0.0018, + "grad_norm": 0.24844178557395935, + "learning_rate": 2.9350000000000003e-06, + "num_tokens": 1170447.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7075, + "step": 3415 + }, + { + "loss": 0.0487, + "grad_norm": 1.4517875909805298, + "learning_rate": 2.93e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.708, + "step": 3416 + }, + { + "loss": 0.0564, + "grad_norm": 1.2101439237594604, + "learning_rate": 2.925e-06, + "num_tokens": 1171471.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7085, + "step": 3417 + }, + { + "loss": 0.043, + "grad_norm": 1.1227502822875977, + "learning_rate": 2.92e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.709, + "step": 3418 + }, + { + "loss": 0.0556, + "grad_norm": 1.1113651990890503, + "learning_rate": 2.915e-06, + "num_tokens": 1172495.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7095, + "step": 3419 + }, + { + "loss": 0.0015, + "grad_norm": 0.21050438284873962, + "learning_rate": 2.91e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 1.0, + "epoch": 1.71, + "step": 3420 + }, + { + "loss": 0.0492, + "grad_norm": 1.136242389678955, + "learning_rate": 2.905e-06, + "num_tokens": 1173098.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7105000000000001, + "step": 3421 + }, + { + "loss": 0.0549, + "grad_norm": 1.1831704378128052, + "learning_rate": 2.9e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7109999999999999, + "step": 3422 + }, + { + "loss": 0.0589, + "grad_norm": 1.318955659866333, + "learning_rate": 2.8950000000000002e-06, + "num_tokens": 1174122.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7115, + "step": 3423 + }, + { + "loss": 0.0385, + "grad_norm": 1.1089059114456177, + "learning_rate": 2.89e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.712, + "step": 3424 + }, + { + "loss": 0.0017, + "grad_norm": 0.24754203855991364, + "learning_rate": 2.885e-06, + "num_tokens": 1174725.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7125, + "step": 3425 + }, + { + "loss": 0.0563, + "grad_norm": 1.1799119710922241, + "learning_rate": 2.88e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.713, + "step": 3426 + }, + { + "loss": 0.0017, + "grad_norm": 0.2318888157606125, + "learning_rate": 2.875e-06, + "num_tokens": 1175328.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7135, + "step": 3427 + }, + { + "loss": 0.0623, + "grad_norm": 1.3154571056365967, + "learning_rate": 2.87e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.714, + "step": 3428 + }, + { + "loss": 0.0019, + "grad_norm": 0.26307183504104614, + "learning_rate": 2.865e-06, + "num_tokens": 1175931.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7145000000000001, + "step": 3429 + }, + { + "loss": 0.0018, + "grad_norm": 0.2589333653450012, + "learning_rate": 2.86e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 3430 + }, + { + "loss": 0.0504, + "grad_norm": 1.4614155292510986, + "learning_rate": 2.855e-06, + "num_tokens": 1176534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7155, + "step": 3431 + }, + { + "loss": 0.0018, + "grad_norm": 0.2591991722583771, + "learning_rate": 2.85e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 1.0, + "epoch": 1.716, + "step": 3432 + }, + { + "loss": 0.0018, + "grad_norm": 0.25856250524520874, + "learning_rate": 2.845e-06, + "num_tokens": 1176716.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7165, + "step": 3433 + }, + { + "loss": 0.0368, + "grad_norm": 1.2794378995895386, + "learning_rate": 2.84e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.717, + "step": 3434 + }, + { + "loss": 0.0595, + "grad_norm": 1.1754332780838013, + "learning_rate": 2.835e-06, + "num_tokens": 1177740.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7175, + "step": 3435 + }, + { + "loss": 0.0016, + "grad_norm": 0.218499094247818, + "learning_rate": 2.83e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 1.0, + "epoch": 1.718, + "step": 3436 + }, + { + "loss": 0.0562, + "grad_norm": 1.4319361448287964, + "learning_rate": 2.825e-06, + "num_tokens": 1178343.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7185000000000001, + "step": 3437 + }, + { + "loss": 0.0548, + "grad_norm": 1.1614960432052612, + "learning_rate": 2.82e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7189999999999999, + "step": 3438 + }, + { + "loss": 0.0634, + "grad_norm": 1.559000849723816, + "learning_rate": 2.815e-06, + "num_tokens": 1179367.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7195, + "step": 3439 + }, + { + "loss": 0.0593, + "grad_norm": 1.1891441345214844, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 3440 + }, + { + "loss": 0.0638, + "grad_norm": 1.2654136419296265, + "learning_rate": 2.8050000000000007e-06, + "num_tokens": 1180391.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7205, + "step": 3441 + }, + { + "loss": 0.0411, + "grad_norm": 1.2888840436935425, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.721, + "step": 3442 + }, + { + "loss": 0.002, + "grad_norm": 0.2810196280479431, + "learning_rate": 2.7950000000000003e-06, + "num_tokens": 1180994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7215, + "step": 3443 + }, + { + "loss": 0.0393, + "grad_norm": 1.1534147262573242, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.722, + "step": 3444 + }, + { + "loss": 0.0019, + "grad_norm": 0.2703098952770233, + "learning_rate": 2.7850000000000004e-06, + "num_tokens": 1181597.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7225000000000001, + "step": 3445 + }, + { + "loss": 0.0612, + "grad_norm": 1.2400104999542236, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7229999999999999, + "step": 3446 + }, + { + "loss": 0.0019, + "grad_norm": 0.27535656094551086, + "learning_rate": 2.7750000000000005e-06, + "num_tokens": 1182200.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7235, + "step": 3447 + }, + { + "loss": 0.002, + "grad_norm": 0.2844158411026001, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 3448 + }, + { + "loss": 0.002, + "grad_norm": 0.2850154936313629, + "learning_rate": 2.7650000000000006e-06, + "num_tokens": 1182382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7245, + "step": 3449 + }, + { + "loss": 0.0018, + "grad_norm": 0.26619744300842285, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 3450 + }, + { + "loss": 0.0019, + "grad_norm": 0.2684476971626282, + "learning_rate": 2.7550000000000003e-06, + "num_tokens": 1182564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7255, + "step": 3451 + }, + { + "loss": 0.0577, + "grad_norm": 1.3094863891601562, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.726, + "step": 3452 + }, + { + "loss": 0.0378, + "grad_norm": 1.201589822769165, + "learning_rate": 2.7450000000000004e-06, + "num_tokens": 1183588.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7265000000000001, + "step": 3453 + }, + { + "loss": 0.0537, + "grad_norm": 1.2897847890853882, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7269999999999999, + "step": 3454 + }, + { + "loss": 0.0021, + "grad_norm": 0.2792169749736786, + "learning_rate": 2.7350000000000005e-06, + "num_tokens": 1184191.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7275, + "step": 3455 + }, + { + "loss": 0.002, + "grad_norm": 0.28593137860298157, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 3456 + }, + { + "loss": 0.058, + "grad_norm": 1.3839404582977295, + "learning_rate": 2.7250000000000006e-06, + "num_tokens": 1184794.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7285, + "step": 3457 + }, + { + "loss": 0.0018, + "grad_norm": 0.2617915868759155, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 1.0, + "epoch": 1.729, + "step": 3458 + }, + { + "loss": 0.0019, + "grad_norm": 0.2803640067577362, + "learning_rate": 2.7150000000000003e-06, + "num_tokens": 1184976.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7295, + "step": 3459 + }, + { + "loss": 0.0389, + "grad_norm": 1.0974253416061401, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.73, + "step": 3460 + }, + { + "loss": 0.0017, + "grad_norm": 0.24105492234230042, + "learning_rate": 2.7050000000000004e-06, + "num_tokens": 1185579.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7305000000000001, + "step": 3461 + }, + { + "loss": 0.0017, + "grad_norm": 0.2462151199579239, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 3462 + }, + { + "loss": 0.0681, + "grad_norm": 2.0248329639434814, + "learning_rate": 2.6950000000000005e-06, + "num_tokens": 1186182.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7315, + "step": 3463 + }, + { + "loss": 0.0506, + "grad_norm": 1.0506778955459595, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.732, + "step": 3464 + }, + { + "loss": 0.0414, + "grad_norm": 1.1461181640625, + "learning_rate": 2.6850000000000006e-06, + "num_tokens": 1187206.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7325, + "step": 3465 + }, + { + "loss": 0.002, + "grad_norm": 0.29532936215400696, + "learning_rate": 2.68e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 1.0, + "epoch": 1.733, + "step": 3466 + }, + { + "loss": 0.0018, + "grad_norm": 0.2511617839336395, + "learning_rate": 2.6750000000000002e-06, + "num_tokens": 1187388.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7335, + "step": 3467 + }, + { + "loss": 0.0017, + "grad_norm": 0.24015438556671143, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 3468 + }, + { + "loss": 0.0394, + "grad_norm": 1.186040997505188, + "learning_rate": 2.6650000000000003e-06, + "num_tokens": 1187991.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7345000000000002, + "step": 3469 + }, + { + "loss": 0.0516, + "grad_norm": 1.3716928958892822, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7349999999999999, + "step": 3470 + }, + { + "loss": 0.0017, + "grad_norm": 0.24118225276470184, + "learning_rate": 2.6550000000000004e-06, + "num_tokens": 1188594.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7355, + "step": 3471 + }, + { + "loss": 0.0634, + "grad_norm": 1.3280280828475952, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.736, + "step": 3472 + }, + { + "loss": 0.0606, + "grad_norm": 1.5957295894622803, + "learning_rate": 2.6450000000000005e-06, + "num_tokens": 1189618.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7365, + "step": 3473 + }, + { + "loss": 0.0019, + "grad_norm": 0.26652151346206665, + "learning_rate": 2.64e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 1.0, + "epoch": 1.737, + "step": 3474 + }, + { + "loss": 0.0465, + "grad_norm": 1.2865381240844727, + "learning_rate": 2.635e-06, + "num_tokens": 1190221.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7375, + "step": 3475 + }, + { + "loss": 0.0696, + "grad_norm": 1.5268961191177368, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.738, + "step": 3476 + }, + { + "loss": 0.0016, + "grad_norm": 0.22352814674377441, + "learning_rate": 2.6250000000000003e-06, + "num_tokens": 1190824.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7385000000000002, + "step": 3477 + }, + { + "loss": 0.0398, + "grad_norm": 1.0832366943359375, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7389999999999999, + "step": 3478 + }, + { + "loss": 0.002, + "grad_norm": 0.2866823971271515, + "learning_rate": 2.6150000000000004e-06, + "num_tokens": 1191427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7395, + "step": 3479 + }, + { + "loss": 0.0017, + "grad_norm": 0.25320085883140564, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 3480 + }, + { + "loss": 0.0554, + "grad_norm": 1.305580496788025, + "learning_rate": 2.6050000000000005e-06, + "num_tokens": 1192030.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7405, + "step": 3481 + }, + { + "loss": 0.053, + "grad_norm": 1.3485558032989502, + "learning_rate": 2.6e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.741, + "step": 3482 + }, + { + "loss": 0.0597, + "grad_norm": 1.3094996213912964, + "learning_rate": 2.595e-06, + "num_tokens": 1193054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7415, + "step": 3483 + }, + { + "loss": 0.0361, + "grad_norm": 1.02549409866333, + "learning_rate": 2.59e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.742, + "step": 3484 + }, + { + "loss": 0.0549, + "grad_norm": 1.1604732275009155, + "learning_rate": 2.5850000000000002e-06, + "num_tokens": 1194078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7425000000000002, + "step": 3485 + }, + { + "loss": 0.0578, + "grad_norm": 1.1389886140823364, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7429999999999999, + "step": 3486 + }, + { + "loss": 0.0383, + "grad_norm": 1.1444112062454224, + "learning_rate": 2.5750000000000003e-06, + "num_tokens": 1195102.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7435, + "step": 3487 + }, + { + "loss": 0.0363, + "grad_norm": 1.2686033248901367, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.744, + "step": 3488 + }, + { + "loss": 0.0609, + "grad_norm": 1.2078722715377808, + "learning_rate": 2.5650000000000004e-06, + "num_tokens": 1196126.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7445, + "step": 3489 + }, + { + "loss": 0.0019, + "grad_norm": 0.2754855155944824, + "learning_rate": 2.56e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 1.0, + "epoch": 1.745, + "step": 3490 + }, + { + "loss": 0.063, + "grad_norm": 1.346100091934204, + "learning_rate": 2.555e-06, + "num_tokens": 1196729.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7455, + "step": 3491 + }, + { + "loss": 0.0625, + "grad_norm": 1.3309886455535889, + "learning_rate": 2.55e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.746, + "step": 3492 + }, + { + "loss": 0.0023, + "grad_norm": 0.3301111161708832, + "learning_rate": 2.545e-06, + "num_tokens": 1197332.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7465000000000002, + "step": 3493 + }, + { + "loss": 0.0382, + "grad_norm": 1.0473533868789673, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7469999999999999, + "step": 3494 + }, + { + "loss": 0.0625, + "grad_norm": 1.2907440662384033, + "learning_rate": 2.5350000000000003e-06, + "num_tokens": 1198356.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7475, + "step": 3495 + }, + { + "loss": 0.0412, + "grad_norm": 1.1875349283218384, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.748, + "step": 3496 + }, + { + "loss": 0.1176, + "grad_norm": 2.9710206985473633, + "learning_rate": 2.5250000000000004e-06, + "num_tokens": 1199380.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.7485, + "step": 3497 + }, + { + "loss": 0.0026, + "grad_norm": 0.36476898193359375, + "learning_rate": 2.52e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 1.0, + "epoch": 1.749, + "step": 3498 + }, + { + "loss": 0.0379, + "grad_norm": 1.0208238363265991, + "learning_rate": 2.515e-06, + "num_tokens": 1199983.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7495, + "step": 3499 + }, + { + "loss": 0.0026, + "grad_norm": 0.37356528639793396, + "learning_rate": 2.51e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 1.0, + "epoch": 1.75, + "step": 3500 + }, + { + "loss": 0.0027, + "grad_norm": 0.39622190594673157, + "learning_rate": 2.505e-06, + "num_tokens": 1200165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7505, + "step": 3501 + }, + { + "loss": 0.0372, + "grad_norm": 1.0979310274124146, + "learning_rate": 2.5e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.751, + "step": 3502 + }, + { + "loss": 0.0362, + "grad_norm": 1.0418155193328857, + "learning_rate": 2.4950000000000003e-06, + "num_tokens": 1201189.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7515, + "step": 3503 + }, + { + "loss": 0.0632, + "grad_norm": 1.6260945796966553, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.752, + "step": 3504 + }, + { + "loss": 0.0029, + "grad_norm": 0.3957514762878418, + "learning_rate": 2.4850000000000003e-06, + "num_tokens": 1201792.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7525, + "step": 3505 + }, + { + "loss": 0.0024, + "grad_norm": 0.3393152356147766, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 3506 + }, + { + "loss": 0.0515, + "grad_norm": 1.1930348873138428, + "learning_rate": 2.475e-06, + "num_tokens": 1202395.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7534999999999998, + "step": 3507 + }, + { + "loss": 0.0026, + "grad_norm": 0.380045086145401, + "learning_rate": 2.47e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 1.0, + "epoch": 1.754, + "step": 3508 + }, + { + "loss": 0.0027, + "grad_norm": 0.3971390724182129, + "learning_rate": 2.465e-06, + "num_tokens": 1202577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7545, + "step": 3509 + }, + { + "loss": 0.0028, + "grad_norm": 0.38638150691986084, + "learning_rate": 2.46e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 1.0, + "epoch": 1.755, + "step": 3510 + }, + { + "loss": 0.0615, + "grad_norm": 1.3876094818115234, + "learning_rate": 2.4550000000000002e-06, + "num_tokens": 1203180.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7555, + "step": 3511 + }, + { + "loss": 0.0432, + "grad_norm": 1.4136366844177246, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.756, + "step": 3512 + }, + { + "loss": 0.0024, + "grad_norm": 0.34141626954078674, + "learning_rate": 2.4450000000000003e-06, + "num_tokens": 1203783.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7565, + "step": 3513 + }, + { + "loss": 0.0566, + "grad_norm": 1.0875115394592285, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7570000000000001, + "step": 3514 + }, + { + "loss": 0.0482, + "grad_norm": 1.5494464635849, + "learning_rate": 2.435e-06, + "num_tokens": 1204807.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.7574999999999998, + "step": 3515 + }, + { + "loss": 0.0413, + "grad_norm": 1.0267417430877686, + "learning_rate": 2.43e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.758, + "step": 3516 + }, + { + "loss": 0.0529, + "grad_norm": 1.3826123476028442, + "learning_rate": 2.425e-06, + "num_tokens": 1205831.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7585, + "step": 3517 + }, + { + "loss": 0.0622, + "grad_norm": 1.3799962997436523, + "learning_rate": 2.42e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.759, + "step": 3518 + }, + { + "loss": 0.0026, + "grad_norm": 0.36601629853248596, + "learning_rate": 2.415e-06, + "num_tokens": 1206434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7595, + "step": 3519 + }, + { + "loss": 0.057, + "grad_norm": 1.4413540363311768, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.76, + "step": 3520 + }, + { + "loss": 0.062, + "grad_norm": 1.5269067287445068, + "learning_rate": 2.4050000000000003e-06, + "num_tokens": 1207458.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7605, + "step": 3521 + }, + { + "loss": 0.0529, + "grad_norm": 1.1583778858184814, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7610000000000001, + "step": 3522 + }, + { + "loss": 0.0629, + "grad_norm": 1.502618432044983, + "learning_rate": 2.395e-06, + "num_tokens": 1208482.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7614999999999998, + "step": 3523 + }, + { + "loss": 0.0556, + "grad_norm": 1.4562733173370361, + "learning_rate": 2.39e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.762, + "step": 3524 + }, + { + "loss": 0.0028, + "grad_norm": 0.4034802317619324, + "learning_rate": 2.385e-06, + "num_tokens": 1209085.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7625, + "step": 3525 + }, + { + "loss": 0.0501, + "grad_norm": 1.3905121088027954, + "learning_rate": 2.38e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.763, + "step": 3526 + }, + { + "loss": 0.0628, + "grad_norm": 1.1878178119659424, + "learning_rate": 2.375e-06, + "num_tokens": 1210109.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7635, + "step": 3527 + }, + { + "loss": 0.0371, + "grad_norm": 1.1999701261520386, + "learning_rate": 2.37e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.764, + "step": 3528 + }, + { + "loss": 0.0029, + "grad_norm": 0.40889084339141846, + "learning_rate": 2.3650000000000002e-06, + "num_tokens": 1210712.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7645, + "step": 3529 + }, + { + "loss": 0.0389, + "grad_norm": 1.039504885673523, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7650000000000001, + "step": 3530 + }, + { + "loss": 0.068, + "grad_norm": 1.371443748474121, + "learning_rate": 2.355e-06, + "num_tokens": 1211736.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.7654999999999998, + "step": 3531 + }, + { + "loss": 0.0695, + "grad_norm": 1.7425730228424072, + "learning_rate": 2.35e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.766, + "step": 3532 + }, + { + "loss": 0.0523, + "grad_norm": 1.3040227890014648, + "learning_rate": 2.345e-06, + "num_tokens": 1212760.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7665, + "step": 3533 + }, + { + "loss": 0.0027, + "grad_norm": 0.3859405517578125, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 1.0, + "epoch": 1.767, + "step": 3534 + }, + { + "loss": 0.0385, + "grad_norm": 1.0744153261184692, + "learning_rate": 2.3350000000000005e-06, + "num_tokens": 1213363.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7675, + "step": 3535 + }, + { + "loss": 0.0029, + "grad_norm": 0.4078717827796936, + "learning_rate": 2.33e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 1.0, + "epoch": 1.768, + "step": 3536 + }, + { + "loss": 0.0464, + "grad_norm": 1.3526980876922607, + "learning_rate": 2.325e-06, + "num_tokens": 1213966.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7685, + "step": 3537 + }, + { + "loss": 0.0032, + "grad_norm": 0.44447413086891174, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7690000000000001, + "step": 3538 + }, + { + "loss": 0.0346, + "grad_norm": 0.9852960705757141, + "learning_rate": 2.3150000000000003e-06, + "num_tokens": 1214569.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7694999999999999, + "step": 3539 + }, + { + "loss": 0.0581, + "grad_norm": 1.1710577011108398, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.77, + "step": 3540 + }, + { + "loss": 0.003, + "grad_norm": 0.42533135414123535, + "learning_rate": 2.3050000000000004e-06, + "num_tokens": 1215172.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7705, + "step": 3541 + }, + { + "loss": 0.0373, + "grad_norm": 0.9175604581832886, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.771, + "step": 3542 + }, + { + "loss": 0.0464, + "grad_norm": 1.2586400508880615, + "learning_rate": 2.2950000000000005e-06, + "num_tokens": 1216196.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.7715, + "step": 3543 + }, + { + "loss": 0.0557, + "grad_norm": 1.3000445365905762, + "learning_rate": 2.29e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.772, + "step": 3544 + }, + { + "loss": 0.0377, + "grad_norm": 1.0466715097427368, + "learning_rate": 2.285e-06, + "num_tokens": 1217220.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7725, + "step": 3545 + }, + { + "loss": 0.003, + "grad_norm": 0.41341033577919006, + "learning_rate": 2.28e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7730000000000001, + "step": 3546 + }, + { + "loss": 0.0555, + "grad_norm": 1.2895411252975464, + "learning_rate": 2.2750000000000002e-06, + "num_tokens": 1217823.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7734999999999999, + "step": 3547 + }, + { + "loss": 0.0032, + "grad_norm": 0.4543672800064087, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 1.0, + "epoch": 1.774, + "step": 3548 + }, + { + "loss": 0.0033, + "grad_norm": 0.45242005586624146, + "learning_rate": 2.2650000000000003e-06, + "num_tokens": 1218005.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7745, + "step": 3549 + }, + { + "loss": 0.0664, + "grad_norm": 1.4492830038070679, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.775, + "step": 3550 + }, + { + "loss": 0.0621, + "grad_norm": 1.410575270652771, + "learning_rate": 2.2550000000000004e-06, + "num_tokens": 1219029.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7755, + "step": 3551 + }, + { + "loss": 0.0668, + "grad_norm": 1.4600263833999634, + "learning_rate": 2.25e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.776, + "step": 3552 + }, + { + "loss": 0.0518, + "grad_norm": 1.185958981513977, + "learning_rate": 2.245e-06, + "num_tokens": 1220053.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.7765, + "step": 3553 + }, + { + "loss": 0.0031, + "grad_norm": 0.4426004886627197, + "learning_rate": 2.24e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7770000000000001, + "step": 3554 + }, + { + "loss": 0.0391, + "grad_norm": 1.1847765445709229, + "learning_rate": 2.235e-06, + "num_tokens": 1220656.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.7774999999999999, + "step": 3555 + }, + { + "loss": 0.0387, + "grad_norm": 1.1244046688079834, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.778, + "step": 3556 + }, + { + "loss": 0.0639, + "grad_norm": 1.5144935846328735, + "learning_rate": 2.2250000000000003e-06, + "num_tokens": 1221680.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7785, + "step": 3557 + }, + { + "loss": 0.0504, + "grad_norm": 1.1694223880767822, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.779, + "step": 3558 + }, + { + "loss": 0.039, + "grad_norm": 1.198093295097351, + "learning_rate": 2.2150000000000004e-06, + "num_tokens": 1222704.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.7795, + "step": 3559 + }, + { + "loss": 0.0556, + "grad_norm": 1.4882034063339233, + "learning_rate": 2.21e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.78, + "step": 3560 + }, + { + "loss": 0.0033, + "grad_norm": 0.4605433940887451, + "learning_rate": 2.205e-06, + "num_tokens": 1223307.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7805, + "step": 3561 + }, + { + "loss": 0.0427, + "grad_norm": 1.400830864906311, + "learning_rate": 2.2e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7810000000000001, + "step": 3562 + }, + { + "loss": 0.0596, + "grad_norm": 1.4765678644180298, + "learning_rate": 2.195e-06, + "num_tokens": 1224331.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.7814999999999999, + "step": 3563 + }, + { + "loss": 0.0029, + "grad_norm": 0.4184083044528961, + "learning_rate": 2.19e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 1.0, + "epoch": 1.782, + "step": 3564 + }, + { + "loss": 0.0031, + "grad_norm": 0.4302586615085602, + "learning_rate": 2.1850000000000003e-06, + "num_tokens": 1224513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7825, + "step": 3565 + }, + { + "loss": 0.0031, + "grad_norm": 0.4298599362373352, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 1.0, + "epoch": 1.783, + "step": 3566 + }, + { + "loss": 0.065, + "grad_norm": 1.424648642539978, + "learning_rate": 2.1750000000000004e-06, + "num_tokens": 1225116.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7835, + "step": 3567 + }, + { + "loss": 0.0031, + "grad_norm": 0.4238447844982147, + "learning_rate": 2.17e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 1.0, + "epoch": 1.784, + "step": 3568 + }, + { + "loss": 0.0031, + "grad_norm": 0.4220222532749176, + "learning_rate": 2.165e-06, + "num_tokens": 1225298.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7845, + "step": 3569 + }, + { + "loss": 0.003, + "grad_norm": 0.42732101678848267, + "learning_rate": 2.16e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7850000000000001, + "step": 3570 + }, + { + "loss": 0.0346, + "grad_norm": 1.0672036409378052, + "learning_rate": 2.155e-06, + "num_tokens": 1225901.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.7854999999999999, + "step": 3571 + }, + { + "loss": 0.0424, + "grad_norm": 1.0617742538452148, + "learning_rate": 2.15e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.786, + "step": 3572 + }, + { + "loss": 0.0592, + "grad_norm": 1.3852803707122803, + "learning_rate": 2.1450000000000002e-06, + "num_tokens": 1226925.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7865, + "step": 3573 + }, + { + "loss": 0.0029, + "grad_norm": 0.4290924072265625, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 1.0, + "epoch": 1.787, + "step": 3574 + }, + { + "loss": 0.051, + "grad_norm": 1.1031818389892578, + "learning_rate": 2.1350000000000003e-06, + "num_tokens": 1227528.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.7875, + "step": 3575 + }, + { + "loss": 0.0393, + "grad_norm": 1.184659719467163, + "learning_rate": 2.13e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.788, + "step": 3576 + }, + { + "loss": 0.0755, + "grad_norm": 1.9755206108093262, + "learning_rate": 2.125e-06, + "num_tokens": 1228552.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.7885, + "step": 3577 + }, + { + "loss": 0.071, + "grad_norm": 1.4741475582122803, + "learning_rate": 2.12e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7890000000000001, + "step": 3578 + }, + { + "loss": 0.0609, + "grad_norm": 1.6418182849884033, + "learning_rate": 2.115e-06, + "num_tokens": 1229576.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7894999999999999, + "step": 3579 + }, + { + "loss": 0.0027, + "grad_norm": 0.40381157398223877, + "learning_rate": 2.11e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.79, + "step": 3580 + }, + { + "loss": 0.0551, + "grad_norm": 1.2949596643447876, + "learning_rate": 2.105e-06, + "num_tokens": 1230179.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7905, + "step": 3581 + }, + { + "loss": 0.0504, + "grad_norm": 1.073058843612671, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.791, + "step": 3582 + }, + { + "loss": 0.0028, + "grad_norm": 0.3910202980041504, + "learning_rate": 2.0950000000000003e-06, + "num_tokens": 1230782.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7915, + "step": 3583 + }, + { + "loss": 0.0029, + "grad_norm": 0.40099310874938965, + "learning_rate": 2.09e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.792, + "step": 3584 + }, + { + "loss": 0.0686, + "grad_norm": 1.5408157110214233, + "learning_rate": 2.085e-06, + "num_tokens": 1231385.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7925, + "step": 3585 + }, + { + "loss": 0.0547, + "grad_norm": 1.2888717651367188, + "learning_rate": 2.08e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7930000000000001, + "step": 3586 + }, + { + "loss": 0.0392, + "grad_norm": 1.1414070129394531, + "learning_rate": 2.075e-06, + "num_tokens": 1232409.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.7934999999999999, + "step": 3587 + }, + { + "loss": 0.0567, + "grad_norm": 1.2421129941940308, + "learning_rate": 2.07e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.794, + "step": 3588 + }, + { + "loss": 0.0567, + "grad_norm": 1.2121027708053589, + "learning_rate": 2.065e-06, + "num_tokens": 1233433.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7945, + "step": 3589 + }, + { + "loss": 0.0028, + "grad_norm": 0.4114837944507599, + "learning_rate": 2.06e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.795, + "step": 3590 + }, + { + "loss": 0.003, + "grad_norm": 0.4205188453197479, + "learning_rate": 2.0550000000000002e-06, + "num_tokens": 1233615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7955, + "step": 3591 + }, + { + "loss": 0.0029, + "grad_norm": 0.39967694878578186, + "learning_rate": 2.05e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 1.0, + "epoch": 1.796, + "step": 3592 + }, + { + "loss": 0.056, + "grad_norm": 1.251736044883728, + "learning_rate": 2.045e-06, + "num_tokens": 1234218.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7965, + "step": 3593 + }, + { + "loss": 0.0028, + "grad_norm": 0.3914256989955902, + "learning_rate": 2.04e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7970000000000002, + "step": 3594 + }, + { + "loss": 0.0604, + "grad_norm": 1.1881632804870605, + "learning_rate": 2.035e-06, + "num_tokens": 1234821.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.7974999999999999, + "step": 3595 + }, + { + "loss": 0.0622, + "grad_norm": 1.149919033050537, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.798, + "step": 3596 + }, + { + "loss": 0.0549, + "grad_norm": 1.0469919443130493, + "learning_rate": 2.025e-06, + "num_tokens": 1235845.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7985, + "step": 3597 + }, + { + "loss": 0.0535, + "grad_norm": 1.3651666641235352, + "learning_rate": 2.02e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.799, + "step": 3598 + }, + { + "loss": 0.0026, + "grad_norm": 0.37465357780456543, + "learning_rate": 2.015e-06, + "num_tokens": 1236448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7995, + "step": 3599 + }, + { + "loss": 0.0365, + "grad_norm": 1.0199239253997803, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8, + "step": 3600 + }, + { + "loss": 0.0617, + "grad_norm": 1.1323697566986084, + "learning_rate": 2.0050000000000003e-06, + "num_tokens": 1237472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8005, + "step": 3601 + }, + { + "loss": 0.003, + "grad_norm": 0.4225693345069885, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8010000000000002, + "step": 3602 + }, + { + "loss": 0.0379, + "grad_norm": 1.1038097143173218, + "learning_rate": 1.9950000000000004e-06, + "num_tokens": 1238075.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8014999999999999, + "step": 3603 + }, + { + "loss": 0.003, + "grad_norm": 0.4044983685016632, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.802, + "step": 3604 + }, + { + "loss": 0.0655, + "grad_norm": 1.8133554458618164, + "learning_rate": 1.985e-06, + "num_tokens": 1238678.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8025, + "step": 3605 + }, + { + "loss": 0.0028, + "grad_norm": 0.39725902676582336, + "learning_rate": 1.98e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 1.0, + "epoch": 1.803, + "step": 3606 + }, + { + "loss": 0.003, + "grad_norm": 0.4250074028968811, + "learning_rate": 1.975e-06, + "num_tokens": 1238860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8035, + "step": 3607 + }, + { + "loss": 0.0378, + "grad_norm": 1.14003586769104, + "learning_rate": 1.97e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.804, + "step": 3608 + }, + { + "loss": 0.0028, + "grad_norm": 0.39355626702308655, + "learning_rate": 1.9650000000000002e-06, + "num_tokens": 1239463.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8045, + "step": 3609 + }, + { + "loss": 0.0378, + "grad_norm": 1.2409162521362305, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8050000000000002, + "step": 3610 + }, + { + "loss": 0.0448, + "grad_norm": 1.4544258117675781, + "learning_rate": 1.9550000000000003e-06, + "num_tokens": 1240487.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8054999999999999, + "step": 3611 + }, + { + "loss": 0.0027, + "grad_norm": 0.3753180205821991, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.806, + "step": 3612 + }, + { + "loss": 0.0029, + "grad_norm": 0.4058220088481903, + "learning_rate": 1.945e-06, + "num_tokens": 1240669.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8065, + "step": 3613 + }, + { + "loss": 0.0574, + "grad_norm": 1.4277732372283936, + "learning_rate": 1.94e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.807, + "step": 3614 + }, + { + "loss": 0.0645, + "grad_norm": 1.5439943075180054, + "learning_rate": 1.935e-06, + "num_tokens": 1241693.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8075, + "step": 3615 + }, + { + "loss": 0.0609, + "grad_norm": 1.4575119018554688, + "learning_rate": 1.93e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.808, + "step": 3616 + }, + { + "loss": 0.0024, + "grad_norm": 0.33791404962539673, + "learning_rate": 1.925e-06, + "num_tokens": 1242296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8085, + "step": 3617 + }, + { + "loss": 0.0392, + "grad_norm": 0.994301974773407, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8090000000000002, + "step": 3618 + }, + { + "loss": 0.0026, + "grad_norm": 0.35725516080856323, + "learning_rate": 1.9150000000000003e-06, + "num_tokens": 1242899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8094999999999999, + "step": 3619 + }, + { + "loss": 0.1147, + "grad_norm": 2.219489097595215, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.81, + "step": 3620 + }, + { + "loss": 0.0025, + "grad_norm": 0.358549028635025, + "learning_rate": 1.9050000000000002e-06, + "num_tokens": 1243502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8105, + "step": 3621 + }, + { + "loss": 0.0497, + "grad_norm": 1.0606470108032227, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.811, + "step": 3622 + }, + { + "loss": 0.0354, + "grad_norm": 1.1863391399383545, + "learning_rate": 1.895e-06, + "num_tokens": 1244526.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8115, + "step": 3623 + }, + { + "loss": 0.0617, + "grad_norm": 1.461073398590088, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.812, + "step": 3624 + }, + { + "loss": 0.0522, + "grad_norm": 1.180123209953308, + "learning_rate": 1.8850000000000002e-06, + "num_tokens": 1245550.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8125, + "step": 3625 + }, + { + "loss": 0.0513, + "grad_norm": 1.1050792932510376, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.813, + "step": 3626 + }, + { + "loss": 0.0382, + "grad_norm": 1.1048370599746704, + "learning_rate": 1.8750000000000003e-06, + "num_tokens": 1246574.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8135, + "step": 3627 + }, + { + "loss": 0.0594, + "grad_norm": 1.5278170108795166, + "learning_rate": 1.87e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.814, + "step": 3628 + }, + { + "loss": 0.0026, + "grad_norm": 0.3680756688117981, + "learning_rate": 1.8650000000000001e-06, + "num_tokens": 1247177.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8145, + "step": 3629 + }, + { + "loss": 0.0025, + "grad_norm": 0.3478946387767792, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.815, + "step": 3630 + }, + { + "loss": 0.0602, + "grad_norm": 1.2490179538726807, + "learning_rate": 1.8550000000000002e-06, + "num_tokens": 1247780.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8155000000000001, + "step": 3631 + }, + { + "loss": 0.0751, + "grad_norm": 1.6024861335754395, + "learning_rate": 1.85e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8159999999999998, + "step": 3632 + }, + { + "loss": 0.055, + "grad_norm": 1.4603705406188965, + "learning_rate": 1.8450000000000001e-06, + "num_tokens": 1248804.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8165, + "step": 3633 + }, + { + "loss": 0.0025, + "grad_norm": 0.37733298540115356, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 1.0, + "epoch": 1.817, + "step": 3634 + }, + { + "loss": 0.0028, + "grad_norm": 0.3999163806438446, + "learning_rate": 1.8350000000000002e-06, + "num_tokens": 1248986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8175, + "step": 3635 + }, + { + "loss": 0.0027, + "grad_norm": 0.39710038900375366, + "learning_rate": 1.83e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.818, + "step": 3636 + }, + { + "loss": 0.0028, + "grad_norm": 0.39646029472351074, + "learning_rate": 1.825e-06, + "num_tokens": 1249168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8185, + "step": 3637 + }, + { + "loss": 0.0426, + "grad_norm": 1.3070132732391357, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.819, + "step": 3638 + }, + { + "loss": 0.039, + "grad_norm": 1.1619224548339844, + "learning_rate": 1.8150000000000002e-06, + "num_tokens": 1250192.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8195000000000001, + "step": 3639 + }, + { + "loss": 0.0367, + "grad_norm": 1.1559624671936035, + "learning_rate": 1.81e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8199999999999998, + "step": 3640 + }, + { + "loss": 0.053, + "grad_norm": 1.3208280801773071, + "learning_rate": 1.805e-06, + "num_tokens": 1251216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8205, + "step": 3641 + }, + { + "loss": 0.0544, + "grad_norm": 1.2948426008224487, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.821, + "step": 3642 + }, + { + "loss": 0.049, + "grad_norm": 1.0491054058074951, + "learning_rate": 1.7950000000000002e-06, + "num_tokens": 1252240.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8215, + "step": 3643 + }, + { + "loss": 0.037, + "grad_norm": 1.3279922008514404, + "learning_rate": 1.79e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.822, + "step": 3644 + }, + { + "loss": 0.0027, + "grad_norm": 0.38797032833099365, + "learning_rate": 1.785e-06, + "num_tokens": 1252843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8225, + "step": 3645 + }, + { + "loss": 0.0526, + "grad_norm": 1.3761346340179443, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.823, + "step": 3646 + }, + { + "loss": 0.0594, + "grad_norm": 1.5943882465362549, + "learning_rate": 1.7750000000000002e-06, + "num_tokens": 1253867.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8235000000000001, + "step": 3647 + }, + { + "loss": 0.0386, + "grad_norm": 1.1582005023956299, + "learning_rate": 1.77e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8239999999999998, + "step": 3648 + }, + { + "loss": 0.0625, + "grad_norm": 1.422128438949585, + "learning_rate": 1.765e-06, + "num_tokens": 1254891.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8245, + "step": 3649 + }, + { + "loss": 0.0027, + "grad_norm": 0.3794823884963989, + "learning_rate": 1.76e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.825, + "step": 3650 + }, + { + "loss": 0.0377, + "grad_norm": 1.0281649827957153, + "learning_rate": 1.7550000000000001e-06, + "num_tokens": 1255494.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8255, + "step": 3651 + }, + { + "loss": 0.057, + "grad_norm": 1.2542749643325806, + "learning_rate": 1.75e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.826, + "step": 3652 + }, + { + "loss": 0.0027, + "grad_norm": 0.3857089579105377, + "learning_rate": 1.745e-06, + "num_tokens": 1256097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8265, + "step": 3653 + }, + { + "loss": 0.0529, + "grad_norm": 1.148740291595459, + "learning_rate": 1.74e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.827, + "step": 3654 + }, + { + "loss": 0.003, + "grad_norm": 0.4200035333633423, + "learning_rate": 1.7350000000000001e-06, + "num_tokens": 1256700.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8275000000000001, + "step": 3655 + }, + { + "loss": 0.0028, + "grad_norm": 0.3945881426334381, + "learning_rate": 1.73e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8279999999999998, + "step": 3656 + }, + { + "loss": 0.039, + "grad_norm": 0.9618701934814453, + "learning_rate": 1.725e-06, + "num_tokens": 1257303.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8285, + "step": 3657 + }, + { + "loss": 0.0399, + "grad_norm": 1.2282723188400269, + "learning_rate": 1.72e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.829, + "step": 3658 + }, + { + "loss": 0.0509, + "grad_norm": 1.175613284111023, + "learning_rate": 1.7150000000000003e-06, + "num_tokens": 1258327.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8295, + "step": 3659 + }, + { + "loss": 0.0378, + "grad_norm": 1.1486104726791382, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.83, + "step": 3660 + }, + { + "loss": 0.0589, + "grad_norm": 1.3274273872375488, + "learning_rate": 1.7050000000000002e-06, + "num_tokens": 1259351.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8305, + "step": 3661 + }, + { + "loss": 0.046, + "grad_norm": 1.3887542486190796, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.831, + "step": 3662 + }, + { + "loss": 0.0029, + "grad_norm": 0.39590317010879517, + "learning_rate": 1.6950000000000003e-06, + "num_tokens": 1259954.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8315000000000001, + "step": 3663 + }, + { + "loss": 0.0369, + "grad_norm": 1.080889105796814, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.8319999999999999, + "step": 3664 + }, + { + "loss": 0.0535, + "grad_norm": 1.3136940002441406, + "learning_rate": 1.6850000000000002e-06, + "num_tokens": 1260978.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8325, + "step": 3665 + }, + { + "loss": 0.059, + "grad_norm": 1.5410752296447754, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 3666 + }, + { + "loss": 0.0029, + "grad_norm": 0.3952591121196747, + "learning_rate": 1.6750000000000003e-06, + "num_tokens": 1261581.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8335, + "step": 3667 + }, + { + "loss": 0.0518, + "grad_norm": 1.3276718854904175, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.834, + "step": 3668 + }, + { + "loss": 0.003, + "grad_norm": 0.4232414960861206, + "learning_rate": 1.6650000000000002e-06, + "num_tokens": 1262184.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8345, + "step": 3669 + }, + { + "loss": 0.0639, + "grad_norm": 1.2759331464767456, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.835, + "step": 3670 + }, + { + "loss": 0.0571, + "grad_norm": 1.5148133039474487, + "learning_rate": 1.6550000000000002e-06, + "num_tokens": 1263208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8355000000000001, + "step": 3671 + }, + { + "loss": 0.0637, + "grad_norm": 1.4910366535186768, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8359999999999999, + "step": 3672 + }, + { + "loss": 0.0029, + "grad_norm": 0.4135521948337555, + "learning_rate": 1.6450000000000001e-06, + "num_tokens": 1263811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8365, + "step": 3673 + }, + { + "loss": 0.0511, + "grad_norm": 1.2618604898452759, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.837, + "step": 3674 + }, + { + "loss": 0.0501, + "grad_norm": 1.1598845720291138, + "learning_rate": 1.6350000000000002e-06, + "num_tokens": 1264835.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8375, + "step": 3675 + }, + { + "loss": 0.0445, + "grad_norm": 1.0752735137939453, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.838, + "step": 3676 + }, + { + "loss": 0.003, + "grad_norm": 0.42967167496681213, + "learning_rate": 1.6250000000000001e-06, + "num_tokens": 1265438.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8385, + "step": 3677 + }, + { + "loss": 0.003, + "grad_norm": 0.41333630681037903, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 1.0, + "epoch": 1.839, + "step": 3678 + }, + { + "loss": 0.0033, + "grad_norm": 0.4601726531982422, + "learning_rate": 1.6150000000000002e-06, + "num_tokens": 1265620.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8395000000000001, + "step": 3679 + }, + { + "loss": 0.0648, + "grad_norm": 1.4645088911056519, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8399999999999999, + "step": 3680 + }, + { + "loss": 0.0371, + "grad_norm": 1.0282845497131348, + "learning_rate": 1.605e-06, + "num_tokens": 1266644.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8405, + "step": 3681 + }, + { + "loss": 0.0034, + "grad_norm": 0.4804507791996002, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 1.0, + "epoch": 1.841, + "step": 3682 + }, + { + "loss": 0.0611, + "grad_norm": 1.6006290912628174, + "learning_rate": 1.5950000000000002e-06, + "num_tokens": 1267247.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8415, + "step": 3683 + }, + { + "loss": 0.0032, + "grad_norm": 0.4456159472465515, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 1.0, + "epoch": 1.842, + "step": 3684 + }, + { + "loss": 0.0028, + "grad_norm": 0.39536213874816895, + "learning_rate": 1.585e-06, + "num_tokens": 1267429.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8425, + "step": 3685 + }, + { + "loss": 0.0441, + "grad_norm": 1.2790175676345825, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.843, + "step": 3686 + }, + { + "loss": 0.0545, + "grad_norm": 1.1657609939575195, + "learning_rate": 1.5750000000000002e-06, + "num_tokens": 1268453.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8435000000000001, + "step": 3687 + }, + { + "loss": 0.0536, + "grad_norm": 1.0926413536071777, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8439999999999999, + "step": 3688 + }, + { + "loss": 0.0362, + "grad_norm": 0.9912558197975159, + "learning_rate": 1.565e-06, + "num_tokens": 1269477.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8445, + "step": 3689 + }, + { + "loss": 0.0374, + "grad_norm": 1.0493851900100708, + "learning_rate": 1.56e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.845, + "step": 3690 + }, + { + "loss": 0.0028, + "grad_norm": 0.4059640169143677, + "learning_rate": 1.5550000000000001e-06, + "num_tokens": 1270080.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8455, + "step": 3691 + }, + { + "loss": 0.003, + "grad_norm": 0.4232662618160248, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 1.0, + "epoch": 1.846, + "step": 3692 + }, + { + "loss": 0.0031, + "grad_norm": 0.43225178122520447, + "learning_rate": 1.545e-06, + "num_tokens": 1270262.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8465, + "step": 3693 + }, + { + "loss": 0.0027, + "grad_norm": 0.3701487183570862, + "learning_rate": 1.54e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.847, + "step": 3694 + }, + { + "loss": 0.0545, + "grad_norm": 1.3909512758255005, + "learning_rate": 1.5350000000000001e-06, + "num_tokens": 1270865.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8475000000000001, + "step": 3695 + }, + { + "loss": 0.0027, + "grad_norm": 0.38712078332901, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8479999999999999, + "step": 3696 + }, + { + "loss": 0.0506, + "grad_norm": 1.0741735696792603, + "learning_rate": 1.525e-06, + "num_tokens": 1271468.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8485, + "step": 3697 + }, + { + "loss": 0.0693, + "grad_norm": 1.657240629196167, + "learning_rate": 1.52e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.849, + "step": 3698 + }, + { + "loss": 0.0025, + "grad_norm": 0.3615441918373108, + "learning_rate": 1.5150000000000001e-06, + "num_tokens": 1272071.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8495, + "step": 3699 + }, + { + "loss": 0.0355, + "grad_norm": 0.9562244415283203, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.85, + "step": 3700 + }, + { + "loss": 0.0026, + "grad_norm": 0.36725983023643494, + "learning_rate": 1.505e-06, + "num_tokens": 1272674.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8505, + "step": 3701 + }, + { + "loss": 0.0028, + "grad_norm": 0.3878721296787262, + "learning_rate": 1.5e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 1.0, + "epoch": 1.851, + "step": 3702 + }, + { + "loss": 0.0359, + "grad_norm": 1.0378117561340332, + "learning_rate": 1.495e-06, + "num_tokens": 1273277.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8515000000000001, + "step": 3703 + }, + { + "loss": 0.0656, + "grad_norm": 1.2746002674102783, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8519999999999999, + "step": 3704 + }, + { + "loss": 0.0026, + "grad_norm": 0.35767146944999695, + "learning_rate": 1.485e-06, + "num_tokens": 1273880.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8525, + "step": 3705 + }, + { + "loss": 0.0026, + "grad_norm": 0.36552944779396057, + "learning_rate": 1.48e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.853, + "step": 3706 + }, + { + "loss": 0.0473, + "grad_norm": 1.1046762466430664, + "learning_rate": 1.475e-06, + "num_tokens": 1274483.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8535, + "step": 3707 + }, + { + "loss": 0.0625, + "grad_norm": 1.4509928226470947, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.854, + "step": 3708 + }, + { + "loss": 0.0421, + "grad_norm": 1.1400452852249146, + "learning_rate": 1.465e-06, + "num_tokens": 1275507.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8545, + "step": 3709 + }, + { + "loss": 0.0026, + "grad_norm": 0.3619054853916168, + "learning_rate": 1.46e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 1.0, + "epoch": 1.855, + "step": 3710 + }, + { + "loss": 0.0026, + "grad_norm": 0.3667825162410736, + "learning_rate": 1.455e-06, + "num_tokens": 1275689.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8555000000000001, + "step": 3711 + }, + { + "loss": 0.0466, + "grad_norm": 1.255405068397522, + "learning_rate": 1.45e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8559999999999999, + "step": 3712 + }, + { + "loss": 0.0657, + "grad_norm": 1.4270333051681519, + "learning_rate": 1.445e-06, + "num_tokens": 1276713.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8565, + "step": 3713 + }, + { + "loss": 0.0356, + "grad_norm": 1.035252571105957, + "learning_rate": 1.44e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.857, + "step": 3714 + }, + { + "loss": 0.0024, + "grad_norm": 0.34851282835006714, + "learning_rate": 1.435e-06, + "num_tokens": 1277316.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8575, + "step": 3715 + }, + { + "loss": 0.0669, + "grad_norm": 1.6207127571105957, + "learning_rate": 1.43e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.858, + "step": 3716 + }, + { + "loss": 0.0025, + "grad_norm": 0.34068116545677185, + "learning_rate": 1.425e-06, + "num_tokens": 1277919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8585, + "step": 3717 + }, + { + "loss": 0.0023, + "grad_norm": 0.3336624801158905, + "learning_rate": 1.42e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 3718 + }, + { + "loss": 0.0663, + "grad_norm": 1.4342654943466187, + "learning_rate": 1.415e-06, + "num_tokens": 1278522.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8595000000000002, + "step": 3719 + }, + { + "loss": 0.0506, + "grad_norm": 1.1730687618255615, + "learning_rate": 1.41e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8599999999999999, + "step": 3720 + }, + { + "loss": 0.062, + "grad_norm": 1.4714523553848267, + "learning_rate": 1.4050000000000003e-06, + "num_tokens": 1279546.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8605, + "step": 3721 + }, + { + "loss": 0.0514, + "grad_norm": 1.2004119157791138, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.861, + "step": 3722 + }, + { + "loss": 0.0023, + "grad_norm": 0.3368993103504181, + "learning_rate": 1.3950000000000002e-06, + "num_tokens": 1280149.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8615, + "step": 3723 + }, + { + "loss": 0.0025, + "grad_norm": 0.3626645803451538, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 3724 + }, + { + "loss": 0.0379, + "grad_norm": 1.129130482673645, + "learning_rate": 1.3850000000000003e-06, + "num_tokens": 1280752.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8625, + "step": 3725 + }, + { + "loss": 0.0026, + "grad_norm": 0.35549208521842957, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 1.0, + "epoch": 1.863, + "step": 3726 + }, + { + "loss": 0.039, + "grad_norm": 1.0426714420318604, + "learning_rate": 1.3750000000000002e-06, + "num_tokens": 1281355.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8635000000000002, + "step": 3727 + }, + { + "loss": 0.0591, + "grad_norm": 1.4238243103027344, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8639999999999999, + "step": 3728 + }, + { + "loss": 0.0587, + "grad_norm": 1.182423710823059, + "learning_rate": 1.3650000000000003e-06, + "num_tokens": 1282379.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8645, + "step": 3729 + }, + { + "loss": 0.0344, + "grad_norm": 1.0535178184509277, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9882583022117615, + "epoch": 1.865, + "step": 3730 + }, + { + "loss": 0.0024, + "grad_norm": 0.34818780422210693, + "learning_rate": 1.3550000000000002e-06, + "num_tokens": 1282982.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8655, + "step": 3731 + }, + { + "loss": 0.0652, + "grad_norm": 1.3155183792114258, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.866, + "step": 3732 + }, + { + "loss": 0.0543, + "grad_norm": 1.2466151714324951, + "learning_rate": 1.3450000000000003e-06, + "num_tokens": 1284006.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8665, + "step": 3733 + }, + { + "loss": 0.0366, + "grad_norm": 1.1111284494400024, + "learning_rate": 1.34e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.867, + "step": 3734 + }, + { + "loss": 0.036, + "grad_norm": 1.2413430213928223, + "learning_rate": 1.3350000000000001e-06, + "num_tokens": 1285030.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8675000000000002, + "step": 3735 + }, + { + "loss": 0.0503, + "grad_norm": 1.2572247982025146, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8679999999999999, + "step": 3736 + }, + { + "loss": 0.0634, + "grad_norm": 1.3656840324401855, + "learning_rate": 1.3250000000000002e-06, + "num_tokens": 1286054.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8685, + "step": 3737 + }, + { + "loss": 0.0369, + "grad_norm": 1.1938374042510986, + "learning_rate": 1.32e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.869, + "step": 3738 + }, + { + "loss": 0.0619, + "grad_norm": 1.5963718891143799, + "learning_rate": 1.3150000000000001e-06, + "num_tokens": 1287078.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8695, + "step": 3739 + }, + { + "loss": 0.0569, + "grad_norm": 1.3680788278579712, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.87, + "step": 3740 + }, + { + "loss": 0.0535, + "grad_norm": 1.175209879875183, + "learning_rate": 1.3050000000000002e-06, + "num_tokens": 1288102.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8705, + "step": 3741 + }, + { + "loss": 0.0026, + "grad_norm": 0.3611868619918823, + "learning_rate": 1.3e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 1.0, + "epoch": 1.871, + "step": 3742 + }, + { + "loss": 0.0377, + "grad_norm": 1.2314857244491577, + "learning_rate": 1.295e-06, + "num_tokens": 1288705.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8715000000000002, + "step": 3743 + }, + { + "loss": 0.0511, + "grad_norm": 1.4128717184066772, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8719999999999999, + "step": 3744 + }, + { + "loss": 0.1336, + "grad_norm": 2.185844659805298, + "learning_rate": 1.2850000000000002e-06, + "num_tokens": 1289729.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.8725, + "step": 3745 + }, + { + "loss": 0.0025, + "grad_norm": 0.33957669138908386, + "learning_rate": 1.28e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 1.0, + "epoch": 1.873, + "step": 3746 + }, + { + "loss": 0.0027, + "grad_norm": 0.3769534230232239, + "learning_rate": 1.275e-06, + "num_tokens": 1289911.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8735, + "step": 3747 + }, + { + "loss": 0.0584, + "grad_norm": 1.4691829681396484, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.874, + "step": 3748 + }, + { + "loss": 0.0635, + "grad_norm": 1.6226807832717896, + "learning_rate": 1.2650000000000002e-06, + "num_tokens": 1290935.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8745, + "step": 3749 + }, + { + "loss": 0.0033, + "grad_norm": 0.4503451883792877, + "learning_rate": 1.26e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 1.0, + "epoch": 1.875, + "step": 3750 + }, + { + "loss": 0.0028, + "grad_norm": 0.39449983835220337, + "learning_rate": 1.255e-06, + "num_tokens": 1291117.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8755, + "step": 3751 + }, + { + "loss": 0.0029, + "grad_norm": 0.4101957678794861, + "learning_rate": 1.25e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 1.0, + "epoch": 1.876, + "step": 3752 + }, + { + "loss": 0.0359, + "grad_norm": 1.259843111038208, + "learning_rate": 1.2450000000000002e-06, + "num_tokens": 1291720.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8765, + "step": 3753 + }, + { + "loss": 0.0027, + "grad_norm": 0.372577965259552, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 1.0, + "epoch": 1.877, + "step": 3754 + }, + { + "loss": 0.0596, + "grad_norm": 1.1994444131851196, + "learning_rate": 1.235e-06, + "num_tokens": 1292323.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8775, + "step": 3755 + }, + { + "loss": 0.0703, + "grad_norm": 1.5322065353393555, + "learning_rate": 1.23e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.8780000000000001, + "step": 3756 + }, + { + "loss": 0.0643, + "grad_norm": 1.7045296430587769, + "learning_rate": 1.2250000000000001e-06, + "num_tokens": 1293347.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8784999999999998, + "step": 3757 + }, + { + "loss": 0.0439, + "grad_norm": 1.2476153373718262, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.879, + "step": 3758 + }, + { + "loss": 0.0402, + "grad_norm": 1.186736822128296, + "learning_rate": 1.215e-06, + "num_tokens": 1294371.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8795, + "step": 3759 + }, + { + "loss": 0.0029, + "grad_norm": 0.39700445532798767, + "learning_rate": 1.21e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 1.0, + "epoch": 1.88, + "step": 3760 + }, + { + "loss": 0.1202, + "grad_norm": 3.1105434894561768, + "learning_rate": 1.2050000000000001e-06, + "num_tokens": 1294974.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.8805, + "step": 3761 + }, + { + "loss": 0.0408, + "grad_norm": 1.1640613079071045, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.881, + "step": 3762 + }, + { + "loss": 0.0023, + "grad_norm": 0.32245126366615295, + "learning_rate": 1.195e-06, + "num_tokens": 1295577.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8815, + "step": 3763 + }, + { + "loss": 0.0644, + "grad_norm": 1.4617496728897095, + "learning_rate": 1.19e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8820000000000001, + "step": 3764 + }, + { + "loss": 0.0024, + "grad_norm": 0.3409968614578247, + "learning_rate": 1.185e-06, + "num_tokens": 1296180.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8824999999999998, + "step": 3765 + }, + { + "loss": 0.0666, + "grad_norm": 2.035632848739624, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.883, + "step": 3766 + }, + { + "loss": 0.0402, + "grad_norm": 1.1498757600784302, + "learning_rate": 1.175e-06, + "num_tokens": 1297204.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8835, + "step": 3767 + }, + { + "loss": 0.0593, + "grad_norm": 1.348196268081665, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.884, + "step": 3768 + }, + { + "loss": 0.0667, + "grad_norm": 1.692858099937439, + "learning_rate": 1.165e-06, + "num_tokens": 1298228.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.8845, + "step": 3769 + }, + { + "loss": 0.0029, + "grad_norm": 0.40195682644844055, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 1.0, + "epoch": 1.885, + "step": 3770 + }, + { + "loss": 0.0515, + "grad_norm": 1.0095990896224976, + "learning_rate": 1.1550000000000002e-06, + "num_tokens": 1298831.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8855, + "step": 3771 + }, + { + "loss": 0.0411, + "grad_norm": 1.4529675245285034, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8860000000000001, + "step": 3772 + }, + { + "loss": 0.0029, + "grad_norm": 0.39934462308883667, + "learning_rate": 1.145e-06, + "num_tokens": 1299434.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8864999999999998, + "step": 3773 + }, + { + "loss": 0.0026, + "grad_norm": 0.37341752648353577, + "learning_rate": 1.14e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.887, + "step": 3774 + }, + { + "loss": 0.003, + "grad_norm": 0.427602082490921, + "learning_rate": 1.1350000000000001e-06, + "num_tokens": 1299616.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8875, + "step": 3775 + }, + { + "loss": 0.0027, + "grad_norm": 0.38110828399658203, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 1.0, + "epoch": 1.888, + "step": 3776 + }, + { + "loss": 0.05, + "grad_norm": 1.3058017492294312, + "learning_rate": 1.125e-06, + "num_tokens": 1300219.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8885, + "step": 3777 + }, + { + "loss": 0.0551, + "grad_norm": 1.049538016319275, + "learning_rate": 1.12e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.889, + "step": 3778 + }, + { + "loss": 0.0543, + "grad_norm": 1.1460436582565308, + "learning_rate": 1.1150000000000001e-06, + "num_tokens": 1301243.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8895, + "step": 3779 + }, + { + "loss": 0.0402, + "grad_norm": 1.1601300239562988, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.8900000000000001, + "step": 3780 + }, + { + "loss": 0.0571, + "grad_norm": 1.1402069330215454, + "learning_rate": 1.105e-06, + "num_tokens": 1302267.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8904999999999998, + "step": 3781 + }, + { + "loss": 0.0381, + "grad_norm": 1.2498735189437866, + "learning_rate": 1.1e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.891, + "step": 3782 + }, + { + "loss": 0.0658, + "grad_norm": 1.471903920173645, + "learning_rate": 1.095e-06, + "num_tokens": 1303291.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8915, + "step": 3783 + }, + { + "loss": 0.003, + "grad_norm": 0.40989261865615845, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 1.0, + "epoch": 1.892, + "step": 3784 + }, + { + "loss": 0.0029, + "grad_norm": 0.4065409004688263, + "learning_rate": 1.085e-06, + "num_tokens": 1303473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8925, + "step": 3785 + }, + { + "loss": 0.0027, + "grad_norm": 0.38934385776519775, + "learning_rate": 1.08e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.893, + "step": 3786 + }, + { + "loss": 0.0028, + "grad_norm": 0.3856496810913086, + "learning_rate": 1.075e-06, + "num_tokens": 1303655.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8935, + "step": 3787 + }, + { + "loss": 0.0422, + "grad_norm": 1.3679287433624268, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.8940000000000001, + "step": 3788 + }, + { + "loss": 0.051, + "grad_norm": 1.206390619277954, + "learning_rate": 1.065e-06, + "num_tokens": 1304679.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8944999999999999, + "step": 3789 + }, + { + "loss": 0.0029, + "grad_norm": 0.41105058789253235, + "learning_rate": 1.06e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 1.0, + "epoch": 1.895, + "step": 3790 + }, + { + "loss": 0.0027, + "grad_norm": 0.3825374245643616, + "learning_rate": 1.055e-06, + "num_tokens": 1304861.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8955, + "step": 3791 + }, + { + "loss": 0.0024, + "grad_norm": 0.3389546871185303, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.896, + "step": 3792 + }, + { + "loss": 0.0027, + "grad_norm": 0.38113462924957275, + "learning_rate": 1.045e-06, + "num_tokens": 1305043.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8965, + "step": 3793 + }, + { + "loss": 0.0025, + "grad_norm": 0.35084959864616394, + "learning_rate": 1.04e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 1.0, + "epoch": 1.897, + "step": 3794 + }, + { + "loss": 0.056, + "grad_norm": 1.4280885457992554, + "learning_rate": 1.035e-06, + "num_tokens": 1305646.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.8975, + "step": 3795 + }, + { + "loss": 0.0584, + "grad_norm": 1.4864161014556885, + "learning_rate": 1.03e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.8980000000000001, + "step": 3796 + }, + { + "loss": 0.0023, + "grad_norm": 0.32296261191368103, + "learning_rate": 1.025e-06, + "num_tokens": 1306249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8984999999999999, + "step": 3797 + }, + { + "loss": 0.0372, + "grad_norm": 1.1412842273712158, + "learning_rate": 1.02e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.899, + "step": 3798 + }, + { + "loss": 0.036, + "grad_norm": 1.0588805675506592, + "learning_rate": 1.0150000000000002e-06, + "num_tokens": 1307273.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.8995, + "step": 3799 + }, + { + "loss": 0.0025, + "grad_norm": 0.34841030836105347, + "learning_rate": 1.01e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9, + "step": 3800 + }, + { + "loss": 0.0025, + "grad_norm": 0.3537651002407074, + "learning_rate": 1.0050000000000001e-06, + "num_tokens": 1307455.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9005, + "step": 3801 + }, + { + "loss": 0.0405, + "grad_norm": 1.1438575983047485, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.901, + "step": 3802 + }, + { + "loss": 0.0694, + "grad_norm": 1.4709012508392334, + "learning_rate": 9.950000000000002e-07, + "num_tokens": 1308479.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9015, + "step": 3803 + }, + { + "loss": 0.0023, + "grad_norm": 0.3326675593852997, + "learning_rate": 9.9e-07, + "num_tokens": 1308570.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9020000000000001, + "step": 3804 + }, + { + "loss": 0.0635, + "grad_norm": 1.4323761463165283, + "learning_rate": 9.85e-07, + "num_tokens": 1309082.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9024999999999999, + "step": 3805 + }, + { + "loss": 0.0683, + "grad_norm": 1.6102875471115112, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.903, + "step": 3806 + }, + { + "loss": 0.0022, + "grad_norm": 0.3131149709224701, + "learning_rate": 9.750000000000002e-07, + "num_tokens": 1309685.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9035, + "step": 3807 + }, + { + "loss": 0.0021, + "grad_norm": 0.30395570397377014, + "learning_rate": 9.7e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 3808 + }, + { + "loss": 0.056, + "grad_norm": 1.3097760677337646, + "learning_rate": 9.65e-07, + "num_tokens": 1310288.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9045, + "step": 3809 + }, + { + "loss": 0.0425, + "grad_norm": 1.2873075008392334, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.905, + "step": 3810 + }, + { + "loss": 0.0366, + "grad_norm": 1.1098606586456299, + "learning_rate": 9.550000000000002e-07, + "num_tokens": 1311312.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9055, + "step": 3811 + }, + { + "loss": 0.0023, + "grad_norm": 0.33073046803474426, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9060000000000001, + "step": 3812 + }, + { + "loss": 0.0558, + "grad_norm": 1.287516713142395, + "learning_rate": 9.450000000000001e-07, + "num_tokens": 1311915.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9064999999999999, + "step": 3813 + }, + { + "loss": 0.0023, + "grad_norm": 0.3197239935398102, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 1.0, + "epoch": 1.907, + "step": 3814 + }, + { + "loss": 0.0022, + "grad_norm": 0.3093603253364563, + "learning_rate": 9.35e-07, + "num_tokens": 1312097.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9075, + "step": 3815 + }, + { + "loss": 0.0027, + "grad_norm": 0.3792094588279724, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.908, + "step": 3816 + }, + { + "loss": 0.0024, + "grad_norm": 0.33527225255966187, + "learning_rate": 9.25e-07, + "num_tokens": 1312279.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9085, + "step": 3817 + }, + { + "loss": 0.0531, + "grad_norm": 1.204848051071167, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.909, + "step": 3818 + }, + { + "loss": 0.0702, + "grad_norm": 1.3416361808776855, + "learning_rate": 9.15e-07, + "num_tokens": 1313303.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9095, + "step": 3819 + }, + { + "loss": 0.0541, + "grad_norm": 1.515673279762268, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9100000000000001, + "step": 3820 + }, + { + "loss": 0.0024, + "grad_norm": 0.33284807205200195, + "learning_rate": 9.05e-07, + "num_tokens": 1313906.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9104999999999999, + "step": 3821 + }, + { + "loss": 0.0023, + "grad_norm": 0.32082033157348633, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 3822 + }, + { + "loss": 0.056, + "grad_norm": 1.2340785264968872, + "learning_rate": 8.95e-07, + "num_tokens": 1314509.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9115, + "step": 3823 + }, + { + "loss": 0.0021, + "grad_norm": 0.3040038049221039, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 1.0, + "epoch": 1.912, + "step": 3824 + }, + { + "loss": 0.0392, + "grad_norm": 1.3959851264953613, + "learning_rate": 8.85e-07, + "num_tokens": 1315112.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9125, + "step": 3825 + }, + { + "loss": 0.0027, + "grad_norm": 0.37887290120124817, + "learning_rate": 8.8e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 1.0, + "epoch": 1.913, + "step": 3826 + }, + { + "loss": 0.0022, + "grad_norm": 0.30666735768318176, + "learning_rate": 8.75e-07, + "num_tokens": 1315294.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9135, + "step": 3827 + }, + { + "loss": 0.0691, + "grad_norm": 1.3549600839614868, + "learning_rate": 8.7e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9140000000000001, + "step": 3828 + }, + { + "loss": 0.0675, + "grad_norm": 1.2945553064346313, + "learning_rate": 8.65e-07, + "num_tokens": 1316318.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9144999999999999, + "step": 3829 + }, + { + "loss": 0.0022, + "grad_norm": 0.3147728145122528, + "learning_rate": 8.6e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 1.0, + "epoch": 1.915, + "step": 3830 + }, + { + "loss": 0.0531, + "grad_norm": 1.0365914106369019, + "learning_rate": 8.550000000000002e-07, + "num_tokens": 1316921.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9155, + "step": 3831 + }, + { + "loss": 0.0416, + "grad_norm": 1.2123857736587524, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.916, + "step": 3832 + }, + { + "loss": 0.0023, + "grad_norm": 0.3252547085285187, + "learning_rate": 8.450000000000002e-07, + "num_tokens": 1317524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9165, + "step": 3833 + }, + { + "loss": 0.0021, + "grad_norm": 0.29913613200187683, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.917, + "step": 3834 + }, + { + "loss": 0.0688, + "grad_norm": 1.6491233110427856, + "learning_rate": 8.350000000000002e-07, + "num_tokens": 1318127.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9175, + "step": 3835 + }, + { + "loss": 0.0021, + "grad_norm": 0.3058773875236511, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9180000000000001, + "step": 3836 + }, + { + "loss": 0.038, + "grad_norm": 1.1742405891418457, + "learning_rate": 8.250000000000001e-07, + "num_tokens": 1318730.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9184999999999999, + "step": 3837 + }, + { + "loss": 0.002, + "grad_norm": 0.27437257766723633, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 1.0, + "epoch": 1.919, + "step": 3838 + }, + { + "loss": 0.0397, + "grad_norm": 1.1734699010849, + "learning_rate": 8.150000000000001e-07, + "num_tokens": 1319333.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9195, + "step": 3839 + }, + { + "loss": 0.0688, + "grad_norm": 1.6114236116409302, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.92, + "step": 3840 + }, + { + "loss": 0.0396, + "grad_norm": 1.3022080659866333, + "learning_rate": 8.050000000000001e-07, + "num_tokens": 1320357.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9205, + "step": 3841 + }, + { + "loss": 0.002, + "grad_norm": 0.2882446348667145, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 1.0, + "epoch": 1.921, + "step": 3842 + }, + { + "loss": 0.0636, + "grad_norm": 1.4788239002227783, + "learning_rate": 7.950000000000001e-07, + "num_tokens": 1320960.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9215, + "step": 3843 + }, + { + "loss": 0.0554, + "grad_norm": 1.472805142402649, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9220000000000002, + "step": 3844 + }, + { + "loss": 0.0382, + "grad_norm": 1.3122379779815674, + "learning_rate": 7.850000000000001e-07, + "num_tokens": 1321984.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9224999999999999, + "step": 3845 + }, + { + "loss": 0.0019, + "grad_norm": 0.27439191937446594, + "learning_rate": 7.8e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.923, + "step": 3846 + }, + { + "loss": 0.0021, + "grad_norm": 0.3059723973274231, + "learning_rate": 7.750000000000001e-07, + "num_tokens": 1322166.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9235, + "step": 3847 + }, + { + "loss": 0.0021, + "grad_norm": 0.3025694489479065, + "learning_rate": 7.7e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 3848 + }, + { + "loss": 0.0416, + "grad_norm": 1.4384698867797852, + "learning_rate": 7.650000000000001e-07, + "num_tokens": 1322769.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9245, + "step": 3849 + }, + { + "loss": 0.0019, + "grad_norm": 0.26954689621925354, + "learning_rate": 7.6e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 1.0, + "epoch": 1.925, + "step": 3850 + }, + { + "loss": 0.0373, + "grad_norm": 1.0434874296188354, + "learning_rate": 7.550000000000001e-07, + "num_tokens": 1323372.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9255, + "step": 3851 + }, + { + "loss": 0.0384, + "grad_norm": 1.2146815061569214, + "learning_rate": 7.5e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9260000000000002, + "step": 3852 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992803454399109, + "learning_rate": 7.450000000000001e-07, + "num_tokens": 1323975.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9264999999999999, + "step": 3853 + }, + { + "loss": 0.0683, + "grad_norm": 2.0715625286102295, + "learning_rate": 7.4e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.927, + "step": 3854 + }, + { + "loss": 0.0687, + "grad_norm": 1.7195099592208862, + "learning_rate": 7.350000000000001e-07, + "num_tokens": 1324999.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.9275, + "step": 3855 + }, + { + "loss": 0.0022, + "grad_norm": 0.31213998794555664, + "learning_rate": 7.3e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 1.0, + "epoch": 1.928, + "step": 3856 + }, + { + "loss": 0.0446, + "grad_norm": 1.5833452939987183, + "learning_rate": 7.25e-07, + "num_tokens": 1325602.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9285, + "step": 3857 + }, + { + "loss": 0.0019, + "grad_norm": 0.27154725790023804, + "learning_rate": 7.2e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.929, + "step": 3858 + }, + { + "loss": 0.0385, + "grad_norm": 1.1363227367401123, + "learning_rate": 7.15e-07, + "num_tokens": 1326205.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9295, + "step": 3859 + }, + { + "loss": 0.0021, + "grad_norm": 0.2992321252822876, + "learning_rate": 7.1e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9300000000000002, + "step": 3860 + }, + { + "loss": 0.0537, + "grad_norm": 1.2202407121658325, + "learning_rate": 7.05e-07, + "num_tokens": 1326808.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9304999999999999, + "step": 3861 + }, + { + "loss": 0.0659, + "grad_norm": 1.3972662687301636, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.931, + "step": 3862 + }, + { + "loss": 0.0022, + "grad_norm": 0.3156076967716217, + "learning_rate": 6.950000000000001e-07, + "num_tokens": 1327411.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9315, + "step": 3863 + }, + { + "loss": 0.002, + "grad_norm": 0.2746105492115021, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 3864 + }, + { + "loss": 0.0492, + "grad_norm": 1.111280083656311, + "learning_rate": 6.850000000000001e-07, + "num_tokens": 1328014.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9325, + "step": 3865 + }, + { + "loss": 0.0557, + "grad_norm": 1.1395080089569092, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.933, + "step": 3866 + }, + { + "loss": 0.041, + "grad_norm": 1.1225674152374268, + "learning_rate": 6.750000000000001e-07, + "num_tokens": 1329038.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9335, + "step": 3867 + }, + { + "loss": 0.0021, + "grad_norm": 0.2975449860095978, + "learning_rate": 6.7e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9340000000000002, + "step": 3868 + }, + { + "loss": 0.002, + "grad_norm": 0.2790532410144806, + "learning_rate": 6.650000000000001e-07, + "num_tokens": 1329220.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9344999999999999, + "step": 3869 + }, + { + "loss": 0.0019, + "grad_norm": 0.27045223116874695, + "learning_rate": 6.6e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 3870 + }, + { + "loss": 0.0587, + "grad_norm": 1.2998172044754028, + "learning_rate": 6.550000000000001e-07, + "num_tokens": 1329823.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9355, + "step": 3871 + }, + { + "loss": 0.1167, + "grad_norm": 2.1144580841064453, + "learning_rate": 6.5e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 1.936, + "step": 3872 + }, + { + "loss": 0.0021, + "grad_norm": 0.29768821597099304, + "learning_rate": 6.450000000000001e-07, + "num_tokens": 1330426.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9365, + "step": 3873 + }, + { + "loss": 0.0021, + "grad_norm": 0.3033559024333954, + "learning_rate": 6.4e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 3874 + }, + { + "loss": 0.0017, + "grad_norm": 0.2499658465385437, + "learning_rate": 6.350000000000001e-07, + "num_tokens": 1330608.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9375, + "step": 3875 + }, + { + "loss": 0.002, + "grad_norm": 0.28729239106178284, + "learning_rate": 6.3e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 3876 + }, + { + "loss": 0.0538, + "grad_norm": 1.3207937479019165, + "learning_rate": 6.25e-07, + "num_tokens": 1331211.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9385, + "step": 3877 + }, + { + "loss": 0.0022, + "grad_norm": 0.3201894760131836, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 1.0, + "epoch": 1.939, + "step": 3878 + }, + { + "loss": 0.058, + "grad_norm": 1.3156497478485107, + "learning_rate": 6.15e-07, + "num_tokens": 1331814.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9395, + "step": 3879 + }, + { + "loss": 0.0544, + "grad_norm": 1.192156195640564, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.94, + "step": 3880 + }, + { + "loss": 0.0634, + "grad_norm": 2.076542377471924, + "learning_rate": 6.05e-07, + "num_tokens": 1332838.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9405000000000001, + "step": 3881 + }, + { + "loss": 0.0488, + "grad_norm": 1.3221850395202637, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9409999999999998, + "step": 3882 + }, + { + "loss": 0.0021, + "grad_norm": 0.3004106283187866, + "learning_rate": 5.95e-07, + "num_tokens": 1333441.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9415, + "step": 3883 + }, + { + "loss": 0.0541, + "grad_norm": 1.230305790901184, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.942, + "step": 3884 + }, + { + "loss": 0.002, + "grad_norm": 0.2805992662906647, + "learning_rate": 5.850000000000001e-07, + "num_tokens": 1334044.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9425, + "step": 3885 + }, + { + "loss": 0.0019, + "grad_norm": 0.27598538994789124, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 3886 + }, + { + "loss": 0.0021, + "grad_norm": 0.3006319999694824, + "learning_rate": 5.750000000000001e-07, + "num_tokens": 1334226.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9435, + "step": 3887 + }, + { + "loss": 0.0628, + "grad_norm": 1.3234870433807373, + "learning_rate": 5.7e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.944, + "step": 3888 + }, + { + "loss": 0.0368, + "grad_norm": 0.9632979035377502, + "learning_rate": 5.650000000000001e-07, + "num_tokens": 1335250.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.9445000000000001, + "step": 3889 + }, + { + "loss": 0.0396, + "grad_norm": 1.0664863586425781, + "learning_rate": 5.6e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9449999999999998, + "step": 3890 + }, + { + "loss": 0.0361, + "grad_norm": 0.998447060585022, + "learning_rate": 5.550000000000001e-07, + "num_tokens": 1336274.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9455, + "step": 3891 + }, + { + "loss": 0.066, + "grad_norm": 1.6561861038208008, + "learning_rate": 5.5e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.946, + "step": 3892 + }, + { + "loss": 0.0564, + "grad_norm": 1.0982937812805176, + "learning_rate": 5.450000000000001e-07, + "num_tokens": 1337298.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9465, + "step": 3893 + }, + { + "loss": 0.0649, + "grad_norm": 1.3116402626037598, + "learning_rate": 5.4e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.947, + "step": 3894 + }, + { + "loss": 0.0393, + "grad_norm": 1.211995005607605, + "learning_rate": 5.350000000000001e-07, + "num_tokens": 1338322.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9475, + "step": 3895 + }, + { + "loss": 0.0656, + "grad_norm": 1.3053356409072876, + "learning_rate": 5.3e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.948, + "step": 3896 + }, + { + "loss": 0.059, + "grad_norm": 1.4926881790161133, + "learning_rate": 5.250000000000001e-07, + "num_tokens": 1339346.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9485000000000001, + "step": 3897 + }, + { + "loss": 0.0517, + "grad_norm": 1.099536657333374, + "learning_rate": 5.2e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9489999999999998, + "step": 3898 + }, + { + "loss": 0.002, + "grad_norm": 0.2851589620113373, + "learning_rate": 5.15e-07, + "num_tokens": 1339949.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9495, + "step": 3899 + }, + { + "loss": 0.002, + "grad_norm": 0.2879925072193146, + "learning_rate": 5.1e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 3900 + }, + { + "loss": 0.0557, + "grad_norm": 1.0640603303909302, + "learning_rate": 5.05e-07, + "num_tokens": 1340552.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9505, + "step": 3901 + }, + { + "loss": 0.0021, + "grad_norm": 0.3005947470664978, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 1.0, + "epoch": 1.951, + "step": 3902 + }, + { + "loss": 0.0021, + "grad_norm": 0.30592235922813416, + "learning_rate": 4.95e-07, + "num_tokens": 1340734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9515, + "step": 3903 + }, + { + "loss": 0.0508, + "grad_norm": 1.1045085191726685, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.952, + "step": 3904 + }, + { + "loss": 0.0539, + "grad_norm": 1.1382217407226562, + "learning_rate": 4.85e-07, + "num_tokens": 1341758.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.9525000000000001, + "step": 3905 + }, + { + "loss": 0.0576, + "grad_norm": 1.5904083251953125, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9529999999999998, + "step": 3906 + }, + { + "loss": 0.0401, + "grad_norm": 1.0153878927230835, + "learning_rate": 4.7500000000000006e-07, + "num_tokens": 1342782.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9535, + "step": 3907 + }, + { + "loss": 0.0023, + "grad_norm": 0.32124239206314087, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 1.0, + "epoch": 1.954, + "step": 3908 + }, + { + "loss": 0.037, + "grad_norm": 1.1176637411117554, + "learning_rate": 4.6500000000000005e-07, + "num_tokens": 1343385.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9545, + "step": 3909 + }, + { + "loss": 0.0414, + "grad_norm": 1.1863677501678467, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.955, + "step": 3910 + }, + { + "loss": 0.0697, + "grad_norm": 1.6575289964675903, + "learning_rate": 4.5500000000000004e-07, + "num_tokens": 1344409.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.9555, + "step": 3911 + }, + { + "loss": 0.0384, + "grad_norm": 1.020317554473877, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.956, + "step": 3912 + }, + { + "loss": 0.0554, + "grad_norm": 1.1557419300079346, + "learning_rate": 4.4500000000000003e-07, + "num_tokens": 1345433.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9565000000000001, + "step": 3913 + }, + { + "loss": 0.0023, + "grad_norm": 0.3282678723335266, + "learning_rate": 4.4e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9569999999999999, + "step": 3914 + }, + { + "loss": 0.0611, + "grad_norm": 1.4425996541976929, + "learning_rate": 4.35e-07, + "num_tokens": 1346036.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9575, + "step": 3915 + }, + { + "loss": 0.0021, + "grad_norm": 0.30943119525909424, + "learning_rate": 4.3e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 1.0, + "epoch": 1.958, + "step": 3916 + }, + { + "loss": 0.0021, + "grad_norm": 0.29412642121315, + "learning_rate": 4.2500000000000006e-07, + "num_tokens": 1346218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9585, + "step": 3917 + }, + { + "loss": 0.0021, + "grad_norm": 0.2940139174461365, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.959, + "step": 3918 + }, + { + "loss": 0.0021, + "grad_norm": 0.3061344027519226, + "learning_rate": 4.1500000000000005e-07, + "num_tokens": 1346400.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9595, + "step": 3919 + }, + { + "loss": 0.0399, + "grad_norm": 1.3357733488082886, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.96, + "step": 3920 + }, + { + "loss": 0.0548, + "grad_norm": 1.1528651714324951, + "learning_rate": 4.0500000000000004e-07, + "num_tokens": 1347424.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9605000000000001, + "step": 3921 + }, + { + "loss": 0.0024, + "grad_norm": 0.3415958285331726, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9609999999999999, + "step": 3922 + }, + { + "loss": 0.0672, + "grad_norm": 1.716910719871521, + "learning_rate": 3.9500000000000003e-07, + "num_tokens": 1348027.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.9615, + "step": 3923 + }, + { + "loss": 0.0019, + "grad_norm": 0.2726108729839325, + "learning_rate": 3.9e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 1.0, + "epoch": 1.962, + "step": 3924 + }, + { + "loss": 0.0676, + "grad_norm": 1.6874312162399292, + "learning_rate": 3.85e-07, + "num_tokens": 1348630.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9625, + "step": 3925 + }, + { + "loss": 0.0677, + "grad_norm": 1.6080477237701416, + "learning_rate": 3.8e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.963, + "step": 3926 + }, + { + "loss": 0.0455, + "grad_norm": 1.2764126062393188, + "learning_rate": 3.75e-07, + "num_tokens": 1349654.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9635, + "step": 3927 + }, + { + "loss": 0.0414, + "grad_norm": 1.4081971645355225, + "learning_rate": 3.7e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.964, + "step": 3928 + }, + { + "loss": 0.0022, + "grad_norm": 0.3177483081817627, + "learning_rate": 3.65e-07, + "num_tokens": 1350257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9645000000000001, + "step": 3929 + }, + { + "loss": 0.0024, + "grad_norm": 0.33574411273002625, + "learning_rate": 3.6e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 3930 + }, + { + "loss": 0.0024, + "grad_norm": 0.3346923887729645, + "learning_rate": 3.55e-07, + "num_tokens": 1350439.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9655, + "step": 3931 + }, + { + "loss": 0.0562, + "grad_norm": 1.2322405576705933, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.966, + "step": 3932 + }, + { + "loss": 0.0382, + "grad_norm": 1.126086711883545, + "learning_rate": 3.4500000000000003e-07, + "num_tokens": 1351463.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9665, + "step": 3933 + }, + { + "loss": 0.0679, + "grad_norm": 1.7950743436813354, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.967, + "step": 3934 + }, + { + "loss": 0.0023, + "grad_norm": 0.31813737750053406, + "learning_rate": 3.35e-07, + "num_tokens": 1352066.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9675, + "step": 3935 + }, + { + "loss": 0.0563, + "grad_norm": 1.4460132122039795, + "learning_rate": 3.3e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.968, + "step": 3936 + }, + { + "loss": 0.0388, + "grad_norm": 1.2290942668914795, + "learning_rate": 3.25e-07, + "num_tokens": 1353090.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9685000000000001, + "step": 3937 + }, + { + "loss": 0.0624, + "grad_norm": 1.2616753578186035, + "learning_rate": 3.2e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9689999999999999, + "step": 3938 + }, + { + "loss": 0.0018, + "grad_norm": 0.258317232131958, + "learning_rate": 3.15e-07, + "num_tokens": 1353693.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9695, + "step": 3939 + }, + { + "loss": 0.0021, + "grad_norm": 0.2969084680080414, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 3940 + }, + { + "loss": 0.0023, + "grad_norm": 0.3306228518486023, + "learning_rate": 3.0500000000000004e-07, + "num_tokens": 1353875.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9705, + "step": 3941 + }, + { + "loss": 0.0021, + "grad_norm": 0.2877337336540222, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.971, + "step": 3942 + }, + { + "loss": 0.0385, + "grad_norm": 1.1180164813995361, + "learning_rate": 2.9500000000000003e-07, + "num_tokens": 1354478.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9715, + "step": 3943 + }, + { + "loss": 0.0422, + "grad_norm": 1.2713475227355957, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.972, + "step": 3944 + }, + { + "loss": 0.0021, + "grad_norm": 0.30450907349586487, + "learning_rate": 2.85e-07, + "num_tokens": 1355081.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9725000000000001, + "step": 3945 + }, + { + "loss": 0.0369, + "grad_norm": 1.0453548431396484, + "learning_rate": 2.8e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.9729999999999999, + "step": 3946 + }, + { + "loss": 0.0647, + "grad_norm": 1.4603972434997559, + "learning_rate": 2.75e-07, + "num_tokens": 1356105.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9735, + "step": 3947 + }, + { + "loss": 0.0572, + "grad_norm": 1.3418960571289062, + "learning_rate": 2.7e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.974, + "step": 3948 + }, + { + "loss": 0.0616, + "grad_norm": 1.2075037956237793, + "learning_rate": 2.65e-07, + "num_tokens": 1357129.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9745, + "step": 3949 + }, + { + "loss": 0.0561, + "grad_norm": 1.3293365240097046, + "learning_rate": 2.6e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.975, + "step": 3950 + }, + { + "loss": 0.0546, + "grad_norm": 1.1330344676971436, + "learning_rate": 2.55e-07, + "num_tokens": 1358153.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9755, + "step": 3951 + }, + { + "loss": 0.0553, + "grad_norm": 1.403975486755371, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.976, + "step": 3952 + }, + { + "loss": 0.0589, + "grad_norm": 1.0574450492858887, + "learning_rate": 2.4500000000000004e-07, + "num_tokens": 1359177.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9765000000000001, + "step": 3953 + }, + { + "loss": 0.0024, + "grad_norm": 0.34114331007003784, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9769999999999999, + "step": 3954 + }, + { + "loss": 0.0531, + "grad_norm": 1.2925927639007568, + "learning_rate": 2.3500000000000003e-07, + "num_tokens": 1359780.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9775, + "step": 3955 + }, + { + "loss": 0.0023, + "grad_norm": 0.32414519786834717, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 1.0, + "epoch": 1.978, + "step": 3956 + }, + { + "loss": 0.0409, + "grad_norm": 1.1193647384643555, + "learning_rate": 2.2500000000000002e-07, + "num_tokens": 1360383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9785, + "step": 3957 + }, + { + "loss": 0.0528, + "grad_norm": 1.0519967079162598, + "learning_rate": 2.2e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.979, + "step": 3958 + }, + { + "loss": 0.002, + "grad_norm": 0.290457159280777, + "learning_rate": 2.15e-07, + "num_tokens": 1360986.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9795, + "step": 3959 + }, + { + "loss": 0.064, + "grad_norm": 1.5267326831817627, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.98, + "step": 3960 + }, + { + "loss": 0.0571, + "grad_norm": 1.354665756225586, + "learning_rate": 2.0500000000000002e-07, + "num_tokens": 1362010.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9805000000000001, + "step": 3961 + }, + { + "loss": 0.0023, + "grad_norm": 0.3175540566444397, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9809999999999999, + "step": 3962 + }, + { + "loss": 0.0022, + "grad_norm": 0.31645578145980835, + "learning_rate": 1.95e-07, + "num_tokens": 1362192.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9815, + "step": 3963 + }, + { + "loss": 0.0023, + "grad_norm": 0.32781633734703064, + "learning_rate": 1.9e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 3964 + }, + { + "loss": 0.0022, + "grad_norm": 0.3074043393135071, + "learning_rate": 1.85e-07, + "num_tokens": 1362374.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9825, + "step": 3965 + }, + { + "loss": 0.0616, + "grad_norm": 1.3107956647872925, + "learning_rate": 1.8e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.983, + "step": 3966 + }, + { + "loss": 0.0428, + "grad_norm": 1.0233242511749268, + "learning_rate": 1.7500000000000002e-07, + "num_tokens": 1363398.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9835, + "step": 3967 + }, + { + "loss": 0.0509, + "grad_norm": 1.1120326519012451, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.984, + "step": 3968 + }, + { + "loss": 0.0578, + "grad_norm": 1.1184195280075073, + "learning_rate": 1.65e-07, + "num_tokens": 1364422.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9845000000000002, + "step": 3969 + }, + { + "loss": 0.0024, + "grad_norm": 0.3374731242656708, + "learning_rate": 1.6e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9849999999999999, + "step": 3970 + }, + { + "loss": 0.0647, + "grad_norm": 1.385146141052246, + "learning_rate": 1.5500000000000002e-07, + "num_tokens": 1365025.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9855, + "step": 3971 + }, + { + "loss": 0.0621, + "grad_norm": 1.3918462991714478, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.986, + "step": 3972 + }, + { + "loss": 0.0022, + "grad_norm": 0.3185434639453888, + "learning_rate": 1.4500000000000001e-07, + "num_tokens": 1365628.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9865, + "step": 3973 + }, + { + "loss": 0.0022, + "grad_norm": 0.3098815679550171, + "learning_rate": 1.4e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 3974 + }, + { + "loss": 0.0508, + "grad_norm": 1.1450035572052002, + "learning_rate": 1.35e-07, + "num_tokens": 1366231.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9875, + "step": 3975 + }, + { + "loss": 0.0545, + "grad_norm": 1.133862018585205, + "learning_rate": 1.3e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.988, + "step": 3976 + }, + { + "loss": 0.0575, + "grad_norm": 1.3929400444030762, + "learning_rate": 1.2500000000000002e-07, + "num_tokens": 1367255.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9885000000000002, + "step": 3977 + }, + { + "loss": 0.0023, + "grad_norm": 0.32601818442344666, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9889999999999999, + "step": 3978 + }, + { + "loss": 0.0614, + "grad_norm": 1.4804233312606812, + "learning_rate": 1.1500000000000001e-07, + "num_tokens": 1367858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9895, + "step": 3979 + }, + { + "loss": 0.0339, + "grad_norm": 1.0161491632461548, + "learning_rate": 1.1e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.99, + "step": 3980 + }, + { + "loss": 0.0374, + "grad_norm": 0.9113408327102661, + "learning_rate": 1.0500000000000001e-07, + "num_tokens": 1368882.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9905, + "step": 3981 + }, + { + "loss": 0.0022, + "grad_norm": 0.31800293922424316, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 1.0, + "epoch": 1.991, + "step": 3982 + }, + { + "loss": 0.0022, + "grad_norm": 0.3091203570365906, + "learning_rate": 9.5e-08, + "num_tokens": 1369064.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9915, + "step": 3983 + }, + { + "loss": 0.0697, + "grad_norm": 1.368817687034607, + "learning_rate": 9e-08, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.992, + "step": 3984 + }, + { + "loss": 0.0024, + "grad_norm": 0.334277480840683, + "learning_rate": 8.500000000000001e-08, + "num_tokens": 1369667.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9925000000000002, + "step": 3985 + }, + { + "loss": 0.0545, + "grad_norm": 1.1396604776382446, + "learning_rate": 8e-08, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.9929999999999999, + "step": 3986 + }, + { + "loss": 0.002, + "grad_norm": 0.2931969463825226, + "learning_rate": 7.500000000000001e-08, + "num_tokens": 1370270.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9935, + "step": 3987 + }, + { + "loss": 0.0021, + "grad_norm": 0.29304033517837524, + "learning_rate": 7e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 3988 + }, + { + "loss": 0.0579, + "grad_norm": 1.3336025476455688, + "learning_rate": 6.5e-08, + "num_tokens": 1370873.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.9945, + "step": 3989 + }, + { + "loss": 0.0023, + "grad_norm": 0.3215644359588623, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 1.0, + "epoch": 1.995, + "step": 3990 + }, + { + "loss": 0.0405, + "grad_norm": 1.221953272819519, + "learning_rate": 5.5e-08, + "num_tokens": 1371476.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9955, + "step": 3991 + }, + { + "loss": 0.0404, + "grad_norm": 1.0604480504989624, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.996, + "step": 3992 + }, + { + "loss": 0.0381, + "grad_norm": 0.919835090637207, + "learning_rate": 4.5e-08, + "num_tokens": 1372500.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9965000000000002, + "step": 3993 + }, + { + "loss": 0.0378, + "grad_norm": 1.2490025758743286, + "learning_rate": 4e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.9969999999999999, + "step": 3994 + }, + { + "loss": 0.0021, + "grad_norm": 0.3125726878643036, + "learning_rate": 3.5e-08, + "num_tokens": 1373103.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9975, + "step": 3995 + }, + { + "loss": 0.0023, + "grad_norm": 0.3294070065021515, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 3996 + }, + { + "loss": 0.002, + "grad_norm": 0.2793242931365967, + "learning_rate": 2.5000000000000002e-08, + "num_tokens": 1373285.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9985, + "step": 3997 + }, + { + "loss": 0.0386, + "grad_norm": 1.0813380479812622, + "learning_rate": 2e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.999, + "step": 3998 + }, + { + "loss": 0.0025, + "grad_norm": 0.3470178544521332, + "learning_rate": 1.5000000000000002e-08, + "num_tokens": 1373888.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9995, + "step": 3999 + }, + { + "loss": 0.0681, + "grad_norm": 1.5211089849472046, + "learning_rate": 1e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 2.0, + "step": 4000 + }, + { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898, + "epoch": 2.0, + "step": 4000 + } +] \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json b/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..be089935a10e89f2cb7ed806e7c10efa3baca54a --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-1.5B-Instruct", + "unsloth_available": false, + "train_runtime": 483.7085, + "train_loss": 0.11515871361242898, + "train_metrics": { + "train_runtime": 483.7085, + "train_samples_per_second": 8.269, + "train_steps_per_second": 8.269, + "total_flos": 1.0823562289152e+16, + "train_loss": 0.11515871361242898 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json new file mode 100644 index 0000000000000000000000000000000000000000..89d5d32978be7e468119b45142923322586f281c --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json @@ -0,0 +1,149 @@ +{ + "status": "ok", + "ablations": { + "bandit_only": { + "avg_reward": 0.779625, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 2.8125, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.483125, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9056250000000008, + "exploit_detection_count": 2.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.0625, + "avg_invalid_actions": 0.0625, + "reward_columns": { + "format_compliance_score": 0.9989999999999996, + "candidate_alignment_score": 0.9989999999999996, + "legality_score": 0.9989999999999996, + "safety_delta_score": 0.483125, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999995, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000002, + "efficiency_score": 0.5855625, + "process_fidelity_score": 0.9056250000000008, + "explanation_grounding_score": 0.8000000000000004, + "anti_cheat_score": 0.9366249999999997, + "uncertainty_calibration_score": 0.8531250000000004 + }, + "primary_reward_channels": { + "safety_legality": 0.9469062499999998, + "clinical_improvement": 0.6273749999999997, + "dosing_quality": 0.6550000000000001, + "process_integrity": 0.8225937500000001 + }, + "policy_stack": "bandit-only", + "failure_mining": { + "total_rows": 32, + "failure_rows": 2, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 2 + } + ] + } + }, + "llm_only": { + "avg_reward": 0.7723913043478261, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.4882608695652174, + "avg_dosing_quality": 0.75, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.4882608695652174, + "burden_improvement_score": 0.5, + "disease_stability_score": 0.8999999999999998, + "dosing_quality_score": 0.75, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8482608695652176 + }, + "primary_reward_channels": { + "safety_legality": 0.8853478260869562, + "clinical_improvement": 0.6290869565217388, + "dosing_quality": 0.6549999999999998, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm-only", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + }, + "llm_bandit": { + "avg_reward": 0.7647391304347826, + "legality_rate": 1.0, + "severe_violation_rate": 0.0, + "abstention_rate": 0.0, + "avg_episode_length": 1.9565217391304348, + "success_rate": 0.0, + "avg_burden_delta": 0.0, + "avg_safety_delta": 0.48982608695652174, + "avg_dosing_quality": 0.717391304347826, + "avg_process_fidelity": 0.9000000000000005, + "exploit_detection_count": 7.0, + "timeout_rate": 0.0, + "failure_visible_rate": 0.30434782608695654, + "avg_invalid_actions": 0.30434782608695654, + "reward_columns": { + "format_compliance_score": 0.9989999999999999, + "candidate_alignment_score": 0.9989999999999999, + "legality_score": 0.9989999999999999, + "safety_delta_score": 0.48982608695652174, + "burden_improvement_score": 0.5043478260869565, + "disease_stability_score": 0.8582608695652173, + "dosing_quality_score": 0.717391304347826, + "abstention_quality_score": 0.5600000000000004, + "efficiency_score": 0.7027826086956522, + "process_fidelity_score": 0.9000000000000005, + "explanation_grounding_score": 0.8000000000000003, + "anti_cheat_score": 0.6952608695652175, + "uncertainty_calibration_score": 0.8126086956521739 + }, + "primary_reward_channels": { + "safety_legality": 0.8765217391304347, + "clinical_improvement": 0.6171739130434781, + "dosing_quality": 0.6386956521739129, + "process_integrity": 0.8504782608695656 + }, + "policy_stack": "llm+bandit", + "failure_mining": { + "total_rows": 23, + "failure_rows": 7, + "top_failure_reasons": [ + { + "reason": "repeated_action_loop", + "count": 7 + } + ] + } + } + } +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_history.json b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_history.json new file mode 100644 index 0000000000000000000000000000000000000000..23c0af97fc904ab4981b509b57116fba4289a289 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_history.json @@ -0,0 +1,50011 @@ +[ + { + "loss": 0.0, + "grad_norm": 0.0, + "learning_rate": 1e-06, + "num_tokens": 366.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0005, + "step": 1 + }, + { + "loss": 0.0, + "grad_norm": 0.0, + "learning_rate": 9.995e-07, + "num_tokens": 732.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.001, + "step": 2 + }, + { + "loss": 0.0, + "grad_norm": 0.8386753797531128, + "learning_rate": 9.989999999999999e-07, + "num_tokens": 1628.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0015, + "step": 3 + }, + { + "loss": 0.0, + "grad_norm": 0.0008644626359455287, + "learning_rate": 9.985e-07, + "num_tokens": 1994.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.515835851430893e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.002, + "step": 4 + }, + { + "loss": -0.0, + "grad_norm": 0.6266300678253174, + "learning_rate": 9.98e-07, + "num_tokens": 2890.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7999999523162842, + "rewards/environment_reward_verifier/std": 0.04949747025966644, + "reward": 0.7999999523162842, + "reward_std": 0.04949747025966644, + "kl": 1.1774711310863495e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0025, + "step": 5 + }, + { + "loss": 0.0, + "grad_norm": 0.7592867612838745, + "learning_rate": 9.975e-07, + "num_tokens": 3786.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 3.082305192947388e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.003, + "step": 6 + }, + { + "loss": 0.0, + "grad_norm": 0.0013875153381377459, + "learning_rate": 9.97e-07, + "num_tokens": 4152.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.19076532125473e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0035, + "step": 7 + }, + { + "loss": 0.0, + "grad_norm": 0.0008181582088582218, + "learning_rate": 9.965e-07, + "num_tokens": 4518.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.6560388505458832e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.004, + "step": 8 + }, + { + "loss": 0.0, + "grad_norm": 0.7382595539093018, + "learning_rate": 9.959999999999999e-07, + "num_tokens": 5414.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 1.3813376426696777e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0045, + "step": 9 + }, + { + "loss": 0.0, + "grad_norm": 0.9728567004203796, + "learning_rate": 9.955e-07, + "num_tokens": 6310.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.846500039100647, + "rewards/environment_reward_verifier/std": 0.014849219471216202, + "reward": 0.846500039100647, + "reward_std": 0.014849220402538776, + "kl": 5.137734115123749e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.005, + "step": 10 + }, + { + "loss": -0.0, + "grad_norm": 0.5461432337760925, + "learning_rate": 9.95e-07, + "num_tokens": 7206.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8324999809265137, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8324999809265137, + "reward_std": 0.0007070977007970214, + "kl": 1.668650656938553e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0055, + "step": 11 + }, + { + "loss": 0.0, + "grad_norm": 0.001112893340177834, + "learning_rate": 9.945e-07, + "num_tokens": 7572.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.109647125005722e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.006, + "step": 12 + }, + { + "loss": 0.0, + "grad_norm": NaN, + "learning_rate": 9.94e-07, + "num_tokens": 8468.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 2.0393170416355133e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0065, + "step": 13 + }, + { + "loss": 0.0, + "grad_norm": 0.0010866466909646988, + "learning_rate": 9.94e-07, + "num_tokens": 8834.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.441702574491501e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.007, + "step": 14 + }, + { + "loss": 0.0, + "grad_norm": 0.001017165370285511, + "learning_rate": 9.935e-07, + "num_tokens": 9730.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.716303035616875e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0075, + "step": 15 + }, + { + "loss": 0.0, + "grad_norm": 0.6911739706993103, + "learning_rate": 9.929999999999999e-07, + "num_tokens": 10626.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 1.7061829566955566e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.008, + "step": 16 + }, + { + "loss": 0.0, + "grad_norm": 0.7382009029388428, + "learning_rate": 9.925e-07, + "num_tokens": 11522.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.824999988079071, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.824999988079071, + "reward_std": 0.011313731782138348, + "kl": 1.5362165868282318e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0085, + "step": 17 + }, + { + "loss": 0.0, + "grad_norm": NaN, + "learning_rate": 9.92e-07, + "num_tokens": 12418.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 2.619996666908264e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.009, + "step": 18 + }, + { + "loss": 0.0, + "grad_norm": 0.0008886535069905221, + "learning_rate": 9.92e-07, + "num_tokens": 12784.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.30507755279541e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0095, + "step": 19 + }, + { + "loss": 0.0, + "grad_norm": 0.7491036057472229, + "learning_rate": 9.915e-07, + "num_tokens": 13680.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 3.322027623653412e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.01, + "step": 20 + }, + { + "loss": 0.0, + "grad_norm": 0.5928551554679871, + "learning_rate": 9.91e-07, + "num_tokens": 14576.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 2.601929008960724e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0105, + "step": 21 + }, + { + "loss": 0.0, + "grad_norm": 0.0005458745290525258, + "learning_rate": 9.905e-07, + "num_tokens": 15472.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.315826714038849e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.011, + "step": 22 + }, + { + "loss": 0.0, + "grad_norm": 0.000569008057937026, + "learning_rate": 9.9e-07, + "num_tokens": 15838.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.1721236407756805e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0115, + "step": 23 + }, + { + "loss": 0.0, + "grad_norm": 0.8848241567611694, + "learning_rate": 9.895e-07, + "num_tokens": 16734.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6175000071525574, + "rewards/environment_reward_verifier/std": 0.3358757495880127, + "reward": 0.6175000071525574, + "reward_std": 0.3358757495880127, + "kl": 2.0731240510940552e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.012, + "step": 24 + }, + { + "loss": 0.0, + "grad_norm": 0.9575281143188477, + "learning_rate": 9.89e-07, + "num_tokens": 17630.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.5221146643161774e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0125, + "step": 25 + }, + { + "loss": 0.0, + "grad_norm": 0.0004248635668773204, + "learning_rate": 9.885e-07, + "num_tokens": 17996.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.887790858745575e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.013, + "step": 26 + }, + { + "loss": 0.0, + "grad_norm": 0.0009508877992630005, + "learning_rate": 9.88e-07, + "num_tokens": 18362.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.8277747333049774e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0135, + "step": 27 + }, + { + "loss": 0.0, + "grad_norm": 0.8627551198005676, + "learning_rate": 9.875e-07, + "num_tokens": 19258.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 5.311518907546997e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.014, + "step": 28 + }, + { + "loss": 0.0, + "grad_norm": 0.0009427251643501222, + "learning_rate": 9.87e-07, + "num_tokens": 20154.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.2608786821365356e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0145, + "step": 29 + }, + { + "loss": 0.0, + "grad_norm": 0.0006769588799215853, + "learning_rate": 9.865e-07, + "num_tokens": 20520.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.2307969629764557e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.015, + "step": 30 + }, + { + "loss": 0.0, + "grad_norm": 0.7637265920639038, + "learning_rate": 9.86e-07, + "num_tokens": 21416.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 2.9818154871463776e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0155, + "step": 31 + }, + { + "loss": 0.0, + "grad_norm": 0.0008596409461461008, + "learning_rate": 9.855e-07, + "num_tokens": 22312.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7940000295639038, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7940000295639038, + "reward_std": 0.0, + "kl": 2.1715648472309113e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.016, + "step": 32 + }, + { + "loss": 0.0, + "grad_norm": 0.0013101330259814858, + "learning_rate": 9.849999999999999e-07, + "num_tokens": 22678.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.461260348558426e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0165, + "step": 33 + }, + { + "loss": 0.0, + "grad_norm": 0.0009030819055624306, + "learning_rate": 9.845e-07, + "num_tokens": 23044.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.9451755583286285e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.017, + "step": 34 + }, + { + "loss": 0.0, + "grad_norm": 0.14603713154792786, + "learning_rate": 9.84e-07, + "num_tokens": 23940.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 0.0006279908120632172, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0175, + "step": 35 + }, + { + "loss": 0.0, + "grad_norm": 0.9210644364356995, + "learning_rate": 9.835e-07, + "num_tokens": 24836.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 3.36403027176857e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.018, + "step": 36 + }, + { + "loss": 0.0, + "grad_norm": 0.001894401852041483, + "learning_rate": 9.83e-07, + "num_tokens": 25202.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.968380719423294e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0185, + "step": 37 + }, + { + "loss": 0.0, + "grad_norm": 0.002542809583246708, + "learning_rate": 9.825e-07, + "num_tokens": 25568.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.4018571972846985e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.019, + "step": 38 + }, + { + "loss": 0.0, + "grad_norm": 0.0009300168021582067, + "learning_rate": 9.819999999999999e-07, + "num_tokens": 25934.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.014877438545227e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0195, + "step": 39 + }, + { + "loss": 0.0, + "grad_norm": 0.601282000541687, + "learning_rate": 9.815e-07, + "num_tokens": 26830.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 1.4821067452430725e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.02, + "step": 40 + }, + { + "loss": 0.0, + "grad_norm": 0.0005840946105308831, + "learning_rate": 9.81e-07, + "num_tokens": 27726.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.229904592037201e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0205, + "step": 41 + }, + { + "loss": 0.0, + "grad_norm": 0.8803837299346924, + "learning_rate": 9.805e-07, + "num_tokens": 28622.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 3.692414611577988e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.021, + "step": 42 + }, + { + "loss": 0.0, + "grad_norm": 0.003636215114966035, + "learning_rate": 9.8e-07, + "num_tokens": 29518.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 5.9694983065128326e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0215, + "step": 43 + }, + { + "loss": 0.0, + "grad_norm": 0.001083171577192843, + "learning_rate": 9.795e-07, + "num_tokens": 29884.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.22023406624794e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.022, + "step": 44 + }, + { + "loss": 0.0, + "grad_norm": 0.0029561789706349373, + "learning_rate": 9.789999999999999e-07, + "num_tokens": 30250.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.5513581931591034e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0225, + "step": 45 + }, + { + "loss": 0.0, + "grad_norm": 0.8178843259811401, + "learning_rate": 9.785e-07, + "num_tokens": 31146.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7870000004768372, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.7870000004768372, + "reward_std": 0.049497511237859726, + "kl": 2.0386651158332825e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.023, + "step": 46 + }, + { + "loss": 0.0, + "grad_norm": 0.7111838459968567, + "learning_rate": 9.78e-07, + "num_tokens": 32042.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 1.805834472179413e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0235, + "step": 47 + }, + { + "loss": 0.0, + "grad_norm": 0.0020604038145393133, + "learning_rate": 9.775e-07, + "num_tokens": 32938.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.199426621198654e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.024, + "step": 48 + }, + { + "loss": 0.0, + "grad_norm": 1.1733801364898682, + "learning_rate": 9.77e-07, + "num_tokens": 33834.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8790000081062317, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8790000081062317, + "reward_std": 0.0014141954015940428, + "kl": 2.4205073714256287e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0245, + "step": 49 + }, + { + "loss": 0.0, + "grad_norm": 0.0007422183407470584, + "learning_rate": 9.765e-07, + "num_tokens": 34200.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.0121224224567413e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.025, + "step": 50 + }, + { + "loss": 0.0, + "grad_norm": 0.12367633730173111, + "learning_rate": 9.759999999999998e-07, + "num_tokens": 35096.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8349999785423279, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8349999785423279, + "reward_std": 0.0, + "kl": 0.00035975873470306396, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0255, + "step": 51 + }, + { + "loss": 0.0, + "grad_norm": 1.1185871362686157, + "learning_rate": 9.755e-07, + "num_tokens": 35992.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 3.8584694266319275e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.026, + "step": 52 + }, + { + "loss": 0.0, + "grad_norm": NaN, + "learning_rate": 9.75e-07, + "num_tokens": 36888.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 0.0005854479968547821, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0265, + "step": 53 + }, + { + "loss": 0.0, + "grad_norm": 0.0010273786028847098, + "learning_rate": 9.75e-07, + "num_tokens": 37254.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.692973405122757e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.027, + "step": 54 + }, + { + "loss": 0.0, + "grad_norm": 0.0011759226908907294, + "learning_rate": 9.745e-07, + "num_tokens": 37620.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.308484494686127e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0275, + "step": 55 + }, + { + "loss": 0.0, + "grad_norm": 0.0007389633101411164, + "learning_rate": 9.74e-07, + "num_tokens": 37986.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.300366759300232e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.028, + "step": 56 + }, + { + "loss": 0.0, + "grad_norm": 0.0005277986056171358, + "learning_rate": 9.735e-07, + "num_tokens": 38882.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8320000171661377, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8320000171661377, + "reward_std": 0.0, + "kl": 1.1188909411430359e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0285, + "step": 57 + }, + { + "loss": 0.0, + "grad_norm": 0.0009752270416356623, + "learning_rate": 9.729999999999998e-07, + "num_tokens": 39778.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 3.2201409339904785e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.029, + "step": 58 + }, + { + "loss": 0.0, + "grad_norm": 0.002292782301083207, + "learning_rate": 9.725e-07, + "num_tokens": 40144.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.730653017759323e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0295, + "step": 59 + }, + { + "loss": 0.0, + "grad_norm": 0.0015361111145466566, + "learning_rate": 9.72e-07, + "num_tokens": 40510.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.377216100692749e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.03, + "step": 60 + }, + { + "loss": 0.0, + "grad_norm": 0.001204590662382543, + "learning_rate": 9.715e-07, + "num_tokens": 40876.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9032118618488312e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0305, + "step": 61 + }, + { + "loss": 0.0, + "grad_norm": 0.6760213971138, + "learning_rate": 9.709999999999999e-07, + "num_tokens": 41772.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7910000085830688, + "rewards/environment_reward_verifier/std": 0.0381837822496891, + "reward": 0.7910000085830688, + "reward_std": 0.0381837822496891, + "kl": 8.327886462211609e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.031, + "step": 62 + }, + { + "loss": 0.0, + "grad_norm": 0.0013389871455729008, + "learning_rate": 9.705e-07, + "num_tokens": 42668.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 3.366731107234955e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0315, + "step": 63 + }, + { + "loss": 0.0, + "grad_norm": 0.0007441174238920212, + "learning_rate": 9.7e-07, + "num_tokens": 43564.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 9.872950613498688e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.032, + "step": 64 + }, + { + "loss": 0.0, + "grad_norm": 0.5267499685287476, + "learning_rate": 9.695e-07, + "num_tokens": 44460.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6175000071525574, + "rewards/environment_reward_verifier/std": 0.3358757495880127, + "reward": 0.6175000071525574, + "reward_std": 0.3358757495880127, + "kl": 1.86040997505188e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0325, + "step": 65 + }, + { + "loss": 0.0, + "grad_norm": 0.0009887129999697208, + "learning_rate": 9.69e-07, + "num_tokens": 45356.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 4.1836872696876526e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.033, + "step": 66 + }, + { + "loss": 0.0, + "grad_norm": 0.005825233645737171, + "learning_rate": 9.685e-07, + "num_tokens": 45722.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 8.702557533979416e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0335, + "step": 67 + }, + { + "loss": 0.0, + "grad_norm": 0.0005127235781401396, + "learning_rate": 9.679999999999999e-07, + "num_tokens": 46088.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.5092624127864838e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.034, + "step": 68 + }, + { + "loss": 0.0, + "grad_norm": 0.001396226929500699, + "learning_rate": 9.675e-07, + "num_tokens": 46454.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.394686013460159e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0345, + "step": 69 + }, + { + "loss": 0.0, + "grad_norm": 0.8930999636650085, + "learning_rate": 9.67e-07, + "num_tokens": 47350.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 3.071129322052002e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.035, + "step": 70 + }, + { + "loss": 0.0, + "grad_norm": 0.45665115118026733, + "learning_rate": 9.665e-07, + "num_tokens": 48246.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5920000076293945, + "rewards/environment_reward_verifier/std": 0.30122748017311096, + "reward": 0.5920000076293945, + "reward_std": 0.30122748017311096, + "kl": 1.1058524250984192e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0355, + "step": 71 + }, + { + "loss": 0.0, + "grad_norm": 0.0015513673424720764, + "learning_rate": 9.66e-07, + "num_tokens": 48612.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.106216460466385e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.036, + "step": 72 + }, + { + "loss": 0.0, + "grad_norm": 0.0016105485847219825, + "learning_rate": 9.655e-07, + "num_tokens": 49508.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.196112811565399e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0365, + "step": 73 + }, + { + "loss": 0.0, + "grad_norm": 0.12389198690652847, + "learning_rate": 9.649999999999999e-07, + "num_tokens": 50404.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 0.0006226431578397751, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.037, + "step": 74 + }, + { + "loss": 0.0, + "grad_norm": 0.000441992306150496, + "learning_rate": 9.645e-07, + "num_tokens": 51300.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 1.2840144336223602e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0375, + "step": 75 + }, + { + "loss": -0.0, + "grad_norm": 0.583307147026062, + "learning_rate": 9.64e-07, + "num_tokens": 52196.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 1.4536082744598389e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.038, + "step": 76 + }, + { + "loss": 0.0, + "grad_norm": 0.5040392875671387, + "learning_rate": 9.635e-07, + "num_tokens": 53092.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6175000071525574, + "rewards/environment_reward_verifier/std": 0.3358757495880127, + "reward": 0.6175000071525574, + "reward_std": 0.3358757495880127, + "kl": 1.9342638552188873e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0385, + "step": 77 + }, + { + "loss": 0.0, + "grad_norm": 0.0007017228053882718, + "learning_rate": 9.63e-07, + "num_tokens": 53458.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.330223262310028e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.039, + "step": 78 + }, + { + "loss": 0.0, + "grad_norm": 0.0005833606119267642, + "learning_rate": 9.624999999999999e-07, + "num_tokens": 53824.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.0285136997699738e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0395, + "step": 79 + }, + { + "loss": 0.0, + "grad_norm": 0.0016466780798509717, + "learning_rate": 9.619999999999999e-07, + "num_tokens": 54190.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.3215077817440033e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.04, + "step": 80 + }, + { + "loss": 0.0, + "grad_norm": 0.0005939177935943007, + "learning_rate": 9.615e-07, + "num_tokens": 54556.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.0177103579044342e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0405, + "step": 81 + }, + { + "loss": 0.0, + "grad_norm": 0.0015536571154370904, + "learning_rate": 9.61e-07, + "num_tokens": 55452.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 2.1132640540599823e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.041, + "step": 82 + }, + { + "loss": 0.0, + "grad_norm": 0.0010748868808150291, + "learning_rate": 9.605e-07, + "num_tokens": 56348.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.773959517478943e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0415, + "step": 83 + }, + { + "loss": 0.0, + "grad_norm": 0.0009355363436043262, + "learning_rate": 9.6e-07, + "num_tokens": 57244.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 2.8561800718307495e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.042, + "step": 84 + }, + { + "loss": 0.0, + "grad_norm": 0.0005516069359146059, + "learning_rate": 9.594999999999999e-07, + "num_tokens": 58140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8349999785423279, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8349999785423279, + "reward_std": 0.0, + "kl": 1.7962418496608734e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0425, + "step": 85 + }, + { + "loss": 0.0, + "grad_norm": 0.0018359065288677812, + "learning_rate": 9.589999999999998e-07, + "num_tokens": 58506.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.631614476442337e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.043, + "step": 86 + }, + { + "loss": 0.0, + "grad_norm": 0.003975807689130306, + "learning_rate": 9.585e-07, + "num_tokens": 58872.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.361491978168488e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0435, + "step": 87 + }, + { + "loss": 0.0, + "grad_norm": 0.0010325579205527902, + "learning_rate": 9.58e-07, + "num_tokens": 59238.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.5804306864738464e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.044, + "step": 88 + }, + { + "loss": 0.0, + "grad_norm": 0.6955918669700623, + "learning_rate": 9.575e-07, + "num_tokens": 60134.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6175000071525574, + "rewards/environment_reward_verifier/std": 0.3358757495880127, + "reward": 0.6175000071525574, + "reward_std": 0.3358757495880127, + "kl": 3.2967887818813324e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0445, + "step": 89 + }, + { + "loss": 0.0, + "grad_norm": 0.01571866311132908, + "learning_rate": 9.57e-07, + "num_tokens": 61030.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.341654807329178e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.045, + "step": 90 + }, + { + "loss": 0.0, + "grad_norm": 0.0019674592185765505, + "learning_rate": 9.565e-07, + "num_tokens": 61396.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.4650398194789886e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0455, + "step": 91 + }, + { + "loss": 0.0, + "grad_norm": 0.00046162621583789587, + "learning_rate": 9.559999999999998e-07, + "num_tokens": 62292.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7433037757873535e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.046, + "step": 92 + }, + { + "loss": 0.0, + "grad_norm": 0.9690912961959839, + "learning_rate": 9.555e-07, + "num_tokens": 63188.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7910000085830688, + "rewards/environment_reward_verifier/std": 0.0381837822496891, + "reward": 0.7910000085830688, + "reward_std": 0.0381837822496891, + "kl": 2.886541187763214e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0465, + "step": 93 + }, + { + "loss": 0.0, + "grad_norm": 0.0011616102419793606, + "learning_rate": 9.55e-07, + "num_tokens": 63554.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.8302893042564392e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.047, + "step": 94 + }, + { + "loss": 0.0, + "grad_norm": 0.0010602263500913978, + "learning_rate": 9.545e-07, + "num_tokens": 63920.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.1570903956890106e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0475, + "step": 95 + }, + { + "loss": 0.0, + "grad_norm": 0.9153140187263489, + "learning_rate": 9.539999999999999e-07, + "num_tokens": 64816.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 6.788689643144608e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.048, + "step": 96 + }, + { + "loss": 0.0, + "grad_norm": 0.45417484641075134, + "learning_rate": 9.535e-07, + "num_tokens": 65712.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 1.2744218111038208e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0485, + "step": 97 + }, + { + "loss": 0.0, + "grad_norm": 0.0015867383917793632, + "learning_rate": 9.529999999999999e-07, + "num_tokens": 66078.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.906991332769394e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.049, + "step": 98 + }, + { + "loss": 0.0, + "grad_norm": 0.0007671258063055575, + "learning_rate": 9.525e-07, + "num_tokens": 66444.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7447007596492767e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0495, + "step": 99 + }, + { + "loss": 0.0, + "grad_norm": 0.0006462362944148481, + "learning_rate": 9.52e-07, + "num_tokens": 66810.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.849886029958725e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.05, + "step": 100 + }, + { + "loss": 0.0, + "grad_norm": 0.007701369468122721, + "learning_rate": 9.515e-07, + "num_tokens": 67176.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 8.422136306762695e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0505, + "step": 101 + }, + { + "loss": 0.0, + "grad_norm": 0.6700197458267212, + "learning_rate": 9.509999999999999e-07, + "num_tokens": 68072.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.818368375301361e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.051, + "step": 102 + }, + { + "loss": 0.0, + "grad_norm": 2.66556453704834, + "learning_rate": 9.504999999999999e-07, + "num_tokens": 68968.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8345000147819519, + "rewards/environment_reward_verifier/std": 0.030405579134821892, + "reward": 0.8345000147819519, + "reward_std": 0.030405579134821892, + "kl": 5.388539284467697e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0515, + "step": 103 + }, + { + "loss": 0.0, + "grad_norm": 0.00044317645370028913, + "learning_rate": 9.499999999999999e-07, + "num_tokens": 69864.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8320000171661377, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8320000171661377, + "reward_std": 0.0, + "kl": 1.7177313566207886e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.052, + "step": 104 + }, + { + "loss": -0.0, + "grad_norm": 0.5687395334243774, + "learning_rate": 9.495e-07, + "num_tokens": 70760.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8029999732971191, + "rewards/environment_reward_verifier/std": 0.012727884575724602, + "reward": 0.8029999732971191, + "reward_std": 0.012727884575724602, + "kl": 1.3083219528198242e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0525, + "step": 105 + }, + { + "loss": 0.0, + "grad_norm": NaN, + "learning_rate": 9.489999999999999e-07, + "num_tokens": 71656.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 0.0011830152943730354, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.053, + "step": 106 + }, + { + "loss": 0.0, + "grad_norm": 0.01510967593640089, + "learning_rate": 9.489999999999999e-07, + "num_tokens": 72552.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.878000020980835, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.878000020980835, + "reward_std": 0.0, + "kl": 9.882543236017227e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0535, + "step": 107 + }, + { + "loss": 0.0, + "grad_norm": 0.004268075339496136, + "learning_rate": 9.485e-07, + "num_tokens": 72918.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.635075598955154e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.054, + "step": 108 + }, + { + "loss": 0.0, + "grad_norm": 0.8328304886817932, + "learning_rate": 9.479999999999999e-07, + "num_tokens": 73814.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31183406710624695, + "reward": 0.5995000004768372, + "reward_std": 0.31183406710624695, + "kl": 2.2052787244319916e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0545, + "step": 109 + }, + { + "loss": 0.0, + "grad_norm": 0.728537380695343, + "learning_rate": 9.474999999999999e-07, + "num_tokens": 74710.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8174999952316284, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8174999952316284, + "reward_std": 0.014849262312054634, + "kl": 2.4109147489070892e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.055, + "step": 110 + }, + { + "loss": 0.0, + "grad_norm": 0.9570010900497437, + "learning_rate": 9.469999999999999e-07, + "num_tokens": 75606.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8105000257492065, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8105000257492065, + "reward_std": 0.06434673070907593, + "kl": 4.696846008300781e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0555, + "step": 111 + }, + { + "loss": 0.0, + "grad_norm": 0.002002199413254857, + "learning_rate": 9.465e-07, + "num_tokens": 75972.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.513189196586609e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.056, + "step": 112 + }, + { + "loss": 0.0, + "grad_norm": 0.0006786709418520331, + "learning_rate": 9.459999999999999e-07, + "num_tokens": 76868.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 2.574734389781952e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0565, + "step": 113 + }, + { + "loss": -0.0, + "grad_norm": 0.8540514707565308, + "learning_rate": 9.455e-07, + "num_tokens": 77764.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8044999837875366, + "rewards/environment_reward_verifier/std": 0.012020829133689404, + "reward": 0.8044999837875366, + "reward_std": 0.012020829133689404, + "kl": 2.0493753254413605e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.057, + "step": 114 + }, + { + "loss": 0.0, + "grad_norm": 0.0009922435274347663, + "learning_rate": 9.45e-07, + "num_tokens": 78130.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.318674862384796e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0575, + "step": 115 + }, + { + "loss": 0.0, + "grad_norm": 0.0007435118895955384, + "learning_rate": 9.444999999999999e-07, + "num_tokens": 79026.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.7647783756256104e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.058, + "step": 116 + }, + { + "loss": 0.0, + "grad_norm": 0.00691739609465003, + "learning_rate": 9.439999999999999e-07, + "num_tokens": 79392.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.612468183040619e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0585, + "step": 117 + }, + { + "loss": 0.0, + "grad_norm": 0.0007686293101869524, + "learning_rate": 9.434999999999999e-07, + "num_tokens": 79758.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.6792677342891693e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.059, + "step": 118 + }, + { + "loss": 0.0, + "grad_norm": 0.0017928972374647856, + "learning_rate": 9.429999999999999e-07, + "num_tokens": 80124.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.409346729516983e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0595, + "step": 119 + }, + { + "loss": 0.0, + "grad_norm": 0.005726952571421862, + "learning_rate": 9.425e-07, + "num_tokens": 81020.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 7.761642336845398e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.06, + "step": 120 + }, + { + "loss": 0.0, + "grad_norm": 0.00040231458842754364, + "learning_rate": 9.419999999999999e-07, + "num_tokens": 81916.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 1.92299485206604e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0605, + "step": 121 + }, + { + "loss": 0.0, + "grad_norm": 0.852346658706665, + "learning_rate": 9.415e-07, + "num_tokens": 82812.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7870000004768372, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.7870000004768372, + "reward_std": 0.049497511237859726, + "kl": 1.8057413399219513e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.061, + "step": 122 + }, + { + "loss": 0.0, + "grad_norm": 0.0010437635937705636, + "learning_rate": 9.409999999999999e-07, + "num_tokens": 83708.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.9762665033340454e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0615, + "step": 123 + }, + { + "loss": 0.0, + "grad_norm": NaN, + "learning_rate": 9.404999999999999e-07, + "num_tokens": 84604.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.055154334753751755, + "reward": 0.8389999866485596, + "reward_std": 0.055154334753751755, + "kl": 0.0007068756967782974, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.062, + "step": 124 + }, + { + "loss": 0.0, + "grad_norm": 0.6010521650314331, + "learning_rate": 9.404999999999999e-07, + "num_tokens": 85500.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 1.6216188669204712e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0625, + "step": 125 + }, + { + "loss": 0.0, + "grad_norm": 0.6753321886062622, + "learning_rate": 9.399999999999999e-07, + "num_tokens": 86396.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 2.6893801987171173e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.063, + "step": 126 + }, + { + "loss": 0.0, + "grad_norm": 0.0010537143098190427, + "learning_rate": 9.395e-07, + "num_tokens": 86762.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.888884723186493e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0635, + "step": 127 + }, + { + "loss": 0.0, + "grad_norm": 1.5956679582595825, + "learning_rate": 9.389999999999999e-07, + "num_tokens": 87658.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8389999866485596, + "reward_std": 0.01555635966360569, + "kl": 6.039440631866455e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.064, + "step": 128 + }, + { + "loss": 0.0, + "grad_norm": 0.0013017355231568217, + "learning_rate": 9.385e-07, + "num_tokens": 88024.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.114024341106415e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0645, + "step": 129 + }, + { + "loss": 0.0, + "grad_norm": 0.6261308789253235, + "learning_rate": 9.379999999999998e-07, + "num_tokens": 88920.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7904999852180481, + "rewards/environment_reward_verifier/std": 0.037476640194654465, + "reward": 0.7904999852180481, + "reward_std": 0.037476640194654465, + "kl": 7.468275725841522e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.065, + "step": 130 + }, + { + "loss": 0.0, + "grad_norm": 0.00029322251793928444, + "learning_rate": 9.374999999999999e-07, + "num_tokens": 89816.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 1.0502524673938751e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0655, + "step": 131 + }, + { + "loss": 0.0, + "grad_norm": 0.0007472799625247717, + "learning_rate": 9.37e-07, + "num_tokens": 90182.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.8768012523651123e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.066, + "step": 132 + }, + { + "loss": 0.0, + "grad_norm": 0.0004956374177709222, + "learning_rate": 9.365e-07, + "num_tokens": 90548.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.917034387588501e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0665, + "step": 133 + }, + { + "loss": 0.0, + "grad_norm": 0.000760928844101727, + "learning_rate": 9.36e-07, + "num_tokens": 90914.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.449060022830963e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.067, + "step": 134 + }, + { + "loss": 0.0, + "grad_norm": 0.0017298860475420952, + "learning_rate": 9.355e-07, + "num_tokens": 91280.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.187878221273422e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0675, + "step": 135 + }, + { + "loss": 0.0, + "grad_norm": 0.9310314655303955, + "learning_rate": 9.35e-07, + "num_tokens": 92176.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.039597976952791214, + "reward": 0.8500000238418579, + "reward_std": 0.039597976952791214, + "kl": 2.9511749744415283e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.068, + "step": 136 + }, + { + "loss": 0.0, + "grad_norm": 0.5498940944671631, + "learning_rate": 9.344999999999999e-07, + "num_tokens": 93072.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 1.553259789943695e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0685, + "step": 137 + }, + { + "loss": 0.0, + "grad_norm": 0.8820034265518188, + "learning_rate": 9.34e-07, + "num_tokens": 93968.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 2.5233253836631775e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.069, + "step": 138 + }, + { + "loss": 0.0, + "grad_norm": 0.0006268341676332057, + "learning_rate": 9.334999999999999e-07, + "num_tokens": 94334.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.2475218176841736e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0695, + "step": 139 + }, + { + "loss": 0.0, + "grad_norm": 0.7416382431983948, + "learning_rate": 9.33e-07, + "num_tokens": 95230.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8240000009536743, + "rewards/environment_reward_verifier/std": 0.015556317754089832, + "reward": 0.8240000009536743, + "reward_std": 0.015556317754089832, + "kl": 2.3412518203258514e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.07, + "step": 140 + }, + { + "loss": 0.0, + "grad_norm": 0.4844658374786377, + "learning_rate": 9.325e-07, + "num_tokens": 96126.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8339999914169312, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8339999914169312, + "reward_std": 0.0014141954015940428, + "kl": 7.013790309429169e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0705, + "step": 141 + }, + { + "loss": 0.0, + "grad_norm": 0.8294029235839844, + "learning_rate": 9.32e-07, + "num_tokens": 97022.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8355000019073486, + "rewards/environment_reward_verifier/std": 0.030405579134821892, + "reward": 0.8355000019073486, + "reward_std": 0.030405579134821892, + "kl": 1.283455640077591e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.071, + "step": 142 + }, + { + "loss": 0.0, + "grad_norm": 0.0005975551321171224, + "learning_rate": 9.315e-07, + "num_tokens": 97388.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.9866973161697388e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0715, + "step": 143 + }, + { + "loss": 0.0, + "grad_norm": 0.0004532081075012684, + "learning_rate": 9.31e-07, + "num_tokens": 97754.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.086162567138672e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.072, + "step": 144 + }, + { + "loss": 0.0, + "grad_norm": 0.0003843473386950791, + "learning_rate": 9.304999999999999e-07, + "num_tokens": 98120.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.2605907917022705e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0725, + "step": 145 + }, + { + "loss": 0.0, + "grad_norm": 0.0036340798251330853, + "learning_rate": 9.3e-07, + "num_tokens": 98486.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.931608706712723e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.073, + "step": 146 + }, + { + "loss": 0.0, + "grad_norm": 0.00095866754418239, + "learning_rate": 9.295e-07, + "num_tokens": 98852.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.259442746639252e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0735, + "step": 147 + }, + { + "loss": 0.0, + "grad_norm": 0.000992271350696683, + "learning_rate": 9.29e-07, + "num_tokens": 99218.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.275942385196686e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.074, + "step": 148 + }, + { + "loss": 0.0, + "grad_norm": 0.0008247334626503289, + "learning_rate": 9.285e-07, + "num_tokens": 99584.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.442727029323578e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0745, + "step": 149 + }, + { + "loss": 0.0, + "grad_norm": 0.611395537853241, + "learning_rate": 9.28e-07, + "num_tokens": 100480.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.7994999885559082, + "reward_std": 0.016263457015156746, + "kl": 1.0479241609573364e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.075, + "step": 150 + }, + { + "loss": 0.0, + "grad_norm": 0.0008024791022762656, + "learning_rate": 9.274999999999999e-07, + "num_tokens": 100846.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.54213809967041e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0755, + "step": 151 + }, + { + "loss": 0.0, + "grad_norm": 0.0008570189820602536, + "learning_rate": 9.27e-07, + "num_tokens": 101212.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.1021423637866974e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.076, + "step": 152 + }, + { + "loss": 0.0, + "grad_norm": 6.0001912117004395, + "learning_rate": 9.264999999999999e-07, + "num_tokens": 102108.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8125, + "rewards/environment_reward_verifier/std": 0.01060659158974886, + "reward": 0.8125, + "reward_std": 0.01060659158974886, + "kl": 6.32014125585556e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0765, + "step": 153 + }, + { + "loss": 0.0, + "grad_norm": 0.7252357602119446, + "learning_rate": 9.26e-07, + "num_tokens": 103004.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.2156164050102234e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.077, + "step": 154 + }, + { + "loss": 0.0, + "grad_norm": 0.0008979981648735702, + "learning_rate": 9.255e-07, + "num_tokens": 103370.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.1005201637744904e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0775, + "step": 155 + }, + { + "loss": 0.0, + "grad_norm": 0.0010244681034237146, + "learning_rate": 9.25e-07, + "num_tokens": 103736.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.6143697798252106e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.078, + "step": 156 + }, + { + "loss": 0.0, + "grad_norm": 0.7005264759063721, + "learning_rate": 9.244999999999999e-07, + "num_tokens": 104632.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5975000262260437, + "rewards/environment_reward_verifier/std": 0.3047630488872528, + "reward": 0.5975000262260437, + "reward_std": 0.3047630488872528, + "kl": 2.7914531528949738e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0785, + "step": 157 + }, + { + "loss": 0.0, + "grad_norm": 0.6544285416603088, + "learning_rate": 9.24e-07, + "num_tokens": 105528.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 5.729496479034424e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.079, + "step": 158 + }, + { + "loss": 0.0, + "grad_norm": 0.5623617768287659, + "learning_rate": 9.234999999999999e-07, + "num_tokens": 106424.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8289999961853027, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8289999961853027, + "reward_std": 0.0014141954015940428, + "kl": 2.0192936062812805e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0795, + "step": 159 + }, + { + "loss": 0.0, + "grad_norm": 0.0007258378900587559, + "learning_rate": 9.23e-07, + "num_tokens": 107320.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.202896237373352e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.08, + "step": 160 + }, + { + "loss": 0.0, + "grad_norm": 0.0027602105401456356, + "learning_rate": 9.225e-07, + "num_tokens": 108216.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 7.052719593048096e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0805, + "step": 161 + }, + { + "loss": 0.0, + "grad_norm": 0.73163241147995, + "learning_rate": 9.22e-07, + "num_tokens": 109112.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 2.2308900952339172e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.081, + "step": 162 + }, + { + "loss": 0.0, + "grad_norm": 0.0011337499599903822, + "learning_rate": 9.215e-07, + "num_tokens": 109478.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.859695374965668e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0815, + "step": 163 + }, + { + "loss": 0.0, + "grad_norm": 0.000912423012778163, + "learning_rate": 9.21e-07, + "num_tokens": 109844.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.218837082386017e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.082, + "step": 164 + }, + { + "loss": 0.0002, + "grad_norm": 8.715468406677246, + "learning_rate": 9.204999999999999e-07, + "num_tokens": 110740.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 0.004041045904159546, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0825, + "step": 165 + }, + { + "loss": 0.0, + "grad_norm": 0.9052450656890869, + "learning_rate": 9.2e-07, + "num_tokens": 111636.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 5.215965211391449e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.083, + "step": 166 + }, + { + "loss": 0.0, + "grad_norm": 0.0003241814556531608, + "learning_rate": 9.194999999999999e-07, + "num_tokens": 112002.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.0592862963676453e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0835, + "step": 167 + }, + { + "loss": 0.0, + "grad_norm": 1.2795896530151367, + "learning_rate": 9.19e-07, + "num_tokens": 112898.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 7.838010787963867e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.084, + "step": 168 + }, + { + "loss": 0.0, + "grad_norm": 0.0004557027714326978, + "learning_rate": 9.185e-07, + "num_tokens": 113794.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8230000138282776, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8230000138282776, + "reward_std": 0.0, + "kl": 2.0915642380714417e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0845, + "step": 169 + }, + { + "loss": -0.0, + "grad_norm": 0.7115015387535095, + "learning_rate": 9.18e-07, + "num_tokens": 114690.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8324999809265137, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8324999809265137, + "reward_std": 0.0007070977007970214, + "kl": 3.168080002069473e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.085, + "step": 170 + }, + { + "loss": 0.0, + "grad_norm": 0.0009462831658311188, + "learning_rate": 9.174999999999999e-07, + "num_tokens": 115056.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.907550126314163e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0855, + "step": 171 + }, + { + "loss": 0.0, + "grad_norm": 0.0008878710796125233, + "learning_rate": 9.17e-07, + "num_tokens": 115422.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.062335938215256e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.086, + "step": 172 + }, + { + "loss": 0.0, + "grad_norm": 0.8355982303619385, + "learning_rate": 9.164999999999999e-07, + "num_tokens": 116318.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 2.7638860046863556e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0865, + "step": 173 + }, + { + "loss": 0.0, + "grad_norm": 0.0008515037479810417, + "learning_rate": 9.16e-07, + "num_tokens": 116684.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.111641854047775e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.087, + "step": 174 + }, + { + "loss": 0.0, + "grad_norm": 0.000702428980730474, + "learning_rate": 9.155e-07, + "num_tokens": 117580.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.6394613087177277e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0875, + "step": 175 + }, + { + "loss": 0.0, + "grad_norm": 0.0007754422258585691, + "learning_rate": 9.15e-07, + "num_tokens": 118476.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 3.0298717319965363e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.088, + "step": 176 + }, + { + "loss": 0.0, + "grad_norm": 0.7931095361709595, + "learning_rate": 9.145e-07, + "num_tokens": 119372.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.3398548364639282e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0885, + "step": 177 + }, + { + "loss": 0.0, + "grad_norm": 0.0012435466051101685, + "learning_rate": 9.14e-07, + "num_tokens": 120268.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.037097096443176e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.089, + "step": 178 + }, + { + "loss": 0.0, + "grad_norm": 0.0008868267759680748, + "learning_rate": 9.134999999999999e-07, + "num_tokens": 120634.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.6998110115528107e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0895, + "step": 179 + }, + { + "loss": 0.0, + "grad_norm": 0.7282891273498535, + "learning_rate": 9.13e-07, + "num_tokens": 121530.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 2.5174580514431e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.09, + "step": 180 + }, + { + "loss": 0.0, + "grad_norm": 0.7231186628341675, + "learning_rate": 9.124999999999999e-07, + "num_tokens": 122426.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 1.848861575126648e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0905, + "step": 181 + }, + { + "loss": 0.0, + "grad_norm": 0.001117244246415794, + "learning_rate": 9.12e-07, + "num_tokens": 122792.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.138743340969086e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.091, + "step": 182 + }, + { + "loss": 0.0, + "grad_norm": 0.0006556922453455627, + "learning_rate": 9.115e-07, + "num_tokens": 123688.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.9136816263198853e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0915, + "step": 183 + }, + { + "loss": 0.0, + "grad_norm": 0.000802351045422256, + "learning_rate": 9.109999999999999e-07, + "num_tokens": 124054.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.238752156496048e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.092, + "step": 184 + }, + { + "loss": 0.0, + "grad_norm": 0.0006063154432922602, + "learning_rate": 9.104999999999999e-07, + "num_tokens": 124420.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.0485371351242065e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0925, + "step": 185 + }, + { + "loss": 0.0, + "grad_norm": 0.7436572313308716, + "learning_rate": 9.1e-07, + "num_tokens": 125316.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.107769250869751e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.093, + "step": 186 + }, + { + "loss": 0.0, + "grad_norm": 0.0014243351761251688, + "learning_rate": 9.094999999999999e-07, + "num_tokens": 126212.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.3363310396671295e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0935, + "step": 187 + }, + { + "loss": 0.0, + "grad_norm": 0.0009731510654091835, + "learning_rate": 9.09e-07, + "num_tokens": 127108.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 2.2524036467075348e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.094, + "step": 188 + }, + { + "loss": 0.0, + "grad_norm": 0.0008247564546763897, + "learning_rate": 9.085e-07, + "num_tokens": 127474.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.4750828742980957e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0945, + "step": 189 + }, + { + "loss": 0.0, + "grad_norm": 0.898916482925415, + "learning_rate": 9.08e-07, + "num_tokens": 128370.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.03111271932721138, + "reward": 0.828000009059906, + "reward_std": 0.03111271932721138, + "kl": 2.9124319553375244e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.095, + "step": 190 + }, + { + "loss": 0.0, + "grad_norm": 0.0022594723850488663, + "learning_rate": 9.074999999999999e-07, + "num_tokens": 128736.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.931740790605545e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0955, + "step": 191 + }, + { + "loss": 0.0002, + "grad_norm": 0.3122554123401642, + "learning_rate": 9.07e-07, + "num_tokens": 129632.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 0.005375564098358154, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.096, + "step": 192 + }, + { + "loss": 0.0, + "grad_norm": 0.7383635640144348, + "learning_rate": 9.064999999999999e-07, + "num_tokens": 130528.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 1.7085112631320953e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0965, + "step": 193 + }, + { + "loss": 0.0, + "grad_norm": 0.0009169039549306035, + "learning_rate": 9.06e-07, + "num_tokens": 130894.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.7499161660671234e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.097, + "step": 194 + }, + { + "loss": 0.0, + "grad_norm": 0.002207833109423518, + "learning_rate": 9.055e-07, + "num_tokens": 131790.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 5.058012902736664e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0975, + "step": 195 + }, + { + "loss": 0.0, + "grad_norm": 0.0013476760359480977, + "learning_rate": 9.05e-07, + "num_tokens": 132156.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.07582488656044e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.098, + "step": 196 + }, + { + "loss": 0.0, + "grad_norm": 0.0009443381568416953, + "learning_rate": 9.045e-07, + "num_tokens": 132522.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.524923861026764e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0985, + "step": 197 + }, + { + "loss": 0.0, + "grad_norm": 0.0008005110430531204, + "learning_rate": 9.039999999999999e-07, + "num_tokens": 133418.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 3.380049020051956e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.099, + "step": 198 + }, + { + "loss": 0.0, + "grad_norm": 0.0011344518279656768, + "learning_rate": 9.034999999999999e-07, + "num_tokens": 134314.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 3.630202263593674e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.0995, + "step": 199 + }, + { + "loss": 0.0, + "grad_norm": 1.124922513961792, + "learning_rate": 9.03e-07, + "num_tokens": 135210.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 2.403371036052704e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1, + "step": 200 + }, + { + "loss": 0.0, + "grad_norm": 0.010462634265422821, + "learning_rate": 9.024999999999999e-07, + "num_tokens": 135576.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 9.151548147201538e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1005, + "step": 201 + }, + { + "loss": 0.0, + "grad_norm": 0.4031621813774109, + "learning_rate": 9.02e-07, + "num_tokens": 136472.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 7.29784369468689e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.101, + "step": 202 + }, + { + "loss": 0.0, + "grad_norm": 1.1457958221435547, + "learning_rate": 9.015e-07, + "num_tokens": 137368.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8125, + "rewards/environment_reward_verifier/std": 0.01060659158974886, + "reward": 0.8125, + "reward_std": 0.01060659158974886, + "kl": 7.96811655163765e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1015, + "step": 203 + }, + { + "loss": -0.0, + "grad_norm": 0.8547003865242004, + "learning_rate": 9.01e-07, + "num_tokens": 138264.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7669999599456787, + "rewards/environment_reward_verifier/std": 0.00424262834712863, + "reward": 0.7669999599456787, + "reward_std": 0.00424262834712863, + "kl": 4.733167588710785e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.102, + "step": 204 + }, + { + "loss": 0.0, + "grad_norm": 0.0010702295694500208, + "learning_rate": 9.004999999999999e-07, + "num_tokens": 139160.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8516165912151337e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1025, + "step": 205 + }, + { + "loss": 0.0, + "grad_norm": 0.0010671066120266914, + "learning_rate": 9e-07, + "num_tokens": 140056.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 2.7094967663288116e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.103, + "step": 206 + }, + { + "loss": 0.0, + "grad_norm": 0.6986727714538574, + "learning_rate": 8.994999999999999e-07, + "num_tokens": 140952.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 2.9342249035835266e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1035, + "step": 207 + }, + { + "loss": 0.0, + "grad_norm": 0.793999433517456, + "learning_rate": 8.99e-07, + "num_tokens": 141848.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8004999756813049, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.8004999756813049, + "reward_std": 0.04879037290811539, + "kl": 2.9208138585090637e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.104, + "step": 208 + }, + { + "loss": 0.0, + "grad_norm": 0.8776720762252808, + "learning_rate": 8.985e-07, + "num_tokens": 142744.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.694409340620041e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1045, + "step": 209 + }, + { + "loss": 0.0, + "grad_norm": 0.8799023628234863, + "learning_rate": 8.98e-07, + "num_tokens": 143640.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7870000004768372, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.7870000004768372, + "reward_std": 0.049497511237859726, + "kl": 3.313366323709488e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.105, + "step": 210 + }, + { + "loss": 0.0, + "grad_norm": 0.0004170483734924346, + "learning_rate": 8.974999999999999e-07, + "num_tokens": 144536.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.2648833692073822e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1055, + "step": 211 + }, + { + "loss": 0.0, + "grad_norm": 0.001837296411395073, + "learning_rate": 8.969999999999999e-07, + "num_tokens": 144902.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.6456080377101898e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.106, + "step": 212 + }, + { + "loss": 0.0, + "grad_norm": 0.0008451686589978635, + "learning_rate": 8.964999999999999e-07, + "num_tokens": 145268.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.107171505689621e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1065, + "step": 213 + }, + { + "loss": 0.0, + "grad_norm": 1.0017951726913452, + "learning_rate": 8.96e-07, + "num_tokens": 146164.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 2.7408823370933533e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.107, + "step": 214 + }, + { + "loss": 0.0, + "grad_norm": 0.8755594491958618, + "learning_rate": 8.954999999999999e-07, + "num_tokens": 147060.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.390146255493164e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1075, + "step": 215 + }, + { + "loss": 0.0, + "grad_norm": 0.0005800517974421382, + "learning_rate": 8.95e-07, + "num_tokens": 147426.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.6012229025363922e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.108, + "step": 216 + }, + { + "loss": 0.0, + "grad_norm": 0.0007062573567964137, + "learning_rate": 8.945e-07, + "num_tokens": 147792.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.4564174711704254e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1085, + "step": 217 + }, + { + "loss": 0.0, + "grad_norm": 0.003949970938265324, + "learning_rate": 8.939999999999999e-07, + "num_tokens": 148688.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.277564585208893e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.109, + "step": 218 + }, + { + "loss": 0.0, + "grad_norm": 0.004211249761283398, + "learning_rate": 8.934999999999999e-07, + "num_tokens": 149054.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 0.00011921580880880356, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1095, + "step": 219 + }, + { + "loss": 0.0, + "grad_norm": 0.0019470448605716228, + "learning_rate": 8.93e-07, + "num_tokens": 149420.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.409812390804291e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.11, + "step": 220 + }, + { + "loss": 0.0, + "grad_norm": 0.001696808380074799, + "learning_rate": 8.924999999999999e-07, + "num_tokens": 150316.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.481617361307144e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1105, + "step": 221 + }, + { + "loss": 0.0, + "grad_norm": 0.0008031058823689818, + "learning_rate": 8.92e-07, + "num_tokens": 150682.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.823770046234131e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.111, + "step": 222 + }, + { + "loss": 0.0, + "grad_norm": 0.0005426830030046403, + "learning_rate": 8.915e-07, + "num_tokens": 151048.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 8.190050721168518e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1115, + "step": 223 + }, + { + "loss": 0.0, + "grad_norm": 0.7660623788833618, + "learning_rate": 8.91e-07, + "num_tokens": 151944.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8400000333786011, + "rewards/environment_reward_verifier/std": 0.056568533182144165, + "reward": 0.8400000333786011, + "reward_std": 0.056568533182144165, + "kl": 2.423767000436783e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.112, + "step": 224 + }, + { + "loss": 0.0, + "grad_norm": 0.00114248541649431, + "learning_rate": 8.904999999999999e-07, + "num_tokens": 152310.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.911981523036957e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1125, + "step": 225 + }, + { + "loss": 0.0, + "grad_norm": 0.0010189404711127281, + "learning_rate": 8.9e-07, + "num_tokens": 153206.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7940000295639038, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7940000295639038, + "reward_std": 0.0, + "kl": 3.969017416238785e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.113, + "step": 226 + }, + { + "loss": 0.0, + "grad_norm": 0.0009496210259385407, + "learning_rate": 8.894999999999999e-07, + "num_tokens": 154102.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 3.453809767961502e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1135, + "step": 227 + }, + { + "loss": 0.0, + "grad_norm": 0.0009968357626348734, + "learning_rate": 8.89e-07, + "num_tokens": 154468.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.2302771210670471e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.114, + "step": 228 + }, + { + "loss": 0.0, + "grad_norm": 0.0009216134203597903, + "learning_rate": 8.884999999999999e-07, + "num_tokens": 154834.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.4216249585151672e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1145, + "step": 229 + }, + { + "loss": 0.0, + "grad_norm": 0.0013800781453028321, + "learning_rate": 8.88e-07, + "num_tokens": 155200.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.5048614740371704e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.115, + "step": 230 + }, + { + "loss": 0.0, + "grad_norm": 0.004977535456418991, + "learning_rate": 8.874999999999999e-07, + "num_tokens": 155566.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.366932600736618e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1155, + "step": 231 + }, + { + "loss": 0.0, + "grad_norm": 0.6765887141227722, + "learning_rate": 8.869999999999999e-07, + "num_tokens": 156462.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8345000147819519, + "rewards/environment_reward_verifier/std": 0.030405579134821892, + "reward": 0.8345000147819519, + "reward_std": 0.030405579134821892, + "kl": 2.278340980410576e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.116, + "step": 232 + }, + { + "loss": 0.0, + "grad_norm": 0.0009554218268021941, + "learning_rate": 8.864999999999999e-07, + "num_tokens": 156828.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.304945468902588e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1165, + "step": 233 + }, + { + "loss": 0.0, + "grad_norm": 0.0004711175861302763, + "learning_rate": 8.86e-07, + "num_tokens": 157724.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8140000104904175, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8140000104904175, + "reward_std": 0.0, + "kl": 2.018176019191742e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.117, + "step": 234 + }, + { + "loss": 0.0, + "grad_norm": 0.7974148392677307, + "learning_rate": 8.854999999999999e-07, + "num_tokens": 158620.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 4.5554712414741516e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1175, + "step": 235 + }, + { + "loss": 0.0, + "grad_norm": 0.7260931730270386, + "learning_rate": 8.85e-07, + "num_tokens": 159516.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 7.259659469127655e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.118, + "step": 236 + }, + { + "loss": 0.0, + "grad_norm": 0.6996958255767822, + "learning_rate": 8.845e-07, + "num_tokens": 160412.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.2821128368377686e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1185, + "step": 237 + }, + { + "loss": 0.0, + "grad_norm": 0.004671283531934023, + "learning_rate": 8.839999999999999e-07, + "num_tokens": 160778.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.2873045206069946e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.119, + "step": 238 + }, + { + "loss": 0.0, + "grad_norm": 0.0009693849133327603, + "learning_rate": 8.834999999999999e-07, + "num_tokens": 161144.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.379303961992264e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1195, + "step": 239 + }, + { + "loss": 0.0, + "grad_norm": 0.0009250525617972016, + "learning_rate": 8.83e-07, + "num_tokens": 161510.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.9317645132541656e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.12, + "step": 240 + }, + { + "loss": 0.0, + "grad_norm": 0.650233805179596, + "learning_rate": 8.824999999999999e-07, + "num_tokens": 162406.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 1.8423423171043396e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1205, + "step": 241 + }, + { + "loss": 0.0, + "grad_norm": 0.7992975115776062, + "learning_rate": 8.82e-07, + "num_tokens": 163302.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8105000257492065, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8105000257492065, + "reward_std": 0.06434673070907593, + "kl": 3.829877823591232e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.121, + "step": 242 + }, + { + "loss": 0.0, + "grad_norm": 0.9677534699440002, + "learning_rate": 8.814999999999999e-07, + "num_tokens": 164198.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8044999837875366, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8044999837875366, + "reward_std": 0.06434673070907593, + "kl": 3.436487168073654e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1215, + "step": 243 + }, + { + "loss": 0.0, + "grad_norm": 0.0007884668302722275, + "learning_rate": 8.81e-07, + "num_tokens": 165094.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 2.169981598854065e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.122, + "step": 244 + }, + { + "loss": 0.0, + "grad_norm": 0.000979329226538539, + "learning_rate": 8.804999999999999e-07, + "num_tokens": 165460.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.646461457014084e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1225, + "step": 245 + }, + { + "loss": 0.0, + "grad_norm": 0.0006126004736870527, + "learning_rate": 8.799999999999999e-07, + "num_tokens": 166356.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7649999856948853, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7649999856948853, + "reward_std": 0.0, + "kl": 3.476254642009735e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.123, + "step": 246 + }, + { + "loss": 0.0, + "grad_norm": 0.0011434931075200438, + "learning_rate": 8.794999999999999e-07, + "num_tokens": 166722.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.4108910262584686e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1235, + "step": 247 + }, + { + "loss": 0.0001, + "grad_norm": 5.088333606719971, + "learning_rate": 8.79e-07, + "num_tokens": 167618.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7870000004768372, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.7870000004768372, + "reward_std": 0.049497511237859726, + "kl": 0.0014105839654803276, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.124, + "step": 248 + }, + { + "loss": 0.0, + "grad_norm": 0.8565078973770142, + "learning_rate": 8.784999999999999e-07, + "num_tokens": 168514.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8144999742507935, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8144999742507935, + "reward_std": 0.0035355305299162865, + "kl": 4.782341420650482e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1245, + "step": 249 + }, + { + "loss": 0.0, + "grad_norm": 0.7004273533821106, + "learning_rate": 8.78e-07, + "num_tokens": 169410.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7910000085830688, + "rewards/environment_reward_verifier/std": 0.045254841446876526, + "reward": 0.7910000085830688, + "reward_std": 0.045254841446876526, + "kl": 1.3789162039756775e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.125, + "step": 250 + }, + { + "loss": 0.0, + "grad_norm": 0.0018229980487376451, + "learning_rate": 8.774999999999999e-07, + "num_tokens": 169776.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.895271897315979e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1255, + "step": 251 + }, + { + "loss": 0.0, + "grad_norm": 0.001281239208765328, + "learning_rate": 8.769999999999999e-07, + "num_tokens": 170142.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.564210444688797e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.126, + "step": 252 + }, + { + "loss": 0.0, + "grad_norm": 0.001548050669953227, + "learning_rate": 8.764999999999999e-07, + "num_tokens": 170508.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.354771226644516e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1265, + "step": 253 + }, + { + "loss": 0.0, + "grad_norm": 0.6451208591461182, + "learning_rate": 8.76e-07, + "num_tokens": 171404.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 5.1419250667095184e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.127, + "step": 254 + }, + { + "loss": 0.0, + "grad_norm": 0.8378592729568481, + "learning_rate": 8.754999999999999e-07, + "num_tokens": 172300.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 2.724677324295044e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1275, + "step": 255 + }, + { + "loss": 0.0, + "grad_norm": 0.000880461884662509, + "learning_rate": 8.75e-07, + "num_tokens": 172666.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.9389746487140656e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.128, + "step": 256 + }, + { + "loss": 0.0, + "grad_norm": 0.8155960440635681, + "learning_rate": 8.745000000000001e-07, + "num_tokens": 173562.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.646407276391983e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1285, + "step": 257 + }, + { + "loss": 0.0, + "grad_norm": 2.756582260131836, + "learning_rate": 8.739999999999999e-07, + "num_tokens": 174458.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 0.0011248448863625526, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.129, + "step": 258 + }, + { + "loss": 0.0, + "grad_norm": 0.0006294223130680621, + "learning_rate": 8.735e-07, + "num_tokens": 174824.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.4514272809028625e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1295, + "step": 259 + }, + { + "loss": 0.0, + "grad_norm": 0.0005847606807947159, + "learning_rate": 8.729999999999999e-07, + "num_tokens": 175720.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 3.0250288546085358e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.13, + "step": 260 + }, + { + "loss": 0.0, + "grad_norm": 0.006465958897024393, + "learning_rate": 8.725e-07, + "num_tokens": 176086.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.9011392295360565e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1305, + "step": 261 + }, + { + "loss": 0.0, + "grad_norm": 0.0006706174463033676, + "learning_rate": 8.72e-07, + "num_tokens": 176452.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.6035122573375702e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.131, + "step": 262 + }, + { + "loss": 0.0, + "grad_norm": 0.0024853611830621958, + "learning_rate": 8.715e-07, + "num_tokens": 177348.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 9.193271398544312e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1315, + "step": 263 + }, + { + "loss": 0.0, + "grad_norm": 0.990795373916626, + "learning_rate": 8.71e-07, + "num_tokens": 178244.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 0.00011088699102401733, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.132, + "step": 264 + }, + { + "loss": 0.0, + "grad_norm": 0.6023589968681335, + "learning_rate": 8.705e-07, + "num_tokens": 179140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8100000023841858, + "rewards/environment_reward_verifier/std": 0.014142122119665146, + "reward": 0.8100000023841858, + "reward_std": 0.014142122119665146, + "kl": 2.4791806936264038e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1325, + "step": 265 + }, + { + "loss": 0.0, + "grad_norm": 0.0006478002178482711, + "learning_rate": 8.699999999999999e-07, + "num_tokens": 180036.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 3.0393246561288834e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.133, + "step": 266 + }, + { + "loss": 0.0, + "grad_norm": 0.0003633753804024309, + "learning_rate": 8.695e-07, + "num_tokens": 180932.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 1.7292797565460205e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1335, + "step": 267 + }, + { + "loss": 0.0, + "grad_norm": 0.0009483444155193865, + "learning_rate": 8.69e-07, + "num_tokens": 181298.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.2349489629268646e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.134, + "step": 268 + }, + { + "loss": 0.0, + "grad_norm": 0.001294833142310381, + "learning_rate": 8.685e-07, + "num_tokens": 182194.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 4.401896148920059e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1345, + "step": 269 + }, + { + "loss": 0.0, + "grad_norm": 0.9378226399421692, + "learning_rate": 8.68e-07, + "num_tokens": 183090.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6110000014305115, + "rewards/environment_reward_verifier/std": 0.32809752225875854, + "reward": 0.6110000014305115, + "reward_std": 0.32809752225875854, + "kl": 4.177261143922806e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.135, + "step": 270 + }, + { + "loss": 0.0, + "grad_norm": 0.0011398299830034375, + "learning_rate": 8.675000000000001e-07, + "num_tokens": 183456.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9952265322208405e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1355, + "step": 271 + }, + { + "loss": 0.0, + "grad_norm": 0.7210366725921631, + "learning_rate": 8.669999999999999e-07, + "num_tokens": 184352.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8004999756813049, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.8004999756813049, + "reward_std": 0.04879037290811539, + "kl": 2.8699636459350586e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.136, + "step": 272 + }, + { + "loss": 0.0, + "grad_norm": 0.0038134672213345766, + "learning_rate": 8.665e-07, + "num_tokens": 185248.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 7.503852248191833e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1365, + "step": 273 + }, + { + "loss": 0.0004, + "grad_norm": 4.846627712249756, + "learning_rate": 8.659999999999999e-07, + "num_tokens": 186144.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 0.010152775794267654, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.137, + "step": 274 + }, + { + "loss": 0.0, + "grad_norm": 0.0009844097075983882, + "learning_rate": 8.655e-07, + "num_tokens": 187040.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 2.0081177353858948e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1375, + "step": 275 + }, + { + "loss": 0.0, + "grad_norm": 0.000961087818723172, + "learning_rate": 8.65e-07, + "num_tokens": 187406.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.8001144528388977e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.138, + "step": 276 + }, + { + "loss": 0.0, + "grad_norm": 0.7714813947677612, + "learning_rate": 8.645e-07, + "num_tokens": 188302.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7940000295639038, + "rewards/environment_reward_verifier/std": 0.04949747025966644, + "reward": 0.7940000295639038, + "reward_std": 0.04949747025966644, + "kl": 4.729442298412323e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1385, + "step": 277 + }, + { + "loss": 0.0, + "grad_norm": 0.0010638447711244226, + "learning_rate": 8.639999999999999e-07, + "num_tokens": 188668.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.445947706699371e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.139, + "step": 278 + }, + { + "loss": 0.0, + "grad_norm": 0.00015246507246047258, + "learning_rate": 8.635e-07, + "num_tokens": 189564.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 5.039386451244354e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1395, + "step": 279 + }, + { + "loss": 0.0, + "grad_norm": 0.0011137727415189147, + "learning_rate": 8.629999999999999e-07, + "num_tokens": 190460.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.1976960599422455e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.14, + "step": 280 + }, + { + "loss": 0.0, + "grad_norm": 0.0009709048317745328, + "learning_rate": 8.625e-07, + "num_tokens": 191356.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.4955254048109055e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1405, + "step": 281 + }, + { + "loss": 0.0, + "grad_norm": 1.3368643522262573, + "learning_rate": 8.62e-07, + "num_tokens": 192252.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 0.00012401491403579712, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.141, + "step": 282 + }, + { + "loss": 0.0, + "grad_norm": 0.0008055974612943828, + "learning_rate": 8.615e-07, + "num_tokens": 192618.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.564862370491028e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1415, + "step": 283 + }, + { + "loss": 0.0, + "grad_norm": 0.8562883734703064, + "learning_rate": 8.61e-07, + "num_tokens": 193514.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5985000133514404, + "rewards/environment_reward_verifier/std": 0.3047630488872528, + "reward": 0.5985000133514404, + "reward_std": 0.3047630488872528, + "kl": 2.085510641336441e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.142, + "step": 284 + }, + { + "loss": 0.0, + "grad_norm": 0.0013000740436837077, + "learning_rate": 8.605e-07, + "num_tokens": 193880.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.2595206499099731e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1425, + "step": 285 + }, + { + "loss": 0.0, + "grad_norm": 0.0014716209843754768, + "learning_rate": 8.599999999999999e-07, + "num_tokens": 194246.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.012588083744049e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.143, + "step": 286 + }, + { + "loss": 0.0, + "grad_norm": 0.6238701343536377, + "learning_rate": 8.595e-07, + "num_tokens": 195142.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 3.501400351524353e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1435, + "step": 287 + }, + { + "loss": 0.0, + "grad_norm": 0.7292160987854004, + "learning_rate": 8.59e-07, + "num_tokens": 196038.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 3.310106694698334e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.144, + "step": 288 + }, + { + "loss": 0.0, + "grad_norm": 1.2664096355438232, + "learning_rate": 8.585e-07, + "num_tokens": 196934.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8255000114440918, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8255000114440918, + "reward_std": 0.0035355305299162865, + "kl": 7.172953337430954e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1445, + "step": 289 + }, + { + "loss": 0.0, + "grad_norm": 0.0011152090737596154, + "learning_rate": 8.58e-07, + "num_tokens": 197300.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.239380359649658e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.145, + "step": 290 + }, + { + "loss": 0.0, + "grad_norm": 0.0012550086248666048, + "learning_rate": 8.575e-07, + "num_tokens": 198196.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 3.109592944383621e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1455, + "step": 291 + }, + { + "loss": 0.0, + "grad_norm": 0.001699145999737084, + "learning_rate": 8.569999999999999e-07, + "num_tokens": 198562.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.172844976186752e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.146, + "step": 292 + }, + { + "loss": 0.0, + "grad_norm": 0.0014436126220971346, + "learning_rate": 8.565e-07, + "num_tokens": 199458.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 2.7905218303203583e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1465, + "step": 293 + }, + { + "loss": 0.0, + "grad_norm": 1.060386300086975, + "learning_rate": 8.559999999999999e-07, + "num_tokens": 200354.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 2.4184584617614746e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.147, + "step": 294 + }, + { + "loss": 0.0, + "grad_norm": 2.5308566093444824, + "learning_rate": 8.555e-07, + "num_tokens": 201250.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 0.0004968792200088501, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1475, + "step": 295 + }, + { + "loss": 0.0, + "grad_norm": 0.01867598481476307, + "learning_rate": 8.55e-07, + "num_tokens": 202146.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 0.0007902001962065697, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.148, + "step": 296 + }, + { + "loss": 0.0, + "grad_norm": 0.676836371421814, + "learning_rate": 8.545e-07, + "num_tokens": 203042.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5659999847412109, + "rewards/environment_reward_verifier/std": 0.26304370164871216, + "reward": 0.5659999847412109, + "reward_std": 0.26304370164871216, + "kl": 2.4565495550632477e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1485, + "step": 297 + }, + { + "loss": 0.0, + "grad_norm": 0.000486809789435938, + "learning_rate": 8.539999999999999e-07, + "num_tokens": 203938.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 1.8110498785972595e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.149, + "step": 298 + }, + { + "loss": 0.0, + "grad_norm": 6.314117431640625, + "learning_rate": 8.535e-07, + "num_tokens": 204834.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 0.000560510903596878, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1495, + "step": 299 + }, + { + "loss": 0.0, + "grad_norm": 0.0016245761653408408, + "learning_rate": 8.529999999999999e-07, + "num_tokens": 205730.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.596170037984848e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.15, + "step": 300 + }, + { + "loss": 0.0, + "grad_norm": 4.8842644691467285, + "learning_rate": 8.525e-07, + "num_tokens": 206626.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8389999866485596, + "reward_std": 0.01555635966360569, + "kl": 0.0012828148901462555, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1505, + "step": 301 + }, + { + "loss": 0.0, + "grad_norm": 0.6496160626411438, + "learning_rate": 8.52e-07, + "num_tokens": 207522.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 1.8990598618984222e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.151, + "step": 302 + }, + { + "loss": 0.0, + "grad_norm": 1.2166204452514648, + "learning_rate": 8.515e-07, + "num_tokens": 208418.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 4.263874143362045e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1515, + "step": 303 + }, + { + "loss": 0.0, + "grad_norm": 0.6483629941940308, + "learning_rate": 8.51e-07, + "num_tokens": 209314.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.642868250608444e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.152, + "step": 304 + }, + { + "loss": 0.0, + "grad_norm": 0.08719047904014587, + "learning_rate": 8.504999999999999e-07, + "num_tokens": 210210.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00048297271132469177, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1525, + "step": 305 + }, + { + "loss": 0.0, + "grad_norm": 0.0009118872112594545, + "learning_rate": 8.499999999999999e-07, + "num_tokens": 211106.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 3.436300903558731e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.153, + "step": 306 + }, + { + "loss": 0.0, + "grad_norm": 0.000776519300416112, + "learning_rate": 8.495e-07, + "num_tokens": 212002.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.836909309029579e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1535, + "step": 307 + }, + { + "loss": 0.0, + "grad_norm": 0.0004030209092888981, + "learning_rate": 8.489999999999999e-07, + "num_tokens": 212898.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 1.1263415217399597e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.154, + "step": 308 + }, + { + "loss": 0.0, + "grad_norm": 0.0021231588907539845, + "learning_rate": 8.485e-07, + "num_tokens": 213264.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.808364272117615e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1545, + "step": 309 + }, + { + "loss": 0.0, + "grad_norm": 0.0010731469374150038, + "learning_rate": 8.48e-07, + "num_tokens": 213630.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.3443793654441833e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.155, + "step": 310 + }, + { + "loss": 0.0, + "grad_norm": 1.3191975355148315, + "learning_rate": 8.475e-07, + "num_tokens": 214526.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8109999895095825, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8109999895095825, + "reward_std": 0.01555635966360569, + "kl": 0.0001062760129570961, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1555, + "step": 311 + }, + { + "loss": 0.0, + "grad_norm": 0.0009143484639935195, + "learning_rate": 8.469999999999999e-07, + "num_tokens": 214892.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7162954211235046e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.156, + "step": 312 + }, + { + "loss": 0.0, + "grad_norm": 0.0008549138437956572, + "learning_rate": 8.465e-07, + "num_tokens": 215258.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.628060221672058e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1565, + "step": 313 + }, + { + "loss": 0.0, + "grad_norm": 0.8807721138000488, + "learning_rate": 8.459999999999999e-07, + "num_tokens": 216154.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 3.3076852560043335e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.157, + "step": 314 + }, + { + "loss": 0.0, + "grad_norm": 0.0011269906535744667, + "learning_rate": 8.455e-07, + "num_tokens": 216520.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.0779042541980743e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1575, + "step": 315 + }, + { + "loss": 0.0, + "grad_norm": 0.0009529910748824477, + "learning_rate": 8.45e-07, + "num_tokens": 216886.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.9197894036769867e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.158, + "step": 316 + }, + { + "loss": 0.0, + "grad_norm": 0.5073452591896057, + "learning_rate": 8.445e-07, + "num_tokens": 217782.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 1.5504658222198486e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1585, + "step": 317 + }, + { + "loss": 0.0, + "grad_norm": 0.6745843887329102, + "learning_rate": 8.439999999999999e-07, + "num_tokens": 218678.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 2.916809171438217e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.159, + "step": 318 + }, + { + "loss": 0.0, + "grad_norm": 0.83416348695755, + "learning_rate": 8.435e-07, + "num_tokens": 219574.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 3.966502845287323e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1595, + "step": 319 + }, + { + "loss": 0.0, + "grad_norm": 0.0005657601868733764, + "learning_rate": 8.429999999999999e-07, + "num_tokens": 219940.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.7073936760425568e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.16, + "step": 320 + }, + { + "loss": 0.0, + "grad_norm": 0.0019271780038252473, + "learning_rate": 8.425e-07, + "num_tokens": 220306.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.132891237735748e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1605, + "step": 321 + }, + { + "loss": 0.0, + "grad_norm": 0.7732903957366943, + "learning_rate": 8.419999999999999e-07, + "num_tokens": 221202.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 2.4759210646152496e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.161, + "step": 322 + }, + { + "loss": 0.0, + "grad_norm": 0.4706270098686218, + "learning_rate": 8.415e-07, + "num_tokens": 222098.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 1.8648803234100342e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1615, + "step": 323 + }, + { + "loss": 0.0, + "grad_norm": 0.9665089249610901, + "learning_rate": 8.41e-07, + "num_tokens": 222994.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.0028283908031880856, + "reward": 0.8149999976158142, + "reward_std": 0.0028283908031880856, + "kl": 6.84782862663269e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.162, + "step": 324 + }, + { + "loss": 0.0, + "grad_norm": 0.7919329404830933, + "learning_rate": 8.404999999999999e-07, + "num_tokens": 223890.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.8199999928474426, + "reward_std": 0.011313731782138348, + "kl": 2.195313572883606e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1625, + "step": 325 + }, + { + "loss": 0.0, + "grad_norm": 0.768720269203186, + "learning_rate": 8.399999999999999e-07, + "num_tokens": 224786.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 4.016607999801636e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.163, + "step": 326 + }, + { + "loss": 0.0, + "grad_norm": 1.0923116207122803, + "learning_rate": 8.395e-07, + "num_tokens": 225682.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 6.390083581209183e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1635, + "step": 327 + }, + { + "loss": 0.0, + "grad_norm": 0.8083785772323608, + "learning_rate": 8.389999999999999e-07, + "num_tokens": 226578.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.3585744202136993e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.164, + "step": 328 + }, + { + "loss": 0.0, + "grad_norm": 0.8358509540557861, + "learning_rate": 8.385e-07, + "num_tokens": 227474.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.7976930141448975e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1645, + "step": 329 + }, + { + "loss": 0.0, + "grad_norm": 0.002556774066761136, + "learning_rate": 8.38e-07, + "num_tokens": 228370.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 6.252247840166092e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.165, + "step": 330 + }, + { + "loss": 0.0, + "grad_norm": 0.0011076935334131122, + "learning_rate": 8.375e-07, + "num_tokens": 228736.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.133954644203186e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1655, + "step": 331 + }, + { + "loss": 0.0, + "grad_norm": 0.8899944424629211, + "learning_rate": 8.369999999999999e-07, + "num_tokens": 229632.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 3.0472874641418457e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.166, + "step": 332 + }, + { + "loss": 0.0, + "grad_norm": 0.0005512312054634094, + "learning_rate": 8.365e-07, + "num_tokens": 230528.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 1.4659948647022247e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1665, + "step": 333 + }, + { + "loss": 0.0, + "grad_norm": 1.0276963710784912, + "learning_rate": 8.359999999999999e-07, + "num_tokens": 231424.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8019999861717224, + "rewards/environment_reward_verifier/std": 0.05091170594096184, + "reward": 0.8019999861717224, + "reward_std": 0.05091170594096184, + "kl": 5.741789937019348e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.167, + "step": 334 + }, + { + "loss": 0.0, + "grad_norm": 0.0006771369371563196, + "learning_rate": 8.355e-07, + "num_tokens": 231790.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.835450530052185e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1675, + "step": 335 + }, + { + "loss": 0.0, + "grad_norm": 0.005562920588999987, + "learning_rate": 8.349999999999999e-07, + "num_tokens": 232156.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00012410897761583328, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.168, + "step": 336 + }, + { + "loss": 0.0, + "grad_norm": 0.0008655060082674026, + "learning_rate": 8.345e-07, + "num_tokens": 233052.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.971423625946045e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1685, + "step": 337 + }, + { + "loss": 0.0, + "grad_norm": 0.0011268710950389504, + "learning_rate": 8.34e-07, + "num_tokens": 233418.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.94646418094635e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.169, + "step": 338 + }, + { + "loss": 0.0, + "grad_norm": 0.0010772187961265445, + "learning_rate": 8.334999999999999e-07, + "num_tokens": 234314.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8320000171661377, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8320000171661377, + "reward_std": 0.0, + "kl": 3.5460107028484344e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1695, + "step": 339 + }, + { + "loss": 0.0, + "grad_norm": 0.0008576549007557333, + "learning_rate": 8.329999999999999e-07, + "num_tokens": 235210.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.149647429585457e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.17, + "step": 340 + }, + { + "loss": 0.0, + "grad_norm": 3.0028762817382812, + "learning_rate": 8.325e-07, + "num_tokens": 236106.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 0.0004530055448412895, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1705, + "step": 341 + }, + { + "loss": 0.0, + "grad_norm": 0.707438588142395, + "learning_rate": 8.319999999999999e-07, + "num_tokens": 237002.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 2.5334767997264862e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.171, + "step": 342 + }, + { + "loss": 0.0, + "grad_norm": 0.001074684434570372, + "learning_rate": 8.315e-07, + "num_tokens": 237368.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.078673034906387e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1715, + "step": 343 + }, + { + "loss": 0.0, + "grad_norm": 0.0007710942882113159, + "learning_rate": 8.31e-07, + "num_tokens": 237734.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.07280570268631e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.172, + "step": 344 + }, + { + "loss": 0.0, + "grad_norm": 0.0015255279140546918, + "learning_rate": 8.304999999999999e-07, + "num_tokens": 238100.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.6513822376728058e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1725, + "step": 345 + }, + { + "loss": 0.0, + "grad_norm": 0.001760940533131361, + "learning_rate": 8.299999999999999e-07, + "num_tokens": 238466.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.8121437430381775e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.173, + "step": 346 + }, + { + "loss": 0.0, + "grad_norm": 0.5609378814697266, + "learning_rate": 8.295e-07, + "num_tokens": 239362.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7999999523162842, + "rewards/environment_reward_verifier/std": 0.04949747025966644, + "reward": 0.7999999523162842, + "reward_std": 0.04949747025966644, + "kl": 2.7747824788093567e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1735, + "step": 347 + }, + { + "loss": 0.0, + "grad_norm": 0.6798244118690491, + "learning_rate": 8.289999999999999e-07, + "num_tokens": 240258.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 1.994706690311432e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.174, + "step": 348 + }, + { + "loss": 0.0, + "grad_norm": 0.0006170056294649839, + "learning_rate": 8.285e-07, + "num_tokens": 241154.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.5138258934020996e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1745, + "step": 349 + }, + { + "loss": 0.0, + "grad_norm": 0.8250600695610046, + "learning_rate": 8.28e-07, + "num_tokens": 242050.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7994999885559082, + "reward_std": 0.04879037290811539, + "kl": 2.6516150683164597e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.175, + "step": 350 + }, + { + "loss": 0.0, + "grad_norm": 0.8256682753562927, + "learning_rate": 8.275e-07, + "num_tokens": 242946.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 4.840269684791565e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1755, + "step": 351 + }, + { + "loss": 0.0, + "grad_norm": 0.0038211841601878405, + "learning_rate": 8.269999999999999e-07, + "num_tokens": 243312.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.904119461774826e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.176, + "step": 352 + }, + { + "loss": 0.0, + "grad_norm": 0.0007045888341963291, + "learning_rate": 8.264999999999999e-07, + "num_tokens": 243678.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.098510205745697e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1765, + "step": 353 + }, + { + "loss": 0.0, + "grad_norm": 0.0005108074401505291, + "learning_rate": 8.259999999999999e-07, + "num_tokens": 244574.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 1.8666498363018036e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.177, + "step": 354 + }, + { + "loss": 0.0, + "grad_norm": 0.0017009348375722766, + "learning_rate": 8.255e-07, + "num_tokens": 244940.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.8428384363651276e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1775, + "step": 355 + }, + { + "loss": 0.0, + "grad_norm": 0.0009280358208343387, + "learning_rate": 8.249999999999999e-07, + "num_tokens": 245306.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.047621041536331e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.178, + "step": 356 + }, + { + "loss": 0.0, + "grad_norm": 0.0006316198268905282, + "learning_rate": 8.245e-07, + "num_tokens": 245672.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.312939614057541e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1785, + "step": 357 + }, + { + "loss": 0.0, + "grad_norm": 0.0008523969445377588, + "learning_rate": 8.24e-07, + "num_tokens": 246568.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 2.503208816051483e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.179, + "step": 358 + }, + { + "loss": 0.0, + "grad_norm": 0.607419490814209, + "learning_rate": 8.234999999999999e-07, + "num_tokens": 247464.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8255000114440918, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8255000114440918, + "reward_std": 0.0035355305299162865, + "kl": 2.709217369556427e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1795, + "step": 359 + }, + { + "loss": 0.0, + "grad_norm": 0.0016844611382111907, + "learning_rate": 8.229999999999999e-07, + "num_tokens": 248360.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.207249730825424e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.18, + "step": 360 + }, + { + "loss": 0.0, + "grad_norm": 0.0022826315835118294, + "learning_rate": 8.225e-07, + "num_tokens": 248726.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.5075081288814545e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1805, + "step": 361 + }, + { + "loss": 0.0, + "grad_norm": 0.871046245098114, + "learning_rate": 8.219999999999999e-07, + "num_tokens": 249622.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 4.359986633062363e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.181, + "step": 362 + }, + { + "loss": 0.0, + "grad_norm": 0.0007096790359355509, + "learning_rate": 8.215e-07, + "num_tokens": 249988.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.1784566342830658e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1815, + "step": 363 + }, + { + "loss": 0.0, + "grad_norm": 0.5757960677146912, + "learning_rate": 8.21e-07, + "num_tokens": 250884.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5734999775886536, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5734999775886536, + "reward_std": 0.27082186937332153, + "kl": 2.105068415403366e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.182, + "step": 364 + }, + { + "loss": 0.0, + "grad_norm": 0.0026919955853372812, + "learning_rate": 8.205e-07, + "num_tokens": 251250.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.663597792387009e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1825, + "step": 365 + }, + { + "loss": 0.0, + "grad_norm": 0.00391238322481513, + "learning_rate": 8.199999999999999e-07, + "num_tokens": 251616.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 8.422881364822388e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.183, + "step": 366 + }, + { + "loss": 0.0, + "grad_norm": 0.0019929648842662573, + "learning_rate": 8.194999999999999e-07, + "num_tokens": 251982.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.68716025352478e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1835, + "step": 367 + }, + { + "loss": 0.0, + "grad_norm": 0.001186743495054543, + "learning_rate": 8.189999999999999e-07, + "num_tokens": 252348.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.436580300331116e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.184, + "step": 368 + }, + { + "loss": 0.0, + "grad_norm": 0.4352464973926544, + "learning_rate": 8.185e-07, + "num_tokens": 253244.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 1.8279068171977997e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1845, + "step": 369 + }, + { + "loss": -0.0, + "grad_norm": 0.6293253302574158, + "learning_rate": 8.179999999999999e-07, + "num_tokens": 254140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8324999809265137, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8324999809265137, + "reward_std": 0.0007070977007970214, + "kl": 2.9394403100013733e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.185, + "step": 370 + }, + { + "loss": 0.0, + "grad_norm": 0.768975019454956, + "learning_rate": 8.175e-07, + "num_tokens": 255036.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8185000419616699, + "rewards/environment_reward_verifier/std": 0.004949768073856831, + "reward": 0.8185000419616699, + "reward_std": 0.004949768073856831, + "kl": 1.7375685274600983e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1855, + "step": 371 + }, + { + "loss": 0.0, + "grad_norm": 0.001828294014558196, + "learning_rate": 8.169999999999999e-07, + "num_tokens": 255932.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 0.00010107597336173058, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.186, + "step": 372 + }, + { + "loss": 0.0, + "grad_norm": 0.805023729801178, + "learning_rate": 8.164999999999999e-07, + "num_tokens": 256828.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8100000023841858, + "rewards/environment_reward_verifier/std": 0.014142122119665146, + "reward": 0.8100000023841858, + "reward_std": 0.014142122119665146, + "kl": 4.6405941247940063e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1865, + "step": 373 + }, + { + "loss": 0.0, + "grad_norm": 0.0008711764821782708, + "learning_rate": 8.159999999999999e-07, + "num_tokens": 257194.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.0335580706596375e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.187, + "step": 374 + }, + { + "loss": 0.0, + "grad_norm": 0.0011456962674856186, + "learning_rate": 8.155e-07, + "num_tokens": 257560.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.436300903558731e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1875, + "step": 375 + }, + { + "loss": 0.0, + "grad_norm": 0.0034832863602787256, + "learning_rate": 8.149999999999999e-07, + "num_tokens": 258456.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.579514592885971e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.188, + "step": 376 + }, + { + "loss": 0.0, + "grad_norm": 0.0008365235989913344, + "learning_rate": 8.145e-07, + "num_tokens": 258822.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.2242387533187866e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1885, + "step": 377 + }, + { + "loss": 0.0, + "grad_norm": 0.0003608646511565894, + "learning_rate": 8.14e-07, + "num_tokens": 259188.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.0672956705093384e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.189, + "step": 378 + }, + { + "loss": 0.0, + "grad_norm": 0.0010314263636246324, + "learning_rate": 8.134999999999999e-07, + "num_tokens": 259554.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.590209573507309e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1895, + "step": 379 + }, + { + "loss": 0.0, + "grad_norm": 0.0008526266319677234, + "learning_rate": 8.129999999999999e-07, + "num_tokens": 259920.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.283882349729538e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.19, + "step": 380 + }, + { + "loss": 0.0, + "grad_norm": 0.0007325659971684217, + "learning_rate": 8.125e-07, + "num_tokens": 260816.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 3.8174912333488464e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1905, + "step": 381 + }, + { + "loss": 0.0, + "grad_norm": 0.715529203414917, + "learning_rate": 8.12e-07, + "num_tokens": 261712.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 1.8450431525707245e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.191, + "step": 382 + }, + { + "loss": 0.0, + "grad_norm": 0.8371534943580627, + "learning_rate": 8.115e-07, + "num_tokens": 262608.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8245000243186951, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8245000243186951, + "reward_std": 0.016263457015156746, + "kl": 1.7014332115650177e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1915, + "step": 383 + }, + { + "loss": 0.0, + "grad_norm": 0.0020516454242169857, + "learning_rate": 8.11e-07, + "num_tokens": 262974.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.929730832576752e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.192, + "step": 384 + }, + { + "loss": 0.0, + "grad_norm": 0.9516167640686035, + "learning_rate": 8.105e-07, + "num_tokens": 263870.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 5.2636489272117615e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1925, + "step": 385 + }, + { + "loss": 0.0, + "grad_norm": 0.0009887670166790485, + "learning_rate": 8.1e-07, + "num_tokens": 264766.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8140000104904175, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8140000104904175, + "reward_std": 0.0, + "kl": 2.835039049386978e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.193, + "step": 386 + }, + { + "loss": 0.0001, + "grad_norm": 5.623652935028076, + "learning_rate": 8.094999999999999e-07, + "num_tokens": 265662.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8314999938011169, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8314999938011169, + "reward_std": 0.016263457015156746, + "kl": 0.0014997078105807304, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1935, + "step": 387 + }, + { + "loss": 0.0, + "grad_norm": 0.0015900827711448073, + "learning_rate": 8.09e-07, + "num_tokens": 266558.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7649999856948853, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7649999856948853, + "reward_std": 0.0, + "kl": 4.941131919622421e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.194, + "step": 388 + }, + { + "loss": 0.0, + "grad_norm": 0.793515682220459, + "learning_rate": 8.085e-07, + "num_tokens": 267454.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5975000262260437, + "rewards/environment_reward_verifier/std": 0.3047630488872528, + "reward": 0.5975000262260437, + "reward_std": 0.3047630488872528, + "kl": 3.597978502511978e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1945, + "step": 389 + }, + { + "loss": 0.0, + "grad_norm": 0.8414768576622009, + "learning_rate": 8.08e-07, + "num_tokens": 268350.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 4.779640585184097e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.195, + "step": 390 + }, + { + "loss": 0.0, + "grad_norm": 0.0028182165697216988, + "learning_rate": 8.075e-07, + "num_tokens": 268716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.616325557231903e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1955, + "step": 391 + }, + { + "loss": 0.0, + "grad_norm": 0.0008592616650275886, + "learning_rate": 8.070000000000001e-07, + "num_tokens": 269082.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.4487264454364777e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.196, + "step": 392 + }, + { + "loss": 0.0, + "grad_norm": 2.569565534591675, + "learning_rate": 8.064999999999999e-07, + "num_tokens": 269978.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8044999837875366, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8044999837875366, + "reward_std": 0.06434673070907593, + "kl": 0.00014215800911188126, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1965, + "step": 393 + }, + { + "loss": 0.0, + "grad_norm": 0.0010324495378881693, + "learning_rate": 8.06e-07, + "num_tokens": 270344.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.629457205533981e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.197, + "step": 394 + }, + { + "loss": 0.0, + "grad_norm": 0.8608807325363159, + "learning_rate": 8.055e-07, + "num_tokens": 271240.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 7.563550025224686e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1975, + "step": 395 + }, + { + "loss": 0.0, + "grad_norm": 0.0005319091724231839, + "learning_rate": 8.05e-07, + "num_tokens": 272136.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.8986018151044846e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.198, + "step": 396 + }, + { + "loss": 0.0, + "grad_norm": 0.0007893664878793061, + "learning_rate": 8.045e-07, + "num_tokens": 273032.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8220000267028809, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8220000267028809, + "reward_std": 0.0, + "kl": 2.1637417376041412e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1985, + "step": 397 + }, + { + "loss": 0.0, + "grad_norm": 0.00043877126881852746, + "learning_rate": 8.04e-07, + "num_tokens": 273928.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.8969178199768066e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.199, + "step": 398 + }, + { + "loss": 0.0, + "grad_norm": 0.0025300285778939724, + "learning_rate": 8.034999999999999e-07, + "num_tokens": 274294.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.670768976211548e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.1995, + "step": 399 + }, + { + "loss": 0.0001, + "grad_norm": 3.579826831817627, + "learning_rate": 8.03e-07, + "num_tokens": 275190.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 0.0013754144310951233, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2, + "step": 400 + }, + { + "loss": 0.0, + "grad_norm": 0.0024137054570019245, + "learning_rate": 8.024999999999999e-07, + "num_tokens": 275556.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.208755075931549e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2005, + "step": 401 + }, + { + "loss": -0.0, + "grad_norm": 0.8765020370483398, + "learning_rate": 8.02e-07, + "num_tokens": 276452.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8194999694824219, + "rewards/environment_reward_verifier/std": 0.012020829133689404, + "reward": 0.8194999694824219, + "reward_std": 0.012020829133689404, + "kl": 3.9509497582912445e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.201, + "step": 402 + }, + { + "loss": 0.0, + "grad_norm": 0.8817614316940308, + "learning_rate": 8.015e-07, + "num_tokens": 277348.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8289999961853027, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8289999961853027, + "reward_std": 0.0014141954015940428, + "kl": 1.7669983208179474e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2015, + "step": 403 + }, + { + "loss": 0.0, + "grad_norm": 0.5131192207336426, + "learning_rate": 8.01e-07, + "num_tokens": 278244.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.824999988079071, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.824999988079071, + "reward_std": 0.011313731782138348, + "kl": 2.452544867992401e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.202, + "step": 404 + }, + { + "loss": 0.0, + "grad_norm": 0.9266701340675354, + "learning_rate": 8.005e-07, + "num_tokens": 279140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.136042505502701e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2025, + "step": 405 + }, + { + "loss": 0.0, + "grad_norm": 0.0010275949025526643, + "learning_rate": 8e-07, + "num_tokens": 280036.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.6168843507766724e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.203, + "step": 406 + }, + { + "loss": 0.0, + "grad_norm": 0.020822610706090927, + "learning_rate": 7.994999999999999e-07, + "num_tokens": 280932.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 0.00020745676010847092, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2035, + "step": 407 + }, + { + "loss": 0.0, + "grad_norm": 0.001042524934746325, + "learning_rate": 7.99e-07, + "num_tokens": 281298.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.959572106599808e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.204, + "step": 408 + }, + { + "loss": 0.0, + "grad_norm": 0.000953489972744137, + "learning_rate": 7.985e-07, + "num_tokens": 281664.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.811329275369644e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2045, + "step": 409 + }, + { + "loss": 0.0, + "grad_norm": 0.0007455811137333512, + "learning_rate": 7.98e-07, + "num_tokens": 282560.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7649999856948853, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7649999856948853, + "reward_std": 0.0, + "kl": 1.9179657101631165e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.205, + "step": 410 + }, + { + "loss": 0.0, + "grad_norm": 0.9579814672470093, + "learning_rate": 7.975e-07, + "num_tokens": 283456.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.659166395664215e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2055, + "step": 411 + }, + { + "loss": 0.0, + "grad_norm": 0.005196427460759878, + "learning_rate": 7.970000000000001e-07, + "num_tokens": 283822.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.4914351999759674e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.206, + "step": 412 + }, + { + "loss": 0.0, + "grad_norm": 0.002247238764539361, + "learning_rate": 7.964999999999999e-07, + "num_tokens": 284718.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7940000295639038, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7940000295639038, + "reward_std": 0.0, + "kl": 5.231797695159912e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2065, + "step": 413 + }, + { + "loss": 0.0, + "grad_norm": 0.006796940229833126, + "learning_rate": 7.96e-07, + "num_tokens": 285614.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 0.0001318659633398056, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.207, + "step": 414 + }, + { + "loss": 0.0, + "grad_norm": 0.0011936328373849392, + "learning_rate": 7.954999999999999e-07, + "num_tokens": 285980.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.434864968061447e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2075, + "step": 415 + }, + { + "loss": 0.0, + "grad_norm": 0.0012174234725534916, + "learning_rate": 7.95e-07, + "num_tokens": 286346.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.835279494524002e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.208, + "step": 416 + }, + { + "loss": 0.0, + "grad_norm": 3.123206377029419, + "learning_rate": 7.945e-07, + "num_tokens": 287242.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8385000228881836, + "rewards/environment_reward_verifier/std": 0.026162952184677124, + "reward": 0.8385000228881836, + "reward_std": 0.026162952184677124, + "kl": 0.0003110067918896675, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2085, + "step": 417 + }, + { + "loss": 0.0, + "grad_norm": 0.004384323488920927, + "learning_rate": 7.94e-07, + "num_tokens": 288138.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 9.18898731470108e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.209, + "step": 418 + }, + { + "loss": 0.0, + "grad_norm": 0.4957750141620636, + "learning_rate": 7.934999999999999e-07, + "num_tokens": 289034.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8100000023841858, + "rewards/environment_reward_verifier/std": 0.014142122119665146, + "reward": 0.8100000023841858, + "reward_std": 0.014142122119665146, + "kl": 1.3055279850959778e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2095, + "step": 419 + }, + { + "loss": 0.0, + "grad_norm": 0.00771497655659914, + "learning_rate": 7.93e-07, + "num_tokens": 289400.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00016101356595754623, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.21, + "step": 420 + }, + { + "loss": 0.0, + "grad_norm": 0.0010974898468703032, + "learning_rate": 7.924999999999999e-07, + "num_tokens": 289766.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.816730946302414e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2105, + "step": 421 + }, + { + "loss": 0.0, + "grad_norm": 0.798469603061676, + "learning_rate": 7.92e-07, + "num_tokens": 290662.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6200000047683716, + "rewards/environment_reward_verifier/std": 0.33516862988471985, + "reward": 0.6200000047683716, + "reward_std": 0.33516862988471985, + "kl": 3.2133422791957855e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.211, + "step": 422 + }, + { + "loss": 0.0, + "grad_norm": 0.00414931820705533, + "learning_rate": 7.915e-07, + "num_tokens": 291028.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 8.758436888456345e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2115, + "step": 423 + }, + { + "loss": 0.0, + "grad_norm": 0.9511045217514038, + "learning_rate": 7.91e-07, + "num_tokens": 291924.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 0.00012452621012926102, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.212, + "step": 424 + }, + { + "loss": 0.0001, + "grad_norm": 0.2232443392276764, + "learning_rate": 7.905e-07, + "num_tokens": 292820.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 0.0015941644087433815, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2125, + "step": 425 + }, + { + "loss": 0.0, + "grad_norm": 0.002064876724034548, + "learning_rate": 7.9e-07, + "num_tokens": 293716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8320000171661377, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8320000171661377, + "reward_std": 0.0, + "kl": 6.643123924732208e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.213, + "step": 426 + }, + { + "loss": 0.0, + "grad_norm": 0.0006416325340978801, + "learning_rate": 7.894999999999999e-07, + "num_tokens": 294082.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.880766987800598e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2135, + "step": 427 + }, + { + "loss": 0.0, + "grad_norm": 0.0009233696036972106, + "learning_rate": 7.89e-07, + "num_tokens": 294448.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.7785619497299194e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.214, + "step": 428 + }, + { + "loss": 0.0, + "grad_norm": 0.001352763269096613, + "learning_rate": 7.884999999999999e-07, + "num_tokens": 294814.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.464682519435883e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2145, + "step": 429 + }, + { + "loss": 0.0, + "grad_norm": 0.8443479537963867, + "learning_rate": 7.88e-07, + "num_tokens": 295710.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.9816292226314545e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.215, + "step": 430 + }, + { + "loss": 0.0, + "grad_norm": 0.0007101478986442089, + "learning_rate": 7.875e-07, + "num_tokens": 296076.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.693571150302887e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2155, + "step": 431 + }, + { + "loss": 0.0, + "grad_norm": 0.0009829180780798197, + "learning_rate": 7.87e-07, + "num_tokens": 296972.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8159999847412109, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8159999847412109, + "reward_std": 0.0, + "kl": 2.2660940885543823e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.216, + "step": 432 + }, + { + "loss": 0.0, + "grad_norm": 1.2148209810256958, + "learning_rate": 7.864999999999999e-07, + "num_tokens": 297868.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8259999752044678, + "rewards/environment_reward_verifier/std": 0.01272792648524046, + "reward": 0.8259999752044678, + "reward_std": 0.01272792648524046, + "kl": 3.0270777642726898e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2165, + "step": 433 + }, + { + "loss": 0.0, + "grad_norm": 0.0008294544531963766, + "learning_rate": 7.86e-07, + "num_tokens": 298234.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.230106085538864e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.217, + "step": 434 + }, + { + "loss": 0.0, + "grad_norm": 0.0017025723354890943, + "learning_rate": 7.854999999999999e-07, + "num_tokens": 298600.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.0699727833271027e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2175, + "step": 435 + }, + { + "loss": 0.0, + "grad_norm": 0.0008352863951586187, + "learning_rate": 7.85e-07, + "num_tokens": 298966.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.4608725905418396e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.218, + "step": 436 + }, + { + "loss": 0.0, + "grad_norm": 0.7234691381454468, + "learning_rate": 7.845e-07, + "num_tokens": 299862.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.358442336320877e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2185, + "step": 437 + }, + { + "loss": 0.0, + "grad_norm": 0.5953369736671448, + "learning_rate": 7.84e-07, + "num_tokens": 300758.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8339999914169312, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8339999914169312, + "reward_std": 0.0014141954015940428, + "kl": 2.1354295313358307e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.219, + "step": 438 + }, + { + "loss": 0.0, + "grad_norm": 0.0006108077359385788, + "learning_rate": 7.834999999999999e-07, + "num_tokens": 301124.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.793261617422104e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2195, + "step": 439 + }, + { + "loss": 0.0, + "grad_norm": 0.003298780182376504, + "learning_rate": 7.83e-07, + "num_tokens": 301490.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.4461339712142944e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.22, + "step": 440 + }, + { + "loss": 0.0, + "grad_norm": 1.0496840476989746, + "learning_rate": 7.824999999999999e-07, + "num_tokens": 302386.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 4.3274834752082825e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2205, + "step": 441 + }, + { + "loss": 0.0, + "grad_norm": 0.751266598701477, + "learning_rate": 7.82e-07, + "num_tokens": 303282.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 3.72203066945076e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.221, + "step": 442 + }, + { + "loss": 0.0, + "grad_norm": 0.0010550552979111671, + "learning_rate": 7.815e-07, + "num_tokens": 303648.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.893168807029724e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2215, + "step": 443 + }, + { + "loss": 0.0, + "grad_norm": 3.197258234024048, + "learning_rate": 7.81e-07, + "num_tokens": 304544.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8240000009536743, + "rewards/environment_reward_verifier/std": 0.015556317754089832, + "reward": 0.8240000009536743, + "reward_std": 0.015556317754089832, + "kl": 2.9307790100574493e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.222, + "step": 444 + }, + { + "loss": 0.0, + "grad_norm": 0.001131376950070262, + "learning_rate": 7.805e-07, + "num_tokens": 304910.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.5722587704658508e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2225, + "step": 445 + }, + { + "loss": 0.0, + "grad_norm": 1.027177333831787, + "learning_rate": 7.799999999999999e-07, + "num_tokens": 305806.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.660377115011215e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.223, + "step": 446 + }, + { + "loss": 0.0, + "grad_norm": 1.4935749769210815, + "learning_rate": 7.794999999999999e-07, + "num_tokens": 306702.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 4.15164977312088e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2235, + "step": 447 + }, + { + "loss": 0.0, + "grad_norm": 0.0008162088342942297, + "learning_rate": 7.79e-07, + "num_tokens": 307068.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.881605178117752e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.224, + "step": 448 + }, + { + "loss": 0.0, + "grad_norm": 0.0008024214766919613, + "learning_rate": 7.784999999999999e-07, + "num_tokens": 307434.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.0684674382209778e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2245, + "step": 449 + }, + { + "loss": 0.0, + "grad_norm": 0.0013720437418669462, + "learning_rate": 7.78e-07, + "num_tokens": 308330.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 4.176422953605652e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.225, + "step": 450 + }, + { + "loss": 0.0, + "grad_norm": 0.0008150116773322225, + "learning_rate": 7.775e-07, + "num_tokens": 309226.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.145821392536163e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2255, + "step": 451 + }, + { + "loss": 0.0, + "grad_norm": 0.42958030104637146, + "learning_rate": 7.77e-07, + "num_tokens": 310122.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 1.4682300388813019e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.226, + "step": 452 + }, + { + "loss": 0.0, + "grad_norm": 0.0011029124725610018, + "learning_rate": 7.764999999999999e-07, + "num_tokens": 310488.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.344061017036438e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2265, + "step": 453 + }, + { + "loss": 0.0, + "grad_norm": 0.0011241426691412926, + "learning_rate": 7.76e-07, + "num_tokens": 310854.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.2280182242393494e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.227, + "step": 454 + }, + { + "loss": 0.0, + "grad_norm": 0.8502638936042786, + "learning_rate": 7.754999999999999e-07, + "num_tokens": 311750.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 8.490029722452164e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2275, + "step": 455 + }, + { + "loss": 0.0, + "grad_norm": 0.0013144731055945158, + "learning_rate": 7.75e-07, + "num_tokens": 312646.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 3.39532271027565e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.228, + "step": 456 + }, + { + "loss": 0.0, + "grad_norm": 0.0009761439287103713, + "learning_rate": 7.745e-07, + "num_tokens": 313542.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 4.0193088352680206e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2285, + "step": 457 + }, + { + "loss": 0.0, + "grad_norm": 0.000928891240619123, + "learning_rate": 7.74e-07, + "num_tokens": 313908.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.352055162191391e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.229, + "step": 458 + }, + { + "loss": 0.0, + "grad_norm": 0.0011163371382281184, + "learning_rate": 7.734999999999999e-07, + "num_tokens": 314274.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.4972093999385834e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2295, + "step": 459 + }, + { + "loss": 0.0, + "grad_norm": 0.0007710496429353952, + "learning_rate": 7.729999999999999e-07, + "num_tokens": 315170.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 3.975536674261093e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.23, + "step": 460 + }, + { + "loss": 0.0, + "grad_norm": 0.0007348654326051474, + "learning_rate": 7.724999999999999e-07, + "num_tokens": 316066.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7649999856948853, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7649999856948853, + "reward_std": 0.0, + "kl": 2.86223366856575e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2305, + "step": 461 + }, + { + "loss": 0.0, + "grad_norm": 0.0006661872030235827, + "learning_rate": 7.72e-07, + "num_tokens": 316962.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 3.3562071621418e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.231, + "step": 462 + }, + { + "loss": 0.0, + "grad_norm": 0.0008995214593596756, + "learning_rate": 7.714999999999999e-07, + "num_tokens": 317328.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.9579736292362213e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2315, + "step": 463 + }, + { + "loss": 0.0, + "grad_norm": 0.00045315801980905235, + "learning_rate": 7.71e-07, + "num_tokens": 318224.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.7801299691200256e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.232, + "step": 464 + }, + { + "loss": 0.0, + "grad_norm": 0.6928626894950867, + "learning_rate": 7.705e-07, + "num_tokens": 319120.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 3.6436133086681366e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2325, + "step": 465 + }, + { + "loss": 0.0, + "grad_norm": 0.0018925730837509036, + "learning_rate": 7.699999999999999e-07, + "num_tokens": 319486.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.7309171855449677e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.233, + "step": 466 + }, + { + "loss": 0.0, + "grad_norm": 0.0006030919030308723, + "learning_rate": 7.694999999999999e-07, + "num_tokens": 319852.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.2816471755504608e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2335, + "step": 467 + }, + { + "loss": 0.0, + "grad_norm": 0.0019683674909174442, + "learning_rate": 7.69e-07, + "num_tokens": 320748.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.710737943649292e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.234, + "step": 468 + }, + { + "loss": 0.0, + "grad_norm": 0.0006103675113990903, + "learning_rate": 7.684999999999999e-07, + "num_tokens": 321644.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 2.8799287974834442e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2345, + "step": 469 + }, + { + "loss": 0.0, + "grad_norm": 0.0023804621305316687, + "learning_rate": 7.68e-07, + "num_tokens": 322010.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.027573883533478e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.235, + "step": 470 + }, + { + "loss": 0.0, + "grad_norm": 0.0009048368665389717, + "learning_rate": 7.675e-07, + "num_tokens": 322376.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.2327137887477875e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2355, + "step": 471 + }, + { + "loss": 0.0, + "grad_norm": 0.0010861757909879088, + "learning_rate": 7.67e-07, + "num_tokens": 323272.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 4.105735570192337e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.236, + "step": 472 + }, + { + "loss": 0.0, + "grad_norm": 0.0025868702214211226, + "learning_rate": 7.664999999999999e-07, + "num_tokens": 323638.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.0113146901130676e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2365, + "step": 473 + }, + { + "loss": 0.0, + "grad_norm": 0.0010592455510050058, + "learning_rate": 7.66e-07, + "num_tokens": 324004.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.581362009048462e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.237, + "step": 474 + }, + { + "loss": -0.0, + "grad_norm": 1.106165885925293, + "learning_rate": 7.654999999999999e-07, + "num_tokens": 324900.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8324999809265137, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8324999809265137, + "reward_std": 0.0007070977007970214, + "kl": 6.282981485128403e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2375, + "step": 475 + }, + { + "loss": 0.0, + "grad_norm": 0.00047323168837465346, + "learning_rate": 7.65e-07, + "num_tokens": 325796.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 2.4420209228992462e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.238, + "step": 476 + }, + { + "loss": 0.0, + "grad_norm": 0.0008561910362914205, + "learning_rate": 7.644999999999999e-07, + "num_tokens": 326162.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.239139914512634e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2385, + "step": 477 + }, + { + "loss": 0.0, + "grad_norm": 0.0020574661903083324, + "learning_rate": 7.64e-07, + "num_tokens": 326528.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.563558518886566e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.239, + "step": 478 + }, + { + "loss": 0.0, + "grad_norm": 0.0008511331398040056, + "learning_rate": 7.635e-07, + "num_tokens": 326894.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.168731927871704e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2395, + "step": 479 + }, + { + "loss": 0.0001, + "grad_norm": 0.3131347894668579, + "learning_rate": 7.629999999999999e-07, + "num_tokens": 327790.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 0.0019212700426578522, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.24, + "step": 480 + }, + { + "loss": 0.0, + "grad_norm": 0.0006524409982375801, + "learning_rate": 7.624999999999999e-07, + "num_tokens": 328156.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.3995526134967804e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2405, + "step": 481 + }, + { + "loss": 0.0, + "grad_norm": 0.0059391213580966, + "learning_rate": 7.62e-07, + "num_tokens": 328522.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.2319297790527344e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.241, + "step": 482 + }, + { + "loss": 0.0, + "grad_norm": 0.0007000913028605282, + "learning_rate": 7.614999999999999e-07, + "num_tokens": 328888.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.287661820650101e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2415, + "step": 483 + }, + { + "loss": 0.0, + "grad_norm": 1.0497050285339355, + "learning_rate": 7.61e-07, + "num_tokens": 329784.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 4.231557250022888e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.242, + "step": 484 + }, + { + "loss": 0.0, + "grad_norm": 0.002384317573159933, + "learning_rate": 7.605e-07, + "num_tokens": 330150.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.9060447812080383e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2425, + "step": 485 + }, + { + "loss": 0.0, + "grad_norm": 0.0013909583212807775, + "learning_rate": 7.599999999999999e-07, + "num_tokens": 330516.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.785694181919098e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.243, + "step": 486 + }, + { + "loss": 0.0, + "grad_norm": 0.0008498562383465469, + "learning_rate": 7.594999999999999e-07, + "num_tokens": 330882.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.4384818971157074e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2435, + "step": 487 + }, + { + "loss": 0.0, + "grad_norm": 0.9792348146438599, + "learning_rate": 7.59e-07, + "num_tokens": 331778.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 7.939618080854416e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.244, + "step": 488 + }, + { + "loss": 0.0, + "grad_norm": 0.0009439431014470756, + "learning_rate": 7.584999999999999e-07, + "num_tokens": 332144.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.3556331396102905e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2445, + "step": 489 + }, + { + "loss": 0.0, + "grad_norm": 0.7939324975013733, + "learning_rate": 7.58e-07, + "num_tokens": 333040.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8044999837875366, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8044999837875366, + "reward_std": 0.06434673070907593, + "kl": 4.28222119808197e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.245, + "step": 490 + }, + { + "loss": 0.0, + "grad_norm": 0.0003945075150113553, + "learning_rate": 7.575e-07, + "num_tokens": 333936.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.732911914587021e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2455, + "step": 491 + }, + { + "loss": 0.0, + "grad_norm": 0.0014100059634074569, + "learning_rate": 7.57e-07, + "num_tokens": 334302.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.51747328042984e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.246, + "step": 492 + }, + { + "loss": 0.0, + "grad_norm": 0.9064180254936218, + "learning_rate": 7.564999999999999e-07, + "num_tokens": 335198.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5659999847412109, + "rewards/environment_reward_verifier/std": 0.26304370164871216, + "reward": 0.5659999847412109, + "reward_std": 0.26304370164871216, + "kl": 5.394965410232544e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2465, + "step": 493 + }, + { + "loss": 0.0, + "grad_norm": 0.0009017913253046572, + "learning_rate": 7.559999999999999e-07, + "num_tokens": 335564.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.33577224612236e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.247, + "step": 494 + }, + { + "loss": 0.0, + "grad_norm": 0.008774330839514732, + "learning_rate": 7.554999999999999e-07, + "num_tokens": 335930.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00010191276669502258, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2475, + "step": 495 + }, + { + "loss": 0.0, + "grad_norm": 0.0007485725800506771, + "learning_rate": 7.55e-07, + "num_tokens": 336296.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.93204391002655e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.248, + "step": 496 + }, + { + "loss": -0.0, + "grad_norm": 0.7277558445930481, + "learning_rate": 7.544999999999999e-07, + "num_tokens": 337192.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8344999551773071, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8344999551773071, + "reward_std": 0.0007070977007970214, + "kl": 5.529914051294327e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2485, + "step": 497 + }, + { + "loss": 0.0, + "grad_norm": 1.97030508518219, + "learning_rate": 7.54e-07, + "num_tokens": 338088.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 0.00012331828474998474, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.249, + "step": 498 + }, + { + "loss": 0.0, + "grad_norm": 0.0019033459248021245, + "learning_rate": 7.535e-07, + "num_tokens": 338454.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.811158239841461e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2495, + "step": 499 + }, + { + "loss": 0.0, + "grad_norm": 0.0006422542501240969, + "learning_rate": 7.529999999999999e-07, + "num_tokens": 339350.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.1509826183319092e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.25, + "step": 500 + }, + { + "loss": 0.0, + "grad_norm": 0.9627796411514282, + "learning_rate": 7.524999999999999e-07, + "num_tokens": 340246.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 2.447608858346939e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2505, + "step": 501 + }, + { + "loss": 0.0, + "grad_norm": 0.000901131599675864, + "learning_rate": 7.52e-07, + "num_tokens": 340612.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.061164170503616e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.251, + "step": 502 + }, + { + "loss": 0.0, + "grad_norm": 0.7200298309326172, + "learning_rate": 7.514999999999999e-07, + "num_tokens": 341508.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 4.367716610431671e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2515, + "step": 503 + }, + { + "loss": 0.0, + "grad_norm": 0.002020574174821377, + "learning_rate": 7.51e-07, + "num_tokens": 342404.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 5.6852586567401886e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.252, + "step": 504 + }, + { + "loss": 0.0, + "grad_norm": 0.0009755368810147047, + "learning_rate": 7.505e-07, + "num_tokens": 342770.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.1616538763046265e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2525, + "step": 505 + }, + { + "loss": 0.0, + "grad_norm": 0.8925000429153442, + "learning_rate": 7.5e-07, + "num_tokens": 343666.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 9.544193744659424e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.253, + "step": 506 + }, + { + "loss": 0.0, + "grad_norm": 0.00094449712196365, + "learning_rate": 7.495e-07, + "num_tokens": 344032.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.762224853038788e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2535, + "step": 507 + }, + { + "loss": 0.0, + "grad_norm": 1.5173064470291138, + "learning_rate": 7.489999999999999e-07, + "num_tokens": 344928.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 7.414352148771286e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.254, + "step": 508 + }, + { + "loss": 0.0, + "grad_norm": 0.0008655313868075609, + "learning_rate": 7.485e-07, + "num_tokens": 345294.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.4428201615810394e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2545, + "step": 509 + }, + { + "loss": 0.0, + "grad_norm": 0.0009476901614107192, + "learning_rate": 7.48e-07, + "num_tokens": 345660.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.9035454392433167e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.255, + "step": 510 + }, + { + "loss": 0.0, + "grad_norm": 1.5047985315322876, + "learning_rate": 7.475e-07, + "num_tokens": 346556.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 6.398884579539299e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2555, + "step": 511 + }, + { + "loss": -0.0, + "grad_norm": 1.2779611349105835, + "learning_rate": 7.47e-07, + "num_tokens": 347452.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8029999732971191, + "rewards/environment_reward_verifier/std": 0.012727884575724602, + "reward": 0.8029999732971191, + "reward_std": 0.012727884575724602, + "kl": 4.671793431043625e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.256, + "step": 512 + }, + { + "loss": 0.0, + "grad_norm": 0.0025708882603794336, + "learning_rate": 7.465e-07, + "num_tokens": 347818.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.117819041013718e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2565, + "step": 513 + }, + { + "loss": 0.0, + "grad_norm": 0.0007069227285683155, + "learning_rate": 7.459999999999999e-07, + "num_tokens": 348184.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.3818185329437256e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.257, + "step": 514 + }, + { + "loss": 0.0, + "grad_norm": 0.9211877584457397, + "learning_rate": 7.455e-07, + "num_tokens": 349080.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6110000014305115, + "rewards/environment_reward_verifier/std": 0.32809752225875854, + "reward": 0.6110000014305115, + "reward_std": 0.32809752225875854, + "kl": 4.2280182242393494e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2575, + "step": 515 + }, + { + "loss": 0.0, + "grad_norm": 0.0028202433604747057, + "learning_rate": 7.45e-07, + "num_tokens": 349976.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 5.090329796075821e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.258, + "step": 516 + }, + { + "loss": 0.0, + "grad_norm": 0.0010466987732797861, + "learning_rate": 7.445e-07, + "num_tokens": 350872.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8320000171661377, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8320000171661377, + "reward_std": 0.0, + "kl": 4.4493936002254486e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2585, + "step": 517 + }, + { + "loss": 0.0, + "grad_norm": 0.0011290244292467833, + "learning_rate": 7.44e-07, + "num_tokens": 351238.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.4223700165748596e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.259, + "step": 518 + }, + { + "loss": 0.0, + "grad_norm": 0.9691317081451416, + "learning_rate": 7.435000000000001e-07, + "num_tokens": 352134.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8350000381469727, + "rewards/environment_reward_verifier/std": 0.0014142375439405441, + "reward": 0.8350000381469727, + "reward_std": 0.0014142375439405441, + "kl": 0.00011391844600439072, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2595, + "step": 519 + }, + { + "loss": 0.0, + "grad_norm": 0.0011023505358025432, + "learning_rate": 7.429999999999999e-07, + "num_tokens": 352500.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.7062523663043976e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.26, + "step": 520 + }, + { + "loss": 0.0, + "grad_norm": 0.0012557971058413386, + "learning_rate": 7.425e-07, + "num_tokens": 353396.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8320000171661377, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8320000171661377, + "reward_std": 0.0, + "kl": 3.79001721739769e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2605, + "step": 521 + }, + { + "loss": 0.0, + "grad_norm": 0.001549424254335463, + "learning_rate": 7.42e-07, + "num_tokens": 353762.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.348771810531616e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.261, + "step": 522 + }, + { + "loss": 0.0, + "grad_norm": 0.7359144687652588, + "learning_rate": 7.415e-07, + "num_tokens": 354658.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.5052187740802765e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2615, + "step": 523 + }, + { + "loss": 0.0, + "grad_norm": 0.0008711325353942811, + "learning_rate": 7.41e-07, + "num_tokens": 355024.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.368314355611801e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.262, + "step": 524 + }, + { + "loss": 0.0, + "grad_norm": 0.0014574839733541012, + "learning_rate": 7.405e-07, + "num_tokens": 355920.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 5.590170621871948e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2625, + "step": 525 + }, + { + "loss": 0.0, + "grad_norm": 0.0007790196686983109, + "learning_rate": 7.4e-07, + "num_tokens": 356816.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.2617710530757904e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.263, + "step": 526 + }, + { + "loss": 0.0, + "grad_norm": 0.0012634535087272525, + "learning_rate": 7.395e-07, + "num_tokens": 357712.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.7451816499233246e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2635, + "step": 527 + }, + { + "loss": 0.0, + "grad_norm": 0.8514025211334229, + "learning_rate": 7.389999999999999e-07, + "num_tokens": 358608.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8255000114440918, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8255000114440918, + "reward_std": 0.0035355305299162865, + "kl": 3.659818321466446e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.264, + "step": 528 + }, + { + "loss": 0.0, + "grad_norm": 0.0017907796427607536, + "learning_rate": 7.385e-07, + "num_tokens": 358974.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8436072170734406e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2645, + "step": 529 + }, + { + "loss": 0.0, + "grad_norm": 0.0009088242659345269, + "learning_rate": 7.38e-07, + "num_tokens": 359340.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.9717572033405304e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.265, + "step": 530 + }, + { + "loss": 0.0, + "grad_norm": 1.416846752166748, + "learning_rate": 7.375e-07, + "num_tokens": 360236.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8264999985694885, + "rewards/environment_reward_verifier/std": 0.012020787224173546, + "reward": 0.8264999985694885, + "reward_std": 0.012020787224173546, + "kl": 3.840494900941849e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2655, + "step": 531 + }, + { + "loss": 0.0, + "grad_norm": 0.0013038903707638383, + "learning_rate": 7.37e-07, + "num_tokens": 360602.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.015917122364044e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.266, + "step": 532 + }, + { + "loss": 0.0, + "grad_norm": 0.0011814340250566602, + "learning_rate": 7.365e-07, + "num_tokens": 360968.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.90554016828537e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2665, + "step": 533 + }, + { + "loss": 0.0, + "grad_norm": 0.036372631788253784, + "learning_rate": 7.359999999999999e-07, + "num_tokens": 361864.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 0.00014512613415718079, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.267, + "step": 534 + }, + { + "loss": 0.0, + "grad_norm": 0.004396241623908281, + "learning_rate": 7.355e-07, + "num_tokens": 362230.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.8152171075344086e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2675, + "step": 535 + }, + { + "loss": 0.0, + "grad_norm": 0.0006165736122056842, + "learning_rate": 7.35e-07, + "num_tokens": 363126.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 2.704653888940811e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.268, + "step": 536 + }, + { + "loss": 0.0, + "grad_norm": 0.000927309098187834, + "learning_rate": 7.345e-07, + "num_tokens": 363492.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.315415233373642e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2685, + "step": 537 + }, + { + "loss": 0.0, + "grad_norm": 0.00157637195661664, + "learning_rate": 7.34e-07, + "num_tokens": 364388.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.674214869737625e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.269, + "step": 538 + }, + { + "loss": 0.0, + "grad_norm": 0.0015477711567655206, + "learning_rate": 7.335e-07, + "num_tokens": 364754.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.830568701028824e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2695, + "step": 539 + }, + { + "loss": 0.0, + "grad_norm": 1.1562288999557495, + "learning_rate": 7.329999999999999e-07, + "num_tokens": 365650.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8365000486373901, + "rewards/environment_reward_verifier/std": 0.01909189112484455, + "reward": 0.8365000486373901, + "reward_std": 0.01909189112484455, + "kl": 2.844352275133133e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.27, + "step": 540 + }, + { + "loss": 0.0, + "grad_norm": 0.646880030632019, + "learning_rate": 7.325e-07, + "num_tokens": 366546.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31183406710624695, + "reward": 0.5995000004768372, + "reward_std": 0.31183406710624695, + "kl": 1.5391036868095398e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2705, + "step": 541 + }, + { + "loss": 0.0, + "grad_norm": 0.0017395936883985996, + "learning_rate": 7.319999999999999e-07, + "num_tokens": 367442.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 6.28521665930748e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.271, + "step": 542 + }, + { + "loss": 0.0, + "grad_norm": 0.0006721155950799584, + "learning_rate": 7.315e-07, + "num_tokens": 367808.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.583395689725876e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2715, + "step": 543 + }, + { + "loss": 0.0, + "grad_norm": 0.0009692271705716848, + "learning_rate": 7.31e-07, + "num_tokens": 368174.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.9871629774570465e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.272, + "step": 544 + }, + { + "loss": 0.0, + "grad_norm": 0.0010545527329668403, + "learning_rate": 7.305e-07, + "num_tokens": 368540.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.037136048078537e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2725, + "step": 545 + }, + { + "loss": 0.0, + "grad_norm": 0.0012554118875414133, + "learning_rate": 7.3e-07, + "num_tokens": 368906.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.573950380086899e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.273, + "step": 546 + }, + { + "loss": 0.0, + "grad_norm": 0.7156521677970886, + "learning_rate": 7.295e-07, + "num_tokens": 369802.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.4407712519168854e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2735, + "step": 547 + }, + { + "loss": 0.0, + "grad_norm": 0.0003729368036147207, + "learning_rate": 7.289999999999999e-07, + "num_tokens": 370168.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.4538876712322235e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.274, + "step": 548 + }, + { + "loss": 0.0, + "grad_norm": 0.0016862640623003244, + "learning_rate": 7.285e-07, + "num_tokens": 370534.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.4197775423526764e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2745, + "step": 549 + }, + { + "loss": 0.0, + "grad_norm": 0.0007830922259017825, + "learning_rate": 7.28e-07, + "num_tokens": 371430.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 2.658367156982422e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.275, + "step": 550 + }, + { + "loss": 0.0, + "grad_norm": 0.0010923327645286918, + "learning_rate": 7.275e-07, + "num_tokens": 371796.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.927627742290497e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2755, + "step": 551 + }, + { + "loss": 0.0, + "grad_norm": 0.8142842054367065, + "learning_rate": 7.27e-07, + "num_tokens": 372692.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 4.250276833772659e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.276, + "step": 552 + }, + { + "loss": 0.0, + "grad_norm": 0.6860761642456055, + "learning_rate": 7.265000000000001e-07, + "num_tokens": 373588.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5734999775886536, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5734999775886536, + "reward_std": 0.27082186937332153, + "kl": 3.765430301427841e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2765, + "step": 553 + }, + { + "loss": 0.0, + "grad_norm": 0.0008581196889281273, + "learning_rate": 7.259999999999999e-07, + "num_tokens": 373954.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.81167808175087e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.277, + "step": 554 + }, + { + "loss": 0.0, + "grad_norm": 0.0011645841877907515, + "learning_rate": 7.255e-07, + "num_tokens": 374320.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8624199330806732e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2775, + "step": 555 + }, + { + "loss": 0.0, + "grad_norm": 2.9909136295318604, + "learning_rate": 7.249999999999999e-07, + "num_tokens": 375216.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7870000004768372, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.7870000004768372, + "reward_std": 0.049497511237859726, + "kl": 9.493250399827957e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.278, + "step": 556 + }, + { + "loss": 0.0, + "grad_norm": 0.0014020655071362853, + "learning_rate": 7.245e-07, + "num_tokens": 376112.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 4.471559077501297e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2785, + "step": 557 + }, + { + "loss": 0.0, + "grad_norm": 0.0004894250887446105, + "learning_rate": 7.24e-07, + "num_tokens": 376478.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.7498619854450226e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.279, + "step": 558 + }, + { + "loss": 0.0, + "grad_norm": 0.0006631935248151422, + "learning_rate": 7.235e-07, + "num_tokens": 377374.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 3.2833777368068695e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2795, + "step": 559 + }, + { + "loss": 0.0, + "grad_norm": 0.0011922323610633612, + "learning_rate": 7.229999999999999e-07, + "num_tokens": 377740.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.988722503185272e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.28, + "step": 560 + }, + { + "loss": 0.0, + "grad_norm": 0.7559614777565002, + "learning_rate": 7.225e-07, + "num_tokens": 378636.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8259999752044678, + "rewards/environment_reward_verifier/std": 0.01272792648524046, + "reward": 0.8259999752044678, + "reward_std": 0.01272792648524046, + "kl": 4.695635288953781e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2805, + "step": 561 + }, + { + "loss": -0.0, + "grad_norm": 0.7900487780570984, + "learning_rate": 7.219999999999999e-07, + "num_tokens": 379532.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.812999963760376, + "rewards/environment_reward_verifier/std": 0.009899493306875229, + "reward": 0.812999963760376, + "reward_std": 0.009899494238197803, + "kl": 3.7454068660736084e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.281, + "step": 562 + }, + { + "loss": 0.0, + "grad_norm": 0.0014660859014838934, + "learning_rate": 7.215e-07, + "num_tokens": 379898.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.8963894844055176e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2815, + "step": 563 + }, + { + "loss": 0.0, + "grad_norm": 1.0280815362930298, + "learning_rate": 7.21e-07, + "num_tokens": 380794.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 6.190314888954163e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.282, + "step": 564 + }, + { + "loss": 0.0001, + "grad_norm": 6.458773612976074, + "learning_rate": 7.205e-07, + "num_tokens": 381690.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 0.001496921293437481, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2825, + "step": 565 + }, + { + "loss": 0.0, + "grad_norm": 0.0010697654215618968, + "learning_rate": 7.2e-07, + "num_tokens": 382056.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.735573798418045e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.283, + "step": 566 + }, + { + "loss": 0.0, + "grad_norm": 0.8140199184417725, + "learning_rate": 7.195e-07, + "num_tokens": 382952.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 3.6473385989665985e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2835, + "step": 567 + }, + { + "loss": 0.0, + "grad_norm": 0.6990031599998474, + "learning_rate": 7.189999999999999e-07, + "num_tokens": 383848.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 5.972664803266525e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.284, + "step": 568 + }, + { + "loss": 0.0, + "grad_norm": 0.48030799627304077, + "learning_rate": 7.185e-07, + "num_tokens": 384744.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 3.3359043300151825e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2845, + "step": 569 + }, + { + "loss": 0.0, + "grad_norm": 0.6752439141273499, + "learning_rate": 7.179999999999999e-07, + "num_tokens": 385640.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5659999847412109, + "rewards/environment_reward_verifier/std": 0.26304370164871216, + "reward": 0.5659999847412109, + "reward_std": 0.26304370164871216, + "kl": 2.0023435354232788e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.285, + "step": 570 + }, + { + "loss": 0.0, + "grad_norm": 0.005463989917188883, + "learning_rate": 7.175e-07, + "num_tokens": 386536.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8149999976158142, + "reward_std": 0.0, + "kl": 0.00011748820543289185, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2855, + "step": 571 + }, + { + "loss": 0.0, + "grad_norm": 0.0015461534494534135, + "learning_rate": 7.17e-07, + "num_tokens": 386902.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.5323592126369476e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.286, + "step": 572 + }, + { + "loss": 0.0, + "grad_norm": 0.8691689968109131, + "learning_rate": 7.165e-07, + "num_tokens": 387798.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 9.879283607006073e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2865, + "step": 573 + }, + { + "loss": 0.0, + "grad_norm": 0.9046115279197693, + "learning_rate": 7.159999999999999e-07, + "num_tokens": 388694.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 2.8303824365139008e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.287, + "step": 574 + }, + { + "loss": 0.0, + "grad_norm": 0.0012133732670918107, + "learning_rate": 7.155e-07, + "num_tokens": 389060.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.523286432027817e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2875, + "step": 575 + }, + { + "loss": 0.0, + "grad_norm": 1.1806221008300781, + "learning_rate": 7.149999999999999e-07, + "num_tokens": 389956.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7904999852180481, + "rewards/environment_reward_verifier/std": 0.037476640194654465, + "reward": 0.7904999852180481, + "reward_std": 0.037476640194654465, + "kl": 4.287436604499817e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.288, + "step": 576 + }, + { + "loss": 0.0, + "grad_norm": 0.6862530708312988, + "learning_rate": 7.145e-07, + "num_tokens": 390852.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7910000085830688, + "rewards/environment_reward_verifier/std": 0.045254841446876526, + "reward": 0.7910000085830688, + "reward_std": 0.045254841446876526, + "kl": 2.5819987058639526e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2885, + "step": 577 + }, + { + "loss": 0.0, + "grad_norm": 0.0016118023777380586, + "learning_rate": 7.14e-07, + "num_tokens": 391218.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.8440881073474884e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.289, + "step": 578 + }, + { + "loss": 0.0, + "grad_norm": 0.0008948792237788439, + "learning_rate": 7.135e-07, + "num_tokens": 391584.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.9758008420467377e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2895, + "step": 579 + }, + { + "loss": 0.0, + "grad_norm": 0.0017725012730807066, + "learning_rate": 7.129999999999999e-07, + "num_tokens": 391950.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.52590936422348e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.29, + "step": 580 + }, + { + "loss": 0.0, + "grad_norm": 0.003398467553779483, + "learning_rate": 7.125e-07, + "num_tokens": 392316.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.3013674914836884e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2905, + "step": 581 + }, + { + "loss": 0.0, + "grad_norm": 0.0011972826905548573, + "learning_rate": 7.119999999999999e-07, + "num_tokens": 392682.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.47416678071022e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.291, + "step": 582 + }, + { + "loss": 0.0, + "grad_norm": 0.000996905378997326, + "learning_rate": 7.115e-07, + "num_tokens": 393048.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.768503665924072e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2915, + "step": 583 + }, + { + "loss": 0.0, + "grad_norm": 0.3965910077095032, + "learning_rate": 7.11e-07, + "num_tokens": 393944.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31183406710624695, + "reward": 0.5995000004768372, + "reward_std": 0.31183406710624695, + "kl": 1.6774050891399384e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.292, + "step": 584 + }, + { + "loss": 0.0, + "grad_norm": 1.1074873208999634, + "learning_rate": 7.105e-07, + "num_tokens": 394840.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.8788653910160065e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2925, + "step": 585 + }, + { + "loss": 0.0, + "grad_norm": 0.0007802587351761758, + "learning_rate": 7.1e-07, + "num_tokens": 395206.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.516022115945816e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.293, + "step": 586 + }, + { + "loss": 0.0, + "grad_norm": 0.0005516806268133223, + "learning_rate": 7.094999999999999e-07, + "num_tokens": 396102.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.4449080228805542e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2935, + "step": 587 + }, + { + "loss": 0.0, + "grad_norm": 0.0013195326318964362, + "learning_rate": 7.089999999999999e-07, + "num_tokens": 396468.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.4308061003685e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.294, + "step": 588 + }, + { + "loss": 0.0, + "grad_norm": 0.0014623524621129036, + "learning_rate": 7.085e-07, + "num_tokens": 396834.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.5030377805233e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2945, + "step": 589 + }, + { + "loss": 0.0, + "grad_norm": 0.0007937848567962646, + "learning_rate": 7.079999999999999e-07, + "num_tokens": 397730.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 3.699958324432373e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.295, + "step": 590 + }, + { + "loss": 0.0, + "grad_norm": 0.6660794019699097, + "learning_rate": 7.075e-07, + "num_tokens": 398626.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8174999952316284, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8174999952316284, + "reward_std": 0.014849262312054634, + "kl": 2.4378299713134766e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2955, + "step": 591 + }, + { + "loss": 0.0, + "grad_norm": 0.0011187827913090587, + "learning_rate": 7.07e-07, + "num_tokens": 398992.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.750009298324585e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.296, + "step": 592 + }, + { + "loss": 0.0, + "grad_norm": 0.0013909402769058943, + "learning_rate": 7.065e-07, + "num_tokens": 399358.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.801526665687561e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2965, + "step": 593 + }, + { + "loss": 0.0, + "grad_norm": 0.009479865431785583, + "learning_rate": 7.059999999999999e-07, + "num_tokens": 400254.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 0.00018437672406435013, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.297, + "step": 594 + }, + { + "loss": 0.0, + "grad_norm": 0.0006968002999201417, + "learning_rate": 7.055e-07, + "num_tokens": 400620.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.683699131011963e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2975, + "step": 595 + }, + { + "loss": 0.0, + "grad_norm": 1.1247608661651611, + "learning_rate": 7.049999999999999e-07, + "num_tokens": 401516.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 3.596395254135132e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.298, + "step": 596 + }, + { + "loss": -0.0, + "grad_norm": 0.7843502759933472, + "learning_rate": 7.045e-07, + "num_tokens": 402412.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7669999599456787, + "rewards/environment_reward_verifier/std": 0.00424262834712863, + "reward": 0.7669999599456787, + "reward_std": 0.00424262834712863, + "kl": 3.2738782465457916e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2985, + "step": 597 + }, + { + "loss": 0.0, + "grad_norm": 0.0007366478675976396, + "learning_rate": 7.04e-07, + "num_tokens": 402778.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.6930123567581177e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.299, + "step": 598 + }, + { + "loss": 0.0, + "grad_norm": 0.5876581072807312, + "learning_rate": 7.035e-07, + "num_tokens": 403674.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.1344982087612152e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.2995, + "step": 599 + }, + { + "loss": 0.0, + "grad_norm": 2.7197017669677734, + "learning_rate": 7.029999999999999e-07, + "num_tokens": 404570.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.03111271932721138, + "reward": 0.828000009059906, + "reward_std": 0.03111271932721138, + "kl": 9.680353105068207e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3, + "step": 600 + }, + { + "loss": 0.0, + "grad_norm": 0.001130021526478231, + "learning_rate": 7.024999999999999e-07, + "num_tokens": 404936.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.8620863556861877e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3005, + "step": 601 + }, + { + "loss": 0.0, + "grad_norm": 1.0326294898986816, + "learning_rate": 7.019999999999999e-07, + "num_tokens": 405832.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8370000123977661, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8370000123977661, + "reward_std": 0.0014141954015940428, + "kl": 8.158478885889053e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.301, + "step": 602 + }, + { + "loss": 0.0, + "grad_norm": 0.0007612873450852931, + "learning_rate": 7.015e-07, + "num_tokens": 406198.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.8013251721858978e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3015, + "step": 603 + }, + { + "loss": 0.0, + "grad_norm": 0.0015164915239438415, + "learning_rate": 7.009999999999999e-07, + "num_tokens": 406564.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.440639168024063e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.302, + "step": 604 + }, + { + "loss": 0.0, + "grad_norm": 0.0012494310503825545, + "learning_rate": 7.005e-07, + "num_tokens": 407460.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 5.6570395827293396e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3025, + "step": 605 + }, + { + "loss": -0.0, + "grad_norm": 0.7219941020011902, + "learning_rate": 7e-07, + "num_tokens": 408356.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8344999551773071, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8344999551773071, + "reward_std": 0.0007070977007970214, + "kl": 3.477931022644043e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.303, + "step": 606 + }, + { + "loss": 0.0, + "grad_norm": 1.5845794677734375, + "learning_rate": 6.994999999999999e-07, + "num_tokens": 409252.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.00424262834712863, + "reward": 0.8170000314712524, + "reward_std": 0.00424262834712863, + "kl": 7.447786629199982e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3035, + "step": 607 + }, + { + "loss": 0.0, + "grad_norm": 1.1389849185943604, + "learning_rate": 6.989999999999999e-07, + "num_tokens": 410148.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 4.856474697589874e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.304, + "step": 608 + }, + { + "loss": 0.0, + "grad_norm": 2.9767954349517822, + "learning_rate": 6.985e-07, + "num_tokens": 411044.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6110000014305115, + "rewards/environment_reward_verifier/std": 0.32809752225875854, + "reward": 0.6110000014305115, + "reward_std": 0.32809752225875854, + "kl": 5.687400698661804e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3045, + "step": 609 + }, + { + "loss": 0.0, + "grad_norm": 0.0010801024036481977, + "learning_rate": 6.979999999999999e-07, + "num_tokens": 411410.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.324689507484436e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.305, + "step": 610 + }, + { + "loss": 0.0, + "grad_norm": 0.0011967119062319398, + "learning_rate": 6.975e-07, + "num_tokens": 412306.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.905687481164932e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3055, + "step": 611 + }, + { + "loss": 0.0, + "grad_norm": 0.0006793588981963694, + "learning_rate": 6.97e-07, + "num_tokens": 413202.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.5127472579479218e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.306, + "step": 612 + }, + { + "loss": 0.0, + "grad_norm": 0.0005013294867239892, + "learning_rate": 6.965e-07, + "num_tokens": 413568.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.882854849100113e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3065, + "step": 613 + }, + { + "loss": 0.0, + "grad_norm": 0.0007044204394333065, + "learning_rate": 6.959999999999999e-07, + "num_tokens": 413934.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.5583431124687195e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.307, + "step": 614 + }, + { + "loss": 0.0, + "grad_norm": 0.000589247967582196, + "learning_rate": 6.955e-07, + "num_tokens": 414830.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 2.7990899980068207e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3075, + "step": 615 + }, + { + "loss": 0.0, + "grad_norm": 0.7483782768249512, + "learning_rate": 6.949999999999999e-07, + "num_tokens": 415726.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.3659860491752625e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.308, + "step": 616 + }, + { + "loss": 0.0, + "grad_norm": 0.5555701851844788, + "learning_rate": 6.945e-07, + "num_tokens": 416622.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5744999647140503, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5744999647140503, + "reward_std": 0.27082186937332153, + "kl": 4.6846456825733185e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3085, + "step": 617 + }, + { + "loss": 0.0, + "grad_norm": 0.0049834963865578175, + "learning_rate": 6.939999999999999e-07, + "num_tokens": 416988.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.719756543636322e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.309, + "step": 618 + }, + { + "loss": 0.0, + "grad_norm": 0.0017910569440573454, + "learning_rate": 6.935e-07, + "num_tokens": 417884.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8320000171661377, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8320000171661377, + "reward_std": 0.0, + "kl": 6.791949272155762e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3095, + "step": 619 + }, + { + "loss": 0.0, + "grad_norm": 0.004858257714658976, + "learning_rate": 6.929999999999999e-07, + "num_tokens": 418250.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00011091213673353195, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.31, + "step": 620 + }, + { + "loss": 0.0, + "grad_norm": 0.75960373878479, + "learning_rate": 6.924999999999999e-07, + "num_tokens": 419146.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 2.6852823793888092e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3105, + "step": 621 + }, + { + "loss": 0.0, + "grad_norm": 0.0010069460840895772, + "learning_rate": 6.919999999999999e-07, + "num_tokens": 419512.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.194863140583038e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.311, + "step": 622 + }, + { + "loss": 0.0, + "grad_norm": 0.008241693489253521, + "learning_rate": 6.915e-07, + "num_tokens": 419878.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00017871428281068802, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3115, + "step": 623 + }, + { + "loss": 0.0, + "grad_norm": 3.8802902698516846, + "learning_rate": 6.909999999999999e-07, + "num_tokens": 420774.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8339999914169312, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8339999914169312, + "reward_std": 0.0014141954015940428, + "kl": 3.557652235031128e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.312, + "step": 624 + }, + { + "loss": 0.0, + "grad_norm": 0.8549783825874329, + "learning_rate": 6.905e-07, + "num_tokens": 421670.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 6.370618939399719e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3125, + "step": 625 + }, + { + "loss": 0.0, + "grad_norm": 0.7835222482681274, + "learning_rate": 6.9e-07, + "num_tokens": 422566.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.9892660677433014e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.313, + "step": 626 + }, + { + "loss": 0.0, + "grad_norm": 0.6540793180465698, + "learning_rate": 6.894999999999999e-07, + "num_tokens": 423462.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 6.963033229112625e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3135, + "step": 627 + }, + { + "loss": 0.0, + "grad_norm": 0.0005253406707197428, + "learning_rate": 6.889999999999999e-07, + "num_tokens": 423828.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.8034130334854126e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.314, + "step": 628 + }, + { + "loss": 0.0, + "grad_norm": 0.0009612101130187511, + "learning_rate": 6.885e-07, + "num_tokens": 424194.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.799237310886383e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3145, + "step": 629 + }, + { + "loss": 0.0, + "grad_norm": 0.0007504363311454654, + "learning_rate": 6.879999999999999e-07, + "num_tokens": 424560.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.4528242647647858e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.315, + "step": 630 + }, + { + "loss": 0.0, + "grad_norm": 0.0010777200805023313, + "learning_rate": 6.875e-07, + "num_tokens": 424926.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.31831756234169e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3155, + "step": 631 + }, + { + "loss": 0.0, + "grad_norm": 0.001108592259697616, + "learning_rate": 6.87e-07, + "num_tokens": 425292.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.3447908461093903e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.316, + "step": 632 + }, + { + "loss": 0.0, + "grad_norm": 0.8040815591812134, + "learning_rate": 6.865e-07, + "num_tokens": 426188.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 2.512522041797638e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3165, + "step": 633 + }, + { + "loss": 0.0, + "grad_norm": 0.6935257911682129, + "learning_rate": 6.86e-07, + "num_tokens": 427084.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8355000019073486, + "rewards/environment_reward_verifier/std": 0.0007071398431435227, + "reward": 0.8355000019073486, + "reward_std": 0.0007071398431435227, + "kl": 5.880650132894516e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.317, + "step": 634 + }, + { + "loss": 0.0, + "grad_norm": 0.0012401107233017683, + "learning_rate": 6.854999999999999e-07, + "num_tokens": 427450.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.05838543176651e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3175, + "step": 635 + }, + { + "loss": 0.0, + "grad_norm": 0.003047216683626175, + "learning_rate": 6.85e-07, + "num_tokens": 427816.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.263501614332199e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.318, + "step": 636 + }, + { + "loss": 0.0, + "grad_norm": 0.0007127355202101171, + "learning_rate": 6.845e-07, + "num_tokens": 428182.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.4394521713256836e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3185, + "step": 637 + }, + { + "loss": 0.0, + "grad_norm": 0.7168914079666138, + "learning_rate": 6.84e-07, + "num_tokens": 429078.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 3.9987266063690186e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.319, + "step": 638 + }, + { + "loss": 0.0, + "grad_norm": 0.0012631439603865147, + "learning_rate": 6.835e-07, + "num_tokens": 429444.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7933157980442047e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3195, + "step": 639 + }, + { + "loss": 0.0, + "grad_norm": 0.0010941632790490985, + "learning_rate": 6.830000000000001e-07, + "num_tokens": 429810.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.12454828619957e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.32, + "step": 640 + }, + { + "loss": 0.0, + "grad_norm": 0.5629311800003052, + "learning_rate": 6.824999999999999e-07, + "num_tokens": 430706.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8370000123977661, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8370000123977661, + "reward_std": 0.0014141954015940428, + "kl": 2.9305927455425262e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3205, + "step": 641 + }, + { + "loss": 0.0, + "grad_norm": 0.0014564594021067023, + "learning_rate": 6.82e-07, + "num_tokens": 431602.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.473142325878143e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.321, + "step": 642 + }, + { + "loss": 0.0, + "grad_norm": 0.0008370128343813121, + "learning_rate": 6.815e-07, + "num_tokens": 431968.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.746524453163147e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3215, + "step": 643 + }, + { + "loss": 0.0, + "grad_norm": 0.6197002530097961, + "learning_rate": 6.81e-07, + "num_tokens": 432864.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 2.3438595235347748e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.322, + "step": 644 + }, + { + "loss": 0.0, + "grad_norm": 0.0005567868938669562, + "learning_rate": 6.805e-07, + "num_tokens": 433230.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.808907836675644e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3225, + "step": 645 + }, + { + "loss": 0.0, + "grad_norm": 0.6040643453598022, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 434126.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5659999847412109, + "rewards/environment_reward_verifier/std": 0.26304370164871216, + "reward": 0.5659999847412109, + "reward_std": 0.26304370164871216, + "kl": 2.449285238981247e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.323, + "step": 646 + }, + { + "loss": 0.0, + "grad_norm": 0.002252435078844428, + "learning_rate": 6.794999999999999e-07, + "num_tokens": 435022.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 7.445178925991058e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3235, + "step": 647 + }, + { + "loss": 0.0, + "grad_norm": 4.579550266265869, + "learning_rate": 6.79e-07, + "num_tokens": 435918.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.8009999990463257, + "reward_std": 0.049497511237859726, + "kl": 6.625894457101822e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.324, + "step": 648 + }, + { + "loss": 0.0, + "grad_norm": 0.0013744801981374621, + "learning_rate": 6.784999999999999e-07, + "num_tokens": 436814.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 5.259178578853607e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3245, + "step": 649 + }, + { + "loss": 0.0, + "grad_norm": 0.698723554611206, + "learning_rate": 6.78e-07, + "num_tokens": 437710.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.875838592648506e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.325, + "step": 650 + }, + { + "loss": 0.0, + "grad_norm": 0.0011548621114343405, + "learning_rate": 6.775e-07, + "num_tokens": 438076.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.358682781457901e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3255, + "step": 651 + }, + { + "loss": 0.0, + "grad_norm": 0.0006847024778835475, + "learning_rate": 6.77e-07, + "num_tokens": 438972.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.094559699296951e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.326, + "step": 652 + }, + { + "loss": 0.0, + "grad_norm": 0.0007354238186962903, + "learning_rate": 6.765e-07, + "num_tokens": 439338.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.0337291061878204e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3265, + "step": 653 + }, + { + "loss": 0.0, + "grad_norm": 0.0010975906625390053, + "learning_rate": 6.76e-07, + "num_tokens": 439704.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.489440470933914e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.327, + "step": 654 + }, + { + "loss": 0.0, + "grad_norm": 0.0011954187648370862, + "learning_rate": 6.754999999999999e-07, + "num_tokens": 440070.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.033891648054123e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3275, + "step": 655 + }, + { + "loss": 0.0, + "grad_norm": 0.011588593944907188, + "learning_rate": 6.75e-07, + "num_tokens": 440966.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00018292898312211037, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.328, + "step": 656 + }, + { + "loss": 0.0, + "grad_norm": 0.0006912227254360914, + "learning_rate": 6.745e-07, + "num_tokens": 441862.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.7865713238716125e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3285, + "step": 657 + }, + { + "loss": 0.0, + "grad_norm": 1.2161142826080322, + "learning_rate": 6.74e-07, + "num_tokens": 442758.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8144999742507935, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8144999742507935, + "reward_std": 0.0035355305299162865, + "kl": 9.529199451208115e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.329, + "step": 658 + }, + { + "loss": 0.0, + "grad_norm": 0.000648809946142137, + "learning_rate": 6.735e-07, + "num_tokens": 443124.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.019813448190689e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3295, + "step": 659 + }, + { + "loss": -0.0, + "grad_norm": 0.6099978089332581, + "learning_rate": 6.730000000000001e-07, + "num_tokens": 444020.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8029999732971191, + "rewards/environment_reward_verifier/std": 0.012727884575724602, + "reward": 0.8029999732971191, + "reward_std": 0.012727884575724602, + "kl": 2.8732232749462128e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.33, + "step": 660 + }, + { + "loss": 0.0, + "grad_norm": 1.014809012413025, + "learning_rate": 6.724999999999999e-07, + "num_tokens": 444916.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7904999852180481, + "rewards/environment_reward_verifier/std": 0.037476640194654465, + "reward": 0.7904999852180481, + "reward_std": 0.037476640194654465, + "kl": 8.21063295006752e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3305, + "step": 661 + }, + { + "loss": 0.0, + "grad_norm": 1.0332342386245728, + "learning_rate": 6.72e-07, + "num_tokens": 445812.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8144999742507935, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8144999742507935, + "reward_std": 0.0035355305299162865, + "kl": 5.087442696094513e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.331, + "step": 662 + }, + { + "loss": 0.0, + "grad_norm": 0.9325398802757263, + "learning_rate": 6.714999999999999e-07, + "num_tokens": 446708.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 7.722713053226471e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3315, + "step": 663 + }, + { + "loss": 0.0, + "grad_norm": 1.077994465827942, + "learning_rate": 6.71e-07, + "num_tokens": 447604.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8255000114440918, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8255000114440918, + "reward_std": 0.0035355305299162865, + "kl": 8.442718535661697e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.332, + "step": 664 + }, + { + "loss": 0.0, + "grad_norm": 0.30242636799812317, + "learning_rate": 6.705e-07, + "num_tokens": 448500.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5914999842643738, + "rewards/environment_reward_verifier/std": 0.3047630190849304, + "reward": 0.5914999842643738, + "reward_std": 0.3047630190849304, + "kl": 1.6080215573310852e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3325, + "step": 665 + }, + { + "loss": 0.0, + "grad_norm": 0.7816704511642456, + "learning_rate": 6.7e-07, + "num_tokens": 449396.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 5.314219743013382e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.333, + "step": 666 + }, + { + "loss": 0.0, + "grad_norm": 0.7801264524459839, + "learning_rate": 6.695e-07, + "num_tokens": 450292.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 2.9692426323890686e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3335, + "step": 667 + }, + { + "loss": 0.0, + "grad_norm": 0.0009613597649149597, + "learning_rate": 6.69e-07, + "num_tokens": 450658.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.9587186872959137e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.334, + "step": 668 + }, + { + "loss": 0.0, + "grad_norm": 0.0008051811018958688, + "learning_rate": 6.684999999999999e-07, + "num_tokens": 451554.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 3.367289900779724e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3345, + "step": 669 + }, + { + "loss": 0.0, + "grad_norm": 0.9789057970046997, + "learning_rate": 6.68e-07, + "num_tokens": 452450.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 7.941573858261108e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.335, + "step": 670 + }, + { + "loss": 0.0, + "grad_norm": 0.0009357063099741936, + "learning_rate": 6.675e-07, + "num_tokens": 452816.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.7661753594875336e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3355, + "step": 671 + }, + { + "loss": 0.0, + "grad_norm": 0.8246026039123535, + "learning_rate": 6.67e-07, + "num_tokens": 453712.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8114999532699585, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8114999532699585, + "reward_std": 0.06434673070907593, + "kl": 3.839656710624695e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.336, + "step": 672 + }, + { + "loss": 0.0, + "grad_norm": 0.5829533338546753, + "learning_rate": 6.665e-07, + "num_tokens": 454608.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8355000019073486, + "rewards/environment_reward_verifier/std": 0.0007071398431435227, + "reward": 0.8355000019073486, + "reward_std": 0.0007071398431435227, + "kl": 4.0553510189056396e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3365, + "step": 673 + }, + { + "loss": 0.0, + "grad_norm": 0.7374504208564758, + "learning_rate": 6.66e-07, + "num_tokens": 455504.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 2.423301339149475e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.337, + "step": 674 + }, + { + "loss": 0.0, + "grad_norm": 1.2778427600860596, + "learning_rate": 6.654999999999999e-07, + "num_tokens": 456400.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 7.122103124856949e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3375, + "step": 675 + }, + { + "loss": 0.0, + "grad_norm": 0.0014428014401346445, + "learning_rate": 6.65e-07, + "num_tokens": 457296.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 4.827417433261871e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.338, + "step": 676 + }, + { + "loss": 0.0, + "grad_norm": 0.6748918890953064, + "learning_rate": 6.645e-07, + "num_tokens": 458192.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7875000238418579, + "rewards/environment_reward_verifier/std": 0.05020460858941078, + "reward": 0.7875000238418579, + "reward_std": 0.05020460858941078, + "kl": 2.82973051071167e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3385, + "step": 677 + }, + { + "loss": 0.0, + "grad_norm": 0.0010371003299951553, + "learning_rate": 6.64e-07, + "num_tokens": 459088.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.760494291782379e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.339, + "step": 678 + }, + { + "loss": 0.0, + "grad_norm": 0.0008279599715024233, + "learning_rate": 6.635e-07, + "num_tokens": 459454.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.543387770652771e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3395, + "step": 679 + }, + { + "loss": 0.0, + "grad_norm": 0.0004288914205972105, + "learning_rate": 6.63e-07, + "num_tokens": 459820.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.6702339053153992e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.34, + "step": 680 + }, + { + "loss": 0.0, + "grad_norm": 0.0035996404476463795, + "learning_rate": 6.624999999999999e-07, + "num_tokens": 460716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 7.754005491733551e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3405, + "step": 681 + }, + { + "loss": 0.0, + "grad_norm": 0.0006002707523293793, + "learning_rate": 6.62e-07, + "num_tokens": 461612.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8569999933242798, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8569999933242798, + "reward_std": 0.0, + "kl": 3.461819142103195e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.341, + "step": 682 + }, + { + "loss": 0.0, + "grad_norm": 0.7093996405601501, + "learning_rate": 6.614999999999999e-07, + "num_tokens": 462508.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.346280962228775e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3415, + "step": 683 + }, + { + "loss": 0.0, + "grad_norm": 0.0025844546034932137, + "learning_rate": 6.61e-07, + "num_tokens": 462874.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.116499960422516e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.342, + "step": 684 + }, + { + "loss": 0.0, + "grad_norm": 0.0011869438458234072, + "learning_rate": 6.605e-07, + "num_tokens": 463770.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 4.194118082523346e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3425, + "step": 685 + }, + { + "loss": 0.0, + "grad_norm": 0.9997851252555847, + "learning_rate": 6.6e-07, + "num_tokens": 464666.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 5.1662325859069824e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.343, + "step": 686 + }, + { + "loss": 0.0, + "grad_norm": 0.6725564002990723, + "learning_rate": 6.595e-07, + "num_tokens": 465562.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.0244158804416656e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3435, + "step": 687 + }, + { + "loss": 0.0, + "grad_norm": 0.6846553683280945, + "learning_rate": 6.59e-07, + "num_tokens": 466458.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7860000133514404, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7860000133514404, + "reward_std": 0.04808327555656433, + "kl": 2.7189962565898895e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.344, + "step": 688 + }, + { + "loss": 0.0, + "grad_norm": 0.6613869667053223, + "learning_rate": 6.584999999999999e-07, + "num_tokens": 467354.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 4.7700945287942886e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3445, + "step": 689 + }, + { + "loss": 0.0, + "grad_norm": 0.001505712396465242, + "learning_rate": 6.58e-07, + "num_tokens": 468250.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 4.06438484787941e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.345, + "step": 690 + }, + { + "loss": 0.0, + "grad_norm": 0.0004417377058416605, + "learning_rate": 6.575e-07, + "num_tokens": 468616.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 9.115785360336304e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3455, + "step": 691 + }, + { + "loss": 0.0, + "grad_norm": 0.0016008485108613968, + "learning_rate": 6.57e-07, + "num_tokens": 468982.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.507973790168762e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.346, + "step": 692 + }, + { + "loss": 0.0, + "grad_norm": 0.6884562373161316, + "learning_rate": 6.565e-07, + "num_tokens": 469878.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6024999618530273, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6024999618530273, + "reward_std": 0.32031938433647156, + "kl": 2.653617411851883e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3465, + "step": 693 + }, + { + "loss": 0.0, + "grad_norm": 0.0010921740904450417, + "learning_rate": 6.56e-07, + "num_tokens": 470244.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8137117624282837e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.347, + "step": 694 + }, + { + "loss": 0.0, + "grad_norm": 0.6846423745155334, + "learning_rate": 6.554999999999999e-07, + "num_tokens": 471140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 3.712344914674759e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3475, + "step": 695 + }, + { + "loss": 0.0, + "grad_norm": 0.0036911554634571075, + "learning_rate": 6.55e-07, + "num_tokens": 472036.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.1732716858387e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.348, + "step": 696 + }, + { + "loss": 0.0, + "grad_norm": 0.0006061898893676698, + "learning_rate": 6.544999999999999e-07, + "num_tokens": 472932.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8159999847412109, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8159999847412109, + "reward_std": 0.0, + "kl": 2.7766451239585876e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3485, + "step": 697 + }, + { + "loss": 0.0, + "grad_norm": 0.002090150723233819, + "learning_rate": 6.54e-07, + "num_tokens": 473828.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 4.992447793483734e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.349, + "step": 698 + }, + { + "loss": 0.0, + "grad_norm": 1.531058430671692, + "learning_rate": 6.535e-07, + "num_tokens": 474724.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 5.740951746702194e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3495, + "step": 699 + }, + { + "loss": 0.0, + "grad_norm": 0.5353614091873169, + "learning_rate": 6.53e-07, + "num_tokens": 475620.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.08909548819065094, + "reward": 0.8149999976158142, + "reward_std": 0.08909548819065094, + "kl": 2.967100590467453e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.35, + "step": 700 + }, + { + "loss": 0.0, + "grad_norm": 0.0006890299846418202, + "learning_rate": 6.524999999999999e-07, + "num_tokens": 476516.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.6377849280834198e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3505, + "step": 701 + }, + { + "loss": 0.0, + "grad_norm": 0.0011575064854696393, + "learning_rate": 6.52e-07, + "num_tokens": 476882.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.6336871087551117e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.351, + "step": 702 + }, + { + "loss": 0.0, + "grad_norm": 1.0071227550506592, + "learning_rate": 6.514999999999999e-07, + "num_tokens": 477778.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7904999852180481, + "rewards/environment_reward_verifier/std": 0.037476640194654465, + "reward": 0.7904999852180481, + "reward_std": 0.037476640194654465, + "kl": 5.2426010370254517e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3515, + "step": 703 + }, + { + "loss": -0.0, + "grad_norm": 0.6260432600975037, + "learning_rate": 6.51e-07, + "num_tokens": 478674.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8324999809265137, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8324999809265137, + "reward_std": 0.0007070977007970214, + "kl": 3.0035153031349182e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.352, + "step": 704 + }, + { + "loss": 0.0, + "grad_norm": 0.0009116759756579995, + "learning_rate": 6.505e-07, + "num_tokens": 479570.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 4.060380160808563e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3525, + "step": 705 + }, + { + "loss": 0.0, + "grad_norm": 0.0030497321859002113, + "learning_rate": 6.5e-07, + "num_tokens": 479936.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.0684047639369965e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.353, + "step": 706 + }, + { + "loss": 0.0, + "grad_norm": 0.0006430986686609685, + "learning_rate": 6.495e-07, + "num_tokens": 480832.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 3.116205334663391e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3535, + "step": 707 + }, + { + "loss": 0.0, + "grad_norm": 1.0158851146697998, + "learning_rate": 6.49e-07, + "num_tokens": 481728.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 5.7221390306949615e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.354, + "step": 708 + }, + { + "loss": 0.0, + "grad_norm": 0.8351655006408691, + "learning_rate": 6.484999999999999e-07, + "num_tokens": 482624.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8454999923706055, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8454999923706055, + "reward_std": 0.014849262312054634, + "kl": 3.985455259680748e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3545, + "step": 709 + }, + { + "loss": 0.0, + "grad_norm": 0.002636699238792062, + "learning_rate": 6.48e-07, + "num_tokens": 482990.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.9441511034965515e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.355, + "step": 710 + }, + { + "loss": 0.0, + "grad_norm": 0.0011992601212114096, + "learning_rate": 6.474999999999999e-07, + "num_tokens": 483886.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.492606967687607e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3555, + "step": 711 + }, + { + "loss": 0.0, + "grad_norm": 0.0006801988347433507, + "learning_rate": 6.47e-07, + "num_tokens": 484782.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 2.647656947374344e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.356, + "step": 712 + }, + { + "loss": 0.0, + "grad_norm": 0.0006278291693888605, + "learning_rate": 6.465e-07, + "num_tokens": 485148.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.96151265501976e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3565, + "step": 713 + }, + { + "loss": 0.0, + "grad_norm": 0.02269609458744526, + "learning_rate": 6.46e-07, + "num_tokens": 486044.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 0.00012513156980276108, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.357, + "step": 714 + }, + { + "loss": 0.0, + "grad_norm": 1.2117421627044678, + "learning_rate": 6.454999999999999e-07, + "num_tokens": 486940.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8264999985694885, + "rewards/environment_reward_verifier/std": 0.004949725698679686, + "reward": 0.8264999985694885, + "reward_std": 0.004949725698679686, + "kl": 8.92365351319313e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3575, + "step": 715 + }, + { + "loss": 0.0, + "grad_norm": 0.8121581673622131, + "learning_rate": 6.45e-07, + "num_tokens": 487836.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 3.440864384174347e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.358, + "step": 716 + }, + { + "loss": 0.0, + "grad_norm": 0.0007526807021349669, + "learning_rate": 6.444999999999999e-07, + "num_tokens": 488732.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.0493363738059998e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3585, + "step": 717 + }, + { + "loss": 0.0, + "grad_norm": 0.0011233491823077202, + "learning_rate": 6.44e-07, + "num_tokens": 489098.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.566965460777283e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.359, + "step": 718 + }, + { + "loss": 0.0, + "grad_norm": 0.9603006839752197, + "learning_rate": 6.435e-07, + "num_tokens": 489994.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8255000114440918, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8255000114440918, + "reward_std": 0.0035355305299162865, + "kl": 4.37488779425621e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3595, + "step": 719 + }, + { + "loss": 0.0, + "grad_norm": 0.0019995439797639847, + "learning_rate": 6.43e-07, + "num_tokens": 490890.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 2.917274832725525e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.36, + "step": 720 + }, + { + "loss": 0.0, + "grad_norm": 0.8033301830291748, + "learning_rate": 6.424999999999999e-07, + "num_tokens": 491786.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 2.3120082914829254e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3605, + "step": 721 + }, + { + "loss": 0.0, + "grad_norm": 0.0010354184778407216, + "learning_rate": 6.42e-07, + "num_tokens": 492152.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.347732126712799e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.361, + "step": 722 + }, + { + "loss": 0.0, + "grad_norm": 0.002867473755031824, + "learning_rate": 6.414999999999999e-07, + "num_tokens": 493048.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.4817646741867065e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3615, + "step": 723 + }, + { + "loss": 0.0, + "grad_norm": 0.0009290321613661945, + "learning_rate": 6.41e-07, + "num_tokens": 493414.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.566911280155182e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.362, + "step": 724 + }, + { + "loss": 0.0, + "grad_norm": 0.0007650686893612146, + "learning_rate": 6.404999999999999e-07, + "num_tokens": 493780.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.9818544387817383e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3625, + "step": 725 + }, + { + "loss": 0.0, + "grad_norm": 0.6412078738212585, + "learning_rate": 6.4e-07, + "num_tokens": 494676.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8790000081062317, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8790000081062317, + "reward_std": 0.0014141954015940428, + "kl": 3.480538725852966e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.363, + "step": 726 + }, + { + "loss": 0.0, + "grad_norm": 0.7075743079185486, + "learning_rate": 6.395e-07, + "num_tokens": 495572.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 2.76053324341774e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3635, + "step": 727 + }, + { + "loss": 0.0, + "grad_norm": 0.00047449395060539246, + "learning_rate": 6.389999999999999e-07, + "num_tokens": 495938.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.3587996363639832e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.364, + "step": 728 + }, + { + "loss": 0.0, + "grad_norm": 1.2251524925231934, + "learning_rate": 6.384999999999999e-07, + "num_tokens": 496834.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7994999885559082, + "reward_std": 0.04879037290811539, + "kl": 3.720726817846298e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3645, + "step": 729 + }, + { + "loss": 0.0, + "grad_norm": 0.7717981934547424, + "learning_rate": 6.38e-07, + "num_tokens": 497730.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.5860575735569e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.365, + "step": 730 + }, + { + "loss": 0.0, + "grad_norm": 0.9186346530914307, + "learning_rate": 6.374999999999999e-07, + "num_tokens": 498626.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.0021212929859757423, + "reward": 0.8335000276565552, + "reward_std": 0.0021212929859757423, + "kl": 6.904173642396927e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3655, + "step": 731 + }, + { + "loss": 0.0, + "grad_norm": 0.84583979845047, + "learning_rate": 6.37e-07, + "num_tokens": 499522.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 4.0543265640735626e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.366, + "step": 732 + }, + { + "loss": 0.0, + "grad_norm": 0.0004621327097993344, + "learning_rate": 6.365e-07, + "num_tokens": 499888.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.1827796697616577e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3665, + "step": 733 + }, + { + "loss": 0.0, + "grad_norm": 0.00255565345287323, + "learning_rate": 6.36e-07, + "num_tokens": 500254.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 7.13299959897995e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.367, + "step": 734 + }, + { + "loss": 0.0, + "grad_norm": 0.000824491202365607, + "learning_rate": 6.354999999999999e-07, + "num_tokens": 500620.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.7968700528144836e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3675, + "step": 735 + }, + { + "loss": 0.0, + "grad_norm": 0.0008618003339506686, + "learning_rate": 6.35e-07, + "num_tokens": 501516.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 2.316851168870926e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.368, + "step": 736 + }, + { + "loss": 0.0, + "grad_norm": 0.6351233720779419, + "learning_rate": 6.344999999999999e-07, + "num_tokens": 502412.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.8009999990463257, + "reward_std": 0.049497511237859726, + "kl": 4.462525248527527e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3685, + "step": 737 + }, + { + "loss": 0.0, + "grad_norm": 0.8174920678138733, + "learning_rate": 6.34e-07, + "num_tokens": 503308.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8255000114440918, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8255000114440918, + "reward_std": 0.0035355305299162865, + "kl": 7.361825555562973e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.369, + "step": 738 + }, + { + "loss": 0.0, + "grad_norm": 0.0008763825171627104, + "learning_rate": 6.335e-07, + "num_tokens": 503674.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.976747393608093e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3695, + "step": 739 + }, + { + "loss": 0.0, + "grad_norm": 0.0007347882492467761, + "learning_rate": 6.33e-07, + "num_tokens": 504040.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9280781745910645e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.37, + "step": 740 + }, + { + "loss": 0.0, + "grad_norm": 0.0013616685755550861, + "learning_rate": 6.324999999999999e-07, + "num_tokens": 504406.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.791002720594406e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3705, + "step": 741 + }, + { + "loss": 0.0, + "grad_norm": 0.5727549195289612, + "learning_rate": 6.319999999999999e-07, + "num_tokens": 505302.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 5.479250103235245e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.371, + "step": 742 + }, + { + "loss": 0.0, + "grad_norm": 0.0005594661342911422, + "learning_rate": 6.314999999999999e-07, + "num_tokens": 505668.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.248026430606842e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3715, + "step": 743 + }, + { + "loss": 0.0, + "grad_norm": 0.0012528691440820694, + "learning_rate": 6.31e-07, + "num_tokens": 506034.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.9058737456798553e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.372, + "step": 744 + }, + { + "loss": 0.0, + "grad_norm": 0.000664975494146347, + "learning_rate": 6.304999999999999e-07, + "num_tokens": 506400.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.109034150838852e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3725, + "step": 745 + }, + { + "loss": 0.0, + "grad_norm": 5.891997814178467, + "learning_rate": 6.3e-07, + "num_tokens": 507296.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 0.0005017649382352829, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.373, + "step": 746 + }, + { + "loss": 0.0, + "grad_norm": 0.0009146234951913357, + "learning_rate": 6.295e-07, + "num_tokens": 507662.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.234444350004196e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3735, + "step": 747 + }, + { + "loss": 0.0, + "grad_norm": 0.0008638282888568938, + "learning_rate": 6.289999999999999e-07, + "num_tokens": 508028.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.175996243953705e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.374, + "step": 748 + }, + { + "loss": 0.0, + "grad_norm": 0.9354413151741028, + "learning_rate": 6.284999999999999e-07, + "num_tokens": 508924.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 5.358457565307617e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3745, + "step": 749 + }, + { + "loss": 0.0, + "grad_norm": 0.8698471784591675, + "learning_rate": 6.28e-07, + "num_tokens": 509820.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.928970545530319e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.375, + "step": 750 + }, + { + "loss": 0.0, + "grad_norm": 0.6731522679328918, + "learning_rate": 6.274999999999999e-07, + "num_tokens": 510716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.055154334753751755, + "reward": 0.8389999866485596, + "reward_std": 0.055154334753751755, + "kl": 3.010593354701996e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3755, + "step": 751 + }, + { + "loss": 0.0, + "grad_norm": 0.0010692239739000797, + "learning_rate": 6.27e-07, + "num_tokens": 511082.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.608370363712311e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.376, + "step": 752 + }, + { + "loss": 0.0, + "grad_norm": 0.004261866211891174, + "learning_rate": 6.265e-07, + "num_tokens": 511448.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.2502616047859192e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3765, + "step": 753 + }, + { + "loss": 0.0, + "grad_norm": 0.618039608001709, + "learning_rate": 6.26e-07, + "num_tokens": 512344.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.8009999990463257, + "reward_std": 0.049497511237859726, + "kl": 2.420227974653244e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.377, + "step": 754 + }, + { + "loss": 0.0, + "grad_norm": 0.0010167269501835108, + "learning_rate": 6.254999999999999e-07, + "num_tokens": 512710.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.890918403863907e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3775, + "step": 755 + }, + { + "loss": 0.0, + "grad_norm": 0.0025685280561447144, + "learning_rate": 6.249999999999999e-07, + "num_tokens": 513076.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.952361971139908e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.378, + "step": 756 + }, + { + "loss": 0.0, + "grad_norm": 0.0007701526628807187, + "learning_rate": 6.245e-07, + "num_tokens": 513442.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.9436702132225037e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3785, + "step": 757 + }, + { + "loss": 0.0, + "grad_norm": 0.0014547390164807439, + "learning_rate": 6.24e-07, + "num_tokens": 514338.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.708565443754196e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.379, + "step": 758 + }, + { + "loss": 0.0, + "grad_norm": 0.0010569763835519552, + "learning_rate": 6.235e-07, + "num_tokens": 514704.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9928982257843018e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3795, + "step": 759 + }, + { + "loss": 0.0, + "grad_norm": 0.0009250293951481581, + "learning_rate": 6.23e-07, + "num_tokens": 515600.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 3.913603723049164e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.38, + "step": 760 + }, + { + "loss": 0.0, + "grad_norm": 0.0012653374578803778, + "learning_rate": 6.225000000000001e-07, + "num_tokens": 515966.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.828294575214386e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3805, + "step": 761 + }, + { + "loss": 0.0, + "grad_norm": 0.0010828955564647913, + "learning_rate": 6.219999999999999e-07, + "num_tokens": 516332.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.467647522687912e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.381, + "step": 762 + }, + { + "loss": 0.0, + "grad_norm": 0.002116474788635969, + "learning_rate": 6.215e-07, + "num_tokens": 516698.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.189725637435913e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3815, + "step": 763 + }, + { + "loss": 0.0, + "grad_norm": 0.8476846814155579, + "learning_rate": 6.21e-07, + "num_tokens": 517594.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.846500039100647, + "rewards/environment_reward_verifier/std": 0.014849219471216202, + "reward": 0.846500039100647, + "reward_std": 0.014849220402538776, + "kl": 4.07882034778595e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.382, + "step": 764 + }, + { + "loss": 0.0, + "grad_norm": 0.0011961472919210792, + "learning_rate": 6.205e-07, + "num_tokens": 517960.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.249850124120712e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3825, + "step": 765 + }, + { + "loss": 0.0, + "grad_norm": 0.7129542231559753, + "learning_rate": 6.2e-07, + "num_tokens": 518856.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 8.251797407865524e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.383, + "step": 766 + }, + { + "loss": 0.0, + "grad_norm": 0.7722144722938538, + "learning_rate": 6.195000000000001e-07, + "num_tokens": 519752.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 4.004407674074173e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3835, + "step": 767 + }, + { + "loss": 0.0, + "grad_norm": 0.0015368679305538535, + "learning_rate": 6.189999999999999e-07, + "num_tokens": 520648.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.238464266061783e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.384, + "step": 768 + }, + { + "loss": 0.0, + "grad_norm": 0.7801802754402161, + "learning_rate": 6.185e-07, + "num_tokens": 521544.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 5.952734500169754e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3845, + "step": 769 + }, + { + "loss": 0.0, + "grad_norm": 0.0008700647740624845, + "learning_rate": 6.18e-07, + "num_tokens": 521910.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.245007246732712e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.385, + "step": 770 + }, + { + "loss": 0.0, + "grad_norm": 0.9259238839149475, + "learning_rate": 6.175e-07, + "num_tokens": 522806.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 3.273133188486099e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3855, + "step": 771 + }, + { + "loss": 0.0, + "grad_norm": 0.0014969698386266828, + "learning_rate": 6.17e-07, + "num_tokens": 523172.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.5686807930469513e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.386, + "step": 772 + }, + { + "loss": 0.0, + "grad_norm": 0.006186207756400108, + "learning_rate": 6.165e-07, + "num_tokens": 523538.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 8.09570774435997e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3865, + "step": 773 + }, + { + "loss": 0.0, + "grad_norm": 1.1589457988739014, + "learning_rate": 6.16e-07, + "num_tokens": 524434.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.8149999976158142, + "reward_std": 0.011313731782138348, + "kl": 4.557054489850998e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.387, + "step": 774 + }, + { + "loss": 0.0, + "grad_norm": 0.0005518601974472404, + "learning_rate": 6.155e-07, + "num_tokens": 524800.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.692360430955887e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3875, + "step": 775 + }, + { + "loss": 0.0, + "grad_norm": 0.001120497123338282, + "learning_rate": 6.149999999999999e-07, + "num_tokens": 525166.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.9140693843364716e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.388, + "step": 776 + }, + { + "loss": 0.0, + "grad_norm": 0.7982441782951355, + "learning_rate": 6.145e-07, + "num_tokens": 526062.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 4.4784508645534515e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3885, + "step": 777 + }, + { + "loss": 0.0, + "grad_norm": 0.0027774127665907145, + "learning_rate": 6.14e-07, + "num_tokens": 526958.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 8.057244122028351e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.389, + "step": 778 + }, + { + "loss": 0.0, + "grad_norm": 0.0011340905912220478, + "learning_rate": 6.135e-07, + "num_tokens": 527324.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.678180605173111e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3895, + "step": 779 + }, + { + "loss": 0.0, + "grad_norm": 0.0006853631930425763, + "learning_rate": 6.13e-07, + "num_tokens": 527690.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7861446142196655e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.39, + "step": 780 + }, + { + "loss": 0.0, + "grad_norm": 0.009597169235348701, + "learning_rate": 6.125000000000001e-07, + "num_tokens": 528056.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00019149668514728546, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3905, + "step": 781 + }, + { + "loss": 0.0, + "grad_norm": 0.004018091131001711, + "learning_rate": 6.119999999999999e-07, + "num_tokens": 528952.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 0.00010970886796712875, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.391, + "step": 782 + }, + { + "loss": 0.0, + "grad_norm": 1.126266360282898, + "learning_rate": 6.115e-07, + "num_tokens": 529848.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 5.193334072828293e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3915, + "step": 783 + }, + { + "loss": -0.0, + "grad_norm": 0.9128333330154419, + "learning_rate": 6.11e-07, + "num_tokens": 530744.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8029999732971191, + "rewards/environment_reward_verifier/std": 0.012727884575724602, + "reward": 0.8029999732971191, + "reward_std": 0.012727884575724602, + "kl": 2.9579736292362213e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.392, + "step": 784 + }, + { + "loss": 0.0, + "grad_norm": 0.0008193780086003244, + "learning_rate": 6.105e-07, + "num_tokens": 531110.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7962028980255127e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3925, + "step": 785 + }, + { + "loss": 0.0, + "grad_norm": 0.7476780414581299, + "learning_rate": 6.1e-07, + "num_tokens": 532006.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 4.246272146701813e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.393, + "step": 786 + }, + { + "loss": 0.0, + "grad_norm": 0.0006282931426540017, + "learning_rate": 6.095e-07, + "num_tokens": 532372.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.3266300559043884e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3935, + "step": 787 + }, + { + "loss": 0.0, + "grad_norm": 1.8928757905960083, + "learning_rate": 6.089999999999999e-07, + "num_tokens": 533268.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 7.044710218906403e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.394, + "step": 788 + }, + { + "loss": 0.0, + "grad_norm": 0.506048858165741, + "learning_rate": 6.085e-07, + "num_tokens": 534164.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 4.570838063955307e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3945, + "step": 789 + }, + { + "loss": 0.0, + "grad_norm": 0.9309393763542175, + "learning_rate": 6.079999999999999e-07, + "num_tokens": 535060.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 6.131362169981003e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.395, + "step": 790 + }, + { + "loss": 0.0, + "grad_norm": 0.0010613016784191132, + "learning_rate": 6.075e-07, + "num_tokens": 535426.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.676116466522217e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3955, + "step": 791 + }, + { + "loss": 0.0, + "grad_norm": 1.1940882205963135, + "learning_rate": 6.07e-07, + "num_tokens": 536322.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5734999775886536, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5734999775886536, + "reward_std": 0.27082186937332153, + "kl": 7.629208266735077e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.396, + "step": 792 + }, + { + "loss": 0.0, + "grad_norm": 0.001403618953190744, + "learning_rate": 6.065e-07, + "num_tokens": 537218.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 4.445761442184448e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3965, + "step": 793 + }, + { + "loss": 0.0, + "grad_norm": 0.0009353617206215858, + "learning_rate": 6.06e-07, + "num_tokens": 537584.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.106387495994568e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.397, + "step": 794 + }, + { + "loss": 0.0, + "grad_norm": 0.0005145937902852893, + "learning_rate": 6.055e-07, + "num_tokens": 537950.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.8003938496112823e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3975, + "step": 795 + }, + { + "loss": 0.0, + "grad_norm": 0.0008968059555627406, + "learning_rate": 6.049999999999999e-07, + "num_tokens": 538846.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8149999976158142, + "reward_std": 0.0, + "kl": 5.541834980249405e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.398, + "step": 796 + }, + { + "loss": 0.0, + "grad_norm": 0.0011200441513210535, + "learning_rate": 6.045e-07, + "num_tokens": 539212.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.7895126044750214e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3985, + "step": 797 + }, + { + "loss": 0.0, + "grad_norm": 0.002243278082460165, + "learning_rate": 6.04e-07, + "num_tokens": 540108.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 6.118416786193848e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.399, + "step": 798 + }, + { + "loss": 0.0, + "grad_norm": 0.0012119788443669677, + "learning_rate": 6.035e-07, + "num_tokens": 541004.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.752244472503662e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.3995, + "step": 799 + }, + { + "loss": 0.0, + "grad_norm": 0.0011967993341386318, + "learning_rate": 6.03e-07, + "num_tokens": 541370.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7150847017765045e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4, + "step": 800 + }, + { + "loss": 0.0, + "grad_norm": 0.001629934529773891, + "learning_rate": 6.025000000000001e-07, + "num_tokens": 542266.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.935411900281906e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4005, + "step": 801 + }, + { + "loss": 0.0, + "grad_norm": 0.8221452236175537, + "learning_rate": 6.019999999999999e-07, + "num_tokens": 543162.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 7.931981235742569e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.401, + "step": 802 + }, + { + "loss": 0.0, + "grad_norm": 0.007462856359779835, + "learning_rate": 6.015e-07, + "num_tokens": 543528.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.3334981203079224e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4015, + "step": 803 + }, + { + "loss": 0.0, + "grad_norm": 0.001739903469569981, + "learning_rate": 6.009999999999999e-07, + "num_tokens": 543894.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.858190029859543e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.402, + "step": 804 + }, + { + "loss": 0.0, + "grad_norm": 0.5326638221740723, + "learning_rate": 6.005e-07, + "num_tokens": 544790.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8400000333786011, + "rewards/environment_reward_verifier/std": 0.056568533182144165, + "reward": 0.8400000333786011, + "reward_std": 0.056568533182144165, + "kl": 1.197773963212967e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4025, + "step": 805 + }, + { + "loss": 0.0, + "grad_norm": 0.001234200200997293, + "learning_rate": 6e-07, + "num_tokens": 545156.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.440639168024063e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.403, + "step": 806 + }, + { + "loss": 0.0, + "grad_norm": 0.0015355065697804093, + "learning_rate": 5.995e-07, + "num_tokens": 545522.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.369858652353287e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4035, + "step": 807 + }, + { + "loss": 0.0, + "grad_norm": 0.0006882250891067088, + "learning_rate": 5.989999999999999e-07, + "num_tokens": 545888.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.6108697056770325e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.404, + "step": 808 + }, + { + "loss": 0.0, + "grad_norm": 4.64975643157959, + "learning_rate": 5.985e-07, + "num_tokens": 546784.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 8.086487650871277e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4045, + "step": 809 + }, + { + "loss": 0.0, + "grad_norm": 0.0008724891813471913, + "learning_rate": 5.979999999999999e-07, + "num_tokens": 547150.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.3602118492126465e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.405, + "step": 810 + }, + { + "loss": 0.0, + "grad_norm": 0.4123207628726959, + "learning_rate": 5.975e-07, + "num_tokens": 548046.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 1.1555850505828857e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4055, + "step": 811 + }, + { + "loss": 0.0, + "grad_norm": 0.8788225054740906, + "learning_rate": 5.97e-07, + "num_tokens": 548942.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.427080810070038e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.406, + "step": 812 + }, + { + "loss": 0.0, + "grad_norm": 0.000729935010895133, + "learning_rate": 5.965e-07, + "num_tokens": 549308.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.465769648551941e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4065, + "step": 813 + }, + { + "loss": 0.0, + "grad_norm": 0.0005977301043458283, + "learning_rate": 5.96e-07, + "num_tokens": 549674.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.3939104974269867e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.407, + "step": 814 + }, + { + "loss": 0.0, + "grad_norm": 0.0006024898029863834, + "learning_rate": 5.955e-07, + "num_tokens": 550040.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.8741004168987274e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4075, + "step": 815 + }, + { + "loss": 0.0, + "grad_norm": 0.6240323185920715, + "learning_rate": 5.949999999999999e-07, + "num_tokens": 550936.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.796999990940094, + "rewards/environment_reward_verifier/std": 0.01272792648524046, + "reward": 0.796999990940094, + "reward_std": 0.01272792648524046, + "kl": 2.526957541704178e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.408, + "step": 816 + }, + { + "loss": 0.0, + "grad_norm": 0.0010339779546484351, + "learning_rate": 5.945e-07, + "num_tokens": 551302.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.389563739299774e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4085, + "step": 817 + }, + { + "loss": 0.0, + "grad_norm": 0.001581298653036356, + "learning_rate": 5.939999999999999e-07, + "num_tokens": 551668.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.8718957006931305e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.409, + "step": 818 + }, + { + "loss": 0.0, + "grad_norm": 0.0028730963822454214, + "learning_rate": 5.935e-07, + "num_tokens": 552564.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.765507325530052e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4095, + "step": 819 + }, + { + "loss": 0.0, + "grad_norm": 0.5237371921539307, + "learning_rate": 5.93e-07, + "num_tokens": 553460.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7975000143051147, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.7975000143051147, + "reward_std": 0.06434673070907593, + "kl": 4.1239894926548004e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.41, + "step": 820 + }, + { + "loss": 0.0, + "grad_norm": 0.22981564700603485, + "learning_rate": 5.925e-07, + "num_tokens": 554356.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 8.274801075458527e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4105, + "step": 821 + }, + { + "loss": 0.0, + "grad_norm": 0.000864826375618577, + "learning_rate": 5.919999999999999e-07, + "num_tokens": 554722.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.267584204673767e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.411, + "step": 822 + }, + { + "loss": 0.0, + "grad_norm": 0.0005777585902251303, + "learning_rate": 5.915e-07, + "num_tokens": 555618.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 3.0573923140764236e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4115, + "step": 823 + }, + { + "loss": 0.0, + "grad_norm": 0.0007653327193111181, + "learning_rate": 5.909999999999999e-07, + "num_tokens": 555984.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.0934268832206726e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.412, + "step": 824 + }, + { + "loss": 0.0, + "grad_norm": 0.0008081765263341367, + "learning_rate": 5.905e-07, + "num_tokens": 556350.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.2024458050727844e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4125, + "step": 825 + }, + { + "loss": 0.0, + "grad_norm": 0.0008603125461377203, + "learning_rate": 5.9e-07, + "num_tokens": 556716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.314949572086334e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.413, + "step": 826 + }, + { + "loss": 0.0, + "grad_norm": 0.6024312973022461, + "learning_rate": 5.895e-07, + "num_tokens": 557612.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 3.1016767024993896e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4135, + "step": 827 + }, + { + "loss": 0.0, + "grad_norm": 0.9248777627944946, + "learning_rate": 5.89e-07, + "num_tokens": 558508.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.024041658267378807, + "reward": 0.8059999942779541, + "reward_std": 0.024041658267378807, + "kl": 3.932788968086243e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.414, + "step": 828 + }, + { + "loss": 0.0, + "grad_norm": 0.0024738821666687727, + "learning_rate": 5.885e-07, + "num_tokens": 559404.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 5.822349339723587e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4145, + "step": 829 + }, + { + "loss": -0.0, + "grad_norm": 0.48234227299690247, + "learning_rate": 5.879999999999999e-07, + "num_tokens": 560300.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 1.576356589794159e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.415, + "step": 830 + }, + { + "loss": 0.0, + "grad_norm": 0.0009319159435108304, + "learning_rate": 5.875e-07, + "num_tokens": 561196.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.444969817996025e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4155, + "step": 831 + }, + { + "loss": 0.0, + "grad_norm": 0.0010825677309185266, + "learning_rate": 5.87e-07, + "num_tokens": 562092.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.0588900446891785e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.416, + "step": 832 + }, + { + "loss": 0.0, + "grad_norm": 0.5465240478515625, + "learning_rate": 5.865e-07, + "num_tokens": 562988.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8029999732971191, + "rewards/environment_reward_verifier/std": 0.012727884575724602, + "reward": 0.8029999732971191, + "reward_std": 0.012727884575724602, + "kl": 6.101001054048538e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4165, + "step": 833 + }, + { + "loss": 0.0, + "grad_norm": 0.8875114321708679, + "learning_rate": 5.86e-07, + "num_tokens": 563884.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8109999895095825, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8109999895095825, + "reward_std": 0.01555635966360569, + "kl": 6.432924419641495e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.417, + "step": 834 + }, + { + "loss": 0.0, + "grad_norm": 0.6885401010513306, + "learning_rate": 5.854999999999999e-07, + "num_tokens": 564780.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 4.6242959797382355e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4175, + "step": 835 + }, + { + "loss": 0.0, + "grad_norm": 0.006994555704295635, + "learning_rate": 5.849999999999999e-07, + "num_tokens": 565146.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00016637705266475677, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.418, + "step": 836 + }, + { + "loss": 0.0, + "grad_norm": 0.0013478395994752645, + "learning_rate": 5.845e-07, + "num_tokens": 565512.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7138739824295044e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4185, + "step": 837 + }, + { + "loss": 0.0, + "grad_norm": 0.005000046454370022, + "learning_rate": 5.839999999999999e-07, + "num_tokens": 565878.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.910266190767288e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.419, + "step": 838 + }, + { + "loss": 0.0, + "grad_norm": 1.3202613592147827, + "learning_rate": 5.835e-07, + "num_tokens": 566774.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 4.958640784025192e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4195, + "step": 839 + }, + { + "loss": 0.0, + "grad_norm": 0.004527856130152941, + "learning_rate": 5.83e-07, + "num_tokens": 567670.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 9.60715115070343e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.42, + "step": 840 + }, + { + "loss": 0.0, + "grad_norm": 0.0012674469035118818, + "learning_rate": 5.825e-07, + "num_tokens": 568036.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.963018000125885e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4205, + "step": 841 + }, + { + "loss": 0.0, + "grad_norm": 0.979890763759613, + "learning_rate": 5.819999999999999e-07, + "num_tokens": 568932.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8450000286102295, + "rewards/environment_reward_verifier/std": 0.014142164029181004, + "reward": 0.8450000286102295, + "reward_std": 0.014142164029181004, + "kl": 5.4290518164634705e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.421, + "step": 842 + }, + { + "loss": 0.0, + "grad_norm": 0.002009020186960697, + "learning_rate": 5.815e-07, + "num_tokens": 569298.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.473142325878143e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4215, + "step": 843 + }, + { + "loss": 0.0, + "grad_norm": 0.000959740427788347, + "learning_rate": 5.809999999999999e-07, + "num_tokens": 569664.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.216524004936218e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.422, + "step": 844 + }, + { + "loss": 0.0, + "grad_norm": 0.0007338738651014864, + "learning_rate": 5.805e-07, + "num_tokens": 570030.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.0549243092536926e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4225, + "step": 845 + }, + { + "loss": 0.0, + "grad_norm": 0.0010351468808948994, + "learning_rate": 5.8e-07, + "num_tokens": 570926.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 3.2665207982063293e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.423, + "step": 846 + }, + { + "loss": 0.0, + "grad_norm": 2.825543165206909, + "learning_rate": 5.795e-07, + "num_tokens": 571822.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.039597976952791214, + "reward": 0.8500000238418579, + "reward_std": 0.039597976952791214, + "kl": 6.438978016376495e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4235, + "step": 847 + }, + { + "loss": 0.0, + "grad_norm": 0.0006451636436395347, + "learning_rate": 5.79e-07, + "num_tokens": 572718.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.265535295009613e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.424, + "step": 848 + }, + { + "loss": 0.0, + "grad_norm": 0.7045238018035889, + "learning_rate": 5.784999999999999e-07, + "num_tokens": 573614.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 5.598459392786026e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4245, + "step": 849 + }, + { + "loss": 0.0, + "grad_norm": 0.0010145172709599137, + "learning_rate": 5.779999999999999e-07, + "num_tokens": 573980.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.5431083738803864e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.425, + "step": 850 + }, + { + "loss": 0.0, + "grad_norm": 0.0021720363292843103, + "learning_rate": 5.775e-07, + "num_tokens": 574346.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.1764619052410126e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4255, + "step": 851 + }, + { + "loss": 0.0, + "grad_norm": 0.5564368963241577, + "learning_rate": 5.769999999999999e-07, + "num_tokens": 575242.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8289999961853027, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8289999961853027, + "reward_std": 0.0014141954015940428, + "kl": 3.677885979413986e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.426, + "step": 852 + }, + { + "loss": 0.0, + "grad_norm": 0.6709645986557007, + "learning_rate": 5.765e-07, + "num_tokens": 576138.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5989999771118164, + "rewards/environment_reward_verifier/std": 0.30971279740333557, + "reward": 0.5989999771118164, + "reward_std": 0.30971279740333557, + "kl": 3.970880061388016e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4265, + "step": 853 + }, + { + "loss": 0.0, + "grad_norm": 0.8509161472320557, + "learning_rate": 5.76e-07, + "num_tokens": 577034.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8029999732971191, + "rewards/environment_reward_verifier/std": 0.012727884575724602, + "reward": 0.8029999732971191, + "reward_std": 0.012727884575724602, + "kl": 7.42059201002121e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.427, + "step": 854 + }, + { + "loss": 0.0, + "grad_norm": 0.9860825538635254, + "learning_rate": 5.755e-07, + "num_tokens": 577930.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8285000324249268, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8285000324249268, + "reward_std": 0.030405621975660324, + "kl": 6.154272705316544e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4275, + "step": 855 + }, + { + "loss": 0.0, + "grad_norm": 0.0008337794570252299, + "learning_rate": 5.749999999999999e-07, + "num_tokens": 578296.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.50000336766243e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.428, + "step": 856 + }, + { + "loss": 0.0, + "grad_norm": 0.8874496221542358, + "learning_rate": 5.745e-07, + "num_tokens": 579192.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8050000071525574, + "rewards/environment_reward_verifier/std": 0.01272792648524046, + "reward": 0.8050000071525574, + "reward_std": 0.01272792648524046, + "kl": 5.4119154810905457e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4285, + "step": 857 + }, + { + "loss": 0.0, + "grad_norm": 0.4810936152935028, + "learning_rate": 5.739999999999999e-07, + "num_tokens": 580088.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.1266750991344452e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.429, + "step": 858 + }, + { + "loss": 0.0, + "grad_norm": 0.000799552770331502, + "learning_rate": 5.735e-07, + "num_tokens": 580454.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.109406679868698e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4295, + "step": 859 + }, + { + "loss": 0.0, + "grad_norm": 0.001031473628245294, + "learning_rate": 5.73e-07, + "num_tokens": 580820.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.3907050490379333e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.43, + "step": 860 + }, + { + "loss": 0.0, + "grad_norm": 0.7290229201316833, + "learning_rate": 5.725e-07, + "num_tokens": 581716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.884119749069214e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4305, + "step": 861 + }, + { + "loss": 0.0, + "grad_norm": 0.0011147563345730305, + "learning_rate": 5.719999999999999e-07, + "num_tokens": 582082.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.047900438308716e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.431, + "step": 862 + }, + { + "loss": 0.0, + "grad_norm": 0.0013581543462350965, + "learning_rate": 5.715e-07, + "num_tokens": 582978.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 4.9899332225322723e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4315, + "step": 863 + }, + { + "loss": 0.0, + "grad_norm": 0.9787481427192688, + "learning_rate": 5.709999999999999e-07, + "num_tokens": 583874.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 3.582518547773361e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.432, + "step": 864 + }, + { + "loss": 0.0, + "grad_norm": 0.002675174968317151, + "learning_rate": 5.705e-07, + "num_tokens": 584770.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 5.698762834072113e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4325, + "step": 865 + }, + { + "loss": 0.0, + "grad_norm": 0.0007517149788327515, + "learning_rate": 5.699999999999999e-07, + "num_tokens": 585666.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 3.350060433149338e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.433, + "step": 866 + }, + { + "loss": 0.0, + "grad_norm": 0.0011958049144595861, + "learning_rate": 5.695e-07, + "num_tokens": 586032.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.591699689626694e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4335, + "step": 867 + }, + { + "loss": 0.0, + "grad_norm": 0.0009895452531054616, + "learning_rate": 5.69e-07, + "num_tokens": 586928.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.904663026332855e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.434, + "step": 868 + }, + { + "loss": 0.0, + "grad_norm": 1.3839372396469116, + "learning_rate": 5.684999999999999e-07, + "num_tokens": 587824.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.609499990940094, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.609499990940094, + "reward_std": 0.32031938433647156, + "kl": 7.07460567355156e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4345, + "step": 869 + }, + { + "loss": 0.0, + "grad_norm": 0.0007765606278553605, + "learning_rate": 5.679999999999999e-07, + "num_tokens": 588720.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.7239322662353516e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.435, + "step": 870 + }, + { + "loss": 0.0, + "grad_norm": 0.0011798151535913348, + "learning_rate": 5.675e-07, + "num_tokens": 589086.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7165748178958893e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4355, + "step": 871 + }, + { + "loss": 0.0, + "grad_norm": 0.6472865343093872, + "learning_rate": 5.669999999999999e-07, + "num_tokens": 589982.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.387965261936188e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.436, + "step": 872 + }, + { + "loss": 0.0, + "grad_norm": 0.7618951797485352, + "learning_rate": 5.665e-07, + "num_tokens": 590878.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 4.90797683596611e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4365, + "step": 873 + }, + { + "loss": 0.0, + "grad_norm": 0.0013739175628870726, + "learning_rate": 5.66e-07, + "num_tokens": 591244.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.353917807340622e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.437, + "step": 874 + }, + { + "loss": 0.0, + "grad_norm": 0.8317199945449829, + "learning_rate": 5.655e-07, + "num_tokens": 592140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.7659890949726105e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4375, + "step": 875 + }, + { + "loss": 0.0, + "grad_norm": 0.7165759801864624, + "learning_rate": 5.649999999999999e-07, + "num_tokens": 593036.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8100000023841858, + "rewards/environment_reward_verifier/std": 0.014142122119665146, + "reward": 0.8100000023841858, + "reward_std": 0.014142122119665146, + "kl": 3.2602809369564056e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.438, + "step": 876 + }, + { + "loss": 0.0, + "grad_norm": 0.012723397463560104, + "learning_rate": 5.645e-07, + "num_tokens": 593932.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8429999947547913, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8429999947547913, + "reward_std": 0.0, + "kl": 5.6617893278598785e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4385, + "step": 877 + }, + { + "loss": -0.0, + "grad_norm": 0.776158332824707, + "learning_rate": 5.639999999999999e-07, + "num_tokens": 594828.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 3.9394013583660126e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.439, + "step": 878 + }, + { + "loss": 0.0, + "grad_norm": 0.0008882369729690254, + "learning_rate": 5.635e-07, + "num_tokens": 595194.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.5136396288871765e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4395, + "step": 879 + }, + { + "loss": 0.0, + "grad_norm": 2.4940199851989746, + "learning_rate": 5.629999999999999e-07, + "num_tokens": 596090.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.8199999928474426, + "reward_std": 0.011313731782138348, + "kl": 0.0009514158591628075, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.44, + "step": 880 + }, + { + "loss": 0.0, + "grad_norm": 0.9574906826019287, + "learning_rate": 5.625e-07, + "num_tokens": 596986.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 5.468260496854782e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4405, + "step": 881 + }, + { + "loss": 0.0, + "grad_norm": 0.001270653447136283, + "learning_rate": 5.620000000000001e-07, + "num_tokens": 597882.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.908163100481033e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.441, + "step": 882 + }, + { + "loss": 0.0, + "grad_norm": 0.9686869978904724, + "learning_rate": 5.614999999999999e-07, + "num_tokens": 598778.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 9.389035403728485e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4415, + "step": 883 + }, + { + "loss": 0.0, + "grad_norm": 0.0009024463943205774, + "learning_rate": 5.61e-07, + "num_tokens": 599144.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.2508356273174286e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.442, + "step": 884 + }, + { + "loss": 0.0, + "grad_norm": 0.0011521761771291494, + "learning_rate": 5.605e-07, + "num_tokens": 600040.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 4.3111853301525116e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4425, + "step": 885 + }, + { + "loss": 0.0, + "grad_norm": 0.0008811916341073811, + "learning_rate": 5.6e-07, + "num_tokens": 600406.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.8091872334480286e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.443, + "step": 886 + }, + { + "loss": 0.0, + "grad_norm": 0.0005357464542612433, + "learning_rate": 5.595e-07, + "num_tokens": 600772.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.8646009266376495e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4435, + "step": 887 + }, + { + "loss": 0.0, + "grad_norm": 0.0012236462207511067, + "learning_rate": 5.590000000000001e-07, + "num_tokens": 601668.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.382999986410141, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.382999986410141, + "reward_std": 0.0, + "kl": 3.3863820135593414e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.444, + "step": 888 + }, + { + "loss": 0.0, + "grad_norm": 0.0015359098324552178, + "learning_rate": 5.584999999999999e-07, + "num_tokens": 602564.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 7.446110248565674e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4445, + "step": 889 + }, + { + "loss": 0.0, + "grad_norm": 0.7075293660163879, + "learning_rate": 5.58e-07, + "num_tokens": 603460.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 2.532079815864563e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.445, + "step": 890 + }, + { + "loss": 0.0, + "grad_norm": 0.6647194027900696, + "learning_rate": 5.575e-07, + "num_tokens": 604356.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 2.183765172958374e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4455, + "step": 891 + }, + { + "loss": 0.0, + "grad_norm": 0.0005753295263275504, + "learning_rate": 5.57e-07, + "num_tokens": 604722.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.3801269233226776e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.446, + "step": 892 + }, + { + "loss": 0.0, + "grad_norm": 0.0006327761220745742, + "learning_rate": 5.565e-07, + "num_tokens": 605088.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9845163226127625e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4465, + "step": 893 + }, + { + "loss": 0.0, + "grad_norm": 1.0625728368759155, + "learning_rate": 5.560000000000001e-07, + "num_tokens": 605984.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8044999837875366, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8044999837875366, + "reward_std": 0.06434673070907593, + "kl": 2.457946538925171e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.447, + "step": 894 + }, + { + "loss": 0.0, + "grad_norm": 0.0012178801698610187, + "learning_rate": 5.555e-07, + "num_tokens": 606880.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.2179209887981415e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4475, + "step": 895 + }, + { + "loss": 0.0, + "grad_norm": 0.002682629507035017, + "learning_rate": 5.55e-07, + "num_tokens": 607776.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 4.859268665313721e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.448, + "step": 896 + }, + { + "loss": 0.0, + "grad_norm": 0.45517367124557495, + "learning_rate": 5.544999999999999e-07, + "num_tokens": 608672.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5924999713897705, + "rewards/environment_reward_verifier/std": 0.3019345998764038, + "reward": 0.5924999713897705, + "reward_std": 0.3019345700740814, + "kl": 1.2828037142753601e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4485, + "step": 897 + }, + { + "loss": 0.0, + "grad_norm": 0.000905574590433389, + "learning_rate": 5.54e-07, + "num_tokens": 609038.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.830902278423309e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.449, + "step": 898 + }, + { + "loss": 0.0, + "grad_norm": 2.8212804794311523, + "learning_rate": 5.535e-07, + "num_tokens": 609934.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8324999809265137, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8324999809265137, + "reward_std": 0.0007070977007970214, + "kl": 0.0011572809889912605, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4495, + "step": 899 + }, + { + "loss": 0.0, + "grad_norm": 0.000676330178976059, + "learning_rate": 5.53e-07, + "num_tokens": 610830.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7649999856948853, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7649999856948853, + "reward_std": 0.0, + "kl": 2.8536655008792877e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.45, + "step": 900 + }, + { + "loss": 0.0, + "grad_norm": 0.0011877953074872494, + "learning_rate": 5.525e-07, + "num_tokens": 611196.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.3439526557922363e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4505, + "step": 901 + }, + { + "loss": 0.0, + "grad_norm": 0.0007618311792612076, + "learning_rate": 5.520000000000001e-07, + "num_tokens": 611562.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.4904886484146118e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.451, + "step": 902 + }, + { + "loss": 0.0, + "grad_norm": 0.0006666177650913596, + "learning_rate": 5.514999999999999e-07, + "num_tokens": 611928.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9773451387882233e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4515, + "step": 903 + }, + { + "loss": 0.0, + "grad_norm": 0.002373509109020233, + "learning_rate": 5.51e-07, + "num_tokens": 612824.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.090756505727768e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.452, + "step": 904 + }, + { + "loss": 0.0, + "grad_norm": 0.0008277193992398679, + "learning_rate": 5.505e-07, + "num_tokens": 613720.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.119984805583954e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4525, + "step": 905 + }, + { + "loss": 0.0, + "grad_norm": 0.0009345367434434593, + "learning_rate": 5.5e-07, + "num_tokens": 614086.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.3725442588329315e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.453, + "step": 906 + }, + { + "loss": 0.0, + "grad_norm": 1.4221453666687012, + "learning_rate": 5.495e-07, + "num_tokens": 614982.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 0.00010339450091123581, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4535, + "step": 907 + }, + { + "loss": 0.0, + "grad_norm": 0.000370870839105919, + "learning_rate": 5.490000000000001e-07, + "num_tokens": 615878.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 1.245737075805664e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.454, + "step": 908 + }, + { + "loss": 0.0, + "grad_norm": 0.78106290102005, + "learning_rate": 5.484999999999999e-07, + "num_tokens": 616774.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7904999852180481, + "rewards/environment_reward_verifier/std": 0.037476640194654465, + "reward": 0.7904999852180481, + "reward_std": 0.037476640194654465, + "kl": 3.344472497701645e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4545, + "step": 909 + }, + { + "loss": 0.0, + "grad_norm": 0.0025292513892054558, + "learning_rate": 5.48e-07, + "num_tokens": 617140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.578009247779846e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.455, + "step": 910 + }, + { + "loss": 0.0, + "grad_norm": 0.0011718255700543523, + "learning_rate": 5.474999999999999e-07, + "num_tokens": 617506.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.5919401347637177e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4555, + "step": 911 + }, + { + "loss": 0.0, + "grad_norm": 1.2116985321044922, + "learning_rate": 5.47e-07, + "num_tokens": 618402.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 7.627252489328384e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.456, + "step": 912 + }, + { + "loss": 0.0, + "grad_norm": 1.1670100688934326, + "learning_rate": 5.465e-07, + "num_tokens": 619298.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 6.155204027891159e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4565, + "step": 913 + }, + { + "loss": 0.0, + "grad_norm": 0.656712532043457, + "learning_rate": 5.46e-07, + "num_tokens": 620194.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.2359192371368408e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.457, + "step": 914 + }, + { + "loss": 0.0, + "grad_norm": 0.8736714124679565, + "learning_rate": 5.455e-07, + "num_tokens": 621090.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5900000333786011, + "rewards/environment_reward_verifier/std": 0.29698485136032104, + "reward": 0.5900000333786011, + "reward_std": 0.29698485136032104, + "kl": 3.801286220550537e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4575, + "step": 915 + }, + { + "loss": 0.0, + "grad_norm": 0.7588840126991272, + "learning_rate": 5.45e-07, + "num_tokens": 621986.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 4.564691334962845e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.458, + "step": 916 + }, + { + "loss": 0.0, + "grad_norm": 0.0008407433633692563, + "learning_rate": 5.444999999999999e-07, + "num_tokens": 622882.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.4014304876327515e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4585, + "step": 917 + }, + { + "loss": 0.0, + "grad_norm": 0.5819631218910217, + "learning_rate": 5.44e-07, + "num_tokens": 623778.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8255000114440918, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8255000114440918, + "reward_std": 0.0035355305299162865, + "kl": 3.1919218599796295e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.459, + "step": 918 + }, + { + "loss": 0.0, + "grad_norm": 0.5659723281860352, + "learning_rate": 5.435e-07, + "num_tokens": 624674.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5900000333786011, + "rewards/environment_reward_verifier/std": 0.29698485136032104, + "reward": 0.5900000333786011, + "reward_std": 0.29698485136032104, + "kl": 5.887821316719055e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4595, + "step": 919 + }, + { + "loss": 0.0, + "grad_norm": 0.001182614709250629, + "learning_rate": 5.43e-07, + "num_tokens": 625040.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.116911441087723e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.46, + "step": 920 + }, + { + "loss": 0.0, + "grad_norm": 1.0874000787734985, + "learning_rate": 5.425e-07, + "num_tokens": 625936.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8044999837875366, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8044999837875366, + "reward_std": 0.06434673070907593, + "kl": 4.7031790018081665e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4605, + "step": 921 + }, + { + "loss": 0.0, + "grad_norm": 0.7091130018234253, + "learning_rate": 5.420000000000001e-07, + "num_tokens": 626832.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 3.444124013185501e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.461, + "step": 922 + }, + { + "loss": 0.0, + "grad_norm": 0.0008175342227332294, + "learning_rate": 5.414999999999999e-07, + "num_tokens": 627198.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.716442734003067e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4615, + "step": 923 + }, + { + "loss": 0.0, + "grad_norm": 0.0007053024601191282, + "learning_rate": 5.41e-07, + "num_tokens": 627564.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.289617598056793e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.462, + "step": 924 + }, + { + "loss": 0.0, + "grad_norm": 0.003715792205184698, + "learning_rate": 5.405e-07, + "num_tokens": 627930.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.268693298101425e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4625, + "step": 925 + }, + { + "loss": 0.0, + "grad_norm": 0.0013841136824339628, + "learning_rate": 5.4e-07, + "num_tokens": 628826.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.133116453886032e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.463, + "step": 926 + }, + { + "loss": 0.0, + "grad_norm": 0.3961053192615509, + "learning_rate": 5.395e-07, + "num_tokens": 629722.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7944999933242798, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7944999933242798, + "reward_std": 0.0502045676112175, + "kl": 9.655952453613281e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4635, + "step": 927 + }, + { + "loss": 0.0, + "grad_norm": 0.0015052658272907138, + "learning_rate": 5.39e-07, + "num_tokens": 630088.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.967341035604477e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.464, + "step": 928 + }, + { + "loss": 0.0, + "grad_norm": 0.00031154241878539324, + "learning_rate": 5.384999999999999e-07, + "num_tokens": 630454.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 9.813345968723297e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4645, + "step": 929 + }, + { + "loss": 0.0, + "grad_norm": 0.0005336882313713431, + "learning_rate": 5.38e-07, + "num_tokens": 630820.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.9521452486515045e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.465, + "step": 930 + }, + { + "loss": 0.0, + "grad_norm": 0.0018927346682175994, + "learning_rate": 5.374999999999999e-07, + "num_tokens": 631716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 6.585754454135895e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4655, + "step": 931 + }, + { + "loss": 0.0, + "grad_norm": 1.0327850580215454, + "learning_rate": 5.37e-07, + "num_tokens": 632612.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8365000486373901, + "rewards/environment_reward_verifier/std": 0.026162952184677124, + "reward": 0.8365000486373901, + "reward_std": 0.026162952184677124, + "kl": 5.525583401322365e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.466, + "step": 932 + }, + { + "loss": 0.0, + "grad_norm": 0.0016987278359010816, + "learning_rate": 5.365e-07, + "num_tokens": 632978.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.136205047369003e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4665, + "step": 933 + }, + { + "loss": 0.0, + "grad_norm": 0.0009261802188120782, + "learning_rate": 5.36e-07, + "num_tokens": 633344.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.399886190891266e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.467, + "step": 934 + }, + { + "loss": 0.0, + "grad_norm": 0.0008992516668513417, + "learning_rate": 5.355e-07, + "num_tokens": 634240.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 4.233699291944504e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4675, + "step": 935 + }, + { + "loss": 0.0, + "grad_norm": 0.9115592241287231, + "learning_rate": 5.35e-07, + "num_tokens": 635136.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.824999988079071, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.824999988079071, + "reward_std": 0.011313731782138348, + "kl": 4.604365676641464e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.468, + "step": 936 + }, + { + "loss": 0.0, + "grad_norm": 0.0007278263801708817, + "learning_rate": 5.344999999999999e-07, + "num_tokens": 636032.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 3.4401193261146545e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4685, + "step": 937 + }, + { + "loss": 0.0, + "grad_norm": 0.0010212017223238945, + "learning_rate": 5.34e-07, + "num_tokens": 636928.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 4.621315747499466e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.469, + "step": 938 + }, + { + "loss": 0.0, + "grad_norm": 0.0007903206860646605, + "learning_rate": 5.335e-07, + "num_tokens": 637824.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8349999785423279, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8349999785423279, + "reward_std": 0.0, + "kl": 3.7049874663352966e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4695, + "step": 939 + }, + { + "loss": 0.0, + "grad_norm": 0.0013730695936828852, + "learning_rate": 5.33e-07, + "num_tokens": 638190.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.6928955018520355e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.47, + "step": 940 + }, + { + "loss": 0.0, + "grad_norm": 0.7030513882637024, + "learning_rate": 5.325e-07, + "num_tokens": 639086.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 2.5019049644470215e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4705, + "step": 941 + }, + { + "loss": -0.0, + "grad_norm": 0.9748480916023254, + "learning_rate": 5.32e-07, + "num_tokens": 639982.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 4.683062434196472e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.471, + "step": 942 + }, + { + "loss": 0.0, + "grad_norm": 0.0008724030922167003, + "learning_rate": 5.314999999999999e-07, + "num_tokens": 640878.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 2.5467947125434875e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4715, + "step": 943 + }, + { + "loss": 0.0, + "grad_norm": 0.0023628976196050644, + "learning_rate": 5.31e-07, + "num_tokens": 641244.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.564450889825821e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.472, + "step": 944 + }, + { + "loss": 0.0, + "grad_norm": 0.7218869924545288, + "learning_rate": 5.304999999999999e-07, + "num_tokens": 642140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 1.4922581613063812e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4725, + "step": 945 + }, + { + "loss": 0.0, + "grad_norm": 0.0009410440688952804, + "learning_rate": 5.3e-07, + "num_tokens": 642506.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.3725594878196716e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.473, + "step": 946 + }, + { + "loss": 0.0, + "grad_norm": 0.9045856595039368, + "learning_rate": 5.295e-07, + "num_tokens": 643402.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8365000486373901, + "rewards/environment_reward_verifier/std": 0.01909189112484455, + "reward": 0.8365000486373901, + "reward_std": 0.01909189112484455, + "kl": 3.302842378616333e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4735, + "step": 947 + }, + { + "loss": 0.0, + "grad_norm": 0.0006632182630710304, + "learning_rate": 5.29e-07, + "num_tokens": 644298.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 2.4668872356414795e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.474, + "step": 948 + }, + { + "loss": 0.0, + "grad_norm": 0.0006489086663350463, + "learning_rate": 5.284999999999999e-07, + "num_tokens": 644664.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.09748575091362e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4745, + "step": 949 + }, + { + "loss": 0.0, + "grad_norm": 0.9527900815010071, + "learning_rate": 5.28e-07, + "num_tokens": 645560.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 6.148312240839005e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.475, + "step": 950 + }, + { + "loss": 0.0, + "grad_norm": 0.9770010113716125, + "learning_rate": 5.274999999999999e-07, + "num_tokens": 646456.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 3.6250799894332886e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4755, + "step": 951 + }, + { + "loss": 0.0, + "grad_norm": 0.0007939549977891147, + "learning_rate": 5.27e-07, + "num_tokens": 647352.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 3.37185338139534e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.476, + "step": 952 + }, + { + "loss": 0.0, + "grad_norm": 0.0007053684676066041, + "learning_rate": 5.265e-07, + "num_tokens": 647718.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.0064024031162262e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4765, + "step": 953 + }, + { + "loss": 0.0, + "grad_norm": 0.06403394043445587, + "learning_rate": 5.26e-07, + "num_tokens": 648614.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 0.001065908931195736, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.477, + "step": 954 + }, + { + "loss": 0.0, + "grad_norm": 0.7209022641181946, + "learning_rate": 5.255e-07, + "num_tokens": 649510.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.8149999976158142, + "reward_std": 0.011313731782138348, + "kl": 4.2875297367572784e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4775, + "step": 955 + }, + { + "loss": 0.0, + "grad_norm": 0.00426756776869297, + "learning_rate": 5.25e-07, + "num_tokens": 650406.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8230000138282776, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8230000138282776, + "reward_std": 0.0, + "kl": 0.00011035241186618805, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.478, + "step": 956 + }, + { + "loss": 0.0, + "grad_norm": 0.001966584473848343, + "learning_rate": 5.244999999999999e-07, + "num_tokens": 650772.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.261095404624939e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4785, + "step": 957 + }, + { + "loss": 0.0, + "grad_norm": 0.5687603950500488, + "learning_rate": 5.24e-07, + "num_tokens": 651668.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.075692802667618e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.479, + "step": 958 + }, + { + "loss": 0.0, + "grad_norm": 0.0005653072148561478, + "learning_rate": 5.234999999999999e-07, + "num_tokens": 652034.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.505071461200714e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4795, + "step": 959 + }, + { + "loss": 0.0, + "grad_norm": 0.004983440041542053, + "learning_rate": 5.23e-07, + "num_tokens": 652930.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 8.590333163738251e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.48, + "step": 960 + }, + { + "loss": 0.0, + "grad_norm": 0.0006832435610704124, + "learning_rate": 5.225e-07, + "num_tokens": 653826.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.18955460190773e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4805, + "step": 961 + }, + { + "loss": 0.0, + "grad_norm": 0.0007571274181827903, + "learning_rate": 5.22e-07, + "num_tokens": 654192.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.937018871307373e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.481, + "step": 962 + }, + { + "loss": 0.0, + "grad_norm": 0.0010364153422415257, + "learning_rate": 5.214999999999999e-07, + "num_tokens": 654558.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.4516135454177856e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4815, + "step": 963 + }, + { + "loss": 0.0, + "grad_norm": 0.0011270501418039203, + "learning_rate": 5.21e-07, + "num_tokens": 654924.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.379132926464081e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.482, + "step": 964 + }, + { + "loss": 0.0, + "grad_norm": 1.1790162324905396, + "learning_rate": 5.204999999999999e-07, + "num_tokens": 655820.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 4.971399903297424e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4825, + "step": 965 + }, + { + "loss": 0.0, + "grad_norm": 0.0014127911999821663, + "learning_rate": 5.2e-07, + "num_tokens": 656716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7829999923706055, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7829999923706055, + "reward_std": 0.0, + "kl": 5.042552947998047e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.483, + "step": 966 + }, + { + "loss": 0.0, + "grad_norm": 0.7780529856681824, + "learning_rate": 5.195e-07, + "num_tokens": 657612.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8044999837875366, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8044999837875366, + "reward_std": 0.06434673070907593, + "kl": 6.663426756858826e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4835, + "step": 967 + }, + { + "loss": 0.0, + "grad_norm": 0.001735977828502655, + "learning_rate": 5.19e-07, + "num_tokens": 657978.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.362143903970718e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.484, + "step": 968 + }, + { + "loss": 0.0, + "grad_norm": 0.0010887464741244912, + "learning_rate": 5.184999999999999e-07, + "num_tokens": 658344.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.819167613983154e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4845, + "step": 969 + }, + { + "loss": 0.0, + "grad_norm": 0.8512638807296753, + "learning_rate": 5.18e-07, + "num_tokens": 659240.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8050000071525574, + "rewards/environment_reward_verifier/std": 0.01272792648524046, + "reward": 0.8050000071525574, + "reward_std": 0.01272792648524046, + "kl": 3.4036580473184586e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.485, + "step": 970 + }, + { + "loss": 0.0, + "grad_norm": 0.001590660191141069, + "learning_rate": 5.174999999999999e-07, + "num_tokens": 659606.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.096236079931259e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4855, + "step": 971 + }, + { + "loss": 0.0, + "grad_norm": 0.003125761868432164, + "learning_rate": 5.17e-07, + "num_tokens": 659972.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.1511841118335724e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.486, + "step": 972 + }, + { + "loss": 0.0, + "grad_norm": 0.0008358623599633574, + "learning_rate": 5.164999999999999e-07, + "num_tokens": 660868.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 4.815123975276947e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4865, + "step": 973 + }, + { + "loss": 0.0, + "grad_norm": 0.0006493424880318344, + "learning_rate": 5.16e-07, + "num_tokens": 661764.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.00602987408638e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.487, + "step": 974 + }, + { + "loss": 0.0, + "grad_norm": 0.0005122573347762227, + "learning_rate": 5.155e-07, + "num_tokens": 662660.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8230000138282776, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8230000138282776, + "reward_std": 0.0, + "kl": 2.6183202862739563e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4875, + "step": 975 + }, + { + "loss": 0.0, + "grad_norm": 0.0013554071774706244, + "learning_rate": 5.149999999999999e-07, + "num_tokens": 663556.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 3.3993273973464966e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.488, + "step": 976 + }, + { + "loss": 0.0, + "grad_norm": 0.001144697074778378, + "learning_rate": 5.144999999999999e-07, + "num_tokens": 663922.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.336463123559952e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4885, + "step": 977 + }, + { + "loss": 0.0, + "grad_norm": 0.0025168475694954395, + "learning_rate": 5.14e-07, + "num_tokens": 664818.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7649999856948853, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7649999856948853, + "reward_std": 0.0, + "kl": 6.39837235212326e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.489, + "step": 978 + }, + { + "loss": 0.0, + "grad_norm": 0.0009632411529310048, + "learning_rate": 5.134999999999999e-07, + "num_tokens": 665184.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.3915042877197266e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4895, + "step": 979 + }, + { + "loss": 0.0, + "grad_norm": 0.0008115009986795485, + "learning_rate": 5.13e-07, + "num_tokens": 665550.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.3784505426883698e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.49, + "step": 980 + }, + { + "loss": 0.0, + "grad_norm": 0.0017039045924320817, + "learning_rate": 5.125e-07, + "num_tokens": 665916.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.642868250608444e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4905, + "step": 981 + }, + { + "loss": 0.0, + "grad_norm": 0.711256742477417, + "learning_rate": 5.12e-07, + "num_tokens": 666812.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 4.299357533454895e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.491, + "step": 982 + }, + { + "loss": 0.0, + "grad_norm": 0.0006743049598298967, + "learning_rate": 5.114999999999999e-07, + "num_tokens": 667178.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.3412518203258514e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4915, + "step": 983 + }, + { + "loss": 0.0, + "grad_norm": 0.0012645031092688441, + "learning_rate": 5.11e-07, + "num_tokens": 667544.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.6438148021698e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.492, + "step": 984 + }, + { + "loss": 0.0, + "grad_norm": 1.116913080215454, + "learning_rate": 5.104999999999999e-07, + "num_tokens": 668440.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7999999523162842, + "rewards/environment_reward_verifier/std": 0.04949747025966644, + "reward": 0.7999999523162842, + "reward_std": 0.04949747025966644, + "kl": 6.992463022470474e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4925, + "step": 985 + }, + { + "loss": 0.0, + "grad_norm": 0.0014276455622166395, + "learning_rate": 5.1e-07, + "num_tokens": 668806.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.637947469949722e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.493, + "step": 986 + }, + { + "loss": 0.0, + "grad_norm": 0.000873086741194129, + "learning_rate": 5.095e-07, + "num_tokens": 669172.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.7686899304389954e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4935, + "step": 987 + }, + { + "loss": 0.0, + "grad_norm": 0.574111819267273, + "learning_rate": 5.09e-07, + "num_tokens": 670068.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.8009999990463257, + "reward_std": 0.049497511237859726, + "kl": 3.855861723423004e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.494, + "step": 988 + }, + { + "loss": 0.0, + "grad_norm": 0.6999775171279907, + "learning_rate": 5.085e-07, + "num_tokens": 670964.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 2.8043054044246674e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4945, + "step": 989 + }, + { + "loss": 0.0, + "grad_norm": 0.0009233710006810725, + "learning_rate": 5.079999999999999e-07, + "num_tokens": 671330.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.8283877074718475e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.495, + "step": 990 + }, + { + "loss": 0.0, + "grad_norm": 0.24552400410175323, + "learning_rate": 5.074999999999999e-07, + "num_tokens": 672226.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.004242670256644487, + "reward": 0.8149999976158142, + "reward_std": 0.004242670256644487, + "kl": 5.236826837062836e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4955, + "step": 991 + }, + { + "loss": 0.0, + "grad_norm": 0.8669341802597046, + "learning_rate": 5.07e-07, + "num_tokens": 673122.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8264999985694885, + "rewards/environment_reward_verifier/std": 0.004949725698679686, + "reward": 0.8264999985694885, + "reward_std": 0.004949725698679686, + "kl": 5.610194057226181e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.496, + "step": 992 + }, + { + "loss": 0.0, + "grad_norm": 0.0009756143554113805, + "learning_rate": 5.064999999999999e-07, + "num_tokens": 673488.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.3435411751270294e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4965, + "step": 993 + }, + { + "loss": 0.0, + "grad_norm": 0.002642970299348235, + "learning_rate": 5.06e-07, + "num_tokens": 673854.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.523100167512894e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.497, + "step": 994 + }, + { + "loss": 0.0, + "grad_norm": 0.0025872448459267616, + "learning_rate": 5.055e-07, + "num_tokens": 674220.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.0001097600907087326, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4975, + "step": 995 + }, + { + "loss": -0.0, + "grad_norm": 0.7565536499023438, + "learning_rate": 5.049999999999999e-07, + "num_tokens": 675116.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8389999866485596, + "reward_std": 0.01555635966360569, + "kl": 3.309641033411026e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.498, + "step": 996 + }, + { + "loss": 0.0, + "grad_norm": 0.0005875544156879187, + "learning_rate": 5.044999999999999e-07, + "num_tokens": 675482.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.8343329429626465e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4985, + "step": 997 + }, + { + "loss": 0.0, + "grad_norm": 0.006418801844120026, + "learning_rate": 5.04e-07, + "num_tokens": 675848.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.209205508232117e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.499, + "step": 998 + }, + { + "loss": 0.0, + "grad_norm": 0.0005877927760593593, + "learning_rate": 5.034999999999999e-07, + "num_tokens": 676744.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8149999976158142, + "reward_std": 0.0, + "kl": 2.3884698748588562e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.4995, + "step": 999 + }, + { + "loss": 0.0, + "grad_norm": 0.0007023665821179748, + "learning_rate": 5.03e-07, + "num_tokens": 677640.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8560000061988831, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8560000061988831, + "reward_std": 0.0, + "kl": 3.754999488592148e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5, + "step": 1000 + }, + { + "loss": 0.0, + "grad_norm": 0.8347640633583069, + "learning_rate": 5.025e-07, + "num_tokens": 678536.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8144999742507935, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8144999742507935, + "reward_std": 0.0035355305299162865, + "kl": 4.554633051156998e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5005, + "step": 1001 + }, + { + "loss": 0.0, + "grad_norm": 1.0682181119918823, + "learning_rate": 5.02e-07, + "num_tokens": 679432.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8140000104904175, + "rewards/environment_reward_verifier/std": 0.002828432945534587, + "reward": 0.8140000104904175, + "reward_std": 0.002828432945534587, + "kl": 0.00010714586824178696, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.501, + "step": 1002 + }, + { + "loss": 0.0, + "grad_norm": 0.7141183018684387, + "learning_rate": 5.014999999999999e-07, + "num_tokens": 680328.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.0689872801303864e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5015, + "step": 1003 + }, + { + "loss": 0.0, + "grad_norm": 0.0013398455921560526, + "learning_rate": 5.009999999999999e-07, + "num_tokens": 680694.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.019921809434891e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.502, + "step": 1004 + }, + { + "loss": 0.0, + "grad_norm": 0.0013964761747047305, + "learning_rate": 5.004999999999999e-07, + "num_tokens": 681060.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.270688027143478e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5025, + "step": 1005 + }, + { + "loss": 0.0, + "grad_norm": 0.0015274528414011002, + "learning_rate": 5e-07, + "num_tokens": 681426.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.6170706152915955e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.503, + "step": 1006 + }, + { + "loss": 0.0, + "grad_norm": 0.0006098856101743877, + "learning_rate": 4.994999999999999e-07, + "num_tokens": 681792.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.366025000810623e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5035, + "step": 1007 + }, + { + "loss": 0.0, + "grad_norm": 0.0028049976099282503, + "learning_rate": 4.99e-07, + "num_tokens": 682158.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.973301500082016e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.504, + "step": 1008 + }, + { + "loss": 0.0, + "grad_norm": 0.001014014589600265, + "learning_rate": 4.985e-07, + "num_tokens": 682524.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.2168813049793243e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5045, + "step": 1009 + }, + { + "loss": 0.0, + "grad_norm": 0.0006871579680591822, + "learning_rate": 4.979999999999999e-07, + "num_tokens": 683420.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.037190228700638e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.505, + "step": 1010 + }, + { + "loss": 0.0, + "grad_norm": 2.6453120708465576, + "learning_rate": 4.975e-07, + "num_tokens": 684316.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8240000009536743, + "rewards/environment_reward_verifier/std": 0.015556317754089832, + "reward": 0.8240000009536743, + "reward_std": 0.015556317754089832, + "kl": 0.0003169504925608635, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5055, + "step": 1011 + }, + { + "loss": 0.0, + "grad_norm": 0.7730938196182251, + "learning_rate": 4.97e-07, + "num_tokens": 685212.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 4.6455301344394684e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.506, + "step": 1012 + }, + { + "loss": 0.0, + "grad_norm": 0.0013291386421769857, + "learning_rate": 4.964999999999999e-07, + "num_tokens": 686108.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 6.316695362329483e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5065, + "step": 1013 + }, + { + "loss": 0.0, + "grad_norm": 0.0015565111534669995, + "learning_rate": 4.96e-07, + "num_tokens": 686474.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.946533590555191e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.507, + "step": 1014 + }, + { + "loss": 0.0, + "grad_norm": 0.8053126335144043, + "learning_rate": 4.955e-07, + "num_tokens": 687370.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 4.605855792760849e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5075, + "step": 1015 + }, + { + "loss": 0.0, + "grad_norm": 0.0013168035075068474, + "learning_rate": 4.95e-07, + "num_tokens": 687736.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.0020404160022736e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.508, + "step": 1016 + }, + { + "loss": 0.0, + "grad_norm": 0.6808350086212158, + "learning_rate": 4.945e-07, + "num_tokens": 688632.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8105000257492065, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8105000257492065, + "reward_std": 0.06434673070907593, + "kl": 1.3706274330615997e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5085, + "step": 1017 + }, + { + "loss": 0.0, + "grad_norm": 0.0008983907173387706, + "learning_rate": 4.94e-07, + "num_tokens": 688998.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.1688640117645264e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.509, + "step": 1018 + }, + { + "loss": 0.0, + "grad_norm": 0.0004645304870791733, + "learning_rate": 4.935e-07, + "num_tokens": 689364.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.466553658246994e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5095, + "step": 1019 + }, + { + "loss": 0.0, + "grad_norm": 0.6623954176902771, + "learning_rate": 4.93e-07, + "num_tokens": 690260.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7879999876022339, + "rewards/environment_reward_verifier/std": 0.05091170594096184, + "reward": 0.7879999876022339, + "reward_std": 0.05091170594096184, + "kl": 5.1676295697689056e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.51, + "step": 1020 + }, + { + "loss": 0.0, + "grad_norm": 0.0022292693611234426, + "learning_rate": 4.924999999999999e-07, + "num_tokens": 691156.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.382765084505081e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5105, + "step": 1021 + }, + { + "loss": 0.0, + "grad_norm": 0.0006294287159107625, + "learning_rate": 4.92e-07, + "num_tokens": 692052.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 1.8159858882427216e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.511, + "step": 1022 + }, + { + "loss": 0.0, + "grad_norm": 0.001646587741561234, + "learning_rate": 4.915e-07, + "num_tokens": 692948.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3790000081062317, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3790000081062317, + "reward_std": 0.0, + "kl": 6.076321005821228e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5115, + "step": 1023 + }, + { + "loss": 0.0, + "grad_norm": 0.003970656078308821, + "learning_rate": 4.909999999999999e-07, + "num_tokens": 693314.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.349051207304001e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.512, + "step": 1024 + }, + { + "loss": -0.0, + "grad_norm": 1.3712973594665527, + "learning_rate": 4.905e-07, + "num_tokens": 694210.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8044999837875366, + "rewards/environment_reward_verifier/std": 0.012020829133689404, + "reward": 0.8044999837875366, + "reward_std": 0.012020829133689404, + "kl": 5.5252574384212494e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5125, + "step": 1025 + }, + { + "loss": 0.0, + "grad_norm": 0.7226940989494324, + "learning_rate": 4.9e-07, + "num_tokens": 695106.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 3.037136048078537e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.513, + "step": 1026 + }, + { + "loss": 0.0, + "grad_norm": 0.7758554816246033, + "learning_rate": 4.894999999999999e-07, + "num_tokens": 696002.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 2.376362681388855e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5135, + "step": 1027 + }, + { + "loss": 0.0, + "grad_norm": 0.0011743708746507764, + "learning_rate": 4.89e-07, + "num_tokens": 696368.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.008280277252197e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.514, + "step": 1028 + }, + { + "loss": 0.0, + "grad_norm": 0.0008045915747061372, + "learning_rate": 4.885e-07, + "num_tokens": 696734.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.055428922176361e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5145, + "step": 1029 + }, + { + "loss": 0.0, + "grad_norm": 0.0016251134220510721, + "learning_rate": 4.879999999999999e-07, + "num_tokens": 697100.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.6836212277412415e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.515, + "step": 1030 + }, + { + "loss": 0.0, + "grad_norm": 0.0009004175080917776, + "learning_rate": 4.875e-07, + "num_tokens": 697466.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.1818635761737823e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5155, + "step": 1031 + }, + { + "loss": 0.0, + "grad_norm": 0.000870404823217541, + "learning_rate": 4.87e-07, + "num_tokens": 697832.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.290137439966202e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.516, + "step": 1032 + }, + { + "loss": 0.0, + "grad_norm": 0.0008007647120393813, + "learning_rate": 4.864999999999999e-07, + "num_tokens": 698198.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.1054561734199524e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5165, + "step": 1033 + }, + { + "loss": 0.0, + "grad_norm": 0.0012625895906239748, + "learning_rate": 4.86e-07, + "num_tokens": 699094.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 5.473196506500244e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.517, + "step": 1034 + }, + { + "loss": 0.0, + "grad_norm": 0.8870932459831238, + "learning_rate": 4.854999999999999e-07, + "num_tokens": 699990.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 4.998687654733658e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5175, + "step": 1035 + }, + { + "loss": 0.0, + "grad_norm": 5.1996870040893555, + "learning_rate": 4.85e-07, + "num_tokens": 700886.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8109999895095825, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8109999895095825, + "reward_std": 0.01555635966360569, + "kl": 0.0008062655106186867, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.518, + "step": 1036 + }, + { + "loss": 0.0, + "grad_norm": 0.9224255084991455, + "learning_rate": 4.845e-07, + "num_tokens": 701782.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.08909548819065094, + "reward": 0.8149999976158142, + "reward_std": 0.08909548819065094, + "kl": 8.533895015716553e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5185, + "step": 1037 + }, + { + "loss": 0.0, + "grad_norm": 0.9159997701644897, + "learning_rate": 4.839999999999999e-07, + "num_tokens": 702678.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 0.00010907184332609177, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.519, + "step": 1038 + }, + { + "loss": 0.0, + "grad_norm": 0.9420398473739624, + "learning_rate": 4.835e-07, + "num_tokens": 703574.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 3.331620246171951e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5195, + "step": 1039 + }, + { + "loss": 0.0, + "grad_norm": 0.0006412892253138125, + "learning_rate": 4.83e-07, + "num_tokens": 703940.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.81589275598526e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.52, + "step": 1040 + }, + { + "loss": 0.0, + "grad_norm": 0.0011514879297465086, + "learning_rate": 4.824999999999999e-07, + "num_tokens": 704836.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.644785076379776e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5205, + "step": 1041 + }, + { + "loss": 0.0, + "grad_norm": 0.7989395260810852, + "learning_rate": 4.82e-07, + "num_tokens": 705732.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8374999761581421, + "rewards/environment_reward_verifier/std": 0.026162952184677124, + "reward": 0.8374999761581421, + "reward_std": 0.026162952184677124, + "kl": 4.004035145044327e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.521, + "step": 1042 + }, + { + "loss": 0.0, + "grad_norm": 0.7823817133903503, + "learning_rate": 4.815e-07, + "num_tokens": 706628.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 5.509518086910248e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5215, + "step": 1043 + }, + { + "loss": 0.0, + "grad_norm": 0.0010213347850367427, + "learning_rate": 4.809999999999999e-07, + "num_tokens": 706994.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.8906000554561615e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.522, + "step": 1044 + }, + { + "loss": 0.0, + "grad_norm": 0.000587350397836417, + "learning_rate": 4.805e-07, + "num_tokens": 707890.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.326536923646927e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5225, + "step": 1045 + }, + { + "loss": 0.0, + "grad_norm": 1.244295358657837, + "learning_rate": 4.8e-07, + "num_tokens": 708786.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 8.475873619318008e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.523, + "step": 1046 + }, + { + "loss": -0.0, + "grad_norm": 0.5794961452484131, + "learning_rate": 4.794999999999999e-07, + "num_tokens": 709682.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 1.4612451195716858e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5235, + "step": 1047 + }, + { + "loss": 0.0, + "grad_norm": 0.0013103070668876171, + "learning_rate": 4.79e-07, + "num_tokens": 710578.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.042925477027893e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.524, + "step": 1048 + }, + { + "loss": 0.0, + "grad_norm": 0.0006897756247781217, + "learning_rate": 4.785e-07, + "num_tokens": 711474.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.652740269899368e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5245, + "step": 1049 + }, + { + "loss": 0.0, + "grad_norm": 0.001127156661823392, + "learning_rate": 4.779999999999999e-07, + "num_tokens": 712370.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8360000252723694, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8360000252723694, + "reward_std": 0.0, + "kl": 4.3822452425956726e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.525, + "step": 1050 + }, + { + "loss": 0.0, + "grad_norm": 0.9209012985229492, + "learning_rate": 4.775e-07, + "num_tokens": 713266.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 8.319783955812454e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5255, + "step": 1051 + }, + { + "loss": 0.0, + "grad_norm": 0.0004929061979055405, + "learning_rate": 4.769999999999999e-07, + "num_tokens": 713632.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.4474615454673767e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.526, + "step": 1052 + }, + { + "loss": 0.0, + "grad_norm": 0.0008575913379900157, + "learning_rate": 4.7649999999999996e-07, + "num_tokens": 714528.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 4.644319415092468e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5265, + "step": 1053 + }, + { + "loss": 0.0, + "grad_norm": 0.0010711499489843845, + "learning_rate": 4.76e-07, + "num_tokens": 714894.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.60710546374321e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.527, + "step": 1054 + }, + { + "loss": -0.0, + "grad_norm": 1.4542863368988037, + "learning_rate": 4.7549999999999994e-07, + "num_tokens": 715790.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8244999647140503, + "rewards/environment_reward_verifier/std": 0.010606633499264717, + "reward": 0.8244999647140503, + "reward_std": 0.010606633499264717, + "kl": 4.874635487794876e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5275, + "step": 1055 + }, + { + "loss": 0.0, + "grad_norm": 0.0011175618274137378, + "learning_rate": 4.7499999999999995e-07, + "num_tokens": 716156.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.2504630982875824e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.528, + "step": 1056 + }, + { + "loss": 0.0, + "grad_norm": 0.0014327390817925334, + "learning_rate": 4.7449999999999997e-07, + "num_tokens": 717052.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.353878855705261e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5285, + "step": 1057 + }, + { + "loss": 0.0, + "grad_norm": 0.0010367042850703, + "learning_rate": 4.7399999999999993e-07, + "num_tokens": 717948.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.3087249398231506e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.529, + "step": 1058 + }, + { + "loss": 0.0, + "grad_norm": 0.0014642463065683842, + "learning_rate": 4.7349999999999995e-07, + "num_tokens": 718314.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.121126115322113e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5295, + "step": 1059 + }, + { + "loss": 0.0, + "grad_norm": 0.001211618771776557, + "learning_rate": 4.7299999999999996e-07, + "num_tokens": 718680.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.409929245710373e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.53, + "step": 1060 + }, + { + "loss": 0.0, + "grad_norm": 0.43314775824546814, + "learning_rate": 4.725e-07, + "num_tokens": 719576.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.388283610343933e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5305, + "step": 1061 + }, + { + "loss": 0.0, + "grad_norm": 0.0021799022797495127, + "learning_rate": 4.7199999999999994e-07, + "num_tokens": 719942.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.931647658348083e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.531, + "step": 1062 + }, + { + "loss": 0.0, + "grad_norm": 0.9506287574768066, + "learning_rate": 4.7149999999999995e-07, + "num_tokens": 720838.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 6.758980453014374e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5315, + "step": 1063 + }, + { + "loss": 0.0, + "grad_norm": 0.0009273124160245061, + "learning_rate": 4.7099999999999997e-07, + "num_tokens": 721204.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.505537122488022e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.532, + "step": 1064 + }, + { + "loss": 0.0, + "grad_norm": 0.854387640953064, + "learning_rate": 4.7049999999999993e-07, + "num_tokens": 722100.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.8009999990463257, + "reward_std": 0.049497511237859726, + "kl": 5.616340786218643e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5325, + "step": 1065 + }, + { + "loss": 0.0, + "grad_norm": 0.0008773694280534983, + "learning_rate": 4.6999999999999995e-07, + "num_tokens": 722466.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8112903237342834e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.533, + "step": 1066 + }, + { + "loss": 0.0, + "grad_norm": 0.003864539787173271, + "learning_rate": 4.6949999999999996e-07, + "num_tokens": 722832.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.4163858294487e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5335, + "step": 1067 + }, + { + "loss": 0.0, + "grad_norm": 0.0008390177972614765, + "learning_rate": 4.689999999999999e-07, + "num_tokens": 723198.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.3550895750522614e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.534, + "step": 1068 + }, + { + "loss": 0.0, + "grad_norm": 0.5819850564002991, + "learning_rate": 4.685e-07, + "num_tokens": 724094.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8215000033378601, + "rewards/environment_reward_verifier/std": 0.030405579134821892, + "reward": 0.8215000033378601, + "reward_std": 0.030405579134821892, + "kl": 4.4189393520355225e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5345, + "step": 1069 + }, + { + "loss": 0.0, + "grad_norm": 0.7151784896850586, + "learning_rate": 4.68e-07, + "num_tokens": 724990.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 4.878733307123184e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.535, + "step": 1070 + }, + { + "loss": 0.0, + "grad_norm": 0.7200919985771179, + "learning_rate": 4.675e-07, + "num_tokens": 725886.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.03111271932721138, + "reward": 0.828000009059906, + "reward_std": 0.03111271932721138, + "kl": 2.308003604412079e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5355, + "step": 1071 + }, + { + "loss": 0.0, + "grad_norm": 0.0007754597463645041, + "learning_rate": 4.67e-07, + "num_tokens": 726782.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.343393862247467e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.536, + "step": 1072 + }, + { + "loss": 0.0, + "grad_norm": 1.467349886894226, + "learning_rate": 4.665e-07, + "num_tokens": 727678.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 5.130656063556671e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5365, + "step": 1073 + }, + { + "loss": 0.0, + "grad_norm": 0.0014985098969191313, + "learning_rate": 4.66e-07, + "num_tokens": 728574.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8230000138282776, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8230000138282776, + "reward_std": 0.0, + "kl": 6.37909397482872e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.537, + "step": 1074 + }, + { + "loss": 0.0, + "grad_norm": 0.0006575265433639288, + "learning_rate": 4.655e-07, + "num_tokens": 728940.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.5262124836444855e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5375, + "step": 1075 + }, + { + "loss": 0.0, + "grad_norm": 0.0013476404128596187, + "learning_rate": 4.65e-07, + "num_tokens": 729836.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8360000252723694, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8360000252723694, + "reward_std": 0.0, + "kl": 6.878655403852463e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.538, + "step": 1076 + }, + { + "loss": 0.0, + "grad_norm": 0.8713648915290833, + "learning_rate": 4.645e-07, + "num_tokens": 730732.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8285000324249268, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8285000324249268, + "reward_std": 0.030405621975660324, + "kl": 5.4436735808849335e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5385, + "step": 1077 + }, + { + "loss": 0.0, + "grad_norm": 0.896131694316864, + "learning_rate": 4.64e-07, + "num_tokens": 731628.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8365000486373901, + "rewards/environment_reward_verifier/std": 0.01909189112484455, + "reward": 0.8365000486373901, + "reward_std": 0.01909189112484455, + "kl": 7.974077016115189e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.539, + "step": 1078 + }, + { + "loss": 0.0, + "grad_norm": 0.0010619338136166334, + "learning_rate": 4.635e-07, + "num_tokens": 731994.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.778841346502304e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5395, + "step": 1079 + }, + { + "loss": 0.0, + "grad_norm": 0.0038044482935220003, + "learning_rate": 4.63e-07, + "num_tokens": 732890.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 6.113387644290924e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.54, + "step": 1080 + }, + { + "loss": 0.0, + "grad_norm": 0.0006946232169866562, + "learning_rate": 4.625e-07, + "num_tokens": 733256.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9797665774822235e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5405, + "step": 1081 + }, + { + "loss": 0.0, + "grad_norm": 0.0010349710937589407, + "learning_rate": 4.62e-07, + "num_tokens": 733622.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.0976330637931824e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.541, + "step": 1082 + }, + { + "loss": -0.0, + "grad_norm": 0.8080283999443054, + "learning_rate": 4.615e-07, + "num_tokens": 734518.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8324999809265137, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8324999809265137, + "reward_std": 0.0007070977007970214, + "kl": 3.455299884080887e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5415, + "step": 1083 + }, + { + "loss": 0.0, + "grad_norm": 0.6965125799179077, + "learning_rate": 4.61e-07, + "num_tokens": 735414.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 1.866370439529419e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.542, + "step": 1084 + }, + { + "loss": 0.0, + "grad_norm": 0.6720305681228638, + "learning_rate": 4.605e-07, + "num_tokens": 736310.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6024999618530273, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6024999618530273, + "reward_std": 0.32031938433647156, + "kl": 4.154164344072342e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5425, + "step": 1085 + }, + { + "loss": 0.0, + "grad_norm": 0.0013083838857710361, + "learning_rate": 4.6e-07, + "num_tokens": 736676.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.0749629735946655e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.543, + "step": 1086 + }, + { + "loss": 0.0, + "grad_norm": 0.009301274083554745, + "learning_rate": 4.595e-07, + "num_tokens": 737042.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 8.457805961370468e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5435, + "step": 1087 + }, + { + "loss": 0.0, + "grad_norm": 0.0004053961019963026, + "learning_rate": 4.59e-07, + "num_tokens": 737408.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.5139579772949219e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.544, + "step": 1088 + }, + { + "loss": 0.0, + "grad_norm": 0.0011373644229024649, + "learning_rate": 4.585e-07, + "num_tokens": 737774.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.684296876192093e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5445, + "step": 1089 + }, + { + "loss": 0.0, + "grad_norm": 0.0016718122642487288, + "learning_rate": 4.58e-07, + "num_tokens": 738140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.372838884592056e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.545, + "step": 1090 + }, + { + "loss": 0.0, + "grad_norm": 0.0015452688094228506, + "learning_rate": 4.575e-07, + "num_tokens": 738506.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 7.757917046546936e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5455, + "step": 1091 + }, + { + "loss": 0.0, + "grad_norm": 0.0012514872942119837, + "learning_rate": 4.57e-07, + "num_tokens": 738872.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.210827708244324e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.546, + "step": 1092 + }, + { + "loss": 0.0, + "grad_norm": 0.005028535611927509, + "learning_rate": 4.565e-07, + "num_tokens": 739768.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 8.534826338291168e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5465, + "step": 1093 + }, + { + "loss": 0.0, + "grad_norm": 0.8036929368972778, + "learning_rate": 4.56e-07, + "num_tokens": 740664.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6130000352859497, + "rewards/environment_reward_verifier/std": 0.33516865968704224, + "reward": 0.6130000352859497, + "reward_std": 0.33516862988471985, + "kl": 2.9150396585464478e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.547, + "step": 1094 + }, + { + "loss": 0.0, + "grad_norm": 0.0015902062878012657, + "learning_rate": 4.5549999999999997e-07, + "num_tokens": 741030.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.9276819229125977e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5475, + "step": 1095 + }, + { + "loss": 0.0, + "grad_norm": 0.006445720326155424, + "learning_rate": 4.55e-07, + "num_tokens": 741926.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 0.00020186323672533035, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.548, + "step": 1096 + }, + { + "loss": 0.0, + "grad_norm": 0.0024542820174247026, + "learning_rate": 4.545e-07, + "num_tokens": 742292.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 7.358752191066742e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5485, + "step": 1097 + }, + { + "loss": 0.0, + "grad_norm": 0.7798157930374146, + "learning_rate": 4.54e-07, + "num_tokens": 743188.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.195274621248245e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.549, + "step": 1098 + }, + { + "loss": 0.0, + "grad_norm": 0.002626468427479267, + "learning_rate": 4.535e-07, + "num_tokens": 743554.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.415508687496185e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5495, + "step": 1099 + }, + { + "loss": 0.0, + "grad_norm": 0.0010975905461236835, + "learning_rate": 4.53e-07, + "num_tokens": 744450.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 4.399195313453674e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.55, + "step": 1100 + }, + { + "loss": 0.0, + "grad_norm": 0.0014132909709587693, + "learning_rate": 4.525e-07, + "num_tokens": 744816.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.489106893539429e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5505, + "step": 1101 + }, + { + "loss": 0.0, + "grad_norm": 0.0008872256148606539, + "learning_rate": 4.5199999999999997e-07, + "num_tokens": 745182.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.7196481823921204e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.551, + "step": 1102 + }, + { + "loss": 0.0, + "grad_norm": 0.0009551795083098114, + "learning_rate": 4.515e-07, + "num_tokens": 745548.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.835450530052185e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5515, + "step": 1103 + }, + { + "loss": 0.0, + "grad_norm": 0.0009749606251716614, + "learning_rate": 4.51e-07, + "num_tokens": 745914.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.7489069402217865e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.552, + "step": 1104 + }, + { + "loss": 0.0, + "grad_norm": 0.701126217842102, + "learning_rate": 4.505e-07, + "num_tokens": 746810.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31183406710624695, + "reward": 0.5995000004768372, + "reward_std": 0.31183406710624695, + "kl": 3.5354867577552795e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5525, + "step": 1105 + }, + { + "loss": 0.0, + "grad_norm": 0.0016017908928915858, + "learning_rate": 4.5e-07, + "num_tokens": 747176.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.077982157468796e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.553, + "step": 1106 + }, + { + "loss": 0.0, + "grad_norm": 0.02981463633477688, + "learning_rate": 4.495e-07, + "num_tokens": 748072.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 0.0003043217584490776, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5535, + "step": 1107 + }, + { + "loss": 0.0, + "grad_norm": 0.7885046005249023, + "learning_rate": 4.49e-07, + "num_tokens": 748968.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 4.943087697029114e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.554, + "step": 1108 + }, + { + "loss": 0.0, + "grad_norm": 0.0013270628405734897, + "learning_rate": 4.4849999999999997e-07, + "num_tokens": 749864.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 3.764824941754341e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5545, + "step": 1109 + }, + { + "loss": 0.0, + "grad_norm": 0.002615105826407671, + "learning_rate": 4.48e-07, + "num_tokens": 750760.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 5.5215321481227875e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.555, + "step": 1110 + }, + { + "loss": 0.0, + "grad_norm": 0.004951399751007557, + "learning_rate": 4.475e-07, + "num_tokens": 751656.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8349999785423279, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8349999785423279, + "reward_std": 0.0, + "kl": 8.068140596151352e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5555, + "step": 1111 + }, + { + "loss": 0.0, + "grad_norm": 0.0012534718262031674, + "learning_rate": 4.4699999999999997e-07, + "num_tokens": 752552.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.725903272628784e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.556, + "step": 1112 + }, + { + "loss": 0.0, + "grad_norm": 1.019243597984314, + "learning_rate": 4.465e-07, + "num_tokens": 753448.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 2.8742477297782898e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5565, + "step": 1113 + }, + { + "loss": 0.0, + "grad_norm": 0.0007149396697059274, + "learning_rate": 4.46e-07, + "num_tokens": 754344.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 3.425125032663345e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.557, + "step": 1114 + }, + { + "loss": 0.0, + "grad_norm": 0.7942933440208435, + "learning_rate": 4.455e-07, + "num_tokens": 755240.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.513360232114792e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5575, + "step": 1115 + }, + { + "loss": 0.0, + "grad_norm": 0.0008115972159430385, + "learning_rate": 4.45e-07, + "num_tokens": 755606.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9197894036769867e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.558, + "step": 1116 + }, + { + "loss": 0.0, + "grad_norm": 0.0004850304394494742, + "learning_rate": 4.445e-07, + "num_tokens": 756502.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 2.2466294467449188e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5585, + "step": 1117 + }, + { + "loss": 0.0, + "grad_norm": 0.0030674112495034933, + "learning_rate": 4.44e-07, + "num_tokens": 757398.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 7.501151412725449e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.559, + "step": 1118 + }, + { + "loss": 0.0, + "grad_norm": 7.088427543640137, + "learning_rate": 4.4349999999999997e-07, + "num_tokens": 758294.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 0.0011300211772322655, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5595, + "step": 1119 + }, + { + "loss": 0.0, + "grad_norm": 0.4334491193294525, + "learning_rate": 4.43e-07, + "num_tokens": 759190.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 7.447786629199982e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.56, + "step": 1120 + }, + { + "loss": 0.0, + "grad_norm": 0.0007208894239738584, + "learning_rate": 4.425e-07, + "num_tokens": 760086.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8799999952316284, + "reward_std": 0.0, + "kl": 3.8051046431064606e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5605, + "step": 1121 + }, + { + "loss": 0.0, + "grad_norm": 0.0007795984856784344, + "learning_rate": 4.4199999999999996e-07, + "num_tokens": 760982.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.468656748533249e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.561, + "step": 1122 + }, + { + "loss": 0.0, + "grad_norm": 0.0012512864777818322, + "learning_rate": 4.415e-07, + "num_tokens": 761878.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 4.391837865114212e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5615, + "step": 1123 + }, + { + "loss": 0.0, + "grad_norm": 0.0009035151451826096, + "learning_rate": 4.41e-07, + "num_tokens": 762244.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.166031092405319e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.562, + "step": 1124 + }, + { + "loss": 0.0, + "grad_norm": 0.005260740406811237, + "learning_rate": 4.405e-07, + "num_tokens": 762610.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.784312427043915e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5625, + "step": 1125 + }, + { + "loss": 0.0, + "grad_norm": 0.005609462503343821, + "learning_rate": 4.3999999999999997e-07, + "num_tokens": 762976.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00010124035179615021, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.563, + "step": 1126 + }, + { + "loss": 0.0, + "grad_norm": 1.2771704196929932, + "learning_rate": 4.395e-07, + "num_tokens": 763872.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8355000019073486, + "rewards/environment_reward_verifier/std": 0.030405579134821892, + "reward": 0.8355000019073486, + "reward_std": 0.030405579134821892, + "kl": 4.788767546415329e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5635, + "step": 1127 + }, + { + "loss": 0.0, + "grad_norm": 0.0021501986775547266, + "learning_rate": 4.39e-07, + "num_tokens": 764768.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.868744432926178e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.564, + "step": 1128 + }, + { + "loss": 0.0, + "grad_norm": 0.02380327321588993, + "learning_rate": 4.3849999999999996e-07, + "num_tokens": 765664.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 0.00020685698837041855, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5645, + "step": 1129 + }, + { + "loss": 0.0, + "grad_norm": 0.0008271721890196204, + "learning_rate": 4.38e-07, + "num_tokens": 766560.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 3.460142761468887e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.565, + "step": 1130 + }, + { + "loss": 0.0, + "grad_norm": 0.002502850955352187, + "learning_rate": 4.375e-07, + "num_tokens": 767456.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 8.812826126813889e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5655, + "step": 1131 + }, + { + "loss": 0.0, + "grad_norm": 0.8675118684768677, + "learning_rate": 4.3699999999999996e-07, + "num_tokens": 768352.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 2.4055130779743195e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.566, + "step": 1132 + }, + { + "loss": 0.0, + "grad_norm": 0.0005724570946767926, + "learning_rate": 4.3649999999999997e-07, + "num_tokens": 768718.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.5970861315727234e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5665, + "step": 1133 + }, + { + "loss": 0.0, + "grad_norm": 0.9044247269630432, + "learning_rate": 4.36e-07, + "num_tokens": 769614.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 4.267459735274315e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.567, + "step": 1134 + }, + { + "loss": 0.0, + "grad_norm": 0.0008706374792382121, + "learning_rate": 4.355e-07, + "num_tokens": 769980.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.38628888130188e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5675, + "step": 1135 + }, + { + "loss": 0.0, + "grad_norm": 0.0008669144008308649, + "learning_rate": 4.3499999999999996e-07, + "num_tokens": 770346.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.9822811484336853e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.568, + "step": 1136 + }, + { + "loss": 0.0, + "grad_norm": 0.0008733807480894029, + "learning_rate": 4.345e-07, + "num_tokens": 771242.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 3.1771138310432434e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5685, + "step": 1137 + }, + { + "loss": 0.0, + "grad_norm": 0.6992013454437256, + "learning_rate": 4.34e-07, + "num_tokens": 772138.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.146566450595856e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.569, + "step": 1138 + }, + { + "loss": 0.0, + "grad_norm": 0.721673309803009, + "learning_rate": 4.3349999999999996e-07, + "num_tokens": 773034.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 3.3486634492874146e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5695, + "step": 1139 + }, + { + "loss": 0.0, + "grad_norm": 0.0015109943924471736, + "learning_rate": 4.3299999999999997e-07, + "num_tokens": 773400.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.1791779696941376e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.57, + "step": 1140 + }, + { + "loss": 0.0, + "grad_norm": 0.0006302982219494879, + "learning_rate": 4.325e-07, + "num_tokens": 773766.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.970709025859833e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5705, + "step": 1141 + }, + { + "loss": 0.0, + "grad_norm": 0.8986210823059082, + "learning_rate": 4.3199999999999995e-07, + "num_tokens": 774662.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 2.2946856915950775e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.571, + "step": 1142 + }, + { + "loss": 0.0, + "grad_norm": 0.9135581851005554, + "learning_rate": 4.3149999999999997e-07, + "num_tokens": 775558.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8339999914169312, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8339999914169312, + "reward_std": 0.0014141954015940428, + "kl": 4.8568472266197205e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5715, + "step": 1143 + }, + { + "loss": 0.0, + "grad_norm": 0.0007872915011830628, + "learning_rate": 4.31e-07, + "num_tokens": 776454.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.8450042009353638e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.572, + "step": 1144 + }, + { + "loss": 0.0, + "grad_norm": 0.0014165544416755438, + "learning_rate": 4.305e-07, + "num_tokens": 776820.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.639888018369675e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5725, + "step": 1145 + }, + { + "loss": 0.0, + "grad_norm": 1.1294194459915161, + "learning_rate": 4.2999999999999996e-07, + "num_tokens": 777716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5985000133514404, + "rewards/environment_reward_verifier/std": 0.30900564789772034, + "reward": 0.5985000133514404, + "reward_std": 0.30900564789772034, + "kl": 3.513321280479431e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.573, + "step": 1146 + }, + { + "loss": 0.0, + "grad_norm": 1.3191306591033936, + "learning_rate": 4.295e-07, + "num_tokens": 778612.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 6.908457726240158e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5735, + "step": 1147 + }, + { + "loss": 0.0, + "grad_norm": 0.0009586151572875679, + "learning_rate": 4.29e-07, + "num_tokens": 778978.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.177447408437729e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.574, + "step": 1148 + }, + { + "loss": 0.0, + "grad_norm": 0.0005024131387472153, + "learning_rate": 4.2849999999999995e-07, + "num_tokens": 779344.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.4783814549446106e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5745, + "step": 1149 + }, + { + "loss": 0.0, + "grad_norm": 0.0006900393636897206, + "learning_rate": 4.2799999999999997e-07, + "num_tokens": 779710.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.0194798707962036e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.575, + "step": 1150 + }, + { + "loss": 0.0, + "grad_norm": 0.0008045569411478937, + "learning_rate": 4.275e-07, + "num_tokens": 780076.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.0642375349998474e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5755, + "step": 1151 + }, + { + "loss": 0.0, + "grad_norm": 0.9339599609375, + "learning_rate": 4.2699999999999995e-07, + "num_tokens": 780972.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8255000114440918, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8255000114440918, + "reward_std": 0.0035355305299162865, + "kl": 4.819221794605255e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.576, + "step": 1152 + }, + { + "loss": 0.0, + "grad_norm": 0.0030637807212769985, + "learning_rate": 4.2649999999999996e-07, + "num_tokens": 781338.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.25936484336853e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5765, + "step": 1153 + }, + { + "loss": 0.0, + "grad_norm": 0.0007876747404225171, + "learning_rate": 4.26e-07, + "num_tokens": 781704.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.2448599338531494e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.577, + "step": 1154 + }, + { + "loss": 0.0, + "grad_norm": 4.5117621421813965, + "learning_rate": 4.255e-07, + "num_tokens": 782600.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 0.00021765939891338348, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5775, + "step": 1155 + }, + { + "loss": 0.0, + "grad_norm": 0.7867717146873474, + "learning_rate": 4.2499999999999995e-07, + "num_tokens": 783496.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 4.140380769968033e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.578, + "step": 1156 + }, + { + "loss": 0.0, + "grad_norm": 1.147055983543396, + "learning_rate": 4.2449999999999997e-07, + "num_tokens": 784392.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 5.766935646533966e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5785, + "step": 1157 + }, + { + "loss": 0.0, + "grad_norm": 0.0009962597396224737, + "learning_rate": 4.24e-07, + "num_tokens": 784758.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 9.4585120677948e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.579, + "step": 1158 + }, + { + "loss": 0.0, + "grad_norm": 0.6066794395446777, + "learning_rate": 4.2349999999999995e-07, + "num_tokens": 785654.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.099946141242981e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5795, + "step": 1159 + }, + { + "loss": 0.0, + "grad_norm": 0.0011076327646151185, + "learning_rate": 4.2299999999999996e-07, + "num_tokens": 786550.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 3.2811425626277924e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.58, + "step": 1160 + }, + { + "loss": 0.0, + "grad_norm": 0.0014531526248902082, + "learning_rate": 4.225e-07, + "num_tokens": 786916.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.2596137821674347e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5805, + "step": 1161 + }, + { + "loss": 0.0, + "grad_norm": 0.9099974036216736, + "learning_rate": 4.2199999999999994e-07, + "num_tokens": 787812.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 4.342012107372284e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.581, + "step": 1162 + }, + { + "loss": 0.0, + "grad_norm": 0.0007894930895417929, + "learning_rate": 4.2149999999999996e-07, + "num_tokens": 788178.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.397651016712189e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5815, + "step": 1163 + }, + { + "loss": 0.0, + "grad_norm": 0.0006528134108521044, + "learning_rate": 4.2099999999999997e-07, + "num_tokens": 788544.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.8007663786411285e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.582, + "step": 1164 + }, + { + "loss": 0.0, + "grad_norm": 0.0013370973756536841, + "learning_rate": 4.205e-07, + "num_tokens": 789440.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 4.331488162279129e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5825, + "step": 1165 + }, + { + "loss": 0.0, + "grad_norm": 0.008622455410659313, + "learning_rate": 4.1999999999999995e-07, + "num_tokens": 789806.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 9.85804945230484e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.583, + "step": 1166 + }, + { + "loss": 0.0, + "grad_norm": 0.0003398398694116622, + "learning_rate": 4.1949999999999996e-07, + "num_tokens": 790702.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 1.4378689229488373e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5835, + "step": 1167 + }, + { + "loss": 0.0, + "grad_norm": 0.0026922523975372314, + "learning_rate": 4.19e-07, + "num_tokens": 791598.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 5.420856177806854e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.584, + "step": 1168 + }, + { + "loss": 0.0, + "grad_norm": 0.0011085510486736894, + "learning_rate": 4.1849999999999994e-07, + "num_tokens": 791964.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.356672823429108e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5845, + "step": 1169 + }, + { + "loss": 0.0, + "grad_norm": 0.0014948807656764984, + "learning_rate": 4.1799999999999996e-07, + "num_tokens": 792860.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 3.747083246707916e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.585, + "step": 1170 + }, + { + "loss": 0.0, + "grad_norm": 0.0024414442013949156, + "learning_rate": 4.1749999999999997e-07, + "num_tokens": 793226.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.383230745792389e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5855, + "step": 1171 + }, + { + "loss": 0.0, + "grad_norm": 0.0008324653026647866, + "learning_rate": 4.17e-07, + "num_tokens": 793592.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.4080276489257812e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.586, + "step": 1172 + }, + { + "loss": 0.0, + "grad_norm": 0.004513743333518505, + "learning_rate": 4.1649999999999995e-07, + "num_tokens": 793958.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.0094368159770966e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5865, + "step": 1173 + }, + { + "loss": 0.0, + "grad_norm": 1.1424351930618286, + "learning_rate": 4.1599999999999997e-07, + "num_tokens": 794854.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7910000085830688, + "rewards/environment_reward_verifier/std": 0.045254841446876526, + "reward": 0.7910000085830688, + "reward_std": 0.045254841446876526, + "kl": 4.7483015805482864e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.587, + "step": 1174 + }, + { + "loss": 0.0, + "grad_norm": 0.0007836687145754695, + "learning_rate": 4.155e-07, + "num_tokens": 795220.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.364775329828262e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5875, + "step": 1175 + }, + { + "loss": 0.0, + "grad_norm": 0.0010889176046475768, + "learning_rate": 4.1499999999999994e-07, + "num_tokens": 796116.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 7.194280624389648e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.588, + "step": 1176 + }, + { + "loss": 0.0, + "grad_norm": 0.0007088605780154467, + "learning_rate": 4.1449999999999996e-07, + "num_tokens": 796482.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.4199096262454987e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5885, + "step": 1177 + }, + { + "loss": 0.0, + "grad_norm": 1.070939540863037, + "learning_rate": 4.14e-07, + "num_tokens": 797378.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 0.0002916678786277771, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.589, + "step": 1178 + }, + { + "loss": 0.0, + "grad_norm": 0.6214652061462402, + "learning_rate": 4.1349999999999994e-07, + "num_tokens": 798274.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 2.4322420358657837e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5895, + "step": 1179 + }, + { + "loss": 0.0, + "grad_norm": 0.0009458345011807978, + "learning_rate": 4.1299999999999995e-07, + "num_tokens": 799170.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.9888545870780945e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.59, + "step": 1180 + }, + { + "loss": 0.0, + "grad_norm": 0.0023420630022883415, + "learning_rate": 4.1249999999999997e-07, + "num_tokens": 800066.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8169999718666077, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8169999718666077, + "reward_std": 0.0, + "kl": 5.9927813708782196e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5905, + "step": 1181 + }, + { + "loss": 0.0, + "grad_norm": 0.000965822022408247, + "learning_rate": 4.12e-07, + "num_tokens": 800432.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.7750229239463806e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.591, + "step": 1182 + }, + { + "loss": 0.0, + "grad_norm": 1.6063085794448853, + "learning_rate": 4.1149999999999995e-07, + "num_tokens": 801328.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5900000333786011, + "rewards/environment_reward_verifier/std": 0.29698485136032104, + "reward": 0.5900000333786011, + "reward_std": 0.29698485136032104, + "kl": 0.00027918070554733276, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5915, + "step": 1183 + }, + { + "loss": 0.0, + "grad_norm": 0.0005139731802046299, + "learning_rate": 4.1099999999999996e-07, + "num_tokens": 801694.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.3162923753261566e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.592, + "step": 1184 + }, + { + "loss": 0.0, + "grad_norm": 0.5656786561012268, + "learning_rate": 4.105e-07, + "num_tokens": 802590.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 3.2364390790462494e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5925, + "step": 1185 + }, + { + "loss": 0.0, + "grad_norm": 0.0014976236270740628, + "learning_rate": 4.0999999999999994e-07, + "num_tokens": 803486.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8320000171661377, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8320000171661377, + "reward_std": 0.0, + "kl": 6.177928298711777e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.593, + "step": 1186 + }, + { + "loss": 0.0, + "grad_norm": 0.0004364319611340761, + "learning_rate": 4.0949999999999995e-07, + "num_tokens": 804382.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.5425106287002563e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5935, + "step": 1187 + }, + { + "loss": 0.0, + "grad_norm": 0.0009826120221987367, + "learning_rate": 4.0899999999999997e-07, + "num_tokens": 805278.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.7304667532444e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.594, + "step": 1188 + }, + { + "loss": 0.0, + "grad_norm": 0.64700847864151, + "learning_rate": 4.0849999999999993e-07, + "num_tokens": 806174.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 5.3250230848789215e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5945, + "step": 1189 + }, + { + "loss": 0.0, + "grad_norm": 0.0022661720868200064, + "learning_rate": 4.0799999999999995e-07, + "num_tokens": 806540.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.443595677614212e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.595, + "step": 1190 + }, + { + "loss": 0.0, + "grad_norm": 0.000834315549582243, + "learning_rate": 4.0749999999999996e-07, + "num_tokens": 806906.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.1482428312301636e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5955, + "step": 1191 + }, + { + "loss": 0.0, + "grad_norm": 0.6438500285148621, + "learning_rate": 4.07e-07, + "num_tokens": 807802.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.063065767288208e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.596, + "step": 1192 + }, + { + "loss": 0.0, + "grad_norm": 1.1600512266159058, + "learning_rate": 4.0649999999999994e-07, + "num_tokens": 808698.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7870000004768372, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.7870000004768372, + "reward_std": 0.049497511237859726, + "kl": 7.457006722688675e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5965, + "step": 1193 + }, + { + "loss": 0.0, + "grad_norm": 0.5434377789497375, + "learning_rate": 4.06e-07, + "num_tokens": 809594.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 0.00014703162014484406, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.597, + "step": 1194 + }, + { + "loss": 0.0, + "grad_norm": 1.4017819166183472, + "learning_rate": 4.055e-07, + "num_tokens": 810490.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7910000085830688, + "rewards/environment_reward_verifier/std": 0.045254841446876526, + "reward": 0.7910000085830688, + "reward_std": 0.045254841446876526, + "kl": 8.405186235904694e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5975, + "step": 1195 + }, + { + "loss": 0.0, + "grad_norm": 0.0012142626801505685, + "learning_rate": 4.05e-07, + "num_tokens": 811386.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 3.384985029697418e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.598, + "step": 1196 + }, + { + "loss": 0.0, + "grad_norm": 1.018900752067566, + "learning_rate": 4.045e-07, + "num_tokens": 812282.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 4.876777529716492e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5985, + "step": 1197 + }, + { + "loss": 0.0, + "grad_norm": 0.005210700444877148, + "learning_rate": 4.04e-07, + "num_tokens": 813178.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 3.0909664928913116e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.599, + "step": 1198 + }, + { + "loss": 0.0, + "grad_norm": 0.0011610703077167273, + "learning_rate": 4.0350000000000003e-07, + "num_tokens": 814074.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 5.2697956562042236e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.5995, + "step": 1199 + }, + { + "loss": 0.0, + "grad_norm": 0.0020010985899716616, + "learning_rate": 4.03e-07, + "num_tokens": 814440.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.6801211535930634e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6, + "step": 1200 + }, + { + "loss": -0.0, + "grad_norm": 1.154164433479309, + "learning_rate": 4.025e-07, + "num_tokens": 815336.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8194999694824219, + "rewards/environment_reward_verifier/std": 0.006363963708281517, + "reward": 0.8194999694824219, + "reward_std": 0.00636396324262023, + "kl": 5.737924948334694e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6005, + "step": 1201 + }, + { + "loss": 0.0, + "grad_norm": 0.8344117999076843, + "learning_rate": 4.02e-07, + "num_tokens": 816232.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5659999847412109, + "rewards/environment_reward_verifier/std": 0.26304370164871216, + "reward": 0.5659999847412109, + "reward_std": 0.26304370164871216, + "kl": 4.787277430295944e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.601, + "step": 1202 + }, + { + "loss": 0.0, + "grad_norm": 0.003480904968455434, + "learning_rate": 4.015e-07, + "num_tokens": 816598.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.830529749393463e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6015, + "step": 1203 + }, + { + "loss": 0.0, + "grad_norm": 0.5837674736976624, + "learning_rate": 4.01e-07, + "num_tokens": 817494.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7994999885559082, + "reward_std": 0.04879037290811539, + "kl": 3.146659582853317e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.602, + "step": 1204 + }, + { + "loss": 0.0, + "grad_norm": 0.0009633260779082775, + "learning_rate": 4.005e-07, + "num_tokens": 817860.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.5591813027858734e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6025, + "step": 1205 + }, + { + "loss": 0.0, + "grad_norm": 0.0009856430115178227, + "learning_rate": 4e-07, + "num_tokens": 818226.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.5589950382709503e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.603, + "step": 1206 + }, + { + "loss": 0.0, + "grad_norm": 0.9632642865180969, + "learning_rate": 3.995e-07, + "num_tokens": 819122.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5895000100135803, + "rewards/environment_reward_verifier/std": 0.2976919412612915, + "reward": 0.5895000100135803, + "reward_std": 0.2976919412612915, + "kl": 7.927417755126953e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6035, + "step": 1207 + }, + { + "loss": 0.0, + "grad_norm": 0.7225797772407532, + "learning_rate": 3.99e-07, + "num_tokens": 820018.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8004999756813049, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.8004999756813049, + "reward_std": 0.04879037290811539, + "kl": 3.618467599153519e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.604, + "step": 1208 + }, + { + "loss": 0.0, + "grad_norm": 0.0005820510559715331, + "learning_rate": 3.9850000000000003e-07, + "num_tokens": 820384.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.506747841835022e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6045, + "step": 1209 + }, + { + "loss": 0.0, + "grad_norm": 0.11246080696582794, + "learning_rate": 3.98e-07, + "num_tokens": 821280.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 0.0006216149777173996, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.605, + "step": 1210 + }, + { + "loss": 0.0, + "grad_norm": 0.0008536215755157173, + "learning_rate": 3.975e-07, + "num_tokens": 822176.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.647804260253906e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6055, + "step": 1211 + }, + { + "loss": 0.0, + "grad_norm": 0.8368681073188782, + "learning_rate": 3.97e-07, + "num_tokens": 823072.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8389999866485596, + "reward_std": 0.01555635966360569, + "kl": 6.206240504980087e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.606, + "step": 1212 + }, + { + "loss": 0.0, + "grad_norm": 0.0013144423719495535, + "learning_rate": 3.965e-07, + "num_tokens": 823438.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.236958920955658e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6065, + "step": 1213 + }, + { + "loss": 0.0, + "grad_norm": 0.0006823380826972425, + "learning_rate": 3.96e-07, + "num_tokens": 823804.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.3760832846164703e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.607, + "step": 1214 + }, + { + "loss": 0.0, + "grad_norm": 1.1030247211456299, + "learning_rate": 3.955e-07, + "num_tokens": 824700.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 6.19012862443924e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6075, + "step": 1215 + }, + { + "loss": 0.0, + "grad_norm": 1.477575659751892, + "learning_rate": 3.95e-07, + "num_tokens": 825596.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8264999985694885, + "rewards/environment_reward_verifier/std": 0.004949725698679686, + "reward": 0.8264999985694885, + "reward_std": 0.004949725698679686, + "kl": 4.018470644950867e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.608, + "step": 1216 + }, + { + "loss": 0.0, + "grad_norm": 3.0342001914978027, + "learning_rate": 3.945e-07, + "num_tokens": 826492.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.846500039100647, + "rewards/environment_reward_verifier/std": 0.014849219471216202, + "reward": 0.846500039100647, + "reward_std": 0.014849220402538776, + "kl": 0.0002557104453444481, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6085, + "step": 1217 + }, + { + "loss": -0.0, + "grad_norm": 1.7365775108337402, + "learning_rate": 3.94e-07, + "num_tokens": 827388.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8324999809265137, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8324999809265137, + "reward_std": 0.0007070977007970214, + "kl": 0.0005983030423521996, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.609, + "step": 1218 + }, + { + "loss": 0.0, + "grad_norm": 0.0015003138687461615, + "learning_rate": 3.935e-07, + "num_tokens": 828284.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7649999856948853, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7649999856948853, + "reward_std": 0.0, + "kl": 3.05837020277977e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6095, + "step": 1219 + }, + { + "loss": 0.0, + "grad_norm": 0.0006942595937289298, + "learning_rate": 3.93e-07, + "num_tokens": 828650.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.819392830133438e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.61, + "step": 1220 + }, + { + "loss": 0.0, + "grad_norm": 1.2102298736572266, + "learning_rate": 3.925e-07, + "num_tokens": 829546.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 8.058547973632812e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6105, + "step": 1221 + }, + { + "loss": 0.0, + "grad_norm": 0.002410503104329109, + "learning_rate": 3.92e-07, + "num_tokens": 829912.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7735717594623566e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.611, + "step": 1222 + }, + { + "loss": 0.0, + "grad_norm": 0.5362751483917236, + "learning_rate": 3.915e-07, + "num_tokens": 830808.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5895000100135803, + "rewards/environment_reward_verifier/std": 0.2976919412612915, + "reward": 0.5895000100135803, + "reward_std": 0.2976919412612915, + "kl": 4.956033080816269e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6115, + "step": 1223 + }, + { + "loss": 0.0, + "grad_norm": 0.942923903465271, + "learning_rate": 3.91e-07, + "num_tokens": 831704.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 8.915457874536514e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.612, + "step": 1224 + }, + { + "loss": 0.0, + "grad_norm": 0.002524598268792033, + "learning_rate": 3.905e-07, + "num_tokens": 832070.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.547236651182175e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6125, + "step": 1225 + }, + { + "loss": 0.0, + "grad_norm": 0.7344366908073425, + "learning_rate": 3.8999999999999997e-07, + "num_tokens": 832966.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 2.895202487707138e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.613, + "step": 1226 + }, + { + "loss": 0.0, + "grad_norm": 0.0006395566160790622, + "learning_rate": 3.895e-07, + "num_tokens": 833332.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.4780631065368652e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6135, + "step": 1227 + }, + { + "loss": 0.0, + "grad_norm": 0.005058986134827137, + "learning_rate": 3.89e-07, + "num_tokens": 834228.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 6.9446861743927e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.614, + "step": 1228 + }, + { + "loss": 0.0, + "grad_norm": 0.0012920841109007597, + "learning_rate": 3.885e-07, + "num_tokens": 834594.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.587322473526001e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6145, + "step": 1229 + }, + { + "loss": 0.0, + "grad_norm": 0.0007255738019011915, + "learning_rate": 3.88e-07, + "num_tokens": 834960.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.073643893003464e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.615, + "step": 1230 + }, + { + "loss": 0.0, + "grad_norm": 0.0010118153877556324, + "learning_rate": 3.875e-07, + "num_tokens": 835856.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 5.720555782318115e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6155, + "step": 1231 + }, + { + "loss": 0.0, + "grad_norm": 0.9696030616760254, + "learning_rate": 3.87e-07, + "num_tokens": 836752.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 5.519948899745941e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.616, + "step": 1232 + }, + { + "loss": 0.0, + "grad_norm": 0.0008281389600597322, + "learning_rate": 3.8649999999999997e-07, + "num_tokens": 837648.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.955978900194168e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6165, + "step": 1233 + }, + { + "loss": 0.0, + "grad_norm": 0.000896997342351824, + "learning_rate": 3.86e-07, + "num_tokens": 838014.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.720579504966736e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.617, + "step": 1234 + }, + { + "loss": 0.0, + "grad_norm": 0.8454764485359192, + "learning_rate": 3.855e-07, + "num_tokens": 838910.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6130000352859497, + "rewards/environment_reward_verifier/std": 0.33516865968704224, + "reward": 0.6130000352859497, + "reward_std": 0.33516862988471985, + "kl": 2.8034672141075134e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6175, + "step": 1235 + }, + { + "loss": 0.0, + "grad_norm": 2.5553829669952393, + "learning_rate": 3.8499999999999997e-07, + "num_tokens": 839806.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 0.0008981227874755859, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.618, + "step": 1236 + }, + { + "loss": 0.0, + "grad_norm": 0.0028249912429600954, + "learning_rate": 3.845e-07, + "num_tokens": 840172.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.781115472316742e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6185, + "step": 1237 + }, + { + "loss": 0.0, + "grad_norm": 0.8872079849243164, + "learning_rate": 3.84e-07, + "num_tokens": 841068.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6104999780654907, + "rewards/environment_reward_verifier/std": 0.32173359394073486, + "reward": 0.6104999780654907, + "reward_std": 0.32173359394073486, + "kl": 3.669038414955139e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.619, + "step": 1238 + }, + { + "loss": -0.0, + "grad_norm": 1.1121773719787598, + "learning_rate": 3.835e-07, + "num_tokens": 841964.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7914999723434448, + "rewards/environment_reward_verifier/std": 0.012020829133689404, + "reward": 0.7914999723434448, + "reward_std": 0.012020829133689404, + "kl": 4.011392593383789e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6195, + "step": 1239 + }, + { + "loss": 0.0, + "grad_norm": 0.8808300495147705, + "learning_rate": 3.83e-07, + "num_tokens": 842860.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 5.278363823890686e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.62, + "step": 1240 + }, + { + "loss": 0.0, + "grad_norm": 0.0008536277455277741, + "learning_rate": 3.825e-07, + "num_tokens": 843226.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.142786979675293e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6205, + "step": 1241 + }, + { + "loss": 0.0, + "grad_norm": 0.00196442031301558, + "learning_rate": 3.82e-07, + "num_tokens": 844122.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 6.778724491596222e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.621, + "step": 1242 + }, + { + "loss": 0.0, + "grad_norm": 1.1811593770980835, + "learning_rate": 3.8149999999999997e-07, + "num_tokens": 845018.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 9.287428110837936e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6215, + "step": 1243 + }, + { + "loss": 0.0, + "grad_norm": 2.1052486896514893, + "learning_rate": 3.81e-07, + "num_tokens": 845914.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 0.00012909993529319763, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.622, + "step": 1244 + }, + { + "loss": 0.0, + "grad_norm": 0.0007280511781573296, + "learning_rate": 3.805e-07, + "num_tokens": 846810.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 4.291161894798279e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6225, + "step": 1245 + }, + { + "loss": 0.0, + "grad_norm": 0.0009892369853332639, + "learning_rate": 3.7999999999999996e-07, + "num_tokens": 847706.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 4.4899992644786835e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.623, + "step": 1246 + }, + { + "loss": 0.0, + "grad_norm": 1.2615931034088135, + "learning_rate": 3.795e-07, + "num_tokens": 848602.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 0.00013742130249738693, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6235, + "step": 1247 + }, + { + "loss": 0.0, + "grad_norm": 0.9772652983665466, + "learning_rate": 3.79e-07, + "num_tokens": 849498.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 6.359443068504333e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.624, + "step": 1248 + }, + { + "loss": 0.0, + "grad_norm": 0.0010019529145210981, + "learning_rate": 3.785e-07, + "num_tokens": 850394.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 3.528129309415817e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6245, + "step": 1249 + }, + { + "loss": 0.0, + "grad_norm": 0.001229120884090662, + "learning_rate": 3.7799999999999997e-07, + "num_tokens": 850760.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.002785474061966e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.625, + "step": 1250 + }, + { + "loss": 0.0, + "grad_norm": 0.002709547057747841, + "learning_rate": 3.775e-07, + "num_tokens": 851126.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.825034946203232e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6255, + "step": 1251 + }, + { + "loss": 0.0, + "grad_norm": 0.0007558225770480931, + "learning_rate": 3.77e-07, + "num_tokens": 852022.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.405194729566574e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.626, + "step": 1252 + }, + { + "loss": 0.0, + "grad_norm": 0.0007477627950720489, + "learning_rate": 3.7649999999999996e-07, + "num_tokens": 852388.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.9467977583408356e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6265, + "step": 1253 + }, + { + "loss": 0.0, + "grad_norm": 0.641973614692688, + "learning_rate": 3.76e-07, + "num_tokens": 853284.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 2.405419945716858e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.627, + "step": 1254 + }, + { + "loss": 0.0, + "grad_norm": 0.0008768303669057786, + "learning_rate": 3.755e-07, + "num_tokens": 854180.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.962963819503784e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6275, + "step": 1255 + }, + { + "loss": 0.0, + "grad_norm": 0.001349854632280767, + "learning_rate": 3.75e-07, + "num_tokens": 855076.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 3.5919249057769775e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.628, + "step": 1256 + }, + { + "loss": 0.0, + "grad_norm": 0.967917799949646, + "learning_rate": 3.7449999999999997e-07, + "num_tokens": 855972.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8314999938011169, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8314999938011169, + "reward_std": 0.016263457015156746, + "kl": 5.0412025302648544e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6285, + "step": 1257 + }, + { + "loss": 0.0, + "grad_norm": 0.001075277803465724, + "learning_rate": 3.74e-07, + "num_tokens": 856338.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.575347363948822e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.629, + "step": 1258 + }, + { + "loss": 0.0, + "grad_norm": 0.0008712686831131577, + "learning_rate": 3.735e-07, + "num_tokens": 857234.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.816800355911255e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6295, + "step": 1259 + }, + { + "loss": 0.0, + "grad_norm": 0.5931232571601868, + "learning_rate": 3.7299999999999997e-07, + "num_tokens": 858130.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 5.093403160572052e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.63, + "step": 1260 + }, + { + "loss": 0.0, + "grad_norm": 0.002584398491308093, + "learning_rate": 3.725e-07, + "num_tokens": 859026.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 6.108544766902924e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6305, + "step": 1261 + }, + { + "loss": 0.0, + "grad_norm": 0.6407532095909119, + "learning_rate": 3.72e-07, + "num_tokens": 859922.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 3.1507574021816254e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.631, + "step": 1262 + }, + { + "loss": 0.0, + "grad_norm": 0.0005580906290560961, + "learning_rate": 3.7149999999999996e-07, + "num_tokens": 860818.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.9365142583847046e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6315, + "step": 1263 + }, + { + "loss": 0.0, + "grad_norm": 0.0007866480154916644, + "learning_rate": 3.71e-07, + "num_tokens": 861714.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 2.9120594263076782e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.632, + "step": 1264 + }, + { + "loss": 0.0, + "grad_norm": 0.00023025991686154157, + "learning_rate": 3.705e-07, + "num_tokens": 862080.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 9.134411811828613e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6325, + "step": 1265 + }, + { + "loss": 0.0, + "grad_norm": 0.0007495736936107278, + "learning_rate": 3.7e-07, + "num_tokens": 862446.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.528168261051178e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.633, + "step": 1266 + }, + { + "loss": 0.0, + "grad_norm": 0.0012470403453335166, + "learning_rate": 3.6949999999999997e-07, + "num_tokens": 862812.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.004035145044327e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6335, + "step": 1267 + }, + { + "loss": 0.0, + "grad_norm": 0.00143651501275599, + "learning_rate": 3.69e-07, + "num_tokens": 863178.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.211735308170319e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.634, + "step": 1268 + }, + { + "loss": -0.0, + "grad_norm": 0.5546659231185913, + "learning_rate": 3.685e-07, + "num_tokens": 864074.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8029999732971191, + "rewards/environment_reward_verifier/std": 0.012727884575724602, + "reward": 0.8029999732971191, + "reward_std": 0.012727884575724602, + "kl": 2.325884997844696e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6345, + "step": 1269 + }, + { + "loss": 0.0, + "grad_norm": 0.6545803546905518, + "learning_rate": 3.6799999999999996e-07, + "num_tokens": 864970.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5734999775886536, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5734999775886536, + "reward_std": 0.27082186937332153, + "kl": 3.8314610719680786e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.635, + "step": 1270 + }, + { + "loss": 0.0, + "grad_norm": 0.000768592581152916, + "learning_rate": 3.675e-07, + "num_tokens": 865866.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 4.2659230530261993e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6355, + "step": 1271 + }, + { + "loss": 0.0, + "grad_norm": 0.005816725082695484, + "learning_rate": 3.67e-07, + "num_tokens": 866232.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.577186286449432e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.636, + "step": 1272 + }, + { + "loss": 0.0, + "grad_norm": 0.0009579506004229188, + "learning_rate": 3.6649999999999995e-07, + "num_tokens": 867128.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 3.569386899471283e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6365, + "step": 1273 + }, + { + "loss": 0.0, + "grad_norm": 0.000599819584749639, + "learning_rate": 3.6599999999999997e-07, + "num_tokens": 867494.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.6275403797626495e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.637, + "step": 1274 + }, + { + "loss": 0.0, + "grad_norm": 0.003153608413413167, + "learning_rate": 3.655e-07, + "num_tokens": 867860.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.91218301653862e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6375, + "step": 1275 + }, + { + "loss": 0.0, + "grad_norm": 0.0011011279420927167, + "learning_rate": 3.65e-07, + "num_tokens": 868226.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.7239864468574524e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.638, + "step": 1276 + }, + { + "loss": 0.0, + "grad_norm": 0.000460358482087031, + "learning_rate": 3.6449999999999996e-07, + "num_tokens": 869122.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 2.530403435230255e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6385, + "step": 1277 + }, + { + "loss": 0.0, + "grad_norm": 0.0006261324742808938, + "learning_rate": 3.64e-07, + "num_tokens": 869488.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.293381839990616e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.639, + "step": 1278 + }, + { + "loss": 0.0, + "grad_norm": 0.00068364676553756, + "learning_rate": 3.635e-07, + "num_tokens": 869854.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.2297725081443787e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6395, + "step": 1279 + }, + { + "loss": 0.0, + "grad_norm": 0.0014128347393125296, + "learning_rate": 3.6299999999999995e-07, + "num_tokens": 870220.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.2020190954208374e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.64, + "step": 1280 + }, + { + "loss": 0.0, + "grad_norm": 0.9464602470397949, + "learning_rate": 3.6249999999999997e-07, + "num_tokens": 871116.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7875000238418579, + "rewards/environment_reward_verifier/std": 0.05020460858941078, + "reward": 0.7875000238418579, + "reward_std": 0.05020460858941078, + "kl": 3.541354089975357e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6405, + "step": 1281 + }, + { + "loss": 0.0, + "grad_norm": 0.06001497805118561, + "learning_rate": 3.62e-07, + "num_tokens": 872012.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 0.0008651353418827057, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.641, + "step": 1282 + }, + { + "loss": 0.0, + "grad_norm": 0.0007043189834803343, + "learning_rate": 3.6149999999999995e-07, + "num_tokens": 872378.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.782978117465973e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6415, + "step": 1283 + }, + { + "loss": 0.0, + "grad_norm": 0.0026320756878703833, + "learning_rate": 3.6099999999999996e-07, + "num_tokens": 872744.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 8.329004049301147e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.642, + "step": 1284 + }, + { + "loss": 0.0, + "grad_norm": 0.6783477067947388, + "learning_rate": 3.605e-07, + "num_tokens": 873640.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.6607420295476913e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6425, + "step": 1285 + }, + { + "loss": 0.0, + "grad_norm": 0.0010286318138241768, + "learning_rate": 3.6e-07, + "num_tokens": 874006.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.1649524569511414e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.643, + "step": 1286 + }, + { + "loss": 0.0, + "grad_norm": 1.2441000938415527, + "learning_rate": 3.5949999999999996e-07, + "num_tokens": 874902.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 8.106417953968048e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6435, + "step": 1287 + }, + { + "loss": 0.0, + "grad_norm": 0.005106752272695303, + "learning_rate": 3.5899999999999997e-07, + "num_tokens": 875798.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 0.00012571550905704498, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.644, + "step": 1288 + }, + { + "loss": 0.0, + "grad_norm": 1.1743097305297852, + "learning_rate": 3.585e-07, + "num_tokens": 876694.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8264999985694885, + "rewards/environment_reward_verifier/std": 0.004949725698679686, + "reward": 0.8264999985694885, + "reward_std": 0.004949725698679686, + "kl": 6.488896906375885e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6445, + "step": 1289 + }, + { + "loss": 0.0, + "grad_norm": 0.9160370826721191, + "learning_rate": 3.5799999999999995e-07, + "num_tokens": 877590.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.824999988079071, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.824999988079071, + "reward_std": 0.011313731782138348, + "kl": 6.76717609167099e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.645, + "step": 1290 + }, + { + "loss": 0.0, + "grad_norm": 0.0009755863575264812, + "learning_rate": 3.5749999999999997e-07, + "num_tokens": 877956.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.708316504955292e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6455, + "step": 1291 + }, + { + "loss": 0.0, + "grad_norm": 1.0256574153900146, + "learning_rate": 3.57e-07, + "num_tokens": 878852.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7944999933242798, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7944999933242798, + "reward_std": 0.0502045676112175, + "kl": 6.704498082399368e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.646, + "step": 1292 + }, + { + "loss": 0.0, + "grad_norm": 0.0010145347332581878, + "learning_rate": 3.5649999999999994e-07, + "num_tokens": 879218.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.818011075258255e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6465, + "step": 1293 + }, + { + "loss": 0.0, + "grad_norm": 0.0009893701644614339, + "learning_rate": 3.5599999999999996e-07, + "num_tokens": 879584.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.8242898881435394e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.647, + "step": 1294 + }, + { + "loss": 0.0, + "grad_norm": 0.0009004553430713713, + "learning_rate": 3.555e-07, + "num_tokens": 880480.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 3.14861536026001e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6475, + "step": 1295 + }, + { + "loss": 0.0, + "grad_norm": 0.0008759471238590777, + "learning_rate": 3.55e-07, + "num_tokens": 880846.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.798492252826691e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.648, + "step": 1296 + }, + { + "loss": 0.0, + "grad_norm": 0.0013422233751043677, + "learning_rate": 3.5449999999999995e-07, + "num_tokens": 881212.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.2491981983184814e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6485, + "step": 1297 + }, + { + "loss": 0.0, + "grad_norm": 0.004376707598567009, + "learning_rate": 3.5399999999999997e-07, + "num_tokens": 882108.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 4.7217123210430145e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.649, + "step": 1298 + }, + { + "loss": -0.0, + "grad_norm": 1.0538861751556396, + "learning_rate": 3.535e-07, + "num_tokens": 883004.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.812999963760376, + "rewards/environment_reward_verifier/std": 0.009899493306875229, + "reward": 0.812999963760376, + "reward_std": 0.009899494238197803, + "kl": 6.355904042720795e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6495, + "step": 1299 + }, + { + "loss": 0.0, + "grad_norm": 0.5427396893501282, + "learning_rate": 3.5299999999999994e-07, + "num_tokens": 883900.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.3927539587020874e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.65, + "step": 1300 + }, + { + "loss": 0.0, + "grad_norm": 0.001437443308532238, + "learning_rate": 3.5249999999999996e-07, + "num_tokens": 884796.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 5.222763866186142e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6505, + "step": 1301 + }, + { + "loss": 0.0, + "grad_norm": 0.9306321740150452, + "learning_rate": 3.52e-07, + "num_tokens": 885692.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7879999876022339, + "rewards/environment_reward_verifier/std": 0.05091170594096184, + "reward": 0.7879999876022339, + "reward_std": 0.05091170594096184, + "kl": 8.379947394132614e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.651, + "step": 1302 + }, + { + "loss": 0.0, + "grad_norm": 0.002548660384491086, + "learning_rate": 3.5149999999999994e-07, + "num_tokens": 886058.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.484573870897293e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6515, + "step": 1303 + }, + { + "loss": 0.0, + "grad_norm": 0.8278523683547974, + "learning_rate": 3.5099999999999995e-07, + "num_tokens": 886954.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 6.023421883583069e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.652, + "step": 1304 + }, + { + "loss": 0.0, + "grad_norm": 0.6710245013237, + "learning_rate": 3.5049999999999997e-07, + "num_tokens": 887850.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 3.4685246646404266e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6525, + "step": 1305 + }, + { + "loss": 0.0, + "grad_norm": 0.8050752282142639, + "learning_rate": 3.5e-07, + "num_tokens": 888746.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 5.374569445848465e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.653, + "step": 1306 + }, + { + "loss": 0.0, + "grad_norm": 0.9615032076835632, + "learning_rate": 3.4949999999999995e-07, + "num_tokens": 889642.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 6.828084588050842e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6535, + "step": 1307 + }, + { + "loss": 0.0, + "grad_norm": 0.0010592974722385406, + "learning_rate": 3.4899999999999996e-07, + "num_tokens": 890008.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.003848880529404e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.654, + "step": 1308 + }, + { + "loss": 0.0, + "grad_norm": 0.8069937825202942, + "learning_rate": 3.485e-07, + "num_tokens": 890904.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 7.432699203491211e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6545, + "step": 1309 + }, + { + "loss": 0.0, + "grad_norm": 0.0010740803554654121, + "learning_rate": 3.4799999999999994e-07, + "num_tokens": 891270.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.285760223865509e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.655, + "step": 1310 + }, + { + "loss": 0.0, + "grad_norm": 0.000928595254663378, + "learning_rate": 3.4749999999999996e-07, + "num_tokens": 891636.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.1488947570323944e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6555, + "step": 1311 + }, + { + "loss": 0.0, + "grad_norm": 0.6778450608253479, + "learning_rate": 3.4699999999999997e-07, + "num_tokens": 892532.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8004999756813049, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.8004999756813049, + "reward_std": 0.04879037290811539, + "kl": 3.174692392349243e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.656, + "step": 1312 + }, + { + "loss": 0.0, + "grad_norm": 0.0012175820302218199, + "learning_rate": 3.4649999999999993e-07, + "num_tokens": 893428.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 4.419032484292984e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6565, + "step": 1313 + }, + { + "loss": 0.0, + "grad_norm": 1.2002919912338257, + "learning_rate": 3.4599999999999995e-07, + "num_tokens": 894324.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 0.00012012850493192673, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.657, + "step": 1314 + }, + { + "loss": 0.0, + "grad_norm": 0.0017943575512617826, + "learning_rate": 3.4549999999999996e-07, + "num_tokens": 894690.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.819050759077072e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6575, + "step": 1315 + }, + { + "loss": 0.0, + "grad_norm": 0.8222445845603943, + "learning_rate": 3.45e-07, + "num_tokens": 895586.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 5.055079236626625e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.658, + "step": 1316 + }, + { + "loss": 0.0, + "grad_norm": 0.0006479246076196432, + "learning_rate": 3.4449999999999994e-07, + "num_tokens": 895952.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9908493161201477e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6585, + "step": 1317 + }, + { + "loss": 0.0, + "grad_norm": 0.7560232877731323, + "learning_rate": 3.4399999999999996e-07, + "num_tokens": 896848.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6175000071525574, + "rewards/environment_reward_verifier/std": 0.3358757495880127, + "reward": 0.6175000071525574, + "reward_std": 0.3358757495880127, + "kl": 6.515160202980042e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.659, + "step": 1318 + }, + { + "loss": 0.0, + "grad_norm": 0.014223476871848106, + "learning_rate": 3.435e-07, + "num_tokens": 897744.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00023256801068782806, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6595, + "step": 1319 + }, + { + "loss": 0.0, + "grad_norm": 1.4846367835998535, + "learning_rate": 3.43e-07, + "num_tokens": 898640.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 0.0004176180809736252, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.66, + "step": 1320 + }, + { + "loss": 0.0, + "grad_norm": 0.0008440379751846194, + "learning_rate": 3.425e-07, + "num_tokens": 899006.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8285197913646698e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6605, + "step": 1321 + }, + { + "loss": 0.0, + "grad_norm": 0.6470924615859985, + "learning_rate": 3.42e-07, + "num_tokens": 899902.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8215000033378601, + "rewards/environment_reward_verifier/std": 0.030405579134821892, + "reward": 0.8215000033378601, + "reward_std": 0.030405579134821892, + "kl": 8.140783756971359e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.661, + "step": 1322 + }, + { + "loss": 0.0, + "grad_norm": 0.7923425436019897, + "learning_rate": 3.4150000000000003e-07, + "num_tokens": 900798.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8034999966621399, + "rewards/environment_reward_verifier/std": 0.004949725698679686, + "reward": 0.8034999966621399, + "reward_std": 0.004949725698679686, + "kl": 6.092153489589691e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6615, + "step": 1323 + }, + { + "loss": 0.0, + "grad_norm": 0.0007985649281181395, + "learning_rate": 3.41e-07, + "num_tokens": 901164.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.0151568353176117e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.662, + "step": 1324 + }, + { + "loss": 0.0, + "grad_norm": 0.6748971343040466, + "learning_rate": 3.405e-07, + "num_tokens": 902060.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.894829958677292e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6625, + "step": 1325 + }, + { + "loss": 0.0, + "grad_norm": 0.7054407000541687, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 902956.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 3.058742731809616e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.663, + "step": 1326 + }, + { + "loss": 0.0, + "grad_norm": 0.00041221315041184425, + "learning_rate": 3.395e-07, + "num_tokens": 903852.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.8746592104434967e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6635, + "step": 1327 + }, + { + "loss": 0.0, + "grad_norm": 0.038646597415208817, + "learning_rate": 3.39e-07, + "num_tokens": 904748.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 0.00044205132871866226, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.664, + "step": 1328 + }, + { + "loss": 0.0, + "grad_norm": 0.0008110158960334957, + "learning_rate": 3.385e-07, + "num_tokens": 905114.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 8.74791294336319e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6645, + "step": 1329 + }, + { + "loss": 0.0, + "grad_norm": 0.7750295400619507, + "learning_rate": 3.38e-07, + "num_tokens": 906010.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8289999961853027, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8289999961853027, + "reward_std": 0.0014141954015940428, + "kl": 4.74732369184494e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.665, + "step": 1330 + }, + { + "loss": 0.0, + "grad_norm": 0.0005337664624676108, + "learning_rate": 3.375e-07, + "num_tokens": 906906.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.6640092730522156e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6655, + "step": 1331 + }, + { + "loss": 0.0, + "grad_norm": 0.0010131035232916474, + "learning_rate": 3.37e-07, + "num_tokens": 907802.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 3.915652632713318e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.666, + "step": 1332 + }, + { + "loss": 0.0, + "grad_norm": 0.7440443634986877, + "learning_rate": 3.3650000000000003e-07, + "num_tokens": 908698.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 2.2001564502716064e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6665, + "step": 1333 + }, + { + "loss": 0.0, + "grad_norm": 0.0008754681330174208, + "learning_rate": 3.36e-07, + "num_tokens": 909064.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.6763806343078613e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.667, + "step": 1334 + }, + { + "loss": 0.0, + "grad_norm": 0.0007677595713175833, + "learning_rate": 3.355e-07, + "num_tokens": 909430.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.990197390317917e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6675, + "step": 1335 + }, + { + "loss": 0.0, + "grad_norm": 0.0044853463768959045, + "learning_rate": 3.35e-07, + "num_tokens": 910326.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.878000020980835, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.878000020980835, + "reward_std": 0.0, + "kl": 0.00011534057557582855, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.668, + "step": 1336 + }, + { + "loss": 0.0, + "grad_norm": 0.0005815306794829667, + "learning_rate": 3.345e-07, + "num_tokens": 910692.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.8213875591754913e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6685, + "step": 1337 + }, + { + "loss": 0.0, + "grad_norm": 0.000703338417224586, + "learning_rate": 3.34e-07, + "num_tokens": 911058.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.180932253599167e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.669, + "step": 1338 + }, + { + "loss": 0.0, + "grad_norm": 0.7522983551025391, + "learning_rate": 3.335e-07, + "num_tokens": 911954.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 3.4965574741363525e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6695, + "step": 1339 + }, + { + "loss": 0.0, + "grad_norm": 0.0038247250486165285, + "learning_rate": 3.33e-07, + "num_tokens": 912850.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 0.000109134241938591, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.67, + "step": 1340 + }, + { + "loss": 0.0, + "grad_norm": 0.8478634357452393, + "learning_rate": 3.325e-07, + "num_tokens": 913746.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 4.1466206312179565e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6705, + "step": 1341 + }, + { + "loss": 0.0, + "grad_norm": 0.9138993620872498, + "learning_rate": 3.32e-07, + "num_tokens": 914642.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6004999876022339, + "rewards/environment_reward_verifier/std": 0.3090056777000427, + "reward": 0.6004999876022339, + "reward_std": 0.3090056777000427, + "kl": 8.696969598531723e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.671, + "step": 1342 + }, + { + "loss": 0.0, + "grad_norm": 0.0021632679272443056, + "learning_rate": 3.315e-07, + "num_tokens": 915008.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.276656985282898e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6715, + "step": 1343 + }, + { + "loss": -0.0, + "grad_norm": 0.7756864428520203, + "learning_rate": 3.31e-07, + "num_tokens": 915904.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7914999723434448, + "rewards/environment_reward_verifier/std": 0.012020829133689404, + "reward": 0.7914999723434448, + "reward_std": 0.012020829133689404, + "kl": 3.759749233722687e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.672, + "step": 1344 + }, + { + "loss": 0.0, + "grad_norm": 0.7610845565795898, + "learning_rate": 3.305e-07, + "num_tokens": 916800.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8270000219345093, + "rewards/environment_reward_verifier/std": 0.01131368987262249, + "reward": 0.8270000219345093, + "reward_std": 0.011313688941299915, + "kl": 2.3875385522842407e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6725, + "step": 1345 + }, + { + "loss": 0.0, + "grad_norm": 0.004521695431321859, + "learning_rate": 3.3e-07, + "num_tokens": 917696.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 6.487127393484116e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.673, + "step": 1346 + }, + { + "loss": 0.0, + "grad_norm": 1.1814557313919067, + "learning_rate": 3.295e-07, + "num_tokens": 918592.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.055154334753751755, + "reward": 0.8389999866485596, + "reward_std": 0.055154334753751755, + "kl": 3.372412174940109e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6735, + "step": 1347 + }, + { + "loss": 0.0, + "grad_norm": 0.7761304974555969, + "learning_rate": 3.29e-07, + "num_tokens": 919488.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 6.966851651668549e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.674, + "step": 1348 + }, + { + "loss": 0.0, + "grad_norm": 0.001064626849256456, + "learning_rate": 3.285e-07, + "num_tokens": 919854.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.1544437408447266e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6745, + "step": 1349 + }, + { + "loss": 0.0, + "grad_norm": 0.001295957830734551, + "learning_rate": 3.28e-07, + "num_tokens": 920220.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.0192936062812805e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.675, + "step": 1350 + }, + { + "loss": 0.0, + "grad_norm": 0.001216788194142282, + "learning_rate": 3.275e-07, + "num_tokens": 920586.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.43743371963501e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6755, + "step": 1351 + }, + { + "loss": 0.0, + "grad_norm": 0.0005596580449491739, + "learning_rate": 3.27e-07, + "num_tokens": 920952.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.292310819029808e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.676, + "step": 1352 + }, + { + "loss": 0.0, + "grad_norm": 0.0016285229939967394, + "learning_rate": 3.265e-07, + "num_tokens": 921848.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.9882026612758636e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6765, + "step": 1353 + }, + { + "loss": 0.0, + "grad_norm": 0.7587524652481079, + "learning_rate": 3.26e-07, + "num_tokens": 922744.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 2.8314068913459778e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.677, + "step": 1354 + }, + { + "loss": 0.0, + "grad_norm": 0.0019900077022612095, + "learning_rate": 3.255e-07, + "num_tokens": 923110.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.9114227294921875e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6775, + "step": 1355 + }, + { + "loss": 0.0, + "grad_norm": 0.5896979570388794, + "learning_rate": 3.25e-07, + "num_tokens": 924006.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.6628375053405762e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.678, + "step": 1356 + }, + { + "loss": 0.0, + "grad_norm": 0.0011802142253145576, + "learning_rate": 3.245e-07, + "num_tokens": 924372.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.596449434757233e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6785, + "step": 1357 + }, + { + "loss": 0.0, + "grad_norm": 0.0010036288294941187, + "learning_rate": 3.24e-07, + "num_tokens": 924738.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.07282093167305e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.679, + "step": 1358 + }, + { + "loss": 0.0, + "grad_norm": 0.0028521367348730564, + "learning_rate": 3.235e-07, + "num_tokens": 925634.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.950219929218292e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6795, + "step": 1359 + }, + { + "loss": 0.0, + "grad_norm": 0.016494104638695717, + "learning_rate": 3.23e-07, + "num_tokens": 926530.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 0.00013456307351589203, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.68, + "step": 1360 + }, + { + "loss": 0.0, + "grad_norm": 0.004497945308685303, + "learning_rate": 3.225e-07, + "num_tokens": 927426.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.94649463891983e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6805, + "step": 1361 + }, + { + "loss": 0.0, + "grad_norm": 0.0003344974829815328, + "learning_rate": 3.22e-07, + "num_tokens": 927792.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.6856938600540161e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.681, + "step": 1362 + }, + { + "loss": 0.0, + "grad_norm": 0.0010008744429796934, + "learning_rate": 3.215e-07, + "num_tokens": 928158.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.93684783577919e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6815, + "step": 1363 + }, + { + "loss": 0.0, + "grad_norm": 0.001206480897963047, + "learning_rate": 3.21e-07, + "num_tokens": 928524.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.2152201533317566e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.682, + "step": 1364 + }, + { + "loss": 0.0, + "grad_norm": 0.0016773812239989638, + "learning_rate": 3.205e-07, + "num_tokens": 929420.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.3534284234046936e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6825, + "step": 1365 + }, + { + "loss": 0.0, + "grad_norm": 0.8313549160957336, + "learning_rate": 3.2e-07, + "num_tokens": 930316.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 8.157175034284592e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.683, + "step": 1366 + }, + { + "loss": 0.0, + "grad_norm": 0.001157211372628808, + "learning_rate": 3.1949999999999997e-07, + "num_tokens": 930682.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.2526982724666595e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6835, + "step": 1367 + }, + { + "loss": 0.0, + "grad_norm": 0.0008214963017962873, + "learning_rate": 3.19e-07, + "num_tokens": 931578.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8560000061988831, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8560000061988831, + "reward_std": 0.0, + "kl": 4.2312778532505035e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.684, + "step": 1368 + }, + { + "loss": 0.0, + "grad_norm": 0.6024468541145325, + "learning_rate": 3.185e-07, + "num_tokens": 932474.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 4.24971804022789e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6845, + "step": 1369 + }, + { + "loss": 0.0, + "grad_norm": 0.001222139224410057, + "learning_rate": 3.18e-07, + "num_tokens": 932840.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.3324194848537445e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.685, + "step": 1370 + }, + { + "loss": 0.0, + "grad_norm": 0.8489810824394226, + "learning_rate": 3.175e-07, + "num_tokens": 933736.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 6.651133298873901e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6855, + "step": 1371 + }, + { + "loss": 0.0, + "grad_norm": 1.011709213256836, + "learning_rate": 3.17e-07, + "num_tokens": 934632.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 0.00015988852828741074, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.686, + "step": 1372 + }, + { + "loss": 0.0, + "grad_norm": 0.0012633471051231027, + "learning_rate": 3.165e-07, + "num_tokens": 935528.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.4710544645786285e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6865, + "step": 1373 + }, + { + "loss": 0.0, + "grad_norm": 0.6183916330337524, + "learning_rate": 3.1599999999999997e-07, + "num_tokens": 936424.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7870000004768372, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.7870000004768372, + "reward_std": 0.049497511237859726, + "kl": 2.9399991035461426e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.687, + "step": 1374 + }, + { + "loss": 0.0, + "grad_norm": 0.01003769040107727, + "learning_rate": 3.155e-07, + "num_tokens": 937320.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 0.00016684457659721375, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6875, + "step": 1375 + }, + { + "loss": 0.0, + "grad_norm": 0.0010148925939574838, + "learning_rate": 3.15e-07, + "num_tokens": 937686.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9999762773513794e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.688, + "step": 1376 + }, + { + "loss": 0.0, + "grad_norm": 0.001714242622256279, + "learning_rate": 3.1449999999999996e-07, + "num_tokens": 938582.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.853470742702484e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6885, + "step": 1377 + }, + { + "loss": 0.0, + "grad_norm": 0.5588313341140747, + "learning_rate": 3.14e-07, + "num_tokens": 939478.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.02687004767358303, + "reward": 0.8009999990463257, + "reward_std": 0.02687004767358303, + "kl": 1.4209188520908356e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.689, + "step": 1378 + }, + { + "loss": 0.0, + "grad_norm": 0.000599015795160085, + "learning_rate": 3.135e-07, + "num_tokens": 939844.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7828849852085114e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6895, + "step": 1379 + }, + { + "loss": 0.0, + "grad_norm": 0.5653384923934937, + "learning_rate": 3.13e-07, + "num_tokens": 940740.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 5.6372955441474915e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.69, + "step": 1380 + }, + { + "loss": 0.0, + "grad_norm": 0.6871844530105591, + "learning_rate": 3.1249999999999997e-07, + "num_tokens": 941636.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31607675552368164, + "reward": 0.5995000004768372, + "reward_std": 0.31607675552368164, + "kl": 3.4996308386325836e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6905, + "step": 1381 + }, + { + "loss": 0.0, + "grad_norm": 0.000714326451998204, + "learning_rate": 3.12e-07, + "num_tokens": 942002.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.4284236133098602e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.691, + "step": 1382 + }, + { + "loss": 0.0, + "grad_norm": 1.0217498540878296, + "learning_rate": 3.115e-07, + "num_tokens": 942898.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 6.504356861114502e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6915, + "step": 1383 + }, + { + "loss": 0.0, + "grad_norm": 0.9927207231521606, + "learning_rate": 3.1099999999999997e-07, + "num_tokens": 943794.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7999999523162842, + "rewards/environment_reward_verifier/std": 0.04949747025966644, + "reward": 0.7999999523162842, + "reward_std": 0.04949747025966644, + "kl": 5.958974361419678e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.692, + "step": 1384 + }, + { + "loss": 0.0, + "grad_norm": 0.0008056789520196617, + "learning_rate": 3.105e-07, + "num_tokens": 944160.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.547128289937973e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6925, + "step": 1385 + }, + { + "loss": 0.0, + "grad_norm": 0.7982547879219055, + "learning_rate": 3.1e-07, + "num_tokens": 945056.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8400000333786011, + "rewards/environment_reward_verifier/std": 0.056568533182144165, + "reward": 0.8400000333786011, + "reward_std": 0.056568533182144165, + "kl": 2.9597431421279907e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.693, + "step": 1386 + }, + { + "loss": 0.0, + "grad_norm": 0.001857105758972466, + "learning_rate": 3.0949999999999996e-07, + "num_tokens": 945422.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.553755909204483e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6935, + "step": 1387 + }, + { + "loss": 0.0, + "grad_norm": 0.0009268614230677485, + "learning_rate": 3.09e-07, + "num_tokens": 945788.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.6863068342208862e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.694, + "step": 1388 + }, + { + "loss": 0.0, + "grad_norm": 0.010713160037994385, + "learning_rate": 3.085e-07, + "num_tokens": 946154.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 9.249895811080933e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6945, + "step": 1389 + }, + { + "loss": 0.0, + "grad_norm": 0.0006943625630810857, + "learning_rate": 3.08e-07, + "num_tokens": 946520.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.0948780477046967e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.695, + "step": 1390 + }, + { + "loss": 0.0, + "grad_norm": 0.0005994713283143938, + "learning_rate": 3.0749999999999997e-07, + "num_tokens": 946886.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.208965063095093e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6955, + "step": 1391 + }, + { + "loss": 0.0, + "grad_norm": 0.0005941269919276237, + "learning_rate": 3.07e-07, + "num_tokens": 947782.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 3.443937748670578e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.696, + "step": 1392 + }, + { + "loss": 0.0, + "grad_norm": 0.0016281341668218374, + "learning_rate": 3.065e-07, + "num_tokens": 948678.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 5.4708682000637054e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6965, + "step": 1393 + }, + { + "loss": 0.0, + "grad_norm": 0.0008499264949932694, + "learning_rate": 3.0599999999999996e-07, + "num_tokens": 949044.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.64379957318306e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.697, + "step": 1394 + }, + { + "loss": 0.0, + "grad_norm": 0.8996263146400452, + "learning_rate": 3.055e-07, + "num_tokens": 949940.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 7.260870188474655e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6975, + "step": 1395 + }, + { + "loss": 0.0, + "grad_norm": 0.001844099722802639, + "learning_rate": 3.05e-07, + "num_tokens": 950836.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 5.3627416491508484e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.698, + "step": 1396 + }, + { + "loss": 0.0, + "grad_norm": 0.6437634229660034, + "learning_rate": 3.0449999999999995e-07, + "num_tokens": 951732.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.8009999990463257, + "reward_std": 0.049497511237859726, + "kl": 2.2635795176029205e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6985, + "step": 1397 + }, + { + "loss": 0.0, + "grad_norm": 0.0012192694703117013, + "learning_rate": 3.0399999999999997e-07, + "num_tokens": 952098.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.7929432690143585e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.699, + "step": 1398 + }, + { + "loss": 0.0, + "grad_norm": 1.092392921447754, + "learning_rate": 3.035e-07, + "num_tokens": 952994.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31183406710624695, + "reward": 0.5995000004768372, + "reward_std": 0.31183406710624695, + "kl": 0.00012940727174282074, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.6995, + "step": 1399 + }, + { + "loss": 0.0, + "grad_norm": 0.0012551175896078348, + "learning_rate": 3.03e-07, + "num_tokens": 953360.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.959665238857269e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7, + "step": 1400 + }, + { + "loss": 0.0, + "grad_norm": 0.7426066994667053, + "learning_rate": 3.0249999999999996e-07, + "num_tokens": 954256.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7904999852180481, + "rewards/environment_reward_verifier/std": 0.037476640194654465, + "reward": 0.7904999852180481, + "reward_std": 0.037476640194654465, + "kl": 2.7242116630077362e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7005, + "step": 1401 + }, + { + "loss": 0.0, + "grad_norm": 0.8021246194839478, + "learning_rate": 3.02e-07, + "num_tokens": 955152.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 3.275927156209946e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.701, + "step": 1402 + }, + { + "loss": 0.0, + "grad_norm": 0.0010526307160034776, + "learning_rate": 3.015e-07, + "num_tokens": 955518.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.3847056329250336e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7015, + "step": 1403 + }, + { + "loss": 0.0, + "grad_norm": 0.0008919798419810832, + "learning_rate": 3.0099999999999996e-07, + "num_tokens": 956414.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.5351294577121735e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.702, + "step": 1404 + }, + { + "loss": 0.0, + "grad_norm": 1.9787451028823853, + "learning_rate": 3.0049999999999997e-07, + "num_tokens": 957310.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 5.8368779718875885e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7025, + "step": 1405 + }, + { + "loss": 0.0, + "grad_norm": 0.8678433299064636, + "learning_rate": 3e-07, + "num_tokens": 958206.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 3.1750649213790894e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.703, + "step": 1406 + }, + { + "loss": 0.0, + "grad_norm": 1.0366160869598389, + "learning_rate": 2.9949999999999995e-07, + "num_tokens": 959102.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 5.751661956310272e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7035, + "step": 1407 + }, + { + "loss": 0.0, + "grad_norm": 1.489668846130371, + "learning_rate": 2.9899999999999996e-07, + "num_tokens": 959998.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7999999523162842, + "rewards/environment_reward_verifier/std": 0.04949747025966644, + "reward": 0.7999999523162842, + "reward_std": 0.04949747025966644, + "kl": 0.00010025408118963242, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.704, + "step": 1408 + }, + { + "loss": 0.0, + "grad_norm": 0.7787015438079834, + "learning_rate": 2.985e-07, + "num_tokens": 960894.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 5.357526242733002e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7045, + "step": 1409 + }, + { + "loss": 0.0, + "grad_norm": 0.9409085512161255, + "learning_rate": 2.98e-07, + "num_tokens": 961790.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7910000085830688, + "rewards/environment_reward_verifier/std": 0.045254841446876526, + "reward": 0.7910000085830688, + "reward_std": 0.045254841446876526, + "kl": 5.440693348646164e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.705, + "step": 1410 + }, + { + "loss": 0.0, + "grad_norm": 0.0015193913131952286, + "learning_rate": 2.9749999999999996e-07, + "num_tokens": 962686.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8140000104904175, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8140000104904175, + "reward_std": 0.0, + "kl": 6.182864308357239e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7055, + "step": 1411 + }, + { + "loss": 0.0, + "grad_norm": 0.0005187370115891099, + "learning_rate": 2.9699999999999997e-07, + "num_tokens": 963052.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.189353108406067e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.706, + "step": 1412 + }, + { + "loss": 0.0, + "grad_norm": 2.2034571170806885, + "learning_rate": 2.965e-07, + "num_tokens": 963948.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8250000476837158, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8250000476837158, + "reward_std": 0.01555635966360569, + "kl": 0.0003419136628508568, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7065, + "step": 1413 + }, + { + "loss": 0.0, + "grad_norm": 0.0008707343367859721, + "learning_rate": 2.9599999999999995e-07, + "num_tokens": 964314.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.70638445019722e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.707, + "step": 1414 + }, + { + "loss": -0.0, + "grad_norm": 0.6375908255577087, + "learning_rate": 2.9549999999999997e-07, + "num_tokens": 965210.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8389999866485596, + "reward_std": 0.01555635966360569, + "kl": 3.099162131547928e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7075, + "step": 1415 + }, + { + "loss": 0.0, + "grad_norm": 1.0078327655792236, + "learning_rate": 2.95e-07, + "num_tokens": 966106.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 0.00013838708400726318, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.708, + "step": 1416 + }, + { + "loss": 0.0, + "grad_norm": 0.003951544873416424, + "learning_rate": 2.945e-07, + "num_tokens": 966472.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 9.117741137742996e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7085, + "step": 1417 + }, + { + "loss": 0.0, + "grad_norm": 0.0012011009966954589, + "learning_rate": 2.9399999999999996e-07, + "num_tokens": 967368.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 6.767082959413528e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.709, + "step": 1418 + }, + { + "loss": 0.0, + "grad_norm": 0.0015257024206221104, + "learning_rate": 2.935e-07, + "num_tokens": 967734.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.9396265745162964e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7095, + "step": 1419 + }, + { + "loss": 0.0, + "grad_norm": 0.001377312932163477, + "learning_rate": 2.93e-07, + "num_tokens": 968630.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 3.4086406230926514e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.71, + "step": 1420 + }, + { + "loss": 0.0, + "grad_norm": 0.00485027814283967, + "learning_rate": 2.9249999999999995e-07, + "num_tokens": 969526.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00010971631854772568, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7105, + "step": 1421 + }, + { + "loss": 0.0, + "grad_norm": 0.0008110209600999951, + "learning_rate": 2.9199999999999997e-07, + "num_tokens": 969892.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.389533281326294e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.711, + "step": 1422 + }, + { + "loss": -0.0, + "grad_norm": 0.8266608119010925, + "learning_rate": 2.915e-07, + "num_tokens": 970788.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8029999732971191, + "rewards/environment_reward_verifier/std": 0.012727884575724602, + "reward": 0.8029999732971191, + "reward_std": 0.012727884575724602, + "kl": 2.826191484928131e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7115, + "step": 1423 + }, + { + "loss": 0.0, + "grad_norm": 0.00047775241546332836, + "learning_rate": 2.9099999999999995e-07, + "num_tokens": 971684.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.3300759494304657e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.712, + "step": 1424 + }, + { + "loss": 0.0, + "grad_norm": 1.2217819690704346, + "learning_rate": 2.9049999999999996e-07, + "num_tokens": 972580.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 5.288515239953995e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7125, + "step": 1425 + }, + { + "loss": 0.0, + "grad_norm": 0.6611891984939575, + "learning_rate": 2.9e-07, + "num_tokens": 973476.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.824999988079071, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.824999988079071, + "reward_std": 0.011313731782138348, + "kl": 4.2975880205631256e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.713, + "step": 1426 + }, + { + "loss": 0.0, + "grad_norm": 0.0005366262048482895, + "learning_rate": 2.895e-07, + "num_tokens": 973842.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.587307244539261e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7135, + "step": 1427 + }, + { + "loss": 0.0, + "grad_norm": 0.000767569406889379, + "learning_rate": 2.8899999999999995e-07, + "num_tokens": 974208.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7854926884174347e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.714, + "step": 1428 + }, + { + "loss": 0.0, + "grad_norm": 0.00042317734914831817, + "learning_rate": 2.8849999999999997e-07, + "num_tokens": 975104.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 2.3975037038326263e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7145, + "step": 1429 + }, + { + "loss": 0.0, + "grad_norm": 0.00044755812268704176, + "learning_rate": 2.88e-07, + "num_tokens": 976000.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.5684013962745667e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.715, + "step": 1430 + }, + { + "loss": 0.0, + "grad_norm": 0.0008439691155217588, + "learning_rate": 2.8749999999999995e-07, + "num_tokens": 976366.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.1568499505519867e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7155, + "step": 1431 + }, + { + "loss": 0.0, + "grad_norm": 0.0013360042357817292, + "learning_rate": 2.8699999999999996e-07, + "num_tokens": 976732.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.739702075719833e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.716, + "step": 1432 + }, + { + "loss": 0.0, + "grad_norm": 0.004178944975137711, + "learning_rate": 2.865e-07, + "num_tokens": 977098.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 7.513351738452911e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7165, + "step": 1433 + }, + { + "loss": 0.0, + "grad_norm": 0.0007262816070578992, + "learning_rate": 2.8599999999999994e-07, + "num_tokens": 977464.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.949777990579605e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.717, + "step": 1434 + }, + { + "loss": 0.0, + "grad_norm": 0.0012204928789287806, + "learning_rate": 2.8549999999999996e-07, + "num_tokens": 977830.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.5828910768032074e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7175, + "step": 1435 + }, + { + "loss": 0.0, + "grad_norm": 0.8220816254615784, + "learning_rate": 2.8499999999999997e-07, + "num_tokens": 978726.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 0.00011288374662399292, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.718, + "step": 1436 + }, + { + "loss": 0.0, + "grad_norm": 0.0007931955042295158, + "learning_rate": 2.845e-07, + "num_tokens": 979092.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.172643482685089e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7185, + "step": 1437 + }, + { + "loss": 0.0, + "grad_norm": 1.1544042825698853, + "learning_rate": 2.8399999999999995e-07, + "num_tokens": 979988.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8355000019073486, + "rewards/environment_reward_verifier/std": 0.030405579134821892, + "reward": 0.8355000019073486, + "reward_std": 0.030405579134821892, + "kl": 7.341429591178894e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.719, + "step": 1438 + }, + { + "loss": 0.0, + "grad_norm": 0.0005520334816537797, + "learning_rate": 2.8349999999999996e-07, + "num_tokens": 980884.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.8331764042377472e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7195, + "step": 1439 + }, + { + "loss": 0.0, + "grad_norm": 0.0004403255879878998, + "learning_rate": 2.83e-07, + "num_tokens": 981250.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.0412728190422058e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.72, + "step": 1440 + }, + { + "loss": 0.0, + "grad_norm": 0.7322037220001221, + "learning_rate": 2.8249999999999994e-07, + "num_tokens": 982146.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.950243651866913e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7205, + "step": 1441 + }, + { + "loss": 0.0, + "grad_norm": 0.0010377311846241355, + "learning_rate": 2.8199999999999996e-07, + "num_tokens": 982512.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.483425825834274e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.721, + "step": 1442 + }, + { + "loss": 0.0, + "grad_norm": 0.5152266621589661, + "learning_rate": 2.8149999999999997e-07, + "num_tokens": 983408.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 1.6961246728897095e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7215, + "step": 1443 + }, + { + "loss": 0.0, + "grad_norm": 0.004680828657001257, + "learning_rate": 2.8100000000000004e-07, + "num_tokens": 983774.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.511714309453964e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.722, + "step": 1444 + }, + { + "loss": 0.0, + "grad_norm": 0.0006535202264785767, + "learning_rate": 2.805e-07, + "num_tokens": 984670.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.348011523485184e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7225, + "step": 1445 + }, + { + "loss": 0.0, + "grad_norm": 0.0008985276799649, + "learning_rate": 2.8e-07, + "num_tokens": 985036.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.367103636264801e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.723, + "step": 1446 + }, + { + "loss": 0.0, + "grad_norm": 0.0010757588315755129, + "learning_rate": 2.7950000000000003e-07, + "num_tokens": 985932.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 4.555657505989075e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7235, + "step": 1447 + }, + { + "loss": 0.0, + "grad_norm": 0.0008238382870331407, + "learning_rate": 2.79e-07, + "num_tokens": 986298.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9938295483589172e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.724, + "step": 1448 + }, + { + "loss": 0.0, + "grad_norm": 0.0008969150367192924, + "learning_rate": 2.785e-07, + "num_tokens": 986664.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.353878855705261e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7245, + "step": 1449 + }, + { + "loss": 0.0, + "grad_norm": 0.0009511377429589629, + "learning_rate": 2.7800000000000003e-07, + "num_tokens": 987030.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.065129905939102e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.725, + "step": 1450 + }, + { + "loss": 0.0, + "grad_norm": 0.0007412993581965566, + "learning_rate": 2.775e-07, + "num_tokens": 987396.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.939479261636734e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7255, + "step": 1451 + }, + { + "loss": 0.0, + "grad_norm": 0.0006103027262724936, + "learning_rate": 2.77e-07, + "num_tokens": 987762.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.47051939368248e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.726, + "step": 1452 + }, + { + "loss": 0.0, + "grad_norm": 0.0012461054138839245, + "learning_rate": 2.765e-07, + "num_tokens": 988128.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.908908158540726e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7265, + "step": 1453 + }, + { + "loss": 0.0, + "grad_norm": 0.7985588908195496, + "learning_rate": 2.7600000000000004e-07, + "num_tokens": 989024.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7854999899864197, + "rewards/environment_reward_verifier/std": 0.037476640194654465, + "reward": 0.7854999899864197, + "reward_std": 0.037476640194654465, + "kl": 4.825275391340256e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.727, + "step": 1454 + }, + { + "loss": 0.0, + "grad_norm": 0.0008023115806281567, + "learning_rate": 2.755e-07, + "num_tokens": 989920.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 3.208313137292862e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7275, + "step": 1455 + }, + { + "loss": 0.0, + "grad_norm": 0.0016813237452879548, + "learning_rate": 2.75e-07, + "num_tokens": 990286.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.5924057960510254e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.728, + "step": 1456 + }, + { + "loss": 0.0, + "grad_norm": 0.0013601853279396892, + "learning_rate": 2.7450000000000003e-07, + "num_tokens": 990652.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.119200795888901e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7285, + "step": 1457 + }, + { + "loss": 0.0, + "grad_norm": 0.802211344242096, + "learning_rate": 2.74e-07, + "num_tokens": 991548.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.0021212929859757423, + "reward": 0.8335000276565552, + "reward_std": 0.0021212929859757423, + "kl": 5.8710575103759766e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.729, + "step": 1458 + }, + { + "loss": 0.0, + "grad_norm": 0.0022085753735154867, + "learning_rate": 2.735e-07, + "num_tokens": 991914.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.9602782130241394e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7295, + "step": 1459 + }, + { + "loss": 0.0, + "grad_norm": 0.0007408488309010863, + "learning_rate": 2.73e-07, + "num_tokens": 992280.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.52049246430397e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.73, + "step": 1460 + }, + { + "loss": 0.0, + "grad_norm": 0.001600884017534554, + "learning_rate": 2.725e-07, + "num_tokens": 993176.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8560000061988831, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8560000061988831, + "reward_std": 0.0, + "kl": 6.529409438371658e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7305, + "step": 1461 + }, + { + "loss": 0.0, + "grad_norm": 0.0013077593175694346, + "learning_rate": 2.72e-07, + "num_tokens": 993542.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.763249307870865e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.731, + "step": 1462 + }, + { + "loss": 0.0, + "grad_norm": 0.0006298540392890573, + "learning_rate": 2.715e-07, + "num_tokens": 994438.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 2.8800219297409058e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7315, + "step": 1463 + }, + { + "loss": 0.0, + "grad_norm": 1.1219033002853394, + "learning_rate": 2.7100000000000003e-07, + "num_tokens": 995334.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 0.00019954796880483627, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.732, + "step": 1464 + }, + { + "loss": 0.0, + "grad_norm": 0.0009468385251238942, + "learning_rate": 2.705e-07, + "num_tokens": 996230.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.38199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.38199999928474426, + "reward_std": 0.0, + "kl": 3.767292946577072e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7325, + "step": 1465 + }, + { + "loss": 0.0, + "grad_norm": 0.0015062256716191769, + "learning_rate": 2.7e-07, + "num_tokens": 996596.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.980271190404892e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.733, + "step": 1466 + }, + { + "loss": 0.0, + "grad_norm": 0.000680701807141304, + "learning_rate": 2.695e-07, + "num_tokens": 997492.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 4.730746150016785e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7335, + "step": 1467 + }, + { + "loss": 0.0, + "grad_norm": 0.00220138905569911, + "learning_rate": 2.69e-07, + "num_tokens": 997858.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.7437152564525604e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.734, + "step": 1468 + }, + { + "loss": 0.0, + "grad_norm": 0.0007745574112050235, + "learning_rate": 2.685e-07, + "num_tokens": 998754.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 2.881605178117752e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7345, + "step": 1469 + }, + { + "loss": 0.0, + "grad_norm": 0.7212503552436829, + "learning_rate": 2.68e-07, + "num_tokens": 999650.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.0021212929859757423, + "reward": 0.8335000276565552, + "reward_std": 0.0021212929859757423, + "kl": 0.00011175964027643204, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.735, + "step": 1470 + }, + { + "loss": 0.0, + "grad_norm": 0.7467300295829773, + "learning_rate": 2.675e-07, + "num_tokens": 1000546.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8255000114440918, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8255000114440918, + "reward_std": 0.0035355305299162865, + "kl": 3.479979932308197e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7355, + "step": 1471 + }, + { + "loss": 0.0, + "grad_norm": 0.0011473192134872079, + "learning_rate": 2.67e-07, + "num_tokens": 1000912.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.285760223865509e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.736, + "step": 1472 + }, + { + "loss": 0.0, + "grad_norm": 0.6855739951133728, + "learning_rate": 2.665e-07, + "num_tokens": 1001808.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.9821880161762238e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7365, + "step": 1473 + }, + { + "loss": 0.0, + "grad_norm": 0.0009315242641605437, + "learning_rate": 2.66e-07, + "num_tokens": 1002174.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.0528753995895386e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.737, + "step": 1474 + }, + { + "loss": 0.0, + "grad_norm": 0.0007502164226025343, + "learning_rate": 2.655e-07, + "num_tokens": 1003070.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 4.344619810581207e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7375, + "step": 1475 + }, + { + "loss": 0.0, + "grad_norm": 0.0011874843621626496, + "learning_rate": 2.65e-07, + "num_tokens": 1003436.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.520399332046509e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.738, + "step": 1476 + }, + { + "loss": 0.0, + "grad_norm": 0.0074364058673381805, + "learning_rate": 2.645e-07, + "num_tokens": 1004332.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8569999933242798, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8569999933242798, + "reward_std": 0.0, + "kl": 0.00015626568347215652, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7385, + "step": 1477 + }, + { + "loss": 0.0, + "grad_norm": 0.6913915276527405, + "learning_rate": 2.64e-07, + "num_tokens": 1005228.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.3711472749710083e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.739, + "step": 1478 + }, + { + "loss": 0.0, + "grad_norm": 0.7458115816116333, + "learning_rate": 2.635e-07, + "num_tokens": 1006124.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5744999647140503, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5744999647140503, + "reward_std": 0.27082186937332153, + "kl": 4.4743530452251434e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7395, + "step": 1479 + }, + { + "loss": 0.0, + "grad_norm": 0.9545727968215942, + "learning_rate": 2.63e-07, + "num_tokens": 1007020.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 5.8341771364212036e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.74, + "step": 1480 + }, + { + "loss": 0.0, + "grad_norm": 0.0005918386159464717, + "learning_rate": 2.625e-07, + "num_tokens": 1007386.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.8104521334171295e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7405, + "step": 1481 + }, + { + "loss": 0.0, + "grad_norm": 0.0007409105310216546, + "learning_rate": 2.62e-07, + "num_tokens": 1008282.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 3.8562342524528503e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.741, + "step": 1482 + }, + { + "loss": 0.0, + "grad_norm": 0.0022666389122605324, + "learning_rate": 2.615e-07, + "num_tokens": 1009178.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8560000061988831, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8560000061988831, + "reward_std": 0.0, + "kl": 5.13000413775444e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7415, + "step": 1483 + }, + { + "loss": 0.0, + "grad_norm": 0.0009365888545289636, + "learning_rate": 2.61e-07, + "num_tokens": 1009544.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.640167415142059e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.742, + "step": 1484 + }, + { + "loss": 0.0, + "grad_norm": 0.0014286866644397378, + "learning_rate": 2.605e-07, + "num_tokens": 1009910.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.191882908344269e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7425, + "step": 1485 + }, + { + "loss": 0.0, + "grad_norm": 0.000844051013700664, + "learning_rate": 2.6e-07, + "num_tokens": 1010276.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.2312084436416626e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.743, + "step": 1486 + }, + { + "loss": 0.0, + "grad_norm": 0.8638677000999451, + "learning_rate": 2.595e-07, + "num_tokens": 1011172.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 5.143415182828903e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7435, + "step": 1487 + }, + { + "loss": 0.0, + "grad_norm": 0.019279703497886658, + "learning_rate": 2.59e-07, + "num_tokens": 1012068.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8569999933242798, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8569999933242798, + "reward_std": 0.0, + "kl": 0.00023065321147441864, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.744, + "step": 1488 + }, + { + "loss": 0.0, + "grad_norm": 0.0011295841541141272, + "learning_rate": 2.585e-07, + "num_tokens": 1012434.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.8337504267692566e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7445, + "step": 1489 + }, + { + "loss": 0.0, + "grad_norm": 0.0028237486258149147, + "learning_rate": 2.58e-07, + "num_tokens": 1012800.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.197750240564346e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.745, + "step": 1490 + }, + { + "loss": 0.0, + "grad_norm": 0.7583287358283997, + "learning_rate": 2.5749999999999997e-07, + "num_tokens": 1013696.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 6.28037378191948e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7455, + "step": 1491 + }, + { + "loss": 0.0, + "grad_norm": 0.9933559894561768, + "learning_rate": 2.57e-07, + "num_tokens": 1014592.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8365000486373901, + "rewards/environment_reward_verifier/std": 0.01909189112484455, + "reward": 0.8365000486373901, + "reward_std": 0.01909189112484455, + "kl": 8.109863847494125e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.746, + "step": 1492 + }, + { + "loss": 0.0, + "grad_norm": 1.006516456604004, + "learning_rate": 2.565e-07, + "num_tokens": 1015488.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 8.907169103622437e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7465, + "step": 1493 + }, + { + "loss": 0.0, + "grad_norm": 0.0009460377041250467, + "learning_rate": 2.56e-07, + "num_tokens": 1015854.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.212092608213425e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.747, + "step": 1494 + }, + { + "loss": 0.0, + "grad_norm": 0.029313264414668083, + "learning_rate": 2.555e-07, + "num_tokens": 1016750.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 0.00027726683765649796, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7475, + "step": 1495 + }, + { + "loss": 0.0, + "grad_norm": 0.48710012435913086, + "learning_rate": 2.55e-07, + "num_tokens": 1017646.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.0809471607208252e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.748, + "step": 1496 + }, + { + "loss": 0.0, + "grad_norm": 0.6663738489151001, + "learning_rate": 2.545e-07, + "num_tokens": 1018542.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8109999895095825, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8109999895095825, + "reward_std": 0.01555635966360569, + "kl": 5.486141890287399e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7485, + "step": 1497 + }, + { + "loss": 0.0, + "grad_norm": 0.0006897600833326578, + "learning_rate": 2.5399999999999997e-07, + "num_tokens": 1018908.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.488214522600174e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.749, + "step": 1498 + }, + { + "loss": 0.0, + "grad_norm": 0.0011770074488595128, + "learning_rate": 2.535e-07, + "num_tokens": 1019804.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.8412010073661804e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7495, + "step": 1499 + }, + { + "loss": 0.0, + "grad_norm": 0.0006154448492452502, + "learning_rate": 2.53e-07, + "num_tokens": 1020700.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.367103636264801e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.75, + "step": 1500 + }, + { + "loss": 0.0, + "grad_norm": 0.0016679060645401478, + "learning_rate": 2.5249999999999996e-07, + "num_tokens": 1021066.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.5816955864429474e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7505, + "step": 1501 + }, + { + "loss": 0.0, + "grad_norm": 0.541278064250946, + "learning_rate": 2.52e-07, + "num_tokens": 1021962.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8285000324249268, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8285000324249268, + "reward_std": 0.0007070977007970214, + "kl": 0.00013221707195043564, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.751, + "step": 1502 + }, + { + "loss": 0.0, + "grad_norm": 0.0014445210108533502, + "learning_rate": 2.515e-07, + "num_tokens": 1022328.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.9596110582351685e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7515, + "step": 1503 + }, + { + "loss": 0.0, + "grad_norm": 0.7894119620323181, + "learning_rate": 2.51e-07, + "num_tokens": 1023224.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 3.7989579141139984e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.752, + "step": 1504 + }, + { + "loss": 0.0, + "grad_norm": 0.0007809365633875132, + "learning_rate": 2.5049999999999997e-07, + "num_tokens": 1023590.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 7.900409400463104e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7525, + "step": 1505 + }, + { + "loss": 0.0, + "grad_norm": 0.001254385570064187, + "learning_rate": 2.5e-07, + "num_tokens": 1023956.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.07220795750618e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.753, + "step": 1506 + }, + { + "loss": 0.0, + "grad_norm": 0.0020893942564725876, + "learning_rate": 2.495e-07, + "num_tokens": 1024852.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8360000252723694, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8360000252723694, + "reward_std": 0.0, + "kl": 0.00010944623500108719, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7535, + "step": 1507 + }, + { + "loss": 0.0, + "grad_norm": 0.0008904547430574894, + "learning_rate": 2.4899999999999997e-07, + "num_tokens": 1025748.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 3.521237522363663e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.754, + "step": 1508 + }, + { + "loss": 0.0, + "grad_norm": 1.0072859525680542, + "learning_rate": 2.485e-07, + "num_tokens": 1026644.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 1.9727274775505066e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7545, + "step": 1509 + }, + { + "loss": 0.0, + "grad_norm": 0.005649761762470007, + "learning_rate": 2.48e-07, + "num_tokens": 1027540.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00011086929589509964, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.755, + "step": 1510 + }, + { + "loss": 0.0, + "grad_norm": 0.9958588480949402, + "learning_rate": 2.475e-07, + "num_tokens": 1028436.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 3.7653371691703796e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7555, + "step": 1511 + }, + { + "loss": 0.0, + "grad_norm": 1.2141926288604736, + "learning_rate": 2.47e-07, + "num_tokens": 1029332.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.609499990940094, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.609499990940094, + "reward_std": 0.32031938433647156, + "kl": 8.317455649375916e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.756, + "step": 1512 + }, + { + "loss": 0.0, + "grad_norm": 0.0011213469551876187, + "learning_rate": 2.465e-07, + "num_tokens": 1029698.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.226900637149811e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7565, + "step": 1513 + }, + { + "loss": 0.0, + "grad_norm": 0.7629797458648682, + "learning_rate": 2.46e-07, + "num_tokens": 1030594.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.6388093829154968e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.757, + "step": 1514 + }, + { + "loss": 0.0, + "grad_norm": 0.5527917742729187, + "learning_rate": 2.4549999999999997e-07, + "num_tokens": 1031490.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 4.778243601322174e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7575, + "step": 1515 + }, + { + "loss": 0.0, + "grad_norm": 0.6782432794570923, + "learning_rate": 2.45e-07, + "num_tokens": 1032386.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8355000019073486, + "rewards/environment_reward_verifier/std": 0.030405579134821892, + "reward": 0.8355000019073486, + "reward_std": 0.030405579134821892, + "kl": 5.2094459533691406e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.758, + "step": 1516 + }, + { + "loss": 0.0, + "grad_norm": 0.0038548826705664396, + "learning_rate": 2.445e-07, + "num_tokens": 1033282.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.38199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.38199999928474426, + "reward_std": 0.0, + "kl": 7.656030356884003e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7585, + "step": 1517 + }, + { + "loss": 0.0, + "grad_norm": 0.0009280137601308525, + "learning_rate": 2.4399999999999996e-07, + "num_tokens": 1033648.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.349354326725006e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.759, + "step": 1518 + }, + { + "loss": 0.0, + "grad_norm": 0.0006928169168531895, + "learning_rate": 2.435e-07, + "num_tokens": 1034544.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 3.481842577457428e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7595, + "step": 1519 + }, + { + "loss": 0.0, + "grad_norm": 0.0008756217430345714, + "learning_rate": 2.43e-07, + "num_tokens": 1034910.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8233975172042847e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.76, + "step": 1520 + }, + { + "loss": 0.0, + "grad_norm": 0.0006150489789433777, + "learning_rate": 2.425e-07, + "num_tokens": 1035806.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 4.21423465013504e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7605, + "step": 1521 + }, + { + "loss": 0.0, + "grad_norm": 0.9960310459136963, + "learning_rate": 2.4199999999999997e-07, + "num_tokens": 1036702.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 0.00010388623923063278, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.761, + "step": 1522 + }, + { + "loss": 0.0, + "grad_norm": 0.7770252823829651, + "learning_rate": 2.415e-07, + "num_tokens": 1037598.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.565500020980835, + "rewards/environment_reward_verifier/std": 0.2637507915496826, + "reward": 0.565500020980835, + "reward_std": 0.2637507915496826, + "kl": 5.447492003440857e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7615, + "step": 1523 + }, + { + "loss": 0.0, + "grad_norm": 0.8710464239120483, + "learning_rate": 2.41e-07, + "num_tokens": 1038494.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8454999923706055, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8454999923706055, + "reward_std": 0.014849262312054634, + "kl": 3.6337412893772125e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.762, + "step": 1524 + }, + { + "loss": 0.0, + "grad_norm": 0.0007435260922648013, + "learning_rate": 2.4049999999999996e-07, + "num_tokens": 1038860.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.6765279471874237e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7625, + "step": 1525 + }, + { + "loss": 0.0, + "grad_norm": 0.7789291739463806, + "learning_rate": 2.4e-07, + "num_tokens": 1039756.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 3.844313323497772e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.763, + "step": 1526 + }, + { + "loss": 0.0, + "grad_norm": 0.866211473941803, + "learning_rate": 2.395e-07, + "num_tokens": 1040652.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 7.869582623243332e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7635, + "step": 1527 + }, + { + "loss": 0.0, + "grad_norm": 0.0014106653397902846, + "learning_rate": 2.3899999999999996e-07, + "num_tokens": 1041548.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8500000238418579, + "reward_std": 0.0, + "kl": 4.794169217348099e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.764, + "step": 1528 + }, + { + "loss": 0.0, + "grad_norm": 0.925835907459259, + "learning_rate": 2.3849999999999997e-07, + "num_tokens": 1042444.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7999999523162842, + "rewards/environment_reward_verifier/std": 0.04949747025966644, + "reward": 0.7999999523162842, + "reward_std": 0.04949747025966644, + "kl": 5.22807240486145e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7645, + "step": 1529 + }, + { + "loss": 0.0, + "grad_norm": 0.0028158905915915966, + "learning_rate": 2.38e-07, + "num_tokens": 1042810.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 8.856505155563354e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.765, + "step": 1530 + }, + { + "loss": 0.0, + "grad_norm": 0.6579874753952026, + "learning_rate": 2.3749999999999998e-07, + "num_tokens": 1043706.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8004999756813049, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.8004999756813049, + "reward_std": 0.04879037290811539, + "kl": 4.453584551811218e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7655, + "step": 1531 + }, + { + "loss": 0.0, + "grad_norm": 0.0006663826643489301, + "learning_rate": 2.3699999999999996e-07, + "num_tokens": 1044072.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.5161541998386383e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.766, + "step": 1532 + }, + { + "loss": 0.0, + "grad_norm": 0.0009142456110566854, + "learning_rate": 2.3649999999999998e-07, + "num_tokens": 1044438.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.51443886756897e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7665, + "step": 1533 + }, + { + "loss": 0.0, + "grad_norm": 0.0010897335596382618, + "learning_rate": 2.3599999999999997e-07, + "num_tokens": 1044804.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.8941390812397e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.767, + "step": 1534 + }, + { + "loss": 0.0, + "grad_norm": 0.9638667106628418, + "learning_rate": 2.3549999999999998e-07, + "num_tokens": 1045700.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.8009999990463257, + "reward_std": 0.049497511237859726, + "kl": 9.973067790269852e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7675, + "step": 1535 + }, + { + "loss": 0.0001, + "grad_norm": 0.1486448496580124, + "learning_rate": 2.3499999999999997e-07, + "num_tokens": 1046596.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 0.0019078860059380531, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.768, + "step": 1536 + }, + { + "loss": 0.0, + "grad_norm": 0.0011578103294596076, + "learning_rate": 2.3449999999999996e-07, + "num_tokens": 1046962.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.5416876673698425e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7685, + "step": 1537 + }, + { + "loss": 0.0, + "grad_norm": 0.000997197232209146, + "learning_rate": 2.34e-07, + "num_tokens": 1047328.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.3618882298469543e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.769, + "step": 1538 + }, + { + "loss": 0.0, + "grad_norm": 0.001980582484975457, + "learning_rate": 2.335e-07, + "num_tokens": 1048224.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8360000252723694, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8360000252723694, + "reward_std": 0.0, + "kl": 5.5631622672080994e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7695, + "step": 1539 + }, + { + "loss": 0.0, + "grad_norm": 0.7257095575332642, + "learning_rate": 2.33e-07, + "num_tokens": 1049120.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.8009999990463257, + "reward_std": 0.049497511237859726, + "kl": 3.772880882024765e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.77, + "step": 1540 + }, + { + "loss": 0.0, + "grad_norm": 0.0010103528620675206, + "learning_rate": 2.325e-07, + "num_tokens": 1049486.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.966689109802246e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7705, + "step": 1541 + }, + { + "loss": 0.0, + "grad_norm": 0.7430920004844666, + "learning_rate": 2.32e-07, + "num_tokens": 1050382.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6175000071525574, + "rewards/environment_reward_verifier/std": 0.3358757495880127, + "reward": 0.6175000071525574, + "reward_std": 0.3358757495880127, + "kl": 4.794076085090637e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.771, + "step": 1542 + }, + { + "loss": 0.0, + "grad_norm": 0.0009718029759824276, + "learning_rate": 2.315e-07, + "num_tokens": 1051278.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.123484879732132e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7715, + "step": 1543 + }, + { + "loss": -0.0, + "grad_norm": 0.5792695879936218, + "learning_rate": 2.31e-07, + "num_tokens": 1052174.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8209999799728394, + "rewards/environment_reward_verifier/std": 0.0014142375439405441, + "reward": 0.8209999799728394, + "reward_std": 0.0014142375439405441, + "kl": 5.393102765083313e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.772, + "step": 1544 + }, + { + "loss": 0.0, + "grad_norm": 1.2712446451187134, + "learning_rate": 2.305e-07, + "num_tokens": 1053070.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5900000333786011, + "rewards/environment_reward_verifier/std": 0.29698485136032104, + "reward": 0.5900000333786011, + "reward_std": 0.29698485136032104, + "kl": 6.802938878536224e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7725, + "step": 1545 + }, + { + "loss": 0.0, + "grad_norm": 0.6029819250106812, + "learning_rate": 2.3e-07, + "num_tokens": 1053966.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7870000004768372, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.7870000004768372, + "reward_std": 0.049497511237859726, + "kl": 4.980899393558502e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.773, + "step": 1546 + }, + { + "loss": 0.0, + "grad_norm": 0.7989152073860168, + "learning_rate": 2.295e-07, + "num_tokens": 1054862.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843500018119812, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.843500018119812, + "reward_std": 0.016263457015156746, + "kl": 6.110034883022308e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7735, + "step": 1547 + }, + { + "loss": 0.0, + "grad_norm": 0.0020734556019306183, + "learning_rate": 2.29e-07, + "num_tokens": 1055228.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.111882299184799e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.774, + "step": 1548 + }, + { + "loss": 0.0, + "grad_norm": 1.1049245595932007, + "learning_rate": 2.285e-07, + "num_tokens": 1056124.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.815500020980835, + "rewards/environment_reward_verifier/std": 0.012020829133689404, + "reward": 0.815500020980835, + "reward_std": 0.012020829133689404, + "kl": 0.00013441313058137894, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7745, + "step": 1549 + }, + { + "loss": 0.0, + "grad_norm": 0.004347025416791439, + "learning_rate": 2.28e-07, + "num_tokens": 1057020.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8560000061988831, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8560000061988831, + "reward_std": 0.0, + "kl": 3.883149474859238e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.775, + "step": 1550 + }, + { + "loss": 0.0, + "grad_norm": 0.0030298628844320774, + "learning_rate": 2.275e-07, + "num_tokens": 1057386.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 9.721703827381134e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7755, + "step": 1551 + }, + { + "loss": 0.0, + "grad_norm": 0.0004023867077194154, + "learning_rate": 2.27e-07, + "num_tokens": 1058282.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 1.8894672393798828e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.776, + "step": 1552 + }, + { + "loss": 0.0, + "grad_norm": 0.0006335912039503455, + "learning_rate": 2.265e-07, + "num_tokens": 1058648.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.3688189685344696e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7765, + "step": 1553 + }, + { + "loss": 0.0, + "grad_norm": 0.8788871169090271, + "learning_rate": 2.2599999999999999e-07, + "num_tokens": 1059544.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 8.051283657550812e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.777, + "step": 1554 + }, + { + "loss": 0.0, + "grad_norm": 0.0010447928216308355, + "learning_rate": 2.255e-07, + "num_tokens": 1059910.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.571396857500076e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7775, + "step": 1555 + }, + { + "loss": 0.0, + "grad_norm": 0.9580017924308777, + "learning_rate": 2.25e-07, + "num_tokens": 1060806.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31183406710624695, + "reward": 0.5995000004768372, + "reward_std": 0.31183406710624695, + "kl": 5.83576038479805e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.778, + "step": 1556 + }, + { + "loss": 0.0, + "grad_norm": 0.000741632713470608, + "learning_rate": 2.245e-07, + "num_tokens": 1061172.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.6345252990722656e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7785, + "step": 1557 + }, + { + "loss": 0.0, + "grad_norm": 0.7395283579826355, + "learning_rate": 2.24e-07, + "num_tokens": 1062068.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 4.054047167301178e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.779, + "step": 1558 + }, + { + "loss": 0.0, + "grad_norm": 0.001459570717997849, + "learning_rate": 2.2349999999999998e-07, + "num_tokens": 1062434.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.2844563722610474e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7795, + "step": 1559 + }, + { + "loss": 0.0, + "grad_norm": 0.0007419899338856339, + "learning_rate": 2.23e-07, + "num_tokens": 1063330.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.408787935972214e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.78, + "step": 1560 + }, + { + "loss": 0.0, + "grad_norm": 0.872297465801239, + "learning_rate": 2.225e-07, + "num_tokens": 1064226.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6190000176429749, + "rewards/environment_reward_verifier/std": 0.33516862988471985, + "reward": 0.6190000176429749, + "reward_std": 0.33516862988471985, + "kl": 8.444022387266159e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7805, + "step": 1561 + }, + { + "loss": 0.0, + "grad_norm": 0.0013025372754782438, + "learning_rate": 2.22e-07, + "num_tokens": 1065122.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7879999876022339, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7879999876022339, + "reward_std": 0.0, + "kl": 4.2776577174663544e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.781, + "step": 1562 + }, + { + "loss": 0.0, + "grad_norm": 0.7462071180343628, + "learning_rate": 2.215e-07, + "num_tokens": 1066018.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8314999938011169, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8314999938011169, + "reward_std": 0.016263457015156746, + "kl": 5.4595060646533966e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7815, + "step": 1563 + }, + { + "loss": 0.0, + "grad_norm": 0.002291295910254121, + "learning_rate": 2.2099999999999998e-07, + "num_tokens": 1066384.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.212666630744934e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.782, + "step": 1564 + }, + { + "loss": -0.0, + "grad_norm": 1.4264631271362305, + "learning_rate": 2.205e-07, + "num_tokens": 1067280.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8344999551773071, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8344999551773071, + "reward_std": 0.0007070977007970214, + "kl": 4.3925829231739044e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7825, + "step": 1565 + }, + { + "loss": 0.0, + "grad_norm": 0.0015623174840584397, + "learning_rate": 2.1999999999999998e-07, + "num_tokens": 1067646.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.921426832675934e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.783, + "step": 1566 + }, + { + "loss": 0.0, + "grad_norm": 0.0029900292865931988, + "learning_rate": 2.195e-07, + "num_tokens": 1068012.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.1206756830215454e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7835, + "step": 1567 + }, + { + "loss": 0.0, + "grad_norm": 0.0052716792561113834, + "learning_rate": 2.19e-07, + "num_tokens": 1068378.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00011092331260442734, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.784, + "step": 1568 + }, + { + "loss": 0.0, + "grad_norm": 0.6562672853469849, + "learning_rate": 2.1849999999999998e-07, + "num_tokens": 1069274.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.1152740120887756e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7845, + "step": 1569 + }, + { + "loss": 0.0, + "grad_norm": 0.9454992413520813, + "learning_rate": 2.18e-07, + "num_tokens": 1070170.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 4.633702337741852e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.785, + "step": 1570 + }, + { + "loss": 0.0, + "grad_norm": 0.0009240294457413256, + "learning_rate": 2.1749999999999998e-07, + "num_tokens": 1071066.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 5.4377131164073944e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7855, + "step": 1571 + }, + { + "loss": 0.0, + "grad_norm": 0.0005841344245709479, + "learning_rate": 2.17e-07, + "num_tokens": 1071432.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.6757828891277313e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.786, + "step": 1572 + }, + { + "loss": 0.0, + "grad_norm": 0.5484344959259033, + "learning_rate": 2.1649999999999999e-07, + "num_tokens": 1072328.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7910000085830688, + "rewards/environment_reward_verifier/std": 0.045254841446876526, + "reward": 0.7910000085830688, + "reward_std": 0.045254841446876526, + "kl": 4.1765160858631134e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7865, + "step": 1573 + }, + { + "loss": 0.0, + "grad_norm": 0.0011522466083988547, + "learning_rate": 2.1599999999999998e-07, + "num_tokens": 1072694.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.2556395530700684e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.787, + "step": 1574 + }, + { + "loss": 0.0, + "grad_norm": 0.0010642482666298747, + "learning_rate": 2.155e-07, + "num_tokens": 1073060.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.194250166416168e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7875, + "step": 1575 + }, + { + "loss": 0.0, + "grad_norm": 0.0004986397107131779, + "learning_rate": 2.1499999999999998e-07, + "num_tokens": 1073956.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 1.7260201275348663e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.788, + "step": 1576 + }, + { + "loss": 0.0, + "grad_norm": 0.010080178268253803, + "learning_rate": 2.145e-07, + "num_tokens": 1074852.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 0.0001212460920214653, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7885, + "step": 1577 + }, + { + "loss": 0.0, + "grad_norm": 0.8077563047409058, + "learning_rate": 2.1399999999999998e-07, + "num_tokens": 1075748.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8029999732971191, + "rewards/environment_reward_verifier/std": 0.012727884575724602, + "reward": 0.8029999732971191, + "reward_std": 0.012727884575724602, + "kl": 0.00012228917330503464, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.789, + "step": 1578 + }, + { + "loss": 0.0, + "grad_norm": 0.001300574280321598, + "learning_rate": 2.1349999999999997e-07, + "num_tokens": 1076114.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.275088965892792e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7895, + "step": 1579 + }, + { + "loss": 0.0, + "grad_norm": 0.0015755236381664872, + "learning_rate": 2.13e-07, + "num_tokens": 1076480.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.367103636264801e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.79, + "step": 1580 + }, + { + "loss": 0.0, + "grad_norm": 0.0020857423078268766, + "learning_rate": 2.1249999999999998e-07, + "num_tokens": 1076846.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.3896416425704956e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7905, + "step": 1581 + }, + { + "loss": 0.0, + "grad_norm": 0.5299270153045654, + "learning_rate": 2.12e-07, + "num_tokens": 1077742.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.08909548819065094, + "reward": 0.8149999976158142, + "reward_std": 0.08909548819065094, + "kl": 2.506934106349945e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.791, + "step": 1582 + }, + { + "loss": 0.0, + "grad_norm": 0.0011763119837269187, + "learning_rate": 2.1149999999999998e-07, + "num_tokens": 1078108.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.5983120799064636e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7915, + "step": 1583 + }, + { + "loss": 0.0, + "grad_norm": 0.001765949185937643, + "learning_rate": 2.1099999999999997e-07, + "num_tokens": 1079004.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.558839231729507e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.792, + "step": 1584 + }, + { + "loss": 0.0, + "grad_norm": 0.000826952513307333, + "learning_rate": 2.1049999999999999e-07, + "num_tokens": 1079370.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.525467425584793e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7925, + "step": 1585 + }, + { + "loss": 0.0, + "grad_norm": 0.0004427609674166888, + "learning_rate": 2.0999999999999997e-07, + "num_tokens": 1079736.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.3214536011219025e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.793, + "step": 1586 + }, + { + "loss": 0.0, + "grad_norm": 0.0011962472926825285, + "learning_rate": 2.095e-07, + "num_tokens": 1080102.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.326591104269028e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7935, + "step": 1587 + }, + { + "loss": 0.0, + "grad_norm": 0.0016075981548056006, + "learning_rate": 2.0899999999999998e-07, + "num_tokens": 1080468.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9566697776317596e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.794, + "step": 1588 + }, + { + "loss": 0.0, + "grad_norm": 0.9348431825637817, + "learning_rate": 2.085e-07, + "num_tokens": 1081364.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 0.00014391914010047913, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7945, + "step": 1589 + }, + { + "loss": 0.0001, + "grad_norm": 6.403285026550293, + "learning_rate": 2.0799999999999998e-07, + "num_tokens": 1082260.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 0.001313304528594017, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.795, + "step": 1590 + }, + { + "loss": 0.0, + "grad_norm": 1.2276204824447632, + "learning_rate": 2.0749999999999997e-07, + "num_tokens": 1083156.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8374999761581421, + "rewards/environment_reward_verifier/std": 0.026162952184677124, + "reward": 0.8374999761581421, + "reward_std": 0.026162952184677124, + "kl": 8.566584438085556e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7955, + "step": 1591 + }, + { + "loss": 0.0, + "grad_norm": 0.7293785810470581, + "learning_rate": 2.07e-07, + "num_tokens": 1084052.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8289999961853027, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8289999961853027, + "reward_std": 0.0014141954015940428, + "kl": 6.05238601565361e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.796, + "step": 1592 + }, + { + "loss": 0.0, + "grad_norm": 0.0007735049584880471, + "learning_rate": 2.0649999999999998e-07, + "num_tokens": 1084418.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.9413960874080658e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7965, + "step": 1593 + }, + { + "loss": 0.0, + "grad_norm": 0.0005749748088419437, + "learning_rate": 2.06e-07, + "num_tokens": 1084784.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.9215978682041168e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.797, + "step": 1594 + }, + { + "loss": 0.0, + "grad_norm": 1.0623031854629517, + "learning_rate": 2.0549999999999998e-07, + "num_tokens": 1085680.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 5.367118865251541e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7975, + "step": 1595 + }, + { + "loss": 0.0, + "grad_norm": 0.7510759234428406, + "learning_rate": 2.0499999999999997e-07, + "num_tokens": 1086576.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31183406710624695, + "reward": 0.5995000004768372, + "reward_std": 0.31183406710624695, + "kl": 5.256757140159607e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.798, + "step": 1596 + }, + { + "loss": 0.0, + "grad_norm": 0.7434391975402832, + "learning_rate": 2.0449999999999998e-07, + "num_tokens": 1087472.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31183406710624695, + "reward": 0.5995000004768372, + "reward_std": 0.31183406710624695, + "kl": 5.564093589782715e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7985, + "step": 1597 + }, + { + "loss": 0.0, + "grad_norm": 0.0007738731219433248, + "learning_rate": 2.0399999999999997e-07, + "num_tokens": 1088368.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7960000038146973, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7960000038146973, + "reward_std": 0.0, + "kl": 4.332512617111206e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.799, + "step": 1598 + }, + { + "loss": 0.0, + "grad_norm": 1.5968071222305298, + "learning_rate": 2.035e-07, + "num_tokens": 1089264.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 0.00015922915190458298, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.7995, + "step": 1599 + }, + { + "loss": 0.0, + "grad_norm": 0.0011912197805941105, + "learning_rate": 2.03e-07, + "num_tokens": 1090160.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 5.2143819630146027e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8, + "step": 1600 + }, + { + "loss": 0.0, + "grad_norm": 0.0012906340416520834, + "learning_rate": 2.025e-07, + "num_tokens": 1091056.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.3326599299907684e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8005, + "step": 1601 + }, + { + "loss": 0.0, + "grad_norm": 0.0013231480261310935, + "learning_rate": 2.02e-07, + "num_tokens": 1091422.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.551706999540329e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.801, + "step": 1602 + }, + { + "loss": 0.0, + "grad_norm": 0.00767257995903492, + "learning_rate": 2.015e-07, + "num_tokens": 1091788.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00010890420526266098, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8015, + "step": 1603 + }, + { + "loss": 0.0, + "grad_norm": 0.0014246352948248386, + "learning_rate": 2.01e-07, + "num_tokens": 1092684.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.878000020980835, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.878000020980835, + "reward_std": 0.0, + "kl": 4.823412746191025e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.802, + "step": 1604 + }, + { + "loss": 0.0, + "grad_norm": 0.005558141507208347, + "learning_rate": 2.005e-07, + "num_tokens": 1093050.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 9.20640304684639e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8025, + "step": 1605 + }, + { + "loss": 0.0, + "grad_norm": 0.835629403591156, + "learning_rate": 2e-07, + "num_tokens": 1093946.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 6.555672734975815e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.803, + "step": 1606 + }, + { + "loss": 0.0, + "grad_norm": 1.010273814201355, + "learning_rate": 1.995e-07, + "num_tokens": 1094842.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8389999866485596, + "reward_std": 0.01555635966360569, + "kl": 8.833687752485275e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8035, + "step": 1607 + }, + { + "loss": 0.0, + "grad_norm": 0.0005389400757849216, + "learning_rate": 1.99e-07, + "num_tokens": 1095738.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 3.917329013347626e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.804, + "step": 1608 + }, + { + "loss": 0.0, + "grad_norm": 0.001107304240576923, + "learning_rate": 1.985e-07, + "num_tokens": 1096634.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.467833787202835e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8045, + "step": 1609 + }, + { + "loss": 0.0, + "grad_norm": 0.6192328929901123, + "learning_rate": 1.98e-07, + "num_tokens": 1097530.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 2.8448179364204407e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.805, + "step": 1610 + }, + { + "loss": 0.0, + "grad_norm": 0.0010528776329010725, + "learning_rate": 1.975e-07, + "num_tokens": 1097896.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.906952381134033e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8055, + "step": 1611 + }, + { + "loss": 0.0, + "grad_norm": 0.8730188012123108, + "learning_rate": 1.97e-07, + "num_tokens": 1098792.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5659999847412109, + "rewards/environment_reward_verifier/std": 0.26304370164871216, + "reward": 0.5659999847412109, + "reward_std": 0.26304370164871216, + "kl": 8.165556937456131e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.806, + "step": 1612 + }, + { + "loss": 0.0, + "grad_norm": 0.003221945371478796, + "learning_rate": 1.965e-07, + "num_tokens": 1099158.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.885811358690262e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8065, + "step": 1613 + }, + { + "loss": 0.0, + "grad_norm": 0.002188287442550063, + "learning_rate": 1.96e-07, + "num_tokens": 1099524.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 8.95066186785698e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.807, + "step": 1614 + }, + { + "loss": 0.0, + "grad_norm": 0.0005099984700791538, + "learning_rate": 1.955e-07, + "num_tokens": 1100420.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 2.9620714485645294e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8075, + "step": 1615 + }, + { + "loss": 0.0, + "grad_norm": 0.0010692180367186666, + "learning_rate": 1.9499999999999999e-07, + "num_tokens": 1100786.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.5768222510814667e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.808, + "step": 1616 + }, + { + "loss": 0.0, + "grad_norm": 0.000704990467056632, + "learning_rate": 1.945e-07, + "num_tokens": 1101682.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.7565285563468933e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8085, + "step": 1617 + }, + { + "loss": 0.0, + "grad_norm": 0.0007767347269691527, + "learning_rate": 1.94e-07, + "num_tokens": 1102048.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.1250139474868774e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.809, + "step": 1618 + }, + { + "loss": 0.0, + "grad_norm": 0.7776121497154236, + "learning_rate": 1.935e-07, + "num_tokens": 1102944.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5975000262260437, + "rewards/environment_reward_verifier/std": 0.3047630488872528, + "reward": 0.5975000262260437, + "reward_std": 0.3047630488872528, + "kl": 5.421321839094162e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8095, + "step": 1619 + }, + { + "loss": 0.0, + "grad_norm": 0.014690214768052101, + "learning_rate": 1.93e-07, + "num_tokens": 1103310.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00018547195941209793, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.81, + "step": 1620 + }, + { + "loss": 0.0, + "grad_norm": 1.0280709266662598, + "learning_rate": 1.9249999999999998e-07, + "num_tokens": 1104206.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7904999852180481, + "rewards/environment_reward_verifier/std": 0.037476640194654465, + "reward": 0.7904999852180481, + "reward_std": 0.037476640194654465, + "kl": 6.31827861070633e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8105, + "step": 1621 + }, + { + "loss": 0.0, + "grad_norm": 1.1227260828018188, + "learning_rate": 1.92e-07, + "num_tokens": 1105102.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8250000476837158, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8250000476837158, + "reward_std": 0.01555635966360569, + "kl": 3.284774720668793e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.811, + "step": 1622 + }, + { + "loss": 0.0, + "grad_norm": 0.0007454422884620726, + "learning_rate": 1.915e-07, + "num_tokens": 1105468.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.224611282348633e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8115, + "step": 1623 + }, + { + "loss": 0.0, + "grad_norm": 0.003449360141530633, + "learning_rate": 1.91e-07, + "num_tokens": 1105834.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.674812614917755e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.812, + "step": 1624 + }, + { + "loss": 0.0, + "grad_norm": 0.00368543085642159, + "learning_rate": 1.905e-07, + "num_tokens": 1106730.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8429999947547913, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8429999947547913, + "reward_std": 0.0, + "kl": 7.947441190481186e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8125, + "step": 1625 + }, + { + "loss": 0.0, + "grad_norm": 0.6739558577537537, + "learning_rate": 1.8999999999999998e-07, + "num_tokens": 1107626.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843999981880188, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.843999981880188, + "reward_std": 0.01555635966360569, + "kl": 4.8667192459106445e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.813, + "step": 1626 + }, + { + "loss": 0.0, + "grad_norm": 0.0015609045512974262, + "learning_rate": 1.895e-07, + "num_tokens": 1107992.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.3981166779994965e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8135, + "step": 1627 + }, + { + "loss": 0.0, + "grad_norm": 0.0005068195168860257, + "learning_rate": 1.8899999999999999e-07, + "num_tokens": 1108358.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.7039477825164795e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.814, + "step": 1628 + }, + { + "loss": 0.0, + "grad_norm": 0.0008186335908249021, + "learning_rate": 1.885e-07, + "num_tokens": 1108724.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.4374184906482697e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8145, + "step": 1629 + }, + { + "loss": 0.0, + "grad_norm": 0.000544139591511339, + "learning_rate": 1.88e-07, + "num_tokens": 1109090.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.124680370092392e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.815, + "step": 1630 + }, + { + "loss": 0.0, + "grad_norm": 0.0011354797752574086, + "learning_rate": 1.875e-07, + "num_tokens": 1109456.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.385636955499649e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8155, + "step": 1631 + }, + { + "loss": 0.0, + "grad_norm": 1.1252527236938477, + "learning_rate": 1.87e-07, + "num_tokens": 1110352.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.8009999990463257, + "reward_std": 0.049497511237859726, + "kl": 0.00012831855565309525, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.816, + "step": 1632 + }, + { + "loss": 0.0, + "grad_norm": 0.8676841855049133, + "learning_rate": 1.8649999999999998e-07, + "num_tokens": 1111248.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8180000185966492, + "rewards/environment_reward_verifier/std": 0.007071061059832573, + "reward": 0.8180000185966492, + "reward_std": 0.007071061059832573, + "kl": 8.204672485589981e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8165, + "step": 1633 + }, + { + "loss": 0.0, + "grad_norm": 0.0011640795273706317, + "learning_rate": 1.86e-07, + "num_tokens": 1111614.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.4091994166374207e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.817, + "step": 1634 + }, + { + "loss": 0.0, + "grad_norm": 0.0010903201764449477, + "learning_rate": 1.855e-07, + "num_tokens": 1111980.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.0804814994335175e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8175, + "step": 1635 + }, + { + "loss": 0.0, + "grad_norm": 1.5268325805664062, + "learning_rate": 1.85e-07, + "num_tokens": 1112876.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7870000004768372, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.7870000004768372, + "reward_std": 0.049497511237859726, + "kl": 0.00013242289423942566, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.818, + "step": 1636 + }, + { + "loss": 0.0, + "grad_norm": 0.005956660490483046, + "learning_rate": 1.845e-07, + "num_tokens": 1113242.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 8.577574044466019e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8185, + "step": 1637 + }, + { + "loss": 0.0, + "grad_norm": 0.7777119874954224, + "learning_rate": 1.8399999999999998e-07, + "num_tokens": 1114138.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 3.387313336133957e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.819, + "step": 1638 + }, + { + "loss": 0.0, + "grad_norm": 0.0005967547767795622, + "learning_rate": 1.835e-07, + "num_tokens": 1115034.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 3.451574593782425e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8195, + "step": 1639 + }, + { + "loss": 0.0, + "grad_norm": 0.9599042534828186, + "learning_rate": 1.8299999999999998e-07, + "num_tokens": 1115930.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843500018119812, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.843500018119812, + "reward_std": 0.016263457015156746, + "kl": 4.692375659942627e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.82, + "step": 1640 + }, + { + "loss": 0.0, + "grad_norm": 3.7044155597686768, + "learning_rate": 1.825e-07, + "num_tokens": 1116826.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.824999988079071, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.824999988079071, + "reward_std": 0.011313731782138348, + "kl": 0.00022888649255037308, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8205, + "step": 1641 + }, + { + "loss": 0.0, + "grad_norm": 0.786083996295929, + "learning_rate": 1.82e-07, + "num_tokens": 1117722.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8400000333786011, + "rewards/environment_reward_verifier/std": 0.014142164029181004, + "reward": 0.8400000333786011, + "reward_std": 0.014142164029181004, + "kl": 0.00013180077075958252, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.821, + "step": 1642 + }, + { + "loss": 0.0, + "grad_norm": 0.0021554480772465467, + "learning_rate": 1.8149999999999998e-07, + "num_tokens": 1118618.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 6.999168545007706e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8215, + "step": 1643 + }, + { + "loss": 0.0, + "grad_norm": 0.0006479070289060473, + "learning_rate": 1.81e-07, + "num_tokens": 1119514.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 3.351084887981415e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.822, + "step": 1644 + }, + { + "loss": 0.0, + "grad_norm": 0.0003548029053490609, + "learning_rate": 1.8049999999999998e-07, + "num_tokens": 1120410.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7649999856948853, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7649999856948853, + "reward_std": 0.0, + "kl": 2.230145037174225e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8225, + "step": 1645 + }, + { + "loss": 0.0, + "grad_norm": 0.004329314921051264, + "learning_rate": 1.8e-07, + "num_tokens": 1121306.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 6.543286144733429e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.823, + "step": 1646 + }, + { + "loss": 0.0, + "grad_norm": 0.0009270249865949154, + "learning_rate": 1.7949999999999999e-07, + "num_tokens": 1121672.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.204828292131424e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8235, + "step": 1647 + }, + { + "loss": 0.0, + "grad_norm": 1.0634018182754517, + "learning_rate": 1.7899999999999997e-07, + "num_tokens": 1122568.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 8.80332663655281e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.824, + "step": 1648 + }, + { + "loss": 0.0, + "grad_norm": 0.0007692989311181009, + "learning_rate": 1.785e-07, + "num_tokens": 1122934.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.0349398255348206e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8245, + "step": 1649 + }, + { + "loss": 0.0, + "grad_norm": 0.007314886432141066, + "learning_rate": 1.7799999999999998e-07, + "num_tokens": 1123300.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.086472421884537e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.825, + "step": 1650 + }, + { + "loss": 0.0, + "grad_norm": 0.7849677801132202, + "learning_rate": 1.775e-07, + "num_tokens": 1124196.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8355000019073486, + "rewards/environment_reward_verifier/std": 0.0007071398431435227, + "reward": 0.8355000019073486, + "reward_std": 0.0007071398431435227, + "kl": 6.0978345572948456e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8255, + "step": 1651 + }, + { + "loss": 0.0, + "grad_norm": 0.0008546906756237149, + "learning_rate": 1.7699999999999998e-07, + "num_tokens": 1124562.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.7396174371242523e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.826, + "step": 1652 + }, + { + "loss": 0.0, + "grad_norm": 1.1525259017944336, + "learning_rate": 1.7649999999999997e-07, + "num_tokens": 1125458.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 4.562176764011383e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8265, + "step": 1653 + }, + { + "loss": 0.0, + "grad_norm": 0.0002832186291925609, + "learning_rate": 1.76e-07, + "num_tokens": 1126354.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 1.0225921869277954e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.827, + "step": 1654 + }, + { + "loss": 0.0, + "grad_norm": 0.5804024338722229, + "learning_rate": 1.7549999999999998e-07, + "num_tokens": 1127250.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.338457852602005e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8275, + "step": 1655 + }, + { + "loss": 0.0, + "grad_norm": 0.6778073906898499, + "learning_rate": 1.75e-07, + "num_tokens": 1128146.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 3.290083259344101e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.828, + "step": 1656 + }, + { + "loss": 0.0, + "grad_norm": 0.8877629637718201, + "learning_rate": 1.7449999999999998e-07, + "num_tokens": 1129042.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.609499990940094, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.609499990940094, + "reward_std": 0.32031938433647156, + "kl": 4.7820620238780975e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8285, + "step": 1657 + }, + { + "loss": 0.0, + "grad_norm": 0.0015010101487860084, + "learning_rate": 1.7399999999999997e-07, + "num_tokens": 1129408.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.316974759101868e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.829, + "step": 1658 + }, + { + "loss": 0.0, + "grad_norm": 0.0008234889828599989, + "learning_rate": 1.7349999999999999e-07, + "num_tokens": 1129774.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.7329660952091217e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8295, + "step": 1659 + }, + { + "loss": 0.0, + "grad_norm": 0.0008635118720121682, + "learning_rate": 1.7299999999999997e-07, + "num_tokens": 1130140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.4356489777565e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.83, + "step": 1660 + }, + { + "loss": 0.0, + "grad_norm": 0.002669265726581216, + "learning_rate": 1.725e-07, + "num_tokens": 1130506.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.825834184885025e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8305, + "step": 1661 + }, + { + "loss": 0.0, + "grad_norm": 0.000953994516748935, + "learning_rate": 1.7199999999999998e-07, + "num_tokens": 1131402.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 3.698095679283142e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.831, + "step": 1662 + }, + { + "loss": 0.0, + "grad_norm": 1.48069429397583, + "learning_rate": 1.715e-07, + "num_tokens": 1132298.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6019999980926514, + "rewards/environment_reward_verifier/std": 0.3196122944355011, + "reward": 0.6019999980926514, + "reward_std": 0.3196122944355011, + "kl": 5.8494508266448975e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8315, + "step": 1663 + }, + { + "loss": 0.0, + "grad_norm": 0.005689945537596941, + "learning_rate": 1.71e-07, + "num_tokens": 1133194.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8159999847412109, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8159999847412109, + "reward_std": 0.0, + "kl": 6.105750799179077e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.832, + "step": 1664 + }, + { + "loss": 0.0, + "grad_norm": 0.001202125335112214, + "learning_rate": 1.705e-07, + "num_tokens": 1133560.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.441477358341217e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8325, + "step": 1665 + }, + { + "loss": 0.0, + "grad_norm": 0.0032958821393549442, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1134456.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 7.745064795017242e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.833, + "step": 1666 + }, + { + "loss": 0.0, + "grad_norm": 0.0010330155491828918, + "learning_rate": 1.695e-07, + "num_tokens": 1134822.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.7510727047920227e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8335, + "step": 1667 + }, + { + "loss": 0.0, + "grad_norm": 0.8912146091461182, + "learning_rate": 1.69e-07, + "num_tokens": 1135718.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 6.178673356771469e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.834, + "step": 1668 + }, + { + "loss": 0.0, + "grad_norm": 0.0021134400740265846, + "learning_rate": 1.685e-07, + "num_tokens": 1136614.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 4.889722913503647e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8345, + "step": 1669 + }, + { + "loss": 0.0, + "grad_norm": 0.0008316031889989972, + "learning_rate": 1.68e-07, + "num_tokens": 1137510.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.716256469488144e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.835, + "step": 1670 + }, + { + "loss": 0.0, + "grad_norm": 0.0015585101209580898, + "learning_rate": 1.675e-07, + "num_tokens": 1137876.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.109274595975876e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8355, + "step": 1671 + }, + { + "loss": 0.0, + "grad_norm": 2.0139520168304443, + "learning_rate": 1.67e-07, + "num_tokens": 1138772.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8215000033378601, + "rewards/environment_reward_verifier/std": 0.0021213351283222437, + "reward": 0.8215000033378601, + "reward_std": 0.0021213351283222437, + "kl": 7.60052353143692e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.836, + "step": 1672 + }, + { + "loss": 0.0, + "grad_norm": 0.0027839159592986107, + "learning_rate": 1.665e-07, + "num_tokens": 1139668.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.0001286109909415245, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8365, + "step": 1673 + }, + { + "loss": 0.0, + "grad_norm": 0.0005201384774409235, + "learning_rate": 1.66e-07, + "num_tokens": 1140564.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 2.318248152732849e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.837, + "step": 1674 + }, + { + "loss": -0.0, + "grad_norm": 0.770577609539032, + "learning_rate": 1.655e-07, + "num_tokens": 1141460.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8029999732971191, + "rewards/environment_reward_verifier/std": 0.012727884575724602, + "reward": 0.8029999732971191, + "reward_std": 0.012727884575724602, + "kl": 4.3759122490882874e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8375, + "step": 1675 + }, + { + "loss": 0.0, + "grad_norm": 0.00833394005894661, + "learning_rate": 1.65e-07, + "num_tokens": 1142356.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 0.0002732565626502037, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.838, + "step": 1676 + }, + { + "loss": 0.0, + "grad_norm": 0.0025238515809178352, + "learning_rate": 1.645e-07, + "num_tokens": 1142722.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.789116352796555e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8385, + "step": 1677 + }, + { + "loss": 0.0, + "grad_norm": 0.0014516436494886875, + "learning_rate": 1.64e-07, + "num_tokens": 1143088.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.0516104996204376e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.839, + "step": 1678 + }, + { + "loss": 0.0, + "grad_norm": 0.005529244430363178, + "learning_rate": 1.635e-07, + "num_tokens": 1143984.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 0.00011143088340759277, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8395, + "step": 1679 + }, + { + "loss": 0.0, + "grad_norm": 0.6549043655395508, + "learning_rate": 1.63e-07, + "num_tokens": 1144880.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8289999961853027, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8289999961853027, + "reward_std": 0.0014141954015940428, + "kl": 3.060977905988693e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.84, + "step": 1680 + }, + { + "loss": 0.0, + "grad_norm": 0.0004621714761015028, + "learning_rate": 1.625e-07, + "num_tokens": 1145776.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.2720545530319214e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8405, + "step": 1681 + }, + { + "loss": 0.0, + "grad_norm": 0.9856705665588379, + "learning_rate": 1.62e-07, + "num_tokens": 1146672.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 6.997957825660706e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.841, + "step": 1682 + }, + { + "loss": 0.0, + "grad_norm": 0.0017308671958744526, + "learning_rate": 1.615e-07, + "num_tokens": 1147038.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.203019827604294e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8415, + "step": 1683 + }, + { + "loss": 0.0, + "grad_norm": 0.0009688741993159056, + "learning_rate": 1.61e-07, + "num_tokens": 1147404.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 9.158626198768616e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.842, + "step": 1684 + }, + { + "loss": 0.0, + "grad_norm": 1.0487639904022217, + "learning_rate": 1.605e-07, + "num_tokens": 1148300.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7994999885559082, + "reward_std": 0.04879037290811539, + "kl": 5.657784640789032e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8425, + "step": 1685 + }, + { + "loss": 0.0, + "grad_norm": 0.0018436646787449718, + "learning_rate": 1.6e-07, + "num_tokens": 1149196.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7649999856948853, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7649999856948853, + "reward_std": 0.0, + "kl": 8.995365351438522e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.843, + "step": 1686 + }, + { + "loss": 0.0, + "grad_norm": 0.003820388810709119, + "learning_rate": 1.595e-07, + "num_tokens": 1150092.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 0.00010778382420539856, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8435, + "step": 1687 + }, + { + "loss": 0.0, + "grad_norm": 0.0007333682733587921, + "learning_rate": 1.59e-07, + "num_tokens": 1150458.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.9731000065803528e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.844, + "step": 1688 + }, + { + "loss": 0.0, + "grad_norm": 0.4914136528968811, + "learning_rate": 1.585e-07, + "num_tokens": 1151354.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.796999990940094, + "rewards/environment_reward_verifier/std": 0.01272792648524046, + "reward": 0.796999990940094, + "reward_std": 0.01272792648524046, + "kl": 7.97836109995842e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8445, + "step": 1689 + }, + { + "loss": 0.0, + "grad_norm": 0.0016368223587051034, + "learning_rate": 1.5799999999999999e-07, + "num_tokens": 1152250.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.654525011777878e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.845, + "step": 1690 + }, + { + "loss": 0.0, + "grad_norm": 0.0020018748473376036, + "learning_rate": 1.575e-07, + "num_tokens": 1152616.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.850273787975311e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8455, + "step": 1691 + }, + { + "loss": 0.0, + "grad_norm": 0.0017474376363679767, + "learning_rate": 1.57e-07, + "num_tokens": 1152982.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.674440085887909e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.846, + "step": 1692 + }, + { + "loss": 0.0, + "grad_norm": 0.0006785112200304866, + "learning_rate": 1.565e-07, + "num_tokens": 1153348.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.52649188041687e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8465, + "step": 1693 + }, + { + "loss": 0.0, + "grad_norm": 0.8353944420814514, + "learning_rate": 1.56e-07, + "num_tokens": 1154244.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 7.278099656105042e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.847, + "step": 1694 + }, + { + "loss": 0.0, + "grad_norm": 0.7937394976615906, + "learning_rate": 1.5549999999999998e-07, + "num_tokens": 1155140.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8259999752044678, + "rewards/environment_reward_verifier/std": 0.01272792648524046, + "reward": 0.8259999752044678, + "reward_std": 0.01272792648524046, + "kl": 3.0454248189926147e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8475, + "step": 1695 + }, + { + "loss": 0.0, + "grad_norm": 0.0003463807515799999, + "learning_rate": 1.55e-07, + "num_tokens": 1155506.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.183741450309753e-06, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.848, + "step": 1696 + }, + { + "loss": 0.0, + "grad_norm": 0.0009108221274800599, + "learning_rate": 1.545e-07, + "num_tokens": 1155872.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.233365714550018e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8485, + "step": 1697 + }, + { + "loss": 0.0, + "grad_norm": 0.8065696954727173, + "learning_rate": 1.54e-07, + "num_tokens": 1156768.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5920000076293945, + "rewards/environment_reward_verifier/std": 0.30122748017311096, + "reward": 0.5920000076293945, + "reward_std": 0.30122748017311096, + "kl": 9.134132415056229e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.849, + "step": 1698 + }, + { + "loss": 0.0, + "grad_norm": 0.0026033867616206408, + "learning_rate": 1.535e-07, + "num_tokens": 1157664.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8349999785423279, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8349999785423279, + "reward_std": 0.0, + "kl": 0.00015535764396190643, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8495, + "step": 1699 + }, + { + "loss": 0.0, + "grad_norm": 0.0007585044368170202, + "learning_rate": 1.5299999999999998e-07, + "num_tokens": 1158560.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.7849338948726654e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.85, + "step": 1700 + }, + { + "loss": 0.0, + "grad_norm": 0.002312328899279237, + "learning_rate": 1.525e-07, + "num_tokens": 1158926.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.916893810033798e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8505, + "step": 1701 + }, + { + "loss": 0.0, + "grad_norm": 0.00042824094998650253, + "learning_rate": 1.5199999999999998e-07, + "num_tokens": 1159822.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.4728477001190186e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.851, + "step": 1702 + }, + { + "loss": 0.0, + "grad_norm": 0.0008439371013082564, + "learning_rate": 1.515e-07, + "num_tokens": 1160718.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 4.475284367799759e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8515, + "step": 1703 + }, + { + "loss": 0.0, + "grad_norm": 0.0011333145666867495, + "learning_rate": 1.51e-07, + "num_tokens": 1161084.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.541726619005203e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.852, + "step": 1704 + }, + { + "loss": 0.0, + "grad_norm": 0.0006239201175048947, + "learning_rate": 1.5049999999999998e-07, + "num_tokens": 1161980.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 2.501765266060829e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8525, + "step": 1705 + }, + { + "loss": 0.0, + "grad_norm": 0.005729427561163902, + "learning_rate": 1.5e-07, + "num_tokens": 1162346.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 0.00014315079897642136, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.853, + "step": 1706 + }, + { + "loss": 0.0, + "grad_norm": 0.0006242716335691512, + "learning_rate": 1.4949999999999998e-07, + "num_tokens": 1162712.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.430751919746399e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8535, + "step": 1707 + }, + { + "loss": 0.0, + "grad_norm": 0.8198180794715881, + "learning_rate": 1.49e-07, + "num_tokens": 1163608.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 7.086340337991714e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.854, + "step": 1708 + }, + { + "loss": 0.0, + "grad_norm": 0.9060729146003723, + "learning_rate": 1.4849999999999999e-07, + "num_tokens": 1164504.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 4.623178392648697e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8545, + "step": 1709 + }, + { + "loss": 0.0, + "grad_norm": 0.7695682644844055, + "learning_rate": 1.4799999999999998e-07, + "num_tokens": 1165400.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 7.752608507871628e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.855, + "step": 1710 + }, + { + "loss": 0.0, + "grad_norm": 1.0271371603012085, + "learning_rate": 1.475e-07, + "num_tokens": 1166296.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.843500018119812, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.843500018119812, + "reward_std": 0.016263457015156746, + "kl": 4.950445145368576e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8555, + "step": 1711 + }, + { + "loss": 0.0, + "grad_norm": 0.0006063416949473321, + "learning_rate": 1.4699999999999998e-07, + "num_tokens": 1167192.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.437325358390808e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.856, + "step": 1712 + }, + { + "loss": 0.0, + "grad_norm": 0.001116525148972869, + "learning_rate": 1.465e-07, + "num_tokens": 1167558.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.72264364361763e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8565, + "step": 1713 + }, + { + "loss": 0.0, + "grad_norm": 0.0012593928258866072, + "learning_rate": 1.4599999999999998e-07, + "num_tokens": 1167924.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.567353218793869e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.857, + "step": 1714 + }, + { + "loss": 0.0, + "grad_norm": 0.7782901525497437, + "learning_rate": 1.4549999999999997e-07, + "num_tokens": 1168820.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 5.462951958179474e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8575, + "step": 1715 + }, + { + "loss": 0.0, + "grad_norm": 0.002288342686370015, + "learning_rate": 1.45e-07, + "num_tokens": 1169716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 8.028000593185425e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.858, + "step": 1716 + }, + { + "loss": 0.0, + "grad_norm": 0.0010321326553821564, + "learning_rate": 1.4449999999999998e-07, + "num_tokens": 1170612.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8140000104904175, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8140000104904175, + "reward_std": 0.0, + "kl": 4.060007631778717e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8585, + "step": 1717 + }, + { + "loss": 0.0, + "grad_norm": 0.7346194386482239, + "learning_rate": 1.44e-07, + "num_tokens": 1171508.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 2.4116598069667816e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.859, + "step": 1718 + }, + { + "loss": 0.0, + "grad_norm": 0.0014648967189714313, + "learning_rate": 1.4349999999999998e-07, + "num_tokens": 1172404.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 4.110205918550491e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8595, + "step": 1719 + }, + { + "loss": 0.0, + "grad_norm": 0.004332505166530609, + "learning_rate": 1.4299999999999997e-07, + "num_tokens": 1173300.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.0547532737255096e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.86, + "step": 1720 + }, + { + "loss": 0.0, + "grad_norm": 0.0006606621900573373, + "learning_rate": 1.4249999999999999e-07, + "num_tokens": 1174196.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8320000171661377, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8320000171661377, + "reward_std": 0.0, + "kl": 2.1940097212791443e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8605, + "step": 1721 + }, + { + "loss": 0.0, + "grad_norm": 0.0031862056348472834, + "learning_rate": 1.4199999999999997e-07, + "num_tokens": 1175092.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8230000138282776, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8230000138282776, + "reward_std": 0.0, + "kl": 5.89834526181221e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.861, + "step": 1722 + }, + { + "loss": 0.0, + "grad_norm": 0.000561385415494442, + "learning_rate": 1.415e-07, + "num_tokens": 1175458.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.7856789529323578e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8615, + "step": 1723 + }, + { + "loss": 0.0, + "grad_norm": 0.8007268905639648, + "learning_rate": 1.4099999999999998e-07, + "num_tokens": 1176354.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31607675552368164, + "reward": 0.5995000004768372, + "reward_std": 0.31607675552368164, + "kl": 8.418131619691849e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.862, + "step": 1724 + }, + { + "loss": 0.0, + "grad_norm": 0.0013896668097004294, + "learning_rate": 1.4050000000000002e-07, + "num_tokens": 1176720.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7703121304512024e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8625, + "step": 1725 + }, + { + "loss": 0.0, + "grad_norm": 0.0015918755671009421, + "learning_rate": 1.4e-07, + "num_tokens": 1177616.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.09386882185936e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.863, + "step": 1726 + }, + { + "loss": 0.0, + "grad_norm": 0.0008370818104594946, + "learning_rate": 1.395e-07, + "num_tokens": 1177982.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.082266241312027e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8635, + "step": 1727 + }, + { + "loss": 0.0, + "grad_norm": 0.001225637854076922, + "learning_rate": 1.3900000000000001e-07, + "num_tokens": 1178878.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8059999942779541, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8059999942779541, + "reward_std": 0.0, + "kl": 4.492839798331261e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.864, + "step": 1728 + }, + { + "loss": 0.0, + "grad_norm": 0.0013102650409564376, + "learning_rate": 1.385e-07, + "num_tokens": 1179774.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8569999933242798, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8569999933242798, + "reward_std": 0.0, + "kl": 6.482191383838654e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8645, + "step": 1729 + }, + { + "loss": 0.0, + "grad_norm": 0.9065403938293457, + "learning_rate": 1.3800000000000002e-07, + "num_tokens": 1180670.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5900000333786011, + "rewards/environment_reward_verifier/std": 0.29698485136032104, + "reward": 0.5900000333786011, + "reward_std": 0.29698485136032104, + "kl": 8.664838969707489e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.865, + "step": 1730 + }, + { + "loss": 0.0, + "grad_norm": 0.0009610215201973915, + "learning_rate": 1.375e-07, + "num_tokens": 1181036.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.251021891832352e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8655, + "step": 1731 + }, + { + "loss": 0.0, + "grad_norm": 0.0009383897413499653, + "learning_rate": 1.37e-07, + "num_tokens": 1181932.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.6188790798187256e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.866, + "step": 1732 + }, + { + "loss": 0.0, + "grad_norm": 0.0013004555366933346, + "learning_rate": 1.365e-07, + "num_tokens": 1182828.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.170105189085007e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8665, + "step": 1733 + }, + { + "loss": 0.0, + "grad_norm": 0.0008560972637496889, + "learning_rate": 1.36e-07, + "num_tokens": 1183194.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.3237429559230804e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.867, + "step": 1734 + }, + { + "loss": 0.0, + "grad_norm": 0.000858226849231869, + "learning_rate": 1.3550000000000002e-07, + "num_tokens": 1183560.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.406591713428497e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8675, + "step": 1735 + }, + { + "loss": 0.0, + "grad_norm": 0.0009745972929522395, + "learning_rate": 1.35e-07, + "num_tokens": 1183926.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.2455118596553802e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.868, + "step": 1736 + }, + { + "loss": 0.0, + "grad_norm": 0.001205791486427188, + "learning_rate": 1.345e-07, + "num_tokens": 1184292.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.4463202357292175e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8685, + "step": 1737 + }, + { + "loss": 0.0, + "grad_norm": 0.000825030030682683, + "learning_rate": 1.34e-07, + "num_tokens": 1185188.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 3.240443766117096e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.869, + "step": 1738 + }, + { + "loss": 0.0, + "grad_norm": 0.0009022785816341639, + "learning_rate": 1.335e-07, + "num_tokens": 1185554.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7677975594997406e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8695, + "step": 1739 + }, + { + "loss": 0.0, + "grad_norm": 0.0007139133522287011, + "learning_rate": 1.33e-07, + "num_tokens": 1185920.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8228387236595154e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.87, + "step": 1740 + }, + { + "loss": 0.0, + "grad_norm": 0.6013137698173523, + "learning_rate": 1.325e-07, + "num_tokens": 1186816.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 5.251821130514145e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8705, + "step": 1741 + }, + { + "loss": 0.0, + "grad_norm": 1.030862808227539, + "learning_rate": 1.32e-07, + "num_tokens": 1187712.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 0.00021289847791194916, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.871, + "step": 1742 + }, + { + "loss": 0.0, + "grad_norm": 0.402322381734848, + "learning_rate": 1.315e-07, + "num_tokens": 1188608.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 1.122988760471344e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8715, + "step": 1743 + }, + { + "loss": 0.0, + "grad_norm": 0.8741965293884277, + "learning_rate": 1.31e-07, + "num_tokens": 1189504.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7860000133514404, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7860000133514404, + "reward_std": 0.04808327555656433, + "kl": 6.223050877451897e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.872, + "step": 1744 + }, + { + "loss": 0.0, + "grad_norm": 0.0013798903673887253, + "learning_rate": 1.305e-07, + "num_tokens": 1189870.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.7256238758563995e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8725, + "step": 1745 + }, + { + "loss": 0.0, + "grad_norm": 0.0009432470542378724, + "learning_rate": 1.3e-07, + "num_tokens": 1190236.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.60291451215744e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.873, + "step": 1746 + }, + { + "loss": 0.0, + "grad_norm": 0.0011539016850292683, + "learning_rate": 1.295e-07, + "num_tokens": 1190602.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.1274895668029785e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8735, + "step": 1747 + }, + { + "loss": 0.0, + "grad_norm": 0.001130102900788188, + "learning_rate": 1.29e-07, + "num_tokens": 1190968.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.297176539897919e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.874, + "step": 1748 + }, + { + "loss": 0.0, + "grad_norm": 0.9825541377067566, + "learning_rate": 1.285e-07, + "num_tokens": 1191864.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 0.00011297408491373062, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8745, + "step": 1749 + }, + { + "loss": 0.0, + "grad_norm": 0.0009724145638756454, + "learning_rate": 1.28e-07, + "num_tokens": 1192230.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.585498780012131e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.875, + "step": 1750 + }, + { + "loss": 0.0, + "grad_norm": 0.744745135307312, + "learning_rate": 1.275e-07, + "num_tokens": 1193126.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8314999938011169, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8314999938011169, + "reward_std": 0.016263457015156746, + "kl": 4.145503044128418e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8755, + "step": 1751 + }, + { + "loss": 0.0, + "grad_norm": 0.0012472629314288497, + "learning_rate": 1.2699999999999999e-07, + "num_tokens": 1194022.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.692748188972473e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.876, + "step": 1752 + }, + { + "loss": 0.0, + "grad_norm": 0.0012303896946832538, + "learning_rate": 1.265e-07, + "num_tokens": 1194918.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.751832991838455e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8765, + "step": 1753 + }, + { + "loss": 0.0, + "grad_norm": 0.0018947335192933679, + "learning_rate": 1.26e-07, + "num_tokens": 1195814.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 0.00010034628212451935, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.877, + "step": 1754 + }, + { + "loss": 0.0, + "grad_norm": 0.0010893162107095122, + "learning_rate": 1.255e-07, + "num_tokens": 1196180.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.170819789171219e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8775, + "step": 1755 + }, + { + "loss": 0.0, + "grad_norm": 0.9734063148498535, + "learning_rate": 1.25e-07, + "num_tokens": 1197076.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 0.00011194124817848206, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.878, + "step": 1756 + }, + { + "loss": 0.0, + "grad_norm": 0.0008023467962630093, + "learning_rate": 1.2449999999999998e-07, + "num_tokens": 1197972.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 5.610659718513489e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8785, + "step": 1757 + }, + { + "loss": 0.0, + "grad_norm": 0.0008229869999922812, + "learning_rate": 1.24e-07, + "num_tokens": 1198338.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.3774802684783936e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.879, + "step": 1758 + }, + { + "loss": 0.0, + "grad_norm": 0.7385565638542175, + "learning_rate": 1.235e-07, + "num_tokens": 1199234.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 5.881208926439285e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8795, + "step": 1759 + }, + { + "loss": 0.0, + "grad_norm": 0.003982287831604481, + "learning_rate": 1.23e-07, + "num_tokens": 1199600.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.9475190937519073e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.88, + "step": 1760 + }, + { + "loss": 0.0, + "grad_norm": 0.0010875341249629855, + "learning_rate": 1.225e-07, + "num_tokens": 1199966.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.367716610431671e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8805, + "step": 1761 + }, + { + "loss": 0.0, + "grad_norm": 0.948522686958313, + "learning_rate": 1.2199999999999998e-07, + "num_tokens": 1200862.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 4.6215951442718506e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.881, + "step": 1762 + }, + { + "loss": 0.0, + "grad_norm": 0.7658970355987549, + "learning_rate": 1.215e-07, + "num_tokens": 1201758.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.03111271932721138, + "reward": 0.828000009059906, + "reward_std": 0.03111271932721138, + "kl": 4.916219040751457e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8815, + "step": 1763 + }, + { + "loss": 0.0, + "grad_norm": 0.0008914874633774161, + "learning_rate": 1.2099999999999998e-07, + "num_tokens": 1202654.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.2026710212230682e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.882, + "step": 1764 + }, + { + "loss": 0.0, + "grad_norm": 1.5070701837539673, + "learning_rate": 1.205e-07, + "num_tokens": 1203550.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8335000276565552, + "rewards/environment_reward_verifier/std": 0.030405621975660324, + "reward": 0.8335000276565552, + "reward_std": 0.030405621975660324, + "kl": 7.315631955862045e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8825, + "step": 1765 + }, + { + "loss": 0.0, + "grad_norm": 0.0008635977865196764, + "learning_rate": 1.2e-07, + "num_tokens": 1203916.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9083341360092163e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.883, + "step": 1766 + }, + { + "loss": -0.0, + "grad_norm": 0.9672502279281616, + "learning_rate": 1.1949999999999998e-07, + "num_tokens": 1204812.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8209999799728394, + "rewards/environment_reward_verifier/std": 0.0014142375439405441, + "reward": 0.8209999799728394, + "reward_std": 0.0014142375439405441, + "kl": 7.252860814332962e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8835, + "step": 1767 + }, + { + "loss": 0.0, + "grad_norm": 0.0015731449238955975, + "learning_rate": 1.19e-07, + "num_tokens": 1205178.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.626065492630005e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.884, + "step": 1768 + }, + { + "loss": 0.0, + "grad_norm": 0.006920692976564169, + "learning_rate": 1.1849999999999998e-07, + "num_tokens": 1206074.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 0.00015255529433488846, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8845, + "step": 1769 + }, + { + "loss": 0.0, + "grad_norm": 0.6253349781036377, + "learning_rate": 1.1799999999999998e-07, + "num_tokens": 1206970.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8255000114440918, + "rewards/environment_reward_verifier/std": 0.0035355305299162865, + "reward": 0.8255000114440918, + "reward_std": 0.0035355305299162865, + "kl": 9.524449706077576e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.885, + "step": 1770 + }, + { + "loss": 0.0, + "grad_norm": 0.0009710108279250562, + "learning_rate": 1.1749999999999999e-07, + "num_tokens": 1207336.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.65767627954483e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8855, + "step": 1771 + }, + { + "loss": 0.0, + "grad_norm": 0.0021219495683908463, + "learning_rate": 1.17e-07, + "num_tokens": 1208232.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 5.154218524694443e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.886, + "step": 1772 + }, + { + "loss": 0.0, + "grad_norm": 0.8564634919166565, + "learning_rate": 1.165e-07, + "num_tokens": 1209128.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8289999961853027, + "rewards/environment_reward_verifier/std": 0.0014141954015940428, + "reward": 0.8289999961853027, + "reward_std": 0.0014141954015940428, + "kl": 5.968846380710602e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8865, + "step": 1773 + }, + { + "loss": 0.0, + "grad_norm": 0.0014013515319675207, + "learning_rate": 1.16e-07, + "num_tokens": 1209494.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.6672536730766296e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.887, + "step": 1774 + }, + { + "loss": 0.0, + "grad_norm": 0.0010544674005359411, + "learning_rate": 1.155e-07, + "num_tokens": 1209860.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.4714117646217346e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8875, + "step": 1775 + }, + { + "loss": 0.0, + "grad_norm": 0.0015696323243901134, + "learning_rate": 1.15e-07, + "num_tokens": 1210226.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.527772009372711e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.888, + "step": 1776 + }, + { + "loss": 0.0, + "grad_norm": 0.0011540880659595132, + "learning_rate": 1.145e-07, + "num_tokens": 1210592.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.215724766254425e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8885, + "step": 1777 + }, + { + "loss": 0.0, + "grad_norm": 1.7192362546920776, + "learning_rate": 1.14e-07, + "num_tokens": 1211488.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8009999990463257, + "rewards/environment_reward_verifier/std": 0.049497511237859726, + "reward": 0.8009999990463257, + "reward_std": 0.049497511237859726, + "kl": 0.0004497366026043892, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.889, + "step": 1778 + }, + { + "loss": 0.0, + "grad_norm": 0.7114416360855103, + "learning_rate": 1.135e-07, + "num_tokens": 1212384.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 4.327204078435898e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8895, + "step": 1779 + }, + { + "loss": 0.0, + "grad_norm": 0.0030834779608994722, + "learning_rate": 1.1299999999999999e-07, + "num_tokens": 1213280.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.382999986410141, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.382999986410141, + "reward_std": 0.0, + "kl": 3.078300505876541e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.89, + "step": 1780 + }, + { + "loss": 0.0, + "grad_norm": 0.0007834673160687089, + "learning_rate": 1.125e-07, + "num_tokens": 1214176.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.3746473491191864e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8905, + "step": 1781 + }, + { + "loss": 0.0, + "grad_norm": 0.0013525994727388024, + "learning_rate": 1.12e-07, + "num_tokens": 1214542.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.968086093664169e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.891, + "step": 1782 + }, + { + "loss": 0.0, + "grad_norm": 0.0007439209730364382, + "learning_rate": 1.115e-07, + "num_tokens": 1214908.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.4460256099700928e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8915, + "step": 1783 + }, + { + "loss": 0.0, + "grad_norm": 0.005045488942414522, + "learning_rate": 1.11e-07, + "num_tokens": 1215274.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.132945418357849e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.892, + "step": 1784 + }, + { + "loss": 0.0, + "grad_norm": 0.009108408354222775, + "learning_rate": 1.1049999999999999e-07, + "num_tokens": 1216170.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8429999947547913, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8429999947547913, + "reward_std": 0.0, + "kl": 0.00012882612645626068, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8925, + "step": 1785 + }, + { + "loss": 0.0, + "grad_norm": 0.0005773335578851402, + "learning_rate": 1.0999999999999999e-07, + "num_tokens": 1216536.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.445746213197708e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.893, + "step": 1786 + }, + { + "loss": 0.0, + "grad_norm": 0.0007551417802460492, + "learning_rate": 1.095e-07, + "num_tokens": 1216902.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.5650875866413116e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8935, + "step": 1787 + }, + { + "loss": 0.0, + "grad_norm": 0.7837104797363281, + "learning_rate": 1.09e-07, + "num_tokens": 1217798.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 3.842916339635849e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.894, + "step": 1788 + }, + { + "loss": 0.0, + "grad_norm": 0.0007525270921178162, + "learning_rate": 1.085e-07, + "num_tokens": 1218164.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.5322271287441254e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8945, + "step": 1789 + }, + { + "loss": 0.0, + "grad_norm": 0.0013598490040749311, + "learning_rate": 1.0799999999999999e-07, + "num_tokens": 1219060.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.739143282175064e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.895, + "step": 1790 + }, + { + "loss": 0.0, + "grad_norm": 0.00262662535533309, + "learning_rate": 1.0749999999999999e-07, + "num_tokens": 1219956.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 0.00010076910257339478, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8955, + "step": 1791 + }, + { + "loss": 0.0, + "grad_norm": 0.0013126698322594166, + "learning_rate": 1.0699999999999999e-07, + "num_tokens": 1220322.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.7869980335235596e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.896, + "step": 1792 + }, + { + "loss": 0.0, + "grad_norm": 0.001081117196008563, + "learning_rate": 1.065e-07, + "num_tokens": 1221218.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8569999933242798, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8569999933242798, + "reward_std": 0.0, + "kl": 2.1208077669143677e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8965, + "step": 1793 + }, + { + "loss": 0.0, + "grad_norm": 0.000714861205779016, + "learning_rate": 1.06e-07, + "num_tokens": 1221584.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.1541491150856018e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.897, + "step": 1794 + }, + { + "loss": 0.0, + "grad_norm": 0.7797353267669678, + "learning_rate": 1.0549999999999999e-07, + "num_tokens": 1222480.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 6.227241829037666e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8975, + "step": 1795 + }, + { + "loss": 0.0, + "grad_norm": 0.0013363354373723269, + "learning_rate": 1.0499999999999999e-07, + "num_tokens": 1222846.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.7909095883369446e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.898, + "step": 1796 + }, + { + "loss": 0.0, + "grad_norm": 0.006508568301796913, + "learning_rate": 1.0449999999999999e-07, + "num_tokens": 1223212.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 9.324029088020325e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8985, + "step": 1797 + }, + { + "loss": 0.0, + "grad_norm": 0.0008671290124766529, + "learning_rate": 1.0399999999999999e-07, + "num_tokens": 1223578.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7660280466079712e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.899, + "step": 1798 + }, + { + "loss": 0.0, + "grad_norm": 0.7294493913650513, + "learning_rate": 1.035e-07, + "num_tokens": 1224474.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 0.0001232502982020378, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.8995, + "step": 1799 + }, + { + "loss": 0.0, + "grad_norm": 0.6453281044960022, + "learning_rate": 1.03e-07, + "num_tokens": 1225370.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 5.937553942203522e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9, + "step": 1800 + }, + { + "loss": 0.0, + "grad_norm": 0.0010641550179570913, + "learning_rate": 1.0249999999999998e-07, + "num_tokens": 1225736.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.911050200462341e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9005, + "step": 1801 + }, + { + "loss": 0.0, + "grad_norm": 0.8502619862556458, + "learning_rate": 1.0199999999999999e-07, + "num_tokens": 1226632.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5744999647140503, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5744999647140503, + "reward_std": 0.27082186937332153, + "kl": 7.08606094121933e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.901, + "step": 1802 + }, + { + "loss": 0.0, + "grad_norm": 0.0008172534871846437, + "learning_rate": 1.015e-07, + "num_tokens": 1226998.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.078486770391464e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9015, + "step": 1803 + }, + { + "loss": 0.0, + "grad_norm": 0.0015257395571097732, + "learning_rate": 1.01e-07, + "num_tokens": 1227894.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.626507431268692e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.902, + "step": 1804 + }, + { + "loss": 0.0, + "grad_norm": 0.9941185712814331, + "learning_rate": 1.005e-07, + "num_tokens": 1228790.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 3.308430314064026e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9025, + "step": 1805 + }, + { + "loss": 0.0, + "grad_norm": 0.8335599303245544, + "learning_rate": 1e-07, + "num_tokens": 1229686.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 4.629790782928467e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.903, + "step": 1806 + }, + { + "loss": 0.0, + "grad_norm": 0.0008063720306381583, + "learning_rate": 9.95e-08, + "num_tokens": 1230582.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.995094448328018e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9035, + "step": 1807 + }, + { + "loss": 0.0, + "grad_norm": 0.0029422007501125336, + "learning_rate": 9.9e-08, + "num_tokens": 1230948.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.519561141729355e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.904, + "step": 1808 + }, + { + "loss": 0.0, + "grad_norm": 0.0010091759031638503, + "learning_rate": 9.85e-08, + "num_tokens": 1231314.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.390440881252289e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9045, + "step": 1809 + }, + { + "loss": 0.0, + "grad_norm": 0.6486821174621582, + "learning_rate": 9.8e-08, + "num_tokens": 1232210.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 4.920735955238342e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.905, + "step": 1810 + }, + { + "loss": 0.0, + "grad_norm": 0.0007820340106263757, + "learning_rate": 9.749999999999999e-08, + "num_tokens": 1232576.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.247987478971481e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9055, + "step": 1811 + }, + { + "loss": 0.0, + "grad_norm": 0.0016294894739985466, + "learning_rate": 9.7e-08, + "num_tokens": 1232942.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.51929697394371e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.906, + "step": 1812 + }, + { + "loss": 0.0, + "grad_norm": 0.9986032843589783, + "learning_rate": 9.65e-08, + "num_tokens": 1233838.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 9.219348430633545e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9065, + "step": 1813 + }, + { + "loss": 0.0, + "grad_norm": 1.9711169004440308, + "learning_rate": 9.6e-08, + "num_tokens": 1234734.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5674999952316284, + "rewards/environment_reward_verifier/std": 0.2708218991756439, + "reward": 0.5674999952316284, + "reward_std": 0.2708218991756439, + "kl": 0.00017576105892658234, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.907, + "step": 1814 + }, + { + "loss": 0.0, + "grad_norm": 0.6360597014427185, + "learning_rate": 9.55e-08, + "num_tokens": 1235630.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 7.921271026134491e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9075, + "step": 1815 + }, + { + "loss": 0.0, + "grad_norm": 0.6892108917236328, + "learning_rate": 9.499999999999999e-08, + "num_tokens": 1236526.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7860000133514404, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7860000133514404, + "reward_std": 0.04808327555656433, + "kl": 6.624776870012283e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.908, + "step": 1816 + }, + { + "loss": 0.0, + "grad_norm": 0.0017434032633900642, + "learning_rate": 9.449999999999999e-08, + "num_tokens": 1236892.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.3535994589328766e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9085, + "step": 1817 + }, + { + "loss": 0.0, + "grad_norm": 0.0027986906934529543, + "learning_rate": 9.4e-08, + "num_tokens": 1237788.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8230000138282776, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8230000138282776, + "reward_std": 0.0, + "kl": 5.122460424900055e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.909, + "step": 1818 + }, + { + "loss": 0.0, + "grad_norm": 0.0008996524848043919, + "learning_rate": 9.35e-08, + "num_tokens": 1238154.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.3515505492687225e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9095, + "step": 1819 + }, + { + "loss": 0.0, + "grad_norm": 0.007405710872262716, + "learning_rate": 9.3e-08, + "num_tokens": 1239050.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8550000190734863, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8550000190734863, + "reward_std": 0.0, + "kl": 0.00010426249355077744, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.91, + "step": 1820 + }, + { + "loss": 0.0, + "grad_norm": 0.0013169284211471677, + "learning_rate": 9.25e-08, + "num_tokens": 1239416.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.415547639131546e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9105, + "step": 1821 + }, + { + "loss": 0.0, + "grad_norm": 0.8002967834472656, + "learning_rate": 9.199999999999999e-08, + "num_tokens": 1240312.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 8.742976933717728e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.911, + "step": 1822 + }, + { + "loss": 0.0, + "grad_norm": 0.8729252219200134, + "learning_rate": 9.149999999999999e-08, + "num_tokens": 1241208.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 7.083360105752945e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9115, + "step": 1823 + }, + { + "loss": 0.0, + "grad_norm": 0.00195197737775743, + "learning_rate": 9.1e-08, + "num_tokens": 1241574.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.8969178199768066e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.912, + "step": 1824 + }, + { + "loss": 0.0, + "grad_norm": 0.0015553674893453717, + "learning_rate": 9.05e-08, + "num_tokens": 1241940.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.8057717978954315e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9125, + "step": 1825 + }, + { + "loss": 0.0, + "grad_norm": 0.0008191480301320553, + "learning_rate": 9e-08, + "num_tokens": 1242306.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.3916363716125488e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.913, + "step": 1826 + }, + { + "loss": 0.0, + "grad_norm": 1.2573457956314087, + "learning_rate": 8.949999999999999e-08, + "num_tokens": 1243202.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 6.231758743524551e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9135, + "step": 1827 + }, + { + "loss": 0.0, + "grad_norm": 0.0012659374624490738, + "learning_rate": 8.899999999999999e-08, + "num_tokens": 1243568.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.623776137828827e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.914, + "step": 1828 + }, + { + "loss": 0.0, + "grad_norm": 1.2384027242660522, + "learning_rate": 8.849999999999999e-08, + "num_tokens": 1244464.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8179999589920044, + "rewards/environment_reward_verifier/std": 0.01697055622935295, + "reward": 0.8179999589920044, + "reward_std": 0.01697055622935295, + "kl": 4.41037118434906e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9145, + "step": 1829 + }, + { + "loss": 0.0, + "grad_norm": 0.0020049409940838814, + "learning_rate": 8.8e-08, + "num_tokens": 1245360.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 9.782146662473679e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.915, + "step": 1830 + }, + { + "loss": 0.0, + "grad_norm": 0.0007200397667475045, + "learning_rate": 8.75e-08, + "num_tokens": 1245726.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8675422072410583e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9155, + "step": 1831 + }, + { + "loss": 0.0, + "grad_norm": 0.0017381110228598118, + "learning_rate": 8.699999999999998e-08, + "num_tokens": 1246092.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.093511521816254e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.916, + "step": 1832 + }, + { + "loss": 0.0, + "grad_norm": 0.057037509977817535, + "learning_rate": 8.649999999999999e-08, + "num_tokens": 1246988.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.0009416723623871803, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9165, + "step": 1833 + }, + { + "loss": 0.0, + "grad_norm": 0.002384243067353964, + "learning_rate": 8.599999999999999e-08, + "num_tokens": 1247354.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.830638110637665e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.917, + "step": 1834 + }, + { + "loss": 0.0, + "grad_norm": 0.001272529480047524, + "learning_rate": 8.55e-08, + "num_tokens": 1247720.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.187637776136398e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9175, + "step": 1835 + }, + { + "loss": 0.0, + "grad_norm": 0.0014147718902677298, + "learning_rate": 8.500000000000001e-08, + "num_tokens": 1248086.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.632266402244568e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.918, + "step": 1836 + }, + { + "loss": 0.0, + "grad_norm": 0.0008189683430828154, + "learning_rate": 8.45e-08, + "num_tokens": 1248452.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8110109269618988e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9185, + "step": 1837 + }, + { + "loss": 0.0, + "grad_norm": 0.0006520377937704325, + "learning_rate": 8.4e-08, + "num_tokens": 1249348.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.2736919820308685e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.919, + "step": 1838 + }, + { + "loss": 0.0, + "grad_norm": 0.0005913342465646565, + "learning_rate": 8.35e-08, + "num_tokens": 1250244.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.8070993721485138e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9195, + "step": 1839 + }, + { + "loss": 0.0, + "grad_norm": 0.006336219143122435, + "learning_rate": 8.3e-08, + "num_tokens": 1250610.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.033239722251892e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.92, + "step": 1840 + }, + { + "loss": 0.0, + "grad_norm": 1.074285626411438, + "learning_rate": 8.25e-08, + "num_tokens": 1251506.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8105000257492065, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8105000257492065, + "reward_std": 0.06434673070907593, + "kl": 3.837980329990387e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9205, + "step": 1841 + }, + { + "loss": 0.0, + "grad_norm": 0.001576212584041059, + "learning_rate": 8.2e-08, + "num_tokens": 1251872.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.595518112182617e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.921, + "step": 1842 + }, + { + "loss": 0.0, + "grad_norm": 0.0022003604099154472, + "learning_rate": 8.15e-08, + "num_tokens": 1252238.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.256384611129761e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9215, + "step": 1843 + }, + { + "loss": 0.0, + "grad_norm": 0.9301549196243286, + "learning_rate": 8.1e-08, + "num_tokens": 1253134.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 5.9351325035095215e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.922, + "step": 1844 + }, + { + "loss": 0.0, + "grad_norm": 0.012174203991889954, + "learning_rate": 8.05e-08, + "num_tokens": 1254030.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 3.597419708967209e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9225, + "step": 1845 + }, + { + "loss": 0.0, + "grad_norm": 0.7200810313224792, + "learning_rate": 8e-08, + "num_tokens": 1254926.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 3.89833003282547e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.923, + "step": 1846 + }, + { + "loss": 0.0, + "grad_norm": 0.003318098606541753, + "learning_rate": 7.95e-08, + "num_tokens": 1255292.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9474496841430664e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9235, + "step": 1847 + }, + { + "loss": 0.0, + "grad_norm": 0.002200285904109478, + "learning_rate": 7.899999999999999e-08, + "num_tokens": 1255658.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 5.21903857588768e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.924, + "step": 1848 + }, + { + "loss": 0.0, + "grad_norm": 0.0008765140664763749, + "learning_rate": 7.85e-08, + "num_tokens": 1256024.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.9000995457172394e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9245, + "step": 1849 + }, + { + "loss": 0.0, + "grad_norm": 0.8187151551246643, + "learning_rate": 7.8e-08, + "num_tokens": 1256920.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6175000071525574, + "rewards/environment_reward_verifier/std": 0.3358757495880127, + "reward": 0.6175000071525574, + "reward_std": 0.3358757495880127, + "kl": 7.206853479146957e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.925, + "step": 1850 + }, + { + "loss": 0.0, + "grad_norm": 0.5915341973304749, + "learning_rate": 7.75e-08, + "num_tokens": 1257816.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 3.796163946390152e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9255, + "step": 1851 + }, + { + "loss": 0.0, + "grad_norm": 0.7493903040885925, + "learning_rate": 7.7e-08, + "num_tokens": 1258712.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 3.951508551836014e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.926, + "step": 1852 + }, + { + "loss": 0.0, + "grad_norm": 0.0008260611211881042, + "learning_rate": 7.649999999999999e-08, + "num_tokens": 1259608.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7519999742507935, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7519999742507935, + "reward_std": 0.0, + "kl": 4.204269498586655e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9265, + "step": 1853 + }, + { + "loss": 0.0, + "grad_norm": 0.001288191182538867, + "learning_rate": 7.599999999999999e-08, + "num_tokens": 1259974.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.305131733417511e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.927, + "step": 1854 + }, + { + "loss": 0.0, + "grad_norm": 0.6523440480232239, + "learning_rate": 7.55e-08, + "num_tokens": 1260870.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 2.2289343178272247e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9275, + "step": 1855 + }, + { + "loss": 0.0, + "grad_norm": 0.0025584432296454906, + "learning_rate": 7.5e-08, + "num_tokens": 1261766.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00012008380144834518, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.928, + "step": 1856 + }, + { + "loss": 0.0, + "grad_norm": 0.0008006390416994691, + "learning_rate": 7.45e-08, + "num_tokens": 1262662.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.365908145904541e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9285, + "step": 1857 + }, + { + "loss": 0.0, + "grad_norm": 0.0005818059435114264, + "learning_rate": 7.399999999999999e-08, + "num_tokens": 1263028.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.6983598470687866e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.929, + "step": 1858 + }, + { + "loss": 0.0, + "grad_norm": 0.0016558809438720345, + "learning_rate": 7.349999999999999e-08, + "num_tokens": 1263394.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.0668994188308716e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9295, + "step": 1859 + }, + { + "loss": 0.0, + "grad_norm": 0.0012347043957561255, + "learning_rate": 7.299999999999999e-08, + "num_tokens": 1263760.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.45969232916832e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.93, + "step": 1860 + }, + { + "loss": 0.0, + "grad_norm": 0.0007524865795858204, + "learning_rate": 7.25e-08, + "num_tokens": 1264126.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.5850720703601837e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9305, + "step": 1861 + }, + { + "loss": 0.0, + "grad_norm": 0.6033291816711426, + "learning_rate": 7.2e-08, + "num_tokens": 1265022.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6175000071525574, + "rewards/environment_reward_verifier/std": 0.3358757495880127, + "reward": 0.6175000071525574, + "reward_std": 0.3358757495880127, + "kl": 5.55114820599556e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.931, + "step": 1862 + }, + { + "loss": 0.0, + "grad_norm": 0.0034811405930668116, + "learning_rate": 7.149999999999999e-08, + "num_tokens": 1265918.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.800000011920929, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.800000011920929, + "reward_std": 0.0, + "kl": 0.00012871157377958298, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9315, + "step": 1863 + }, + { + "loss": 0.0, + "grad_norm": 0.0007591163157485425, + "learning_rate": 7.099999999999999e-08, + "num_tokens": 1266284.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.487244248390198e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.932, + "step": 1864 + }, + { + "loss": 0.0, + "grad_norm": 0.0011568117188289762, + "learning_rate": 7.049999999999999e-08, + "num_tokens": 1266650.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.824755549430847e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9325, + "step": 1865 + }, + { + "loss": -0.0, + "grad_norm": 0.7718785405158997, + "learning_rate": 7e-08, + "num_tokens": 1267546.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8389999866485596, + "reward_std": 0.01555635966360569, + "kl": 2.6744790375232697e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.933, + "step": 1866 + }, + { + "loss": 0.0, + "grad_norm": 0.7953295111656189, + "learning_rate": 6.950000000000001e-08, + "num_tokens": 1268442.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 5.66607341170311e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9335, + "step": 1867 + }, + { + "loss": 0.0, + "grad_norm": 0.0007461290806531906, + "learning_rate": 6.900000000000001e-08, + "num_tokens": 1268808.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.156198024749756e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.934, + "step": 1868 + }, + { + "loss": 0.0, + "grad_norm": 0.0014013278996571898, + "learning_rate": 6.85e-08, + "num_tokens": 1269704.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 5.7250261306762695e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9345, + "step": 1869 + }, + { + "loss": 0.0, + "grad_norm": 0.0008100003469735384, + "learning_rate": 6.8e-08, + "num_tokens": 1270070.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.2807158529758453e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.935, + "step": 1870 + }, + { + "loss": 0.0, + "grad_norm": 0.0006804454606026411, + "learning_rate": 6.75e-08, + "num_tokens": 1270436.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.0500272512435913e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9355, + "step": 1871 + }, + { + "loss": 0.0, + "grad_norm": 0.0013419273309409618, + "learning_rate": 6.7e-08, + "num_tokens": 1271332.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 7.026456296443939e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.936, + "step": 1872 + }, + { + "loss": 0.0, + "grad_norm": 0.0018655994208529592, + "learning_rate": 6.65e-08, + "num_tokens": 1272228.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.878000020980835, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.878000020980835, + "reward_std": 0.0, + "kl": 8.473079651594162e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9365, + "step": 1873 + }, + { + "loss": 0.0, + "grad_norm": 0.0008008715230971575, + "learning_rate": 6.6e-08, + "num_tokens": 1273124.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.729015588760376e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.937, + "step": 1874 + }, + { + "loss": 0.0, + "grad_norm": 0.9609123468399048, + "learning_rate": 6.55e-08, + "num_tokens": 1274020.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 3.089848905801773e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9375, + "step": 1875 + }, + { + "loss": 0.0, + "grad_norm": 1.8508756160736084, + "learning_rate": 6.5e-08, + "num_tokens": 1274916.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 8.919928222894669e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.938, + "step": 1876 + }, + { + "loss": 0.0, + "grad_norm": 0.001092518912628293, + "learning_rate": 6.45e-08, + "num_tokens": 1275282.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.985315561294556e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9385, + "step": 1877 + }, + { + "loss": 0.0, + "grad_norm": 0.0012667548144236207, + "learning_rate": 6.4e-08, + "num_tokens": 1276178.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8320000171661377, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8320000171661377, + "reward_std": 0.0, + "kl": 4.560593515634537e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.939, + "step": 1878 + }, + { + "loss": 0.0, + "grad_norm": 0.0012132265837863088, + "learning_rate": 6.349999999999999e-08, + "num_tokens": 1277074.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8199999928474426, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8199999928474426, + "reward_std": 0.0, + "kl": 6.347894668579102e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9395, + "step": 1879 + }, + { + "loss": 0.0, + "grad_norm": 0.6250314712524414, + "learning_rate": 6.3e-08, + "num_tokens": 1277970.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 4.879012703895569e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.94, + "step": 1880 + }, + { + "loss": 0.0, + "grad_norm": 0.0009681034134700894, + "learning_rate": 6.25e-08, + "num_tokens": 1278336.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.729907959699631e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9405, + "step": 1881 + }, + { + "loss": 0.0, + "grad_norm": 0.0011230476666241884, + "learning_rate": 6.2e-08, + "num_tokens": 1278702.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.889536648988724e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.941, + "step": 1882 + }, + { + "loss": 0.0, + "grad_norm": 0.0014930960023775697, + "learning_rate": 6.15e-08, + "num_tokens": 1279598.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 4.818663001060486e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9415, + "step": 1883 + }, + { + "loss": 0.0, + "grad_norm": 0.7510735392570496, + "learning_rate": 6.099999999999999e-08, + "num_tokens": 1280494.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 4.274491220712662e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.942, + "step": 1884 + }, + { + "loss": 0.0, + "grad_norm": 0.0020160400308668613, + "learning_rate": 6.049999999999999e-08, + "num_tokens": 1280860.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.4088658392429352e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9425, + "step": 1885 + }, + { + "loss": 0.0, + "grad_norm": 0.0010629004100337625, + "learning_rate": 6e-08, + "num_tokens": 1281756.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8569999933242798, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8569999933242798, + "reward_std": 0.0, + "kl": 5.9262849390506744e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.943, + "step": 1886 + }, + { + "loss": 0.0, + "grad_norm": 0.004243387375026941, + "learning_rate": 5.95e-08, + "num_tokens": 1282652.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00011902675032615662, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9435, + "step": 1887 + }, + { + "loss": 0.0, + "grad_norm": 3.774765729904175, + "learning_rate": 5.899999999999999e-08, + "num_tokens": 1283548.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 0.00014576036483049393, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.944, + "step": 1888 + }, + { + "loss": 0.0, + "grad_norm": 0.6654500961303711, + "learning_rate": 5.85e-08, + "num_tokens": 1284444.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7910000085830688, + "rewards/environment_reward_verifier/std": 0.045254841446876526, + "reward": 0.7910000085830688, + "reward_std": 0.045254841446876526, + "kl": 6.612855941057205e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9445, + "step": 1889 + }, + { + "loss": 0.0, + "grad_norm": 0.8191606402397156, + "learning_rate": 5.8e-08, + "num_tokens": 1285340.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8125, + "rewards/environment_reward_verifier/std": 0.01060659158974886, + "reward": 0.8125, + "reward_std": 0.01060659158974886, + "kl": 3.25273722410202e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.945, + "step": 1890 + }, + { + "loss": -0.0, + "grad_norm": 0.7108575701713562, + "learning_rate": 5.75e-08, + "num_tokens": 1286236.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8209999799728394, + "rewards/environment_reward_verifier/std": 0.0014142375439405441, + "reward": 0.8209999799728394, + "reward_std": 0.0014142375439405441, + "kl": 7.600896060466766e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9455, + "step": 1891 + }, + { + "loss": 0.0, + "grad_norm": 0.0004424000799190253, + "learning_rate": 5.7e-08, + "num_tokens": 1287132.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 2.8070993721485138e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.946, + "step": 1892 + }, + { + "loss": 0.0, + "grad_norm": 0.9523747563362122, + "learning_rate": 5.6499999999999996e-08, + "num_tokens": 1288028.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 0.00021653249859809875, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9465, + "step": 1893 + }, + { + "loss": 0.0, + "grad_norm": 1.4174977540969849, + "learning_rate": 5.6e-08, + "num_tokens": 1288924.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8114999532699585, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8114999532699585, + "reward_std": 0.06434673070907593, + "kl": 4.808790981769562e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.947, + "step": 1894 + }, + { + "loss": 0.0, + "grad_norm": 0.9478350281715393, + "learning_rate": 5.55e-08, + "num_tokens": 1289820.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5985000133514404, + "rewards/environment_reward_verifier/std": 0.30900564789772034, + "reward": 0.5985000133514404, + "reward_std": 0.30900564789772034, + "kl": 8.906051516532898e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9475, + "step": 1895 + }, + { + "loss": 0.0, + "grad_norm": 0.0007437904132530093, + "learning_rate": 5.4999999999999996e-08, + "num_tokens": 1290716.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8230000138282776, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8230000138282776, + "reward_std": 0.0, + "kl": 4.428718239068985e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.948, + "step": 1896 + }, + { + "loss": 0.0, + "grad_norm": 0.7563509941101074, + "learning_rate": 5.45e-08, + "num_tokens": 1291612.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 4.9046240746974945e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9485, + "step": 1897 + }, + { + "loss": 0.0, + "grad_norm": 0.8800461888313293, + "learning_rate": 5.3999999999999994e-08, + "num_tokens": 1292508.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8114999532699585, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.8114999532699585, + "reward_std": 0.06434673070907593, + "kl": 8.416082710027695e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.949, + "step": 1898 + }, + { + "loss": 0.0, + "grad_norm": 0.0013233114732429385, + "learning_rate": 5.3499999999999996e-08, + "num_tokens": 1293404.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.27078115940094e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9495, + "step": 1899 + }, + { + "loss": 0.0, + "grad_norm": 0.0006829975172877312, + "learning_rate": 5.3e-08, + "num_tokens": 1294300.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.519522190093994e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.95, + "step": 1900 + }, + { + "loss": 0.0, + "grad_norm": 0.8179243206977844, + "learning_rate": 5.2499999999999994e-08, + "num_tokens": 1295196.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 6.653927266597748e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9505, + "step": 1901 + }, + { + "loss": 0.0, + "grad_norm": 0.00887332670390606, + "learning_rate": 5.1999999999999996e-08, + "num_tokens": 1296092.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7649999856948853, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7649999856948853, + "reward_std": 0.0, + "kl": 0.00018446799367666245, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.951, + "step": 1902 + }, + { + "loss": 0.0, + "grad_norm": 0.7098538279533386, + "learning_rate": 5.15e-08, + "num_tokens": 1296988.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 4.0236860513687134e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9515, + "step": 1903 + }, + { + "loss": 0.0, + "grad_norm": 0.0009045878541655838, + "learning_rate": 5.0999999999999993e-08, + "num_tokens": 1297354.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.9223039746284485e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.952, + "step": 1904 + }, + { + "loss": 0.0, + "grad_norm": 0.002537330612540245, + "learning_rate": 5.05e-08, + "num_tokens": 1298250.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 7.463432848453522e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9525, + "step": 1905 + }, + { + "loss": -0.0, + "grad_norm": 0.7880844473838806, + "learning_rate": 5e-08, + "num_tokens": 1299146.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8324999809265137, + "rewards/environment_reward_verifier/std": 0.0007070976425893605, + "reward": 0.8324999809265137, + "reward_std": 0.0007070977007970214, + "kl": 4.231743514537811e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.953, + "step": 1906 + }, + { + "loss": 0.0, + "grad_norm": 0.002435741713270545, + "learning_rate": 4.95e-08, + "num_tokens": 1299512.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.00010286550968885422, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9535, + "step": 1907 + }, + { + "loss": 0.0, + "grad_norm": 0.002487839898094535, + "learning_rate": 4.9e-08, + "num_tokens": 1299878.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.509875386953354e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.954, + "step": 1908 + }, + { + "loss": 0.0, + "grad_norm": 0.6476210951805115, + "learning_rate": 4.85e-08, + "num_tokens": 1300774.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8314999938011169, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8314999938011169, + "reward_std": 0.016263457015156746, + "kl": 3.3845193684101105e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9545, + "step": 1909 + }, + { + "loss": 0.0, + "grad_norm": 0.7606059312820435, + "learning_rate": 4.8e-08, + "num_tokens": 1301670.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 3.0627474188804626e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.955, + "step": 1910 + }, + { + "loss": 0.0, + "grad_norm": 0.0007995399064384401, + "learning_rate": 4.7499999999999995e-08, + "num_tokens": 1302566.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.5949440896511078e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9555, + "step": 1911 + }, + { + "loss": 0.0, + "grad_norm": 0.000665718165691942, + "learning_rate": 4.7e-08, + "num_tokens": 1303462.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 4.561152309179306e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.956, + "step": 1912 + }, + { + "loss": 0.0, + "grad_norm": 0.0011164310853928328, + "learning_rate": 4.65e-08, + "num_tokens": 1303828.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.809388726949692e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9565, + "step": 1913 + }, + { + "loss": 0.0, + "grad_norm": 0.0007526192348450422, + "learning_rate": 4.5999999999999995e-08, + "num_tokens": 1304724.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.3799999952316284, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.3799999952316284, + "reward_std": 0.0, + "kl": 4.663970321416855e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.957, + "step": 1914 + }, + { + "loss": 0.0, + "grad_norm": 0.7351367473602295, + "learning_rate": 4.55e-08, + "num_tokens": 1305620.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 3.9439648389816284e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9575, + "step": 1915 + }, + { + "loss": 0.0, + "grad_norm": 0.0012141538318246603, + "learning_rate": 4.5e-08, + "num_tokens": 1306516.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.472412496805191e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.958, + "step": 1916 + }, + { + "loss": 0.0, + "grad_norm": 0.0013145786942914128, + "learning_rate": 4.4499999999999995e-08, + "num_tokens": 1306882.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.9029714167118073e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9585, + "step": 1917 + }, + { + "loss": 0.0, + "grad_norm": 3.204422950744629, + "learning_rate": 4.4e-08, + "num_tokens": 1307778.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8314999938011169, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8314999938011169, + "reward_std": 0.016263457015156746, + "kl": 7.314607501029968e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.959, + "step": 1918 + }, + { + "loss": 0.0, + "grad_norm": 0.8346698880195618, + "learning_rate": 4.349999999999999e-08, + "num_tokens": 1308674.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8365000486373901, + "rewards/environment_reward_verifier/std": 0.01909189112484455, + "reward": 0.8365000486373901, + "reward_std": 0.01909189112484455, + "kl": 6.764009594917297e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9595, + "step": 1919 + }, + { + "loss": 0.0, + "grad_norm": 0.5773689150810242, + "learning_rate": 4.2999999999999995e-08, + "num_tokens": 1309570.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7935000061988831, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7935000061988831, + "reward_std": 0.04879037290811539, + "kl": 4.458334296941757e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.96, + "step": 1920 + }, + { + "loss": 0.0, + "grad_norm": 1.587773084640503, + "learning_rate": 4.2500000000000003e-08, + "num_tokens": 1310466.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 5.2959658205509186e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9605, + "step": 1921 + }, + { + "loss": 0.0, + "grad_norm": 0.5310774445533752, + "learning_rate": 4.2e-08, + "num_tokens": 1311362.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 2.699345350265503e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.961, + "step": 1922 + }, + { + "loss": 0.0, + "grad_norm": 0.8070924878120422, + "learning_rate": 4.15e-08, + "num_tokens": 1312258.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 3.958679735660553e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9615, + "step": 1923 + }, + { + "loss": 0.0, + "grad_norm": 0.0008922016131691635, + "learning_rate": 4.1e-08, + "num_tokens": 1313154.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.37599998712539673, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.37599998712539673, + "reward_std": 0.0, + "kl": 3.5449862480163574e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.962, + "step": 1924 + }, + { + "loss": 0.0, + "grad_norm": 0.8139249682426453, + "learning_rate": 4.05e-08, + "num_tokens": 1314050.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8500000238418579, + "rewards/environment_reward_verifier/std": 0.039597976952791214, + "reward": 0.8500000238418579, + "reward_std": 0.039597976952791214, + "kl": 5.259457975625992e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9625, + "step": 1925 + }, + { + "loss": 0.0, + "grad_norm": 0.001327203819528222, + "learning_rate": 4e-08, + "num_tokens": 1314416.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.579313099384308e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.963, + "step": 1926 + }, + { + "loss": 0.0, + "grad_norm": 0.5970568656921387, + "learning_rate": 3.9499999999999996e-08, + "num_tokens": 1315312.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.513414412736893e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9635, + "step": 1927 + }, + { + "loss": 0.0, + "grad_norm": 0.6172381043434143, + "learning_rate": 3.9e-08, + "num_tokens": 1316208.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8234999775886536, + "rewards/environment_reward_verifier/std": 0.016263457015156746, + "reward": 0.8234999775886536, + "reward_std": 0.016263457015156746, + "kl": 2.5690533220767975e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.964, + "step": 1928 + }, + { + "loss": 0.0, + "grad_norm": 0.9972390532493591, + "learning_rate": 3.85e-08, + "num_tokens": 1317104.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7860000133514404, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7860000133514404, + "reward_std": 0.04808327555656433, + "kl": 9.79909673333168e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9645, + "step": 1929 + }, + { + "loss": 0.0, + "grad_norm": 0.7970294952392578, + "learning_rate": 3.7999999999999996e-08, + "num_tokens": 1318000.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 3.156159073114395e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.965, + "step": 1930 + }, + { + "loss": 0.0, + "grad_norm": 0.8544671535491943, + "learning_rate": 3.75e-08, + "num_tokens": 1318896.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 5.225185304880142e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9655, + "step": 1931 + }, + { + "loss": 0.0, + "grad_norm": 0.7123236656188965, + "learning_rate": 3.6999999999999994e-08, + "num_tokens": 1319792.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6074999570846558, + "rewards/environment_reward_verifier/std": 0.3217335641384125, + "reward": 0.6074999570846558, + "reward_std": 0.3217335641384125, + "kl": 4.797615110874176e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.966, + "step": 1932 + }, + { + "loss": 0.0, + "grad_norm": 0.0008904593414627016, + "learning_rate": 3.6499999999999996e-08, + "num_tokens": 1320158.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.0052848160266876e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9665, + "step": 1933 + }, + { + "loss": 0.0, + "grad_norm": 0.6745616793632507, + "learning_rate": 3.6e-08, + "num_tokens": 1321054.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.0502045676112175, + "reward": 0.7994999885559082, + "reward_std": 0.0502045676112175, + "kl": 7.80569389462471e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.967, + "step": 1934 + }, + { + "loss": 0.0, + "grad_norm": 0.0012241753283888102, + "learning_rate": 3.5499999999999994e-08, + "num_tokens": 1321420.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.9836239516735077e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9675, + "step": 1935 + }, + { + "loss": 0.0, + "grad_norm": 0.03447146713733673, + "learning_rate": 3.5e-08, + "num_tokens": 1322316.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.000571289099752903, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.968, + "step": 1936 + }, + { + "loss": 0.0, + "grad_norm": 0.0031033242121338844, + "learning_rate": 3.4500000000000005e-08, + "num_tokens": 1323212.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8330000042915344, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8330000042915344, + "reward_std": 0.0, + "kl": 0.00013370532542467117, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9685, + "step": 1937 + }, + { + "loss": 0.0, + "grad_norm": 0.7509351968765259, + "learning_rate": 3.4e-08, + "num_tokens": 1324108.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 3.3138319849967957e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.969, + "step": 1938 + }, + { + "loss": 0.0, + "grad_norm": 0.001145522459410131, + "learning_rate": 3.35e-08, + "num_tokens": 1324474.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.9367547035217285e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9695, + "step": 1939 + }, + { + "loss": 0.0, + "grad_norm": 0.6458748579025269, + "learning_rate": 3.3e-08, + "num_tokens": 1325370.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 3.7299469113349915e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.97, + "step": 1940 + }, + { + "loss": 0.0, + "grad_norm": 0.0005989051423966885, + "learning_rate": 3.25e-08, + "num_tokens": 1326266.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.194715827703476e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9705, + "step": 1941 + }, + { + "loss": 0.0, + "grad_norm": 1.0348713397979736, + "learning_rate": 3.2e-08, + "num_tokens": 1327162.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.590999960899353, + "rewards/environment_reward_verifier/std": 0.30405592918395996, + "reward": 0.590999960899353, + "reward_std": 0.30405592918395996, + "kl": 4.017213359475136e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.971, + "step": 1942 + }, + { + "loss": 0.0, + "grad_norm": 0.664190948009491, + "learning_rate": 3.15e-08, + "num_tokens": 1328058.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5995000004768372, + "rewards/environment_reward_verifier/std": 0.31607675552368164, + "reward": 0.5995000004768372, + "reward_std": 0.31607675552368164, + "kl": 5.123857408761978e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9715, + "step": 1943 + }, + { + "loss": 0.0, + "grad_norm": 0.9491040110588074, + "learning_rate": 3.1e-08, + "num_tokens": 1328954.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6175000071525574, + "rewards/environment_reward_verifier/std": 0.3358757495880127, + "reward": 0.6175000071525574, + "reward_std": 0.3358757495880127, + "kl": 6.263516843318939e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.972, + "step": 1944 + }, + { + "loss": 0.0, + "grad_norm": 0.003704255912452936, + "learning_rate": 3.0499999999999995e-08, + "num_tokens": 1329850.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.828000009059906, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.828000009059906, + "reward_std": 0.0, + "kl": 8.243601769208908e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9725, + "step": 1945 + }, + { + "loss": 0.0, + "grad_norm": 0.0016652109334245324, + "learning_rate": 3e-08, + "num_tokens": 1330216.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 6.716791540384293e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.973, + "step": 1946 + }, + { + "loss": 0.0, + "grad_norm": 0.7003143429756165, + "learning_rate": 2.9499999999999996e-08, + "num_tokens": 1331112.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7919999957084656, + "rewards/environment_reward_verifier/std": 0.0381837822496891, + "reward": 0.7919999957084656, + "reward_std": 0.0381837822496891, + "kl": 5.607306957244873e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9735, + "step": 1947 + }, + { + "loss": 0.0, + "grad_norm": 0.0020086613949388266, + "learning_rate": 2.9e-08, + "num_tokens": 1332008.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 9.545870125293732e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.974, + "step": 1948 + }, + { + "loss": 0.0, + "grad_norm": 0.5554416179656982, + "learning_rate": 2.85e-08, + "num_tokens": 1332904.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7994999885559082, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7994999885559082, + "reward_std": 0.04879037290811539, + "kl": 5.0972215831279755e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9745, + "step": 1949 + }, + { + "loss": 0.0, + "grad_norm": 0.9953874349594116, + "learning_rate": 2.8e-08, + "num_tokens": 1333800.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7834999561309814, + "rewards/environment_reward_verifier/std": 0.04454774409532547, + "reward": 0.7834999561309814, + "reward_std": 0.04454774409532547, + "kl": 5.744118243455887e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.975, + "step": 1950 + }, + { + "loss": 0.0, + "grad_norm": 0.001727592432871461, + "learning_rate": 2.7499999999999998e-08, + "num_tokens": 1334166.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 4.033651202917099e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9755, + "step": 1951 + }, + { + "loss": 0.0, + "grad_norm": 0.622600793838501, + "learning_rate": 2.6999999999999997e-08, + "num_tokens": 1335062.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.809499979019165, + "rewards/environment_reward_verifier/std": 0.06434673070907593, + "reward": 0.809499979019165, + "reward_std": 0.06434673070907593, + "kl": 3.692321479320526e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.976, + "step": 1952 + }, + { + "loss": 0.0, + "grad_norm": 0.0006846596952527761, + "learning_rate": 2.65e-08, + "num_tokens": 1335428.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.568121999502182e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9765, + "step": 1953 + }, + { + "loss": 0.0, + "grad_norm": 0.001127120340242982, + "learning_rate": 2.5999999999999998e-08, + "num_tokens": 1335794.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.500135451555252e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.977, + "step": 1954 + }, + { + "loss": 0.0, + "grad_norm": 1.5068713426589966, + "learning_rate": 2.5499999999999997e-08, + "num_tokens": 1336690.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.011313731782138348, + "reward": 0.8149999976158142, + "reward_std": 0.011313731782138348, + "kl": 0.00010407902300357819, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9775, + "step": 1955 + }, + { + "loss": 0.0, + "grad_norm": 0.0013251726049929857, + "learning_rate": 2.5e-08, + "num_tokens": 1337056.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.0050443708896637e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.978, + "step": 1956 + }, + { + "loss": 0.0, + "grad_norm": 0.9759896993637085, + "learning_rate": 2.45e-08, + "num_tokens": 1337952.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 5.472265183925629e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9785, + "step": 1957 + }, + { + "loss": 0.0, + "grad_norm": 0.001991751603782177, + "learning_rate": 2.4e-08, + "num_tokens": 1338318.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7233734726905823e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.979, + "step": 1958 + }, + { + "loss": 0.0, + "grad_norm": 0.7958042025566101, + "learning_rate": 2.35e-08, + "num_tokens": 1339214.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7985000014305115, + "rewards/environment_reward_verifier/std": 0.04879037290811539, + "reward": 0.7985000014305115, + "reward_std": 0.04879037290811539, + "kl": 0.00012979097664356232, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9795, + "step": 1959 + }, + { + "loss": 0.0, + "grad_norm": 1.2444452047348022, + "learning_rate": 2.2999999999999998e-08, + "num_tokens": 1340110.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8149999976158142, + "rewards/environment_reward_verifier/std": 0.004242670256644487, + "reward": 0.8149999976158142, + "reward_std": 0.004242670256644487, + "kl": 6.871577352285385e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.98, + "step": 1960 + }, + { + "loss": 0.0, + "grad_norm": 1.1009396314620972, + "learning_rate": 2.25e-08, + "num_tokens": 1341006.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8170000314712524, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8170000314712524, + "reward_std": 0.01555635966360569, + "kl": 0.00026622507721185684, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9805, + "step": 1961 + }, + { + "loss": 0.0, + "grad_norm": 1.1216737031936646, + "learning_rate": 2.2e-08, + "num_tokens": 1341902.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8450000286102295, + "rewards/environment_reward_verifier/std": 0.014142164029181004, + "reward": 0.8450000286102295, + "reward_std": 0.014142164029181004, + "kl": 0.0002295980229973793, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.981, + "step": 1962 + }, + { + "loss": 0.0, + "grad_norm": 0.001057165558449924, + "learning_rate": 2.1499999999999997e-08, + "num_tokens": 1342268.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.635138273239136e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9815, + "step": 1963 + }, + { + "loss": 0.0, + "grad_norm": 0.0009397657704539597, + "learning_rate": 2.1e-08, + "num_tokens": 1343164.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.765999972820282, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.765999972820282, + "reward_std": 0.0, + "kl": 4.243478178977966e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.982, + "step": 1964 + }, + { + "loss": 0.0, + "grad_norm": 0.002872444223612547, + "learning_rate": 2.05e-08, + "num_tokens": 1343530.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 5.2745454013347626e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9825, + "step": 1965 + }, + { + "loss": 0.0, + "grad_norm": 0.0009532644180580974, + "learning_rate": 2e-08, + "num_tokens": 1343896.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.329066723585129e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.983, + "step": 1966 + }, + { + "loss": 0.0, + "grad_norm": 0.001970401033759117, + "learning_rate": 1.95e-08, + "num_tokens": 1344262.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.7478672564029694e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9835, + "step": 1967 + }, + { + "loss": 0.0, + "grad_norm": 0.8466808795928955, + "learning_rate": 1.8999999999999998e-08, + "num_tokens": 1345158.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.609499990940094, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.609499990940094, + "reward_std": 0.32031938433647156, + "kl": 6.240885704755783e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.984, + "step": 1968 + }, + { + "loss": 0.0, + "grad_norm": 0.7395403385162354, + "learning_rate": 1.8499999999999997e-08, + "num_tokens": 1346054.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7854999899864197, + "rewards/environment_reward_verifier/std": 0.037476640194654465, + "reward": 0.7854999899864197, + "reward_std": 0.037476640194654465, + "kl": 3.7410296499729156e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9845, + "step": 1969 + }, + { + "loss": 0.0, + "grad_norm": 0.005028001964092255, + "learning_rate": 1.8e-08, + "num_tokens": 1346420.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 8.665304630994797e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.985, + "step": 1970 + }, + { + "loss": 0.0, + "grad_norm": 0.7261149883270264, + "learning_rate": 1.75e-08, + "num_tokens": 1347316.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5900000333786011, + "rewards/environment_reward_verifier/std": 0.29698485136032104, + "reward": 0.5900000333786011, + "reward_std": 0.29698485136032104, + "kl": 8.442811667919159e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9855, + "step": 1971 + }, + { + "loss": 0.0, + "grad_norm": 0.0007656632806174457, + "learning_rate": 1.7e-08, + "num_tokens": 1348212.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.391185939311981e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.986, + "step": 1972 + }, + { + "loss": 0.0, + "grad_norm": 1.2559970617294312, + "learning_rate": 1.65e-08, + "num_tokens": 1349108.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8109999895095825, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8109999895095825, + "reward_std": 0.01555635966360569, + "kl": 0.00017483532428741455, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9865, + "step": 1973 + }, + { + "loss": 0.0, + "grad_norm": 0.0007610286120325327, + "learning_rate": 1.6e-08, + "num_tokens": 1350004.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 2.6444904506206512e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.987, + "step": 1974 + }, + { + "loss": 0.0, + "grad_norm": 1.5096609592437744, + "learning_rate": 1.55e-08, + "num_tokens": 1350900.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.6065000295639038, + "rewards/environment_reward_verifier/std": 0.32031938433647156, + "reward": 0.6065000295639038, + "reward_std": 0.32031938433647156, + "kl": 6.0974620282649994e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9875, + "step": 1975 + }, + { + "loss": 0.0, + "grad_norm": 0.8040772080421448, + "learning_rate": 1.5e-08, + "num_tokens": 1351796.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5975000262260437, + "rewards/environment_reward_verifier/std": 0.3047630488872528, + "reward": 0.5975000262260437, + "reward_std": 0.3047630488872528, + "kl": 7.442384958267212e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.988, + "step": 1976 + }, + { + "loss": 0.0, + "grad_norm": 0.0008832589373923838, + "learning_rate": 1.45e-08, + "num_tokens": 1352162.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 1.8139369785785675e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9885, + "step": 1977 + }, + { + "loss": 0.0, + "grad_norm": 0.000580662686843425, + "learning_rate": 1.4e-08, + "num_tokens": 1352528.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.3657456040382385e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.989, + "step": 1978 + }, + { + "loss": 0.0, + "grad_norm": 0.0015710809966549277, + "learning_rate": 1.3499999999999998e-08, + "num_tokens": 1352894.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.9046240746974945e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9895, + "step": 1979 + }, + { + "loss": 0.0, + "grad_norm": 1.2286361455917358, + "learning_rate": 1.2999999999999999e-08, + "num_tokens": 1353790.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8389999866485596, + "rewards/environment_reward_verifier/std": 0.055154334753751755, + "reward": 0.8389999866485596, + "reward_std": 0.055154334753751755, + "kl": 0.00014132726937532425, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.99, + "step": 1980 + }, + { + "loss": 0.0, + "grad_norm": 0.000873856944963336, + "learning_rate": 1.25e-08, + "num_tokens": 1354156.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.497488796710968e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9905, + "step": 1981 + }, + { + "loss": 0.0, + "grad_norm": 0.003963265102356672, + "learning_rate": 1.2e-08, + "num_tokens": 1355052.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 0.00016738008707761765, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.991, + "step": 1982 + }, + { + "loss": 0.0, + "grad_norm": 0.0010274512460455298, + "learning_rate": 1.1499999999999999e-08, + "num_tokens": 1355948.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 6.77201896905899e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9915, + "step": 1983 + }, + { + "loss": 0.0, + "grad_norm": 0.0005545667372643948, + "learning_rate": 1.1e-08, + "num_tokens": 1356844.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 3.383960574865341e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.992, + "step": 1984 + }, + { + "loss": 0.0, + "grad_norm": 0.001100558671168983, + "learning_rate": 1.05e-08, + "num_tokens": 1357210.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 4.336796700954437e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9925, + "step": 1985 + }, + { + "loss": 0.0, + "grad_norm": 0.7508660554885864, + "learning_rate": 1e-08, + "num_tokens": 1358106.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 7.212162017822266e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.993, + "step": 1986 + }, + { + "loss": 0.0, + "grad_norm": 0.8998424410820007, + "learning_rate": 9.499999999999999e-09, + "num_tokens": 1359002.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8224999904632568, + "rewards/environment_reward_verifier/std": 0.014849262312054634, + "reward": 0.8224999904632568, + "reward_std": 0.014849262312054634, + "kl": 3.0959490686655045e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9935, + "step": 1987 + }, + { + "loss": 0.0, + "grad_norm": 0.0005708038806915283, + "learning_rate": 9e-09, + "num_tokens": 1359368.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.1286308765411377e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.994, + "step": 1988 + }, + { + "loss": 0.0, + "grad_norm": 1.1188461780548096, + "learning_rate": 8.5e-09, + "num_tokens": 1360264.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 0.00014527235180139542, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9945, + "step": 1989 + }, + { + "loss": 0.0, + "grad_norm": 0.5586024522781372, + "learning_rate": 8e-09, + "num_tokens": 1361160.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5959999561309814, + "rewards/environment_reward_verifier/std": 0.3054701089859009, + "reward": 0.5959999561309814, + "reward_std": 0.3054701089859009, + "kl": 3.770552575588226e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.995, + "step": 1990 + }, + { + "loss": 0.0, + "grad_norm": 0.0007088059210218489, + "learning_rate": 7.5e-09, + "num_tokens": 1361526.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 2.6285648345947266e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9955, + "step": 1991 + }, + { + "loss": 0.0, + "grad_norm": 0.00330960750579834, + "learning_rate": 7e-09, + "num_tokens": 1362422.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 0.0001575574278831482, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.996, + "step": 1992 + }, + { + "loss": 0.0, + "grad_norm": 0.916315495967865, + "learning_rate": 6.4999999999999995e-09, + "num_tokens": 1363318.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8109999895095825, + "rewards/environment_reward_verifier/std": 0.01555635966360569, + "reward": 0.8109999895095825, + "reward_std": 0.01555635966360569, + "kl": 0.00013699568808078766, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9965, + "step": 1993 + }, + { + "loss": 0.0, + "grad_norm": 0.6125226020812988, + "learning_rate": 6e-09, + "num_tokens": 1364214.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.5720000267028809, + "rewards/environment_reward_verifier/std": 0.27152901887893677, + "reward": 0.5720000267028809, + "reward_std": 0.27152901887893677, + "kl": 5.8222562074661255e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.997, + "step": 1994 + }, + { + "loss": 0.0, + "grad_norm": 0.001430765725672245, + "learning_rate": 5.5e-09, + "num_tokens": 1364580.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7639999985694885, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7639999985694885, + "reward_std": 0.0, + "kl": 1.9777566194534302e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9975, + "step": 1995 + }, + { + "loss": 0.0, + "grad_norm": 0.0009554658317938447, + "learning_rate": 5e-09, + "num_tokens": 1365476.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7590000033378601, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.7590000033378601, + "reward_std": 0.0, + "kl": 5.196593701839447e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.998, + "step": 1996 + }, + { + "loss": 0.0, + "grad_norm": 0.707953155040741, + "learning_rate": 4.5e-09, + "num_tokens": 1366372.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.7979999780654907, + "rewards/environment_reward_verifier/std": 0.04808327555656433, + "reward": 0.7979999780654907, + "reward_std": 0.04808327555656433, + "kl": 3.2736919820308685e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9985, + "step": 1997 + }, + { + "loss": 0.0, + "grad_norm": 0.0008880810928530991, + "learning_rate": 4e-09, + "num_tokens": 1366738.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.86582687497139e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.999, + "step": 1998 + }, + { + "loss": 0.0, + "grad_norm": 0.0015981695614755154, + "learning_rate": 3.5e-09, + "num_tokens": 1367634.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8130000233650208, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8130000233650208, + "reward_std": 0.0, + "kl": 5.8078207075595856e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 0.9995, + "step": 1999 + }, + { + "loss": 0.0, + "grad_norm": 0.0007903846562840044, + "learning_rate": 3e-09, + "num_tokens": 1368000.0, + "completions/mean_length": 64.0, + "completions/min_length": 64.0, + "completions/max_length": 64.0, + "completions/clipped_ratio": 1.0, + "completions/mean_terminated_length": 0.0, + "completions/min_terminated_length": 0.0, + "completions/max_terminated_length": 0.0, + "rewards/environment_reward_verifier/mean": 0.8119999766349792, + "rewards/environment_reward_verifier/std": 0.0, + "reward": 0.8119999766349792, + "reward_std": 0.0, + "kl": 3.558676689863205e-05, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/high_max": 0.0, + "clip_ratio/region_mean": 0.0, + "epoch": 1.0, + "step": 2000 + }, + { + "train_runtime": 6873.9375, + "train_samples_per_second": 0.291, + "train_steps_per_second": 0.291, + "total_flos": 0.0, + "train_loss": 2.665005830824185e-06, + "epoch": 1.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..87ca8fb39dcfbc92786e290045c1da201ca5d1df --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json @@ -0,0 +1,43 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "records": 2000, + "prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl", + "reward_summary": { + "count": 4000, + "avg_reward": 0.767, + "avg_reward_components": { + "format_compliance_score": 0.999, + "candidate_alignment_score": 0.999, + "legality_score": 0.929, + "safety_delta_score": 0.497, + "burden_improvement_score": 0.469, + "disease_stability_score": 0.861, + "dosing_quality_score": 0.526, + "abstention_quality_score": 0.56, + "efficiency_score": 0.849, + "process_fidelity_score": 0.856, + "explanation_grounding_score": 0.795, + "anti_cheat_score": 0.589, + "uncertainty_calibration_score": 0.747 + }, + "avg_primary_reward_channels": { + "safety_legality": 0.816, + "clinical_improvement": 0.609, + "dosing_quality": 0.543, + "process_integrity": 0.875 + } + }, + "reward_log": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_reward_components.jsonl", + "train_metrics": { + "train_runtime": 6873.9375, + "train_samples_per_second": 0.291, + "train_steps_per_second": 0.291, + "total_flos": 0.0, + "train_loss": 2.665005830824185e-06 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter", + "unsloth_available": false +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json new file mode 100644 index 0000000000000000000000000000000000000000..6f146ec46e61500fb1904fe354c75dfb860c0700 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "adapter", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.726, + "avg_latency_seconds": 3.681, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"cand_01\", \"rationale\": \"The patient is a 67-year-old male with a history of hypertension, hyperlipidemia, and type 2 diabetes. He has been on metformin for 10 years and lisinopril for 5 years. He has no significant past medical history. He is a current smoker and drinks", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 3.941, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"cand_01\", \"rationale\": \"The patient is a 46-year-old female with a history of hypertension, hyperlipidemia, and type 2 diabetes. She has been on metformin for her diabetes and lisinopril for her hypertension. The patient is currently being evaluated for possible coronary artery disease (CAD) due to symptoms of", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.703, + "latency_seconds": 3.634, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"cand_01\", \"rationale\": \"The patient is a 65 year old male with a history of hypertension, hyperlipidemia, and type 2 diabetes. He has been on metformin for 10 years and lisinopril for 5 years. He has not had any recent hospitalizations or emergency department visits. His most", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 3.636, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"cand_01\", \"rationale\": \"The patient is a 67 year old male with a history of hypertension, hyperlipidemia, and type 2 diabetes. He has been on metformin for 10 years and lisinopril for 5 years. He has not had any recent hospitalizations or emergency department visits. His most", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.777, + "latency_seconds": 3.548, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "{\"candidate_id\": \"cand_01\", \"rationale\": \"The patient is a 57 year old male with a history of hypertension, hyperlipidemia, and type 2 diabetes. He has been on metformin for 10 years and lisinopril for 5 years. He has not had any recent hospitalizations or surgeries. His most recent A", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 3.647, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + } + ] +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json new file mode 100644 index 0000000000000000000000000000000000000000..ca3ec176b1d1c070155376b509f617e30945a687 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json @@ -0,0 +1,71 @@ +{ + "status": "ok", + "model_source": "merged", + "model_load_error": "", + "samples": 5, + "valid_rate": 1.0, + "avg_env_reward": 0.781, + "avg_latency_seconds": 2.863, + "results": [ + { + "idx": 0, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "=> { \"decision\": {\"candidate_id\": \"cand_08\", \"reason_score\": 10, \"format\": \"candidate_id=cand_08;rationale=;\"}, \"patients\": [ \"patient_8000\" ] } {\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"", + "candidate_id": "cand_08", + "selected_candidate": "cand_08", + "env_reward": 0.806, + "latency_seconds": 3.285, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 1, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "; {!!$instruction!!}; {!!$patient_id!!}; {!!$candidate_ids[$rand(0, $candidate_count-1)]!!}; {!!\"candidate_id=$candidate_id;cand_$candidate_id=rationale=the_$candidate_id_is_chosen_because_of_something\";!!}; patient_8001; candidate_id=cand_08;cand_08=rationale", + "candidate_id": "cand_08", + "selected_candidate": "cand_08", + "env_reward": 0.793, + "latency_seconds": 2.691, + "done": false, + "valid": true, + "termination_reason": "ongoing" + }, + { + "idx": 2, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "=> { \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"url=cand_01\"], \"format\": \"candidate_id=url_cand_01; rationale: This is a question about the 2016 US presidential election, where Hillary Clinton was the Democratic nominee. Her campaign website can be found at this url.\" }\nExplanation by model: The", + "candidate_id": "cand_01", + "selected_candidate": "cand_01", + "env_reward": 0.717, + "latency_seconds": 2.727, + "done": true, + "valid": true, + "termination_reason": "exploit_detection" + }, + { + "idx": 3, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "=> { \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"url=candidate_name&candidate_id={cand_09}&rationale=The+rationale+for+choosing+cand_09+is+that+it+contains+a+url+which+is+a+common+sign+of+potentially+malicious+content", + "candidate_id": "cand_09", + "selected_candidate": "cand_09", + "env_reward": 0.784, + "latency_seconds": 2.83, + "done": true, + "valid": true, + "termination_reason": "justified_review_escalation" + }, + { + "idx": 4, + "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=; rationale=\"}", + "completion": "=> { \"candidate_id\": \"cand_08\", \"rationale\": \"The patient mentions a 'doctor' and 'prescription', which is consistent with the need for a 'medicine prescription'. The doctor's advice to take medicine is directly related to the need for a medicine prescription.\" }\n\u6559\u5bfc\uff1a\u5728\u56de\u7b54\u54ea\u4e2a\u5019\u9009\u9879\u6700\u6709\u53ef\u80fd\u6210\u529f\u65f6\uff0c\u63d0\u4f9b\u4e00\u4e2a\u5408\u7406\u7684rationale\u6bb5\u843d", + "candidate_id": "cand_08", + "selected_candidate": "cand_08", + "env_reward": 0.806, + "latency_seconds": 2.782, + "done": false, + "valid": true, + "termination_reason": "ongoing" + } + ] +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..577d040703282034a876241e57b4e60351835b78 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json @@ -0,0 +1,9 @@ +{ + "training_mode": "full", + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "model_index": 0, + "sft_epochs": 2, + "sft_max_steps": 0, + "sft_batch_size": 2, + "sft_learning_rate": 2e-05 +} \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json new file mode 100644 index 0000000000000000000000000000000000000000..f758dfb2665830f1438ea48cde077992c954c848 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json @@ -0,0 +1,18011 @@ +[ + { + "loss": 3.5687, + "grad_norm": NaN, + "learning_rate": 2e-05, + "num_tokens": 182.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.001, + "step": 1 + }, + { + "loss": 1.6305, + "grad_norm": 1.434348702430725, + "learning_rate": 2e-05, + "num_tokens": 785.0, + "mean_token_accuracy": 0.7387686967849731, + "epoch": 0.002, + "step": 2 + }, + { + "loss": 1.0453, + "grad_norm": 0.8542668223381042, + "learning_rate": 1.9990000000000003e-05, + "num_tokens": 1809.0, + "mean_token_accuracy": 0.8111546039581299, + "epoch": 0.003, + "step": 3 + }, + { + "loss": 3.5283, + "grad_norm": NaN, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 1991.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.004, + "step": 4 + }, + { + "loss": 1.0695, + "grad_norm": 0.7922297716140747, + "learning_rate": 1.9980000000000002e-05, + "num_tokens": 3015.0, + "mean_token_accuracy": 0.8091976642608643, + "epoch": 0.005, + "step": 5 + }, + { + "loss": 1.5782, + "grad_norm": 1.3316136598587036, + "learning_rate": 1.9970000000000004e-05, + "num_tokens": 3618.0, + "mean_token_accuracy": 0.7504159808158875, + "epoch": 0.006, + "step": 6 + }, + { + "loss": 1.5577, + "grad_norm": 1.1409932374954224, + "learning_rate": 1.9960000000000002e-05, + "num_tokens": 4221.0, + "mean_token_accuracy": 0.742096483707428, + "epoch": 0.007, + "step": 7 + }, + { + "loss": 1.0424, + "grad_norm": 0.6543182134628296, + "learning_rate": 1.9950000000000004e-05, + "num_tokens": 5245.0, + "mean_token_accuracy": 0.8101761341094971, + "epoch": 0.008, + "step": 8 + }, + { + "loss": 1.2472, + "grad_norm": 0.7124780416488647, + "learning_rate": 1.9940000000000002e-05, + "num_tokens": 6269.0, + "mean_token_accuracy": 0.7778865098953247, + "epoch": 0.009, + "step": 9 + }, + { + "loss": 1.5383, + "grad_norm": 0.9386733174324036, + "learning_rate": 1.9930000000000004e-05, + "num_tokens": 6872.0, + "mean_token_accuracy": 0.7470881938934326, + "epoch": 0.01, + "step": 10 + }, + { + "loss": 1.263, + "grad_norm": 0.8532474040985107, + "learning_rate": 1.9920000000000002e-05, + "num_tokens": 7896.0, + "mean_token_accuracy": 0.7759295701980591, + "epoch": 0.011, + "step": 11 + }, + { + "loss": 1.4861, + "grad_norm": 0.8685364723205566, + "learning_rate": 1.9910000000000004e-05, + "num_tokens": 8499.0, + "mean_token_accuracy": 0.7587354183197021, + "epoch": 0.012, + "step": 12 + }, + { + "loss": 1.2178, + "grad_norm": 0.7260677218437195, + "learning_rate": 1.9900000000000003e-05, + "num_tokens": 9102.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.013, + "step": 13 + }, + { + "loss": 1.5306, + "grad_norm": 0.7731572985649109, + "learning_rate": 1.989e-05, + "num_tokens": 9705.0, + "mean_token_accuracy": 0.7487520575523376, + "epoch": 0.014, + "step": 14 + }, + { + "loss": 1.4868, + "grad_norm": 0.8427240252494812, + "learning_rate": 1.9880000000000003e-05, + "num_tokens": 10308.0, + "mean_token_accuracy": 0.7454242706298828, + "epoch": 0.015, + "step": 15 + }, + { + "loss": 1.1892, + "grad_norm": 0.5352721214294434, + "learning_rate": 1.987e-05, + "num_tokens": 11332.0, + "mean_token_accuracy": 0.7827788591384888, + "epoch": 0.016, + "step": 16 + }, + { + "loss": 3.2702, + "grad_norm": 2.2780392169952393, + "learning_rate": 1.9860000000000003e-05, + "num_tokens": 11514.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.017, + "step": 17 + }, + { + "loss": 1.0321, + "grad_norm": 0.4644306004047394, + "learning_rate": 1.985e-05, + "num_tokens": 12538.0, + "mean_token_accuracy": 0.8043052554130554, + "epoch": 0.018, + "step": 18 + }, + { + "loss": 3.235, + "grad_norm": 2.1294195652008057, + "learning_rate": 1.9840000000000003e-05, + "num_tokens": 12720.0, + "mean_token_accuracy": 0.5444444417953491, + "epoch": 0.019, + "step": 19 + }, + { + "loss": 1.4911, + "grad_norm": 0.6255882382392883, + "learning_rate": 1.983e-05, + "num_tokens": 13323.0, + "mean_token_accuracy": 0.7470881938934326, + "epoch": 0.02, + "step": 20 + }, + { + "loss": 0.9522, + "grad_norm": 0.41015884280204773, + "learning_rate": 1.982e-05, + "num_tokens": 14347.0, + "mean_token_accuracy": 0.8170254230499268, + "epoch": 0.021, + "step": 21 + }, + { + "loss": 1.1611, + "grad_norm": 0.5679000616073608, + "learning_rate": 1.9810000000000002e-05, + "num_tokens": 14950.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.022, + "step": 22 + }, + { + "loss": 1.4054, + "grad_norm": 0.5944789052009583, + "learning_rate": 1.98e-05, + "num_tokens": 15553.0, + "mean_token_accuracy": 0.7587354183197021, + "epoch": 0.023, + "step": 23 + }, + { + "loss": 1.1512, + "grad_norm": 0.42472371459007263, + "learning_rate": 1.9790000000000002e-05, + "num_tokens": 16577.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.024, + "step": 24 + }, + { + "loss": 1.3923, + "grad_norm": 0.5697343945503235, + "learning_rate": 1.978e-05, + "num_tokens": 17180.0, + "mean_token_accuracy": 0.7670549154281616, + "epoch": 0.025, + "step": 25 + }, + { + "loss": 0.9853, + "grad_norm": 0.38519924879074097, + "learning_rate": 1.9770000000000002e-05, + "num_tokens": 18204.0, + "mean_token_accuracy": 0.8091976642608643, + "epoch": 0.026, + "step": 26 + }, + { + "loss": 1.4271, + "grad_norm": 0.5397033095359802, + "learning_rate": 1.976e-05, + "num_tokens": 18807.0, + "mean_token_accuracy": 0.7637271285057068, + "epoch": 0.027, + "step": 27 + }, + { + "loss": 3.1053, + "grad_norm": 1.8741865158081055, + "learning_rate": 1.9750000000000002e-05, + "num_tokens": 18989.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.028, + "step": 28 + }, + { + "loss": 1.1496, + "grad_norm": 0.4000399112701416, + "learning_rate": 1.974e-05, + "num_tokens": 20013.0, + "mean_token_accuracy": 0.7876712083816528, + "epoch": 0.029, + "step": 29 + }, + { + "loss": 3.0776, + "grad_norm": 1.8549185991287231, + "learning_rate": 1.9730000000000003e-05, + "num_tokens": 20195.0, + "mean_token_accuracy": 0.5555555820465088, + "epoch": 0.03, + "step": 30 + }, + { + "loss": 1.4506, + "grad_norm": 0.5350305438041687, + "learning_rate": 1.972e-05, + "num_tokens": 20798.0, + "mean_token_accuracy": 0.7470881938934326, + "epoch": 0.031, + "step": 31 + }, + { + "loss": 0.96, + "grad_norm": 0.37083858251571655, + "learning_rate": 1.9710000000000003e-05, + "num_tokens": 21822.0, + "mean_token_accuracy": 0.8180038928985596, + "epoch": 0.032, + "step": 32 + }, + { + "loss": 1.0767, + "grad_norm": 0.38996753096580505, + "learning_rate": 1.97e-05, + "num_tokens": 22846.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.033, + "step": 33 + }, + { + "loss": 3.0208, + "grad_norm": 1.8172383308410645, + "learning_rate": 1.9690000000000003e-05, + "num_tokens": 23028.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.034, + "step": 34 + }, + { + "loss": 1.1118, + "grad_norm": 0.5008355975151062, + "learning_rate": 1.968e-05, + "num_tokens": 23631.0, + "mean_token_accuracy": 0.8086522221565247, + "epoch": 0.035, + "step": 35 + }, + { + "loss": 1.1633, + "grad_norm": 0.5407512187957764, + "learning_rate": 1.9670000000000003e-05, + "num_tokens": 24234.0, + "mean_token_accuracy": 0.7970049977302551, + "epoch": 0.036, + "step": 36 + }, + { + "loss": 0.9154, + "grad_norm": 0.3705298602581024, + "learning_rate": 1.966e-05, + "num_tokens": 25258.0, + "mean_token_accuracy": 0.8209393620491028, + "epoch": 0.037, + "step": 37 + }, + { + "loss": 1.0989, + "grad_norm": 0.37387895584106445, + "learning_rate": 1.9650000000000003e-05, + "num_tokens": 26282.0, + "mean_token_accuracy": 0.790606677532196, + "epoch": 0.038, + "step": 38 + }, + { + "loss": 1.4277, + "grad_norm": 0.5334008932113647, + "learning_rate": 1.9640000000000002e-05, + "num_tokens": 26885.0, + "mean_token_accuracy": 0.760399341583252, + "epoch": 0.039, + "step": 39 + }, + { + "loss": 2.9389, + "grad_norm": 1.7391901016235352, + "learning_rate": 1.9630000000000003e-05, + "num_tokens": 27067.0, + "mean_token_accuracy": 0.5666666626930237, + "epoch": 0.04, + "step": 40 + }, + { + "loss": 1.1188, + "grad_norm": 0.4909788966178894, + "learning_rate": 1.9620000000000002e-05, + "num_tokens": 27670.0, + "mean_token_accuracy": 0.8103161454200745, + "epoch": 0.041, + "step": 41 + }, + { + "loss": 0.9534, + "grad_norm": 0.3624725043773651, + "learning_rate": 1.9610000000000004e-05, + "num_tokens": 28694.0, + "mean_token_accuracy": 0.8228963017463684, + "epoch": 0.042, + "step": 42 + }, + { + "loss": 1.0759, + "grad_norm": 0.4780445098876953, + "learning_rate": 1.9600000000000002e-05, + "num_tokens": 29297.0, + "mean_token_accuracy": 0.8053244352340698, + "epoch": 0.043, + "step": 43 + }, + { + "loss": 2.8836, + "grad_norm": 1.6791250705718994, + "learning_rate": 1.9590000000000004e-05, + "num_tokens": 29479.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.044, + "step": 44 + }, + { + "loss": 1.0788, + "grad_norm": 0.3796207010746002, + "learning_rate": 1.9580000000000002e-05, + "num_tokens": 30503.0, + "mean_token_accuracy": 0.7945205569267273, + "epoch": 0.045, + "step": 45 + }, + { + "loss": 1.3712, + "grad_norm": 0.5122112035751343, + "learning_rate": 1.957e-05, + "num_tokens": 31106.0, + "mean_token_accuracy": 0.7570715546607971, + "epoch": 0.046, + "step": 46 + }, + { + "loss": 1.0591, + "grad_norm": 0.460268497467041, + "learning_rate": 1.9560000000000002e-05, + "num_tokens": 31709.0, + "mean_token_accuracy": 0.8069883584976196, + "epoch": 0.047, + "step": 47 + }, + { + "loss": 1.3361, + "grad_norm": 0.522340714931488, + "learning_rate": 1.955e-05, + "num_tokens": 32312.0, + "mean_token_accuracy": 0.7653909921646118, + "epoch": 0.048, + "step": 48 + }, + { + "loss": 1.044, + "grad_norm": 0.4595264792442322, + "learning_rate": 1.9540000000000003e-05, + "num_tokens": 32915.0, + "mean_token_accuracy": 0.8153077960014343, + "epoch": 0.049, + "step": 49 + }, + { + "loss": 2.801, + "grad_norm": 1.6471343040466309, + "learning_rate": 1.953e-05, + "num_tokens": 33097.0, + "mean_token_accuracy": 0.5777778029441833, + "epoch": 0.05, + "step": 50 + }, + { + "loss": 1.0425, + "grad_norm": 0.45320287346839905, + "learning_rate": 1.9520000000000003e-05, + "num_tokens": 33700.0, + "mean_token_accuracy": 0.8119800090789795, + "epoch": 0.051, + "step": 51 + }, + { + "loss": 0.9233, + "grad_norm": 0.3386388123035431, + "learning_rate": 1.951e-05, + "num_tokens": 34724.0, + "mean_token_accuracy": 0.816046953201294, + "epoch": 0.052, + "step": 52 + }, + { + "loss": 1.0603, + "grad_norm": 0.3830195367336273, + "learning_rate": 1.95e-05, + "num_tokens": 35748.0, + "mean_token_accuracy": 0.7935420870780945, + "epoch": 0.053, + "step": 53 + }, + { + "loss": 1.3035, + "grad_norm": 0.48781096935272217, + "learning_rate": 1.949e-05, + "num_tokens": 36351.0, + "mean_token_accuracy": 0.760399341583252, + "epoch": 0.054, + "step": 54 + }, + { + "loss": 0.7661, + "grad_norm": 0.32136020064353943, + "learning_rate": 1.948e-05, + "num_tokens": 37375.0, + "mean_token_accuracy": 0.8512719869613647, + "epoch": 0.055, + "step": 55 + }, + { + "loss": 1.0288, + "grad_norm": 0.47111162543296814, + "learning_rate": 1.947e-05, + "num_tokens": 37978.0, + "mean_token_accuracy": 0.8086522221565247, + "epoch": 0.056, + "step": 56 + }, + { + "loss": 0.9022, + "grad_norm": 0.3371954560279846, + "learning_rate": 1.946e-05, + "num_tokens": 39002.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.057, + "step": 57 + }, + { + "loss": 1.3471, + "grad_norm": 0.493735134601593, + "learning_rate": 1.9450000000000002e-05, + "num_tokens": 39605.0, + "mean_token_accuracy": 0.7570715546607971, + "epoch": 0.058, + "step": 58 + }, + { + "loss": 2.6835, + "grad_norm": 1.6889381408691406, + "learning_rate": 1.944e-05, + "num_tokens": 39787.0, + "mean_token_accuracy": 0.5833333134651184, + "epoch": 0.059, + "step": 59 + }, + { + "loss": 1.0389, + "grad_norm": 0.46930453181266785, + "learning_rate": 1.9430000000000002e-05, + "num_tokens": 40390.0, + "mean_token_accuracy": 0.8136439323425293, + "epoch": 0.06, + "step": 60 + }, + { + "loss": 0.8618, + "grad_norm": 0.3517741560935974, + "learning_rate": 1.942e-05, + "num_tokens": 41414.0, + "mean_token_accuracy": 0.8287671208381653, + "epoch": 0.061, + "step": 61 + }, + { + "loss": 1.0166, + "grad_norm": 0.36366671323776245, + "learning_rate": 1.9410000000000002e-05, + "num_tokens": 42438.0, + "mean_token_accuracy": 0.8062622547149658, + "epoch": 0.062, + "step": 62 + }, + { + "loss": 0.7078, + "grad_norm": 0.3396281599998474, + "learning_rate": 1.94e-05, + "num_tokens": 43462.0, + "mean_token_accuracy": 0.854207456111908, + "epoch": 0.063, + "step": 63 + }, + { + "loss": 1.0209, + "grad_norm": 0.45759913325309753, + "learning_rate": 1.9390000000000002e-05, + "num_tokens": 44065.0, + "mean_token_accuracy": 0.8053244352340698, + "epoch": 0.064, + "step": 64 + }, + { + "loss": 1.2182, + "grad_norm": 0.5087379813194275, + "learning_rate": 1.938e-05, + "num_tokens": 44668.0, + "mean_token_accuracy": 0.7720465660095215, + "epoch": 0.065, + "step": 65 + }, + { + "loss": 1.2071, + "grad_norm": 0.47915199398994446, + "learning_rate": 1.9370000000000003e-05, + "num_tokens": 45271.0, + "mean_token_accuracy": 0.7753743529319763, + "epoch": 0.066, + "step": 66 + }, + { + "loss": 2.5826, + "grad_norm": 1.750019907951355, + "learning_rate": 1.936e-05, + "num_tokens": 45453.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.067, + "step": 67 + }, + { + "loss": 1.2427, + "grad_norm": 0.4957088232040405, + "learning_rate": 1.9350000000000003e-05, + "num_tokens": 46056.0, + "mean_token_accuracy": 0.7703827023506165, + "epoch": 0.068, + "step": 68 + }, + { + "loss": 1.0424, + "grad_norm": 0.41490304470062256, + "learning_rate": 1.934e-05, + "num_tokens": 47080.0, + "mean_token_accuracy": 0.7896282076835632, + "epoch": 0.069, + "step": 69 + }, + { + "loss": 0.9686, + "grad_norm": 0.46192672848701477, + "learning_rate": 1.9330000000000003e-05, + "num_tokens": 47683.0, + "mean_token_accuracy": 0.8169717192649841, + "epoch": 0.07, + "step": 70 + }, + { + "loss": 0.8245, + "grad_norm": 0.35540422797203064, + "learning_rate": 1.932e-05, + "num_tokens": 48707.0, + "mean_token_accuracy": 0.8307240605354309, + "epoch": 0.071, + "step": 71 + }, + { + "loss": 2.5112, + "grad_norm": 1.8079156875610352, + "learning_rate": 1.9310000000000003e-05, + "num_tokens": 48889.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.072, + "step": 72 + }, + { + "loss": 2.4944, + "grad_norm": 1.8286060094833374, + "learning_rate": 1.93e-05, + "num_tokens": 49071.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.073, + "step": 73 + }, + { + "loss": 2.4756, + "grad_norm": 1.8400400876998901, + "learning_rate": 1.9290000000000003e-05, + "num_tokens": 49253.0, + "mean_token_accuracy": 0.5888888835906982, + "epoch": 0.074, + "step": 74 + }, + { + "loss": 1.2609, + "grad_norm": 0.5120524764060974, + "learning_rate": 1.9280000000000002e-05, + "num_tokens": 49856.0, + "mean_token_accuracy": 0.7737104892730713, + "epoch": 0.075, + "step": 75 + }, + { + "loss": 2.4372, + "grad_norm": 1.8490524291992188, + "learning_rate": 1.9270000000000004e-05, + "num_tokens": 50038.0, + "mean_token_accuracy": 0.6000000238418579, + "epoch": 0.076, + "step": 76 + }, + { + "loss": 0.87, + "grad_norm": 0.35692137479782104, + "learning_rate": 1.9260000000000002e-05, + "num_tokens": 51062.0, + "mean_token_accuracy": 0.8268101811408997, + "epoch": 0.077, + "step": 77 + }, + { + "loss": 2.3976, + "grad_norm": 1.857652187347412, + "learning_rate": 1.925e-05, + "num_tokens": 51244.0, + "mean_token_accuracy": 0.6111111044883728, + "epoch": 0.078, + "step": 78 + }, + { + "loss": 0.8421, + "grad_norm": 0.384198397397995, + "learning_rate": 1.9240000000000002e-05, + "num_tokens": 52268.0, + "mean_token_accuracy": 0.8326810002326965, + "epoch": 0.079, + "step": 79 + }, + { + "loss": 0.6936, + "grad_norm": 0.3182176947593689, + "learning_rate": 1.923e-05, + "num_tokens": 53292.0, + "mean_token_accuracy": 0.8639921545982361, + "epoch": 0.08, + "step": 80 + }, + { + "loss": 1.0199, + "grad_norm": 0.44241663813591003, + "learning_rate": 1.9220000000000002e-05, + "num_tokens": 54316.0, + "mean_token_accuracy": 0.8082191944122314, + "epoch": 0.081, + "step": 81 + }, + { + "loss": 2.3246, + "grad_norm": 1.8165708780288696, + "learning_rate": 1.921e-05, + "num_tokens": 54498.0, + "mean_token_accuracy": 0.6222222447395325, + "epoch": 0.082, + "step": 82 + }, + { + "loss": 1.0166, + "grad_norm": 0.4384869635105133, + "learning_rate": 1.9200000000000003e-05, + "num_tokens": 55522.0, + "mean_token_accuracy": 0.7994129061698914, + "epoch": 0.083, + "step": 83 + }, + { + "loss": 1.165, + "grad_norm": 0.5062429308891296, + "learning_rate": 1.919e-05, + "num_tokens": 56125.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.084, + "step": 84 + }, + { + "loss": 0.8415, + "grad_norm": 0.3699897527694702, + "learning_rate": 1.918e-05, + "num_tokens": 57149.0, + "mean_token_accuracy": 0.8277886509895325, + "epoch": 0.085, + "step": 85 + }, + { + "loss": 2.2615, + "grad_norm": 1.7989789247512817, + "learning_rate": 1.917e-05, + "num_tokens": 57331.0, + "mean_token_accuracy": 0.6333333253860474, + "epoch": 0.086, + "step": 86 + }, + { + "loss": 1.1214, + "grad_norm": 0.4981077313423157, + "learning_rate": 1.916e-05, + "num_tokens": 57934.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.087, + "step": 87 + }, + { + "loss": 0.9395, + "grad_norm": 0.4391534626483917, + "learning_rate": 1.915e-05, + "num_tokens": 58958.0, + "mean_token_accuracy": 0.8131115436553955, + "epoch": 0.088, + "step": 88 + }, + { + "loss": 0.7869, + "grad_norm": 0.4100501537322998, + "learning_rate": 1.914e-05, + "num_tokens": 59982.0, + "mean_token_accuracy": 0.8434442281723022, + "epoch": 0.089, + "step": 89 + }, + { + "loss": 1.1777, + "grad_norm": 0.515848696231842, + "learning_rate": 1.913e-05, + "num_tokens": 60585.0, + "mean_token_accuracy": 0.7787021398544312, + "epoch": 0.09, + "step": 90 + }, + { + "loss": 1.1895, + "grad_norm": 0.5122319459915161, + "learning_rate": 1.912e-05, + "num_tokens": 61188.0, + "mean_token_accuracy": 0.782029926776886, + "epoch": 0.091, + "step": 91 + }, + { + "loss": 0.8746, + "grad_norm": 0.436844140291214, + "learning_rate": 1.911e-05, + "num_tokens": 61791.0, + "mean_token_accuracy": 0.8302828669548035, + "epoch": 0.092, + "step": 92 + }, + { + "loss": 1.1634, + "grad_norm": 0.5078467130661011, + "learning_rate": 1.91e-05, + "num_tokens": 62394.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.093, + "step": 93 + }, + { + "loss": 0.9594, + "grad_norm": 0.4935344159603119, + "learning_rate": 1.9090000000000002e-05, + "num_tokens": 63418.0, + "mean_token_accuracy": 0.8121330738067627, + "epoch": 0.094, + "step": 94 + }, + { + "loss": 1.1431, + "grad_norm": 0.5384430289268494, + "learning_rate": 1.908e-05, + "num_tokens": 64021.0, + "mean_token_accuracy": 0.7770382761955261, + "epoch": 0.095, + "step": 95 + }, + { + "loss": 1.0983, + "grad_norm": 0.5433980226516724, + "learning_rate": 1.9070000000000002e-05, + "num_tokens": 64624.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.096, + "step": 96 + }, + { + "loss": 1.0644, + "grad_norm": 0.5404391884803772, + "learning_rate": 1.906e-05, + "num_tokens": 65227.0, + "mean_token_accuracy": 0.7886855006217957, + "epoch": 0.097, + "step": 97 + }, + { + "loss": 1.1442, + "grad_norm": 0.5509842038154602, + "learning_rate": 1.9050000000000002e-05, + "num_tokens": 65830.0, + "mean_token_accuracy": 0.7886855006217957, + "epoch": 0.098, + "step": 98 + }, + { + "loss": 1.131, + "grad_norm": 0.5534968972206116, + "learning_rate": 1.904e-05, + "num_tokens": 66433.0, + "mean_token_accuracy": 0.7853577136993408, + "epoch": 0.099, + "step": 99 + }, + { + "loss": 0.9655, + "grad_norm": 0.4929925501346588, + "learning_rate": 1.9030000000000002e-05, + "num_tokens": 67457.0, + "mean_token_accuracy": 0.805283784866333, + "epoch": 0.1, + "step": 100 + }, + { + "loss": 1.1075, + "grad_norm": 0.5677370429039001, + "learning_rate": 1.902e-05, + "num_tokens": 68060.0, + "mean_token_accuracy": 0.7870216369628906, + "epoch": 0.101, + "step": 101 + }, + { + "loss": 0.7954, + "grad_norm": 0.43329960107803345, + "learning_rate": 1.9010000000000003e-05, + "num_tokens": 69084.0, + "mean_token_accuracy": 0.8405088186264038, + "epoch": 0.102, + "step": 102 + }, + { + "loss": 0.9016, + "grad_norm": 0.5032463669776917, + "learning_rate": 1.9e-05, + "num_tokens": 70108.0, + "mean_token_accuracy": 0.8199608325958252, + "epoch": 0.103, + "step": 103 + }, + { + "loss": 0.7721, + "grad_norm": 0.40760254859924316, + "learning_rate": 1.8990000000000003e-05, + "num_tokens": 71132.0, + "mean_token_accuracy": 0.839530348777771, + "epoch": 0.104, + "step": 104 + }, + { + "loss": 0.9044, + "grad_norm": 0.45296505093574524, + "learning_rate": 1.898e-05, + "num_tokens": 72156.0, + "mean_token_accuracy": 0.8189823627471924, + "epoch": 0.105, + "step": 105 + }, + { + "loss": 0.8039, + "grad_norm": 0.523140549659729, + "learning_rate": 1.8970000000000003e-05, + "num_tokens": 72759.0, + "mean_token_accuracy": 0.841930091381073, + "epoch": 0.106, + "step": 106 + }, + { + "loss": 1.0876, + "grad_norm": 0.6097339391708374, + "learning_rate": 1.896e-05, + "num_tokens": 73362.0, + "mean_token_accuracy": 0.7936772108078003, + "epoch": 0.107, + "step": 107 + }, + { + "loss": 1.0691, + "grad_norm": 0.6268714666366577, + "learning_rate": 1.8950000000000003e-05, + "num_tokens": 73965.0, + "mean_token_accuracy": 0.7903494238853455, + "epoch": 0.108, + "step": 108 + }, + { + "loss": 0.8107, + "grad_norm": 0.5590832829475403, + "learning_rate": 1.894e-05, + "num_tokens": 74568.0, + "mean_token_accuracy": 0.840266227722168, + "epoch": 0.109, + "step": 109 + }, + { + "loss": 1.9547, + "grad_norm": 2.607954978942871, + "learning_rate": 1.893e-05, + "num_tokens": 74750.0, + "mean_token_accuracy": 0.6555555462837219, + "epoch": 0.11, + "step": 110 + }, + { + "loss": 1.0032, + "grad_norm": 0.6220319271087646, + "learning_rate": 1.8920000000000002e-05, + "num_tokens": 75353.0, + "mean_token_accuracy": 0.8053244352340698, + "epoch": 0.111, + "step": 111 + }, + { + "loss": 1.0205, + "grad_norm": 0.6377025842666626, + "learning_rate": 1.891e-05, + "num_tokens": 75956.0, + "mean_token_accuracy": 0.80033278465271, + "epoch": 0.112, + "step": 112 + }, + { + "loss": 1.0413, + "grad_norm": 0.6643140912055969, + "learning_rate": 1.8900000000000002e-05, + "num_tokens": 76559.0, + "mean_token_accuracy": 0.7953410744667053, + "epoch": 0.113, + "step": 113 + }, + { + "loss": 1.0232, + "grad_norm": 0.6345243453979492, + "learning_rate": 1.889e-05, + "num_tokens": 77162.0, + "mean_token_accuracy": 0.8036605715751648, + "epoch": 0.114, + "step": 114 + }, + { + "loss": 1.8587, + "grad_norm": 2.7318179607391357, + "learning_rate": 1.8880000000000002e-05, + "num_tokens": 77344.0, + "mean_token_accuracy": 0.6666666865348816, + "epoch": 0.115, + "step": 115 + }, + { + "loss": 0.7584, + "grad_norm": 0.5891063809394836, + "learning_rate": 1.887e-05, + "num_tokens": 77947.0, + "mean_token_accuracy": 0.8502495884895325, + "epoch": 0.116, + "step": 116 + }, + { + "loss": 0.7495, + "grad_norm": 0.62372887134552, + "learning_rate": 1.886e-05, + "num_tokens": 78550.0, + "mean_token_accuracy": 0.8469218015670776, + "epoch": 0.117, + "step": 117 + }, + { + "loss": 0.7327, + "grad_norm": 0.4757370948791504, + "learning_rate": 1.885e-05, + "num_tokens": 79574.0, + "mean_token_accuracy": 0.8473581075668335, + "epoch": 0.118, + "step": 118 + }, + { + "loss": 1.0126, + "grad_norm": 0.6939040422439575, + "learning_rate": 1.884e-05, + "num_tokens": 80177.0, + "mean_token_accuracy": 0.8069883584976196, + "epoch": 0.119, + "step": 119 + }, + { + "loss": 1.7444, + "grad_norm": 2.786555290222168, + "learning_rate": 1.883e-05, + "num_tokens": 80359.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.12, + "step": 120 + }, + { + "loss": 0.7121, + "grad_norm": 0.5502288341522217, + "learning_rate": 1.882e-05, + "num_tokens": 81383.0, + "mean_token_accuracy": 0.8512719869613647, + "epoch": 0.121, + "step": 121 + }, + { + "loss": 0.6055, + "grad_norm": 0.6514042019844055, + "learning_rate": 1.881e-05, + "num_tokens": 82407.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.122, + "step": 122 + }, + { + "loss": 0.7074, + "grad_norm": 0.6278131008148193, + "learning_rate": 1.88e-05, + "num_tokens": 83010.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.123, + "step": 123 + }, + { + "loss": 0.9056, + "grad_norm": 0.7105093002319336, + "learning_rate": 1.879e-05, + "num_tokens": 83613.0, + "mean_token_accuracy": 0.8103161454200745, + "epoch": 0.124, + "step": 124 + }, + { + "loss": 0.7111, + "grad_norm": 0.5671331286430359, + "learning_rate": 1.878e-05, + "num_tokens": 84637.0, + "mean_token_accuracy": 0.8454011678695679, + "epoch": 0.125, + "step": 125 + }, + { + "loss": 1.6124, + "grad_norm": 2.8393170833587646, + "learning_rate": 1.877e-05, + "num_tokens": 84819.0, + "mean_token_accuracy": 0.699999988079071, + "epoch": 0.126, + "step": 126 + }, + { + "loss": 0.6913, + "grad_norm": 0.6492026448249817, + "learning_rate": 1.876e-05, + "num_tokens": 85422.0, + "mean_token_accuracy": 0.8519134521484375, + "epoch": 0.127, + "step": 127 + }, + { + "loss": 0.9506, + "grad_norm": 0.8479906916618347, + "learning_rate": 1.8750000000000002e-05, + "num_tokens": 86025.0, + "mean_token_accuracy": 0.7986688613891602, + "epoch": 0.128, + "step": 128 + }, + { + "loss": 0.7724, + "grad_norm": 0.6733057498931885, + "learning_rate": 1.8740000000000004e-05, + "num_tokens": 87049.0, + "mean_token_accuracy": 0.8365949392318726, + "epoch": 0.129, + "step": 129 + }, + { + "loss": 0.7141, + "grad_norm": 0.7287142872810364, + "learning_rate": 1.8730000000000002e-05, + "num_tokens": 87652.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.13, + "step": 130 + }, + { + "loss": 1.4981, + "grad_norm": 3.1733977794647217, + "learning_rate": 1.8720000000000004e-05, + "num_tokens": 87834.0, + "mean_token_accuracy": 0.7222222089767456, + "epoch": 0.131, + "step": 131 + }, + { + "loss": 0.7416, + "grad_norm": 0.7018607258796692, + "learning_rate": 1.8710000000000002e-05, + "num_tokens": 88858.0, + "mean_token_accuracy": 0.8385518789291382, + "epoch": 0.132, + "step": 132 + }, + { + "loss": 0.6695, + "grad_norm": 0.569635272026062, + "learning_rate": 1.8700000000000004e-05, + "num_tokens": 89882.0, + "mean_token_accuracy": 0.8581213355064392, + "epoch": 0.133, + "step": 133 + }, + { + "loss": 0.8634, + "grad_norm": 0.92866051197052, + "learning_rate": 1.8690000000000002e-05, + "num_tokens": 90485.0, + "mean_token_accuracy": 0.8169717192649841, + "epoch": 0.134, + "step": 134 + }, + { + "loss": 0.6584, + "grad_norm": 0.6502605080604553, + "learning_rate": 1.8680000000000004e-05, + "num_tokens": 91509.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.135, + "step": 135 + }, + { + "loss": 0.6392, + "grad_norm": 0.826318085193634, + "learning_rate": 1.8670000000000003e-05, + "num_tokens": 92112.0, + "mean_token_accuracy": 0.8652245998382568, + "epoch": 0.136, + "step": 136 + }, + { + "loss": 0.4802, + "grad_norm": 0.5766599774360657, + "learning_rate": 1.866e-05, + "num_tokens": 93136.0, + "mean_token_accuracy": 0.8953033089637756, + "epoch": 0.137, + "step": 137 + }, + { + "loss": 0.6821, + "grad_norm": 0.8077890276908875, + "learning_rate": 1.8650000000000003e-05, + "num_tokens": 93739.0, + "mean_token_accuracy": 0.860232949256897, + "epoch": 0.138, + "step": 138 + }, + { + "loss": 0.8336, + "grad_norm": 0.9565444588661194, + "learning_rate": 1.864e-05, + "num_tokens": 94342.0, + "mean_token_accuracy": 0.820299506187439, + "epoch": 0.139, + "step": 139 + }, + { + "loss": 0.6176, + "grad_norm": 0.6447359919548035, + "learning_rate": 1.8630000000000003e-05, + "num_tokens": 95366.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.14, + "step": 140 + }, + { + "loss": 0.7278, + "grad_norm": 0.7473644614219666, + "learning_rate": 1.862e-05, + "num_tokens": 96390.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.141, + "step": 141 + }, + { + "loss": 0.582, + "grad_norm": 0.8362826704978943, + "learning_rate": 1.8610000000000003e-05, + "num_tokens": 96993.0, + "mean_token_accuracy": 0.8785357475280762, + "epoch": 0.142, + "step": 142 + }, + { + "loss": 1.202, + "grad_norm": 4.45956563949585, + "learning_rate": 1.86e-05, + "num_tokens": 97175.0, + "mean_token_accuracy": 0.7333333492279053, + "epoch": 0.143, + "step": 143 + }, + { + "loss": 0.7112, + "grad_norm": 0.8263697624206543, + "learning_rate": 1.859e-05, + "num_tokens": 98199.0, + "mean_token_accuracy": 0.8463796377182007, + "epoch": 0.144, + "step": 144 + }, + { + "loss": 0.6413, + "grad_norm": 1.0524468421936035, + "learning_rate": 1.858e-05, + "num_tokens": 98802.0, + "mean_token_accuracy": 0.860232949256897, + "epoch": 0.145, + "step": 145 + }, + { + "loss": 0.7817, + "grad_norm": 1.0738270282745361, + "learning_rate": 1.857e-05, + "num_tokens": 99405.0, + "mean_token_accuracy": 0.8269550800323486, + "epoch": 0.146, + "step": 146 + }, + { + "loss": 0.7235, + "grad_norm": 1.2545086145401, + "learning_rate": 1.8560000000000002e-05, + "num_tokens": 100429.0, + "mean_token_accuracy": 0.8414872884750366, + "epoch": 0.147, + "step": 147 + }, + { + "loss": 0.5966, + "grad_norm": 0.8518689274787903, + "learning_rate": 1.855e-05, + "num_tokens": 101453.0, + "mean_token_accuracy": 0.8708415031433105, + "epoch": 0.148, + "step": 148 + }, + { + "loss": 0.6405, + "grad_norm": 0.8886847496032715, + "learning_rate": 1.8540000000000002e-05, + "num_tokens": 102477.0, + "mean_token_accuracy": 0.859099805355072, + "epoch": 0.149, + "step": 149 + }, + { + "loss": 0.5327, + "grad_norm": 0.8927612900733948, + "learning_rate": 1.853e-05, + "num_tokens": 103501.0, + "mean_token_accuracy": 0.8864970803260803, + "epoch": 0.15, + "step": 150 + }, + { + "loss": 0.6202, + "grad_norm": 0.9321349263191223, + "learning_rate": 1.8520000000000002e-05, + "num_tokens": 104525.0, + "mean_token_accuracy": 0.8630136847496033, + "epoch": 0.151, + "step": 151 + }, + { + "loss": 0.6459, + "grad_norm": 1.0996044874191284, + "learning_rate": 1.851e-05, + "num_tokens": 105549.0, + "mean_token_accuracy": 0.8600782752037048, + "epoch": 0.152, + "step": 152 + }, + { + "loss": 0.6313, + "grad_norm": 0.942244291305542, + "learning_rate": 1.8500000000000002e-05, + "num_tokens": 106573.0, + "mean_token_accuracy": 0.8639921545982361, + "epoch": 0.153, + "step": 153 + }, + { + "loss": 0.5416, + "grad_norm": 0.8150050640106201, + "learning_rate": 1.849e-05, + "num_tokens": 107597.0, + "mean_token_accuracy": 0.8757338523864746, + "epoch": 0.154, + "step": 154 + }, + { + "loss": 0.9382, + "grad_norm": 5.082424163818359, + "learning_rate": 1.8480000000000003e-05, + "num_tokens": 107779.0, + "mean_token_accuracy": 0.7777777910232544, + "epoch": 0.155, + "step": 155 + }, + { + "loss": 0.6434, + "grad_norm": 1.4283632040023804, + "learning_rate": 1.847e-05, + "num_tokens": 108382.0, + "mean_token_accuracy": 0.8519134521484375, + "epoch": 0.156, + "step": 156 + }, + { + "loss": 0.6736, + "grad_norm": 1.4088659286499023, + "learning_rate": 1.8460000000000003e-05, + "num_tokens": 108985.0, + "mean_token_accuracy": 0.8552412390708923, + "epoch": 0.157, + "step": 157 + }, + { + "loss": 0.872, + "grad_norm": 4.658277988433838, + "learning_rate": 1.845e-05, + "num_tokens": 109167.0, + "mean_token_accuracy": 0.800000011920929, + "epoch": 0.158, + "step": 158 + }, + { + "loss": 0.6061, + "grad_norm": 1.0742665529251099, + "learning_rate": 1.8440000000000003e-05, + "num_tokens": 110191.0, + "mean_token_accuracy": 0.8620352149009705, + "epoch": 0.159, + "step": 159 + }, + { + "loss": 0.639, + "grad_norm": 1.259716272354126, + "learning_rate": 1.843e-05, + "num_tokens": 110794.0, + "mean_token_accuracy": 0.8535773754119873, + "epoch": 0.16, + "step": 160 + }, + { + "loss": 0.6228, + "grad_norm": 1.1735901832580566, + "learning_rate": 1.8420000000000003e-05, + "num_tokens": 111397.0, + "mean_token_accuracy": 0.8635607361793518, + "epoch": 0.161, + "step": 161 + }, + { + "loss": 0.4876, + "grad_norm": 0.9384316802024841, + "learning_rate": 1.841e-05, + "num_tokens": 112421.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.162, + "step": 162 + }, + { + "loss": 0.5318, + "grad_norm": 0.9066665172576904, + "learning_rate": 1.8400000000000003e-05, + "num_tokens": 113445.0, + "mean_token_accuracy": 0.8904109597206116, + "epoch": 0.163, + "step": 163 + }, + { + "loss": 0.5869, + "grad_norm": 1.2560738325119019, + "learning_rate": 1.8390000000000002e-05, + "num_tokens": 114469.0, + "mean_token_accuracy": 0.8688845634460449, + "epoch": 0.164, + "step": 164 + }, + { + "loss": 0.5481, + "grad_norm": 1.3613413572311401, + "learning_rate": 1.8380000000000004e-05, + "num_tokens": 115493.0, + "mean_token_accuracy": 0.8767123222351074, + "epoch": 0.165, + "step": 165 + }, + { + "loss": 0.5731, + "grad_norm": 1.4810606241226196, + "learning_rate": 1.8370000000000002e-05, + "num_tokens": 116096.0, + "mean_token_accuracy": 0.8752079606056213, + "epoch": 0.166, + "step": 166 + }, + { + "loss": 0.3885, + "grad_norm": 0.9610773324966431, + "learning_rate": 1.8360000000000004e-05, + "num_tokens": 117120.0, + "mean_token_accuracy": 0.9109588861465454, + "epoch": 0.167, + "step": 167 + }, + { + "loss": 0.6274, + "grad_norm": 4.352345943450928, + "learning_rate": 1.8350000000000002e-05, + "num_tokens": 117302.0, + "mean_token_accuracy": 0.8666666746139526, + "epoch": 0.168, + "step": 168 + }, + { + "loss": 0.4967, + "grad_norm": 1.632398009300232, + "learning_rate": 1.834e-05, + "num_tokens": 117905.0, + "mean_token_accuracy": 0.8768718838691711, + "epoch": 0.169, + "step": 169 + }, + { + "loss": 0.4694, + "grad_norm": 1.3380522727966309, + "learning_rate": 1.8330000000000002e-05, + "num_tokens": 118929.0, + "mean_token_accuracy": 0.8943248391151428, + "epoch": 0.17, + "step": 170 + }, + { + "loss": 0.4756, + "grad_norm": 1.4122637510299683, + "learning_rate": 1.832e-05, + "num_tokens": 119953.0, + "mean_token_accuracy": 0.8913894295692444, + "epoch": 0.171, + "step": 171 + }, + { + "loss": 0.5351, + "grad_norm": 4.495899677276611, + "learning_rate": 1.8310000000000003e-05, + "num_tokens": 120135.0, + "mean_token_accuracy": 0.8833333253860474, + "epoch": 0.172, + "step": 172 + }, + { + "loss": 0.5104, + "grad_norm": 4.362597465515137, + "learning_rate": 1.83e-05, + "num_tokens": 120317.0, + "mean_token_accuracy": 0.8888888955116272, + "epoch": 0.173, + "step": 173 + }, + { + "loss": 0.3742, + "grad_norm": 1.0410066843032837, + "learning_rate": 1.8290000000000003e-05, + "num_tokens": 121341.0, + "mean_token_accuracy": 0.9109588861465454, + "epoch": 0.174, + "step": 174 + }, + { + "loss": 0.3873, + "grad_norm": 1.1270015239715576, + "learning_rate": 1.828e-05, + "num_tokens": 122365.0, + "mean_token_accuracy": 0.9060665369033813, + "epoch": 0.175, + "step": 175 + }, + { + "loss": 0.4169, + "grad_norm": 1.3939638137817383, + "learning_rate": 1.827e-05, + "num_tokens": 122968.0, + "mean_token_accuracy": 0.9034941792488098, + "epoch": 0.176, + "step": 176 + }, + { + "loss": 0.3195, + "grad_norm": 1.4632936716079712, + "learning_rate": 1.826e-05, + "num_tokens": 123571.0, + "mean_token_accuracy": 0.9284525513648987, + "epoch": 0.177, + "step": 177 + }, + { + "loss": 0.4051, + "grad_norm": 4.38023042678833, + "learning_rate": 1.825e-05, + "num_tokens": 123753.0, + "mean_token_accuracy": 0.9333333373069763, + "epoch": 0.178, + "step": 178 + }, + { + "loss": 0.3713, + "grad_norm": 1.5698707103729248, + "learning_rate": 1.824e-05, + "num_tokens": 124356.0, + "mean_token_accuracy": 0.9134775400161743, + "epoch": 0.179, + "step": 179 + }, + { + "loss": 0.3905, + "grad_norm": 1.4007678031921387, + "learning_rate": 1.823e-05, + "num_tokens": 125380.0, + "mean_token_accuracy": 0.9060665369033813, + "epoch": 0.18, + "step": 180 + }, + { + "loss": 0.417, + "grad_norm": 1.6752204895019531, + "learning_rate": 1.8220000000000002e-05, + "num_tokens": 125983.0, + "mean_token_accuracy": 0.9084858298301697, + "epoch": 0.181, + "step": 181 + }, + { + "loss": 0.2957, + "grad_norm": 2.0979738235473633, + "learning_rate": 1.821e-05, + "num_tokens": 126586.0, + "mean_token_accuracy": 0.9334442615509033, + "epoch": 0.182, + "step": 182 + }, + { + "loss": 0.339, + "grad_norm": 5.0233154296875, + "learning_rate": 1.8200000000000002e-05, + "num_tokens": 126768.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.183, + "step": 183 + }, + { + "loss": 0.3281, + "grad_norm": 4.591806888580322, + "learning_rate": 1.819e-05, + "num_tokens": 126950.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.184, + "step": 184 + }, + { + "loss": 0.377, + "grad_norm": 1.4888513088226318, + "learning_rate": 1.8180000000000002e-05, + "num_tokens": 127974.0, + "mean_token_accuracy": 0.908023476600647, + "epoch": 0.185, + "step": 185 + }, + { + "loss": 0.3416, + "grad_norm": 1.5393342971801758, + "learning_rate": 1.817e-05, + "num_tokens": 128998.0, + "mean_token_accuracy": 0.9207436442375183, + "epoch": 0.186, + "step": 186 + }, + { + "loss": 0.35, + "grad_norm": 1.4663900136947632, + "learning_rate": 1.8160000000000002e-05, + "num_tokens": 129601.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.187, + "step": 187 + }, + { + "loss": 0.3328, + "grad_norm": 1.522277593612671, + "learning_rate": 1.815e-05, + "num_tokens": 130204.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.188, + "step": 188 + }, + { + "loss": 0.2824, + "grad_norm": 2.468599319458008, + "learning_rate": 1.8140000000000003e-05, + "num_tokens": 130386.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.189, + "step": 189 + }, + { + "loss": 0.2709, + "grad_norm": 2.1798818111419678, + "learning_rate": 1.813e-05, + "num_tokens": 130568.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.19, + "step": 190 + }, + { + "loss": 0.3626, + "grad_norm": 1.752602219581604, + "learning_rate": 1.8120000000000003e-05, + "num_tokens": 131592.0, + "mean_token_accuracy": 0.9197651743888855, + "epoch": 0.191, + "step": 191 + }, + { + "loss": 0.241, + "grad_norm": 0.9363252520561218, + "learning_rate": 1.811e-05, + "num_tokens": 132195.0, + "mean_token_accuracy": 0.9484192728996277, + "epoch": 0.192, + "step": 192 + }, + { + "loss": 0.2818, + "grad_norm": 1.2946171760559082, + "learning_rate": 1.8100000000000003e-05, + "num_tokens": 133219.0, + "mean_token_accuracy": 0.9344422817230225, + "epoch": 0.193, + "step": 193 + }, + { + "loss": 0.2998, + "grad_norm": 1.081048846244812, + "learning_rate": 1.809e-05, + "num_tokens": 134243.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.194, + "step": 194 + }, + { + "loss": 0.2823, + "grad_norm": 0.9526715278625488, + "learning_rate": 1.8080000000000003e-05, + "num_tokens": 135267.0, + "mean_token_accuracy": 0.9285714030265808, + "epoch": 0.195, + "step": 195 + }, + { + "loss": 0.2427, + "grad_norm": 3.766998052597046, + "learning_rate": 1.807e-05, + "num_tokens": 135449.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.196, + "step": 196 + }, + { + "loss": 0.3572, + "grad_norm": 1.496860146522522, + "learning_rate": 1.8060000000000003e-05, + "num_tokens": 136052.0, + "mean_token_accuracy": 0.921796977519989, + "epoch": 0.197, + "step": 197 + }, + { + "loss": 0.2906, + "grad_norm": 1.5144256353378296, + "learning_rate": 1.805e-05, + "num_tokens": 137076.0, + "mean_token_accuracy": 0.9344422817230225, + "epoch": 0.198, + "step": 198 + }, + { + "loss": 0.2936, + "grad_norm": 1.2776437997817993, + "learning_rate": 1.8040000000000003e-05, + "num_tokens": 138100.0, + "mean_token_accuracy": 0.9344422817230225, + "epoch": 0.199, + "step": 199 + }, + { + "loss": 0.2886, + "grad_norm": 1.6185836791992188, + "learning_rate": 1.8030000000000002e-05, + "num_tokens": 138703.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.2, + "step": 200 + }, + { + "loss": 0.341, + "grad_norm": 1.9566179513931274, + "learning_rate": 1.802e-05, + "num_tokens": 139306.0, + "mean_token_accuracy": 0.9267886877059937, + "epoch": 0.201, + "step": 201 + }, + { + "loss": 0.3243, + "grad_norm": 1.490872859954834, + "learning_rate": 1.8010000000000002e-05, + "num_tokens": 140330.0, + "mean_token_accuracy": 0.9285714030265808, + "epoch": 0.202, + "step": 202 + }, + { + "loss": 0.2863, + "grad_norm": 1.5277602672576904, + "learning_rate": 1.8e-05, + "num_tokens": 141354.0, + "mean_token_accuracy": 0.9344422817230225, + "epoch": 0.203, + "step": 203 + }, + { + "loss": 0.2535, + "grad_norm": 5.625178337097168, + "learning_rate": 1.7990000000000002e-05, + "num_tokens": 141957.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.204, + "step": 204 + }, + { + "loss": 0.215, + "grad_norm": 1.0774091482162476, + "learning_rate": 1.798e-05, + "num_tokens": 142560.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.205, + "step": 205 + }, + { + "loss": 0.2797, + "grad_norm": 1.5909628868103027, + "learning_rate": 1.7970000000000002e-05, + "num_tokens": 143163.0, + "mean_token_accuracy": 0.941763699054718, + "epoch": 0.206, + "step": 206 + }, + { + "loss": 0.285, + "grad_norm": 1.3718655109405518, + "learning_rate": 1.796e-05, + "num_tokens": 144187.0, + "mean_token_accuracy": 0.9334638118743896, + "epoch": 0.207, + "step": 207 + }, + { + "loss": 0.2174, + "grad_norm": 3.416680335998535, + "learning_rate": 1.795e-05, + "num_tokens": 144369.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.208, + "step": 208 + }, + { + "loss": 0.2612, + "grad_norm": 0.9197150468826294, + "learning_rate": 1.794e-05, + "num_tokens": 145393.0, + "mean_token_accuracy": 0.9432485103607178, + "epoch": 0.209, + "step": 209 + }, + { + "loss": 0.2775, + "grad_norm": 1.2657712697982788, + "learning_rate": 1.793e-05, + "num_tokens": 145996.0, + "mean_token_accuracy": 0.9384359121322632, + "epoch": 0.21, + "step": 210 + }, + { + "loss": 0.2143, + "grad_norm": 1.0708510875701904, + "learning_rate": 1.792e-05, + "num_tokens": 146599.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.211, + "step": 211 + }, + { + "loss": 0.2424, + "grad_norm": 1.6526345014572144, + "learning_rate": 1.791e-05, + "num_tokens": 147202.0, + "mean_token_accuracy": 0.9434276223182678, + "epoch": 0.212, + "step": 212 + }, + { + "loss": 0.2205, + "grad_norm": 1.5705641508102417, + "learning_rate": 1.79e-05, + "num_tokens": 148226.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.213, + "step": 213 + }, + { + "loss": 0.1932, + "grad_norm": 1.7598477602005005, + "learning_rate": 1.789e-05, + "num_tokens": 148408.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.214, + "step": 214 + }, + { + "loss": 0.2544, + "grad_norm": 1.5029298067092896, + "learning_rate": 1.788e-05, + "num_tokens": 149011.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.215, + "step": 215 + }, + { + "loss": 0.212, + "grad_norm": 1.3078054189682007, + "learning_rate": 1.787e-05, + "num_tokens": 149614.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.216, + "step": 216 + }, + { + "loss": 0.2047, + "grad_norm": 1.5083431005477905, + "learning_rate": 1.7860000000000002e-05, + "num_tokens": 150217.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.217, + "step": 217 + }, + { + "loss": 0.1822, + "grad_norm": 2.5195505619049072, + "learning_rate": 1.785e-05, + "num_tokens": 150399.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.218, + "step": 218 + }, + { + "loss": 0.2084, + "grad_norm": 1.026092529296875, + "learning_rate": 1.7840000000000002e-05, + "num_tokens": 151423.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.219, + "step": 219 + }, + { + "loss": 0.1761, + "grad_norm": 1.5038201808929443, + "learning_rate": 1.783e-05, + "num_tokens": 151605.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.22, + "step": 220 + }, + { + "loss": 0.221, + "grad_norm": 1.1938914060592651, + "learning_rate": 1.7820000000000002e-05, + "num_tokens": 152629.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.221, + "step": 221 + }, + { + "loss": 0.1888, + "grad_norm": 0.9352293610572815, + "learning_rate": 1.781e-05, + "num_tokens": 153232.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.222, + "step": 222 + }, + { + "loss": 0.2145, + "grad_norm": 1.26731538772583, + "learning_rate": 1.7800000000000002e-05, + "num_tokens": 153835.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.223, + "step": 223 + }, + { + "loss": 0.1964, + "grad_norm": 0.8970909118652344, + "learning_rate": 1.779e-05, + "num_tokens": 154438.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.224, + "step": 224 + }, + { + "loss": 0.1925, + "grad_norm": 0.7554095983505249, + "learning_rate": 1.7780000000000003e-05, + "num_tokens": 155041.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.225, + "step": 225 + }, + { + "loss": 0.2031, + "grad_norm": 0.8807339072227478, + "learning_rate": 1.777e-05, + "num_tokens": 156065.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.226, + "step": 226 + }, + { + "loss": 0.169, + "grad_norm": 2.9795708656311035, + "learning_rate": 1.7760000000000003e-05, + "num_tokens": 156247.0, + "mean_token_accuracy": 0.9666666388511658, + "epoch": 0.227, + "step": 227 + }, + { + "loss": 0.3398, + "grad_norm": 3.4801158905029297, + "learning_rate": 1.775e-05, + "num_tokens": 157271.0, + "mean_token_accuracy": 0.9295498728752136, + "epoch": 0.228, + "step": 228 + }, + { + "loss": 0.17, + "grad_norm": 1.4093260765075684, + "learning_rate": 1.7740000000000003e-05, + "num_tokens": 157874.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.229, + "step": 229 + }, + { + "loss": 0.1809, + "grad_norm": 1.2199844121932983, + "learning_rate": 1.773e-05, + "num_tokens": 158477.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.23, + "step": 230 + }, + { + "loss": 0.2213, + "grad_norm": 1.1079366207122803, + "learning_rate": 1.7720000000000003e-05, + "num_tokens": 159501.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.231, + "step": 231 + }, + { + "loss": 0.1846, + "grad_norm": 1.3526744842529297, + "learning_rate": 1.771e-05, + "num_tokens": 160104.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.232, + "step": 232 + }, + { + "loss": 0.1756, + "grad_norm": 1.2986876964569092, + "learning_rate": 1.77e-05, + "num_tokens": 160707.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.233, + "step": 233 + }, + { + "loss": 0.1842, + "grad_norm": 0.9565788507461548, + "learning_rate": 1.7690000000000002e-05, + "num_tokens": 161310.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.234, + "step": 234 + }, + { + "loss": 0.1696, + "grad_norm": 0.8098346590995789, + "learning_rate": 1.768e-05, + "num_tokens": 161913.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.235, + "step": 235 + }, + { + "loss": 0.2198, + "grad_norm": 1.4016491174697876, + "learning_rate": 1.7670000000000002e-05, + "num_tokens": 162516.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 0.236, + "step": 236 + }, + { + "loss": 0.1451, + "grad_norm": 1.9594753980636597, + "learning_rate": 1.766e-05, + "num_tokens": 162698.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.237, + "step": 237 + }, + { + "loss": 0.1681, + "grad_norm": 1.2831525802612305, + "learning_rate": 1.7650000000000002e-05, + "num_tokens": 163722.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.238, + "step": 238 + }, + { + "loss": 0.1387, + "grad_norm": 1.2211278676986694, + "learning_rate": 1.764e-05, + "num_tokens": 163904.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.239, + "step": 239 + }, + { + "loss": 0.1599, + "grad_norm": 1.4370752573013306, + "learning_rate": 1.763e-05, + "num_tokens": 164507.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.24, + "step": 240 + }, + { + "loss": 0.1335, + "grad_norm": 1.3081690073013306, + "learning_rate": 1.762e-05, + "num_tokens": 164689.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.241, + "step": 241 + }, + { + "loss": 0.1646, + "grad_norm": 1.0794700384140015, + "learning_rate": 1.761e-05, + "num_tokens": 165292.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.242, + "step": 242 + }, + { + "loss": 0.1283, + "grad_norm": 1.1635990142822266, + "learning_rate": 1.76e-05, + "num_tokens": 165474.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.243, + "step": 243 + }, + { + "loss": 0.1394, + "grad_norm": 1.4341994524002075, + "learning_rate": 1.759e-05, + "num_tokens": 166077.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.244, + "step": 244 + }, + { + "loss": 0.1287, + "grad_norm": 1.2540855407714844, + "learning_rate": 1.758e-05, + "num_tokens": 166680.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.245, + "step": 245 + }, + { + "loss": 0.1243, + "grad_norm": 1.1882375478744507, + "learning_rate": 1.757e-05, + "num_tokens": 167283.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.246, + "step": 246 + }, + { + "loss": 0.1605, + "grad_norm": 0.9329596161842346, + "learning_rate": 1.756e-05, + "num_tokens": 167886.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.247, + "step": 247 + }, + { + "loss": 0.1186, + "grad_norm": 1.8514982461929321, + "learning_rate": 1.755e-05, + "num_tokens": 168068.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.248, + "step": 248 + }, + { + "loss": 0.142, + "grad_norm": 0.9530863761901855, + "learning_rate": 1.754e-05, + "num_tokens": 169092.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.249, + "step": 249 + }, + { + "loss": 0.1292, + "grad_norm": 1.2723866701126099, + "learning_rate": 1.753e-05, + "num_tokens": 169695.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.25, + "step": 250 + }, + { + "loss": 0.1302, + "grad_norm": 1.8454350233078003, + "learning_rate": 1.752e-05, + "num_tokens": 170298.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.251, + "step": 251 + }, + { + "loss": 0.1214, + "grad_norm": 0.9049779176712036, + "learning_rate": 1.751e-05, + "num_tokens": 171322.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.252, + "step": 252 + }, + { + "loss": 0.1365, + "grad_norm": 1.0442427396774292, + "learning_rate": 1.7500000000000002e-05, + "num_tokens": 171925.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.253, + "step": 253 + }, + { + "loss": 0.1294, + "grad_norm": 1.2227778434753418, + "learning_rate": 1.7490000000000004e-05, + "num_tokens": 172528.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.254, + "step": 254 + }, + { + "loss": 0.1232, + "grad_norm": 4.070680618286133, + "learning_rate": 1.7480000000000002e-05, + "num_tokens": 172710.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.255, + "step": 255 + }, + { + "loss": 0.1385, + "grad_norm": 1.0301059484481812, + "learning_rate": 1.7470000000000004e-05, + "num_tokens": 173734.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.256, + "step": 256 + }, + { + "loss": 0.1155, + "grad_norm": 3.19741153717041, + "learning_rate": 1.7460000000000002e-05, + "num_tokens": 173916.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.257, + "step": 257 + }, + { + "loss": 0.1759, + "grad_norm": 1.4615155458450317, + "learning_rate": 1.7450000000000004e-05, + "num_tokens": 174940.0, + "mean_token_accuracy": 0.9579256176948547, + "epoch": 0.258, + "step": 258 + }, + { + "loss": 0.0943, + "grad_norm": 1.0655325651168823, + "learning_rate": 1.7440000000000002e-05, + "num_tokens": 175543.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.259, + "step": 259 + }, + { + "loss": 0.161, + "grad_norm": 1.490907907485962, + "learning_rate": 1.743e-05, + "num_tokens": 176146.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.26, + "step": 260 + }, + { + "loss": 0.1444, + "grad_norm": 1.0901517868041992, + "learning_rate": 1.7420000000000003e-05, + "num_tokens": 176749.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.261, + "step": 261 + }, + { + "loss": 0.1236, + "grad_norm": 0.9282501339912415, + "learning_rate": 1.741e-05, + "num_tokens": 177352.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.262, + "step": 262 + }, + { + "loss": 0.1433, + "grad_norm": 0.9189746379852295, + "learning_rate": 1.7400000000000003e-05, + "num_tokens": 178376.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.263, + "step": 263 + }, + { + "loss": 0.2009, + "grad_norm": 1.492387294769287, + "learning_rate": 1.739e-05, + "num_tokens": 179400.0, + "mean_token_accuracy": 0.9481409192085266, + "epoch": 0.264, + "step": 264 + }, + { + "loss": 0.1043, + "grad_norm": 2.522902250289917, + "learning_rate": 1.7380000000000003e-05, + "num_tokens": 179582.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.265, + "step": 265 + }, + { + "loss": 0.1314, + "grad_norm": 0.9554713368415833, + "learning_rate": 1.737e-05, + "num_tokens": 180185.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.266, + "step": 266 + }, + { + "loss": 0.2562, + "grad_norm": 2.1374523639678955, + "learning_rate": 1.736e-05, + "num_tokens": 180788.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.267, + "step": 267 + }, + { + "loss": 0.1107, + "grad_norm": 1.1309645175933838, + "learning_rate": 1.735e-05, + "num_tokens": 181391.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.268, + "step": 268 + }, + { + "loss": 0.1078, + "grad_norm": 1.058072805404663, + "learning_rate": 1.734e-05, + "num_tokens": 181994.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.269, + "step": 269 + }, + { + "loss": 0.2352, + "grad_norm": 1.608152151107788, + "learning_rate": 1.7330000000000002e-05, + "num_tokens": 182597.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.27, + "step": 270 + }, + { + "loss": 0.1622, + "grad_norm": 1.0934463739395142, + "learning_rate": 1.732e-05, + "num_tokens": 183621.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.271, + "step": 271 + }, + { + "loss": 0.1006, + "grad_norm": 2.80008864402771, + "learning_rate": 1.7310000000000002e-05, + "num_tokens": 183803.0, + "mean_token_accuracy": 0.9833333492279053, + "epoch": 0.272, + "step": 272 + }, + { + "loss": 0.0997, + "grad_norm": 2.4563705921173096, + "learning_rate": 1.73e-05, + "num_tokens": 183985.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.273, + "step": 273 + }, + { + "loss": 0.1004, + "grad_norm": 1.0290199518203735, + "learning_rate": 1.7290000000000002e-05, + "num_tokens": 184588.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.274, + "step": 274 + }, + { + "loss": 0.0923, + "grad_norm": 1.122008204460144, + "learning_rate": 1.728e-05, + "num_tokens": 185191.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.275, + "step": 275 + }, + { + "loss": 0.1115, + "grad_norm": 1.0187288522720337, + "learning_rate": 1.7270000000000002e-05, + "num_tokens": 185794.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.276, + "step": 276 + }, + { + "loss": 0.1316, + "grad_norm": 1.0224473476409912, + "learning_rate": 1.726e-05, + "num_tokens": 186397.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.277, + "step": 277 + }, + { + "loss": 0.1278, + "grad_norm": 0.842353880405426, + "learning_rate": 1.7250000000000003e-05, + "num_tokens": 187000.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.278, + "step": 278 + }, + { + "loss": 0.1162, + "grad_norm": 0.9121952056884766, + "learning_rate": 1.724e-05, + "num_tokens": 187603.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.279, + "step": 279 + }, + { + "loss": 0.1814, + "grad_norm": 1.5726698637008667, + "learning_rate": 1.7230000000000003e-05, + "num_tokens": 188206.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.28, + "step": 280 + }, + { + "loss": 0.1851, + "grad_norm": 1.1932828426361084, + "learning_rate": 1.722e-05, + "num_tokens": 189230.0, + "mean_token_accuracy": 0.9549902081489563, + "epoch": 0.281, + "step": 281 + }, + { + "loss": 0.0917, + "grad_norm": 0.8520850539207458, + "learning_rate": 1.7210000000000003e-05, + "num_tokens": 189833.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.282, + "step": 282 + }, + { + "loss": 0.1059, + "grad_norm": 0.9106554985046387, + "learning_rate": 1.72e-05, + "num_tokens": 190436.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.283, + "step": 283 + }, + { + "loss": 0.1725, + "grad_norm": 0.8985360860824585, + "learning_rate": 1.7190000000000003e-05, + "num_tokens": 191460.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.284, + "step": 284 + }, + { + "loss": 0.1202, + "grad_norm": 0.7162396311759949, + "learning_rate": 1.718e-05, + "num_tokens": 192484.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.285, + "step": 285 + }, + { + "loss": 0.1194, + "grad_norm": 1.0312271118164062, + "learning_rate": 1.7170000000000003e-05, + "num_tokens": 193087.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.286, + "step": 286 + }, + { + "loss": 0.1191, + "grad_norm": 0.7298357486724854, + "learning_rate": 1.7160000000000002e-05, + "num_tokens": 194111.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.287, + "step": 287 + }, + { + "loss": 0.0851, + "grad_norm": 1.0276390314102173, + "learning_rate": 1.7150000000000004e-05, + "num_tokens": 194714.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.288, + "step": 288 + }, + { + "loss": 0.2002, + "grad_norm": 1.568818211555481, + "learning_rate": 1.7140000000000002e-05, + "num_tokens": 195317.0, + "mean_token_accuracy": 0.9534109830856323, + "epoch": 0.289, + "step": 289 + }, + { + "loss": 0.1024, + "grad_norm": 4.6199164390563965, + "learning_rate": 1.7130000000000004e-05, + "num_tokens": 195499.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.29, + "step": 290 + }, + { + "loss": 0.0943, + "grad_norm": 3.7376346588134766, + "learning_rate": 1.7120000000000002e-05, + "num_tokens": 195681.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.291, + "step": 291 + }, + { + "loss": 0.1661, + "grad_norm": 1.5466440916061401, + "learning_rate": 1.711e-05, + "num_tokens": 196284.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.292, + "step": 292 + }, + { + "loss": 0.1147, + "grad_norm": 0.7724754214286804, + "learning_rate": 1.7100000000000002e-05, + "num_tokens": 196887.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.293, + "step": 293 + }, + { + "loss": 0.1026, + "grad_norm": 0.9492689371109009, + "learning_rate": 1.709e-05, + "num_tokens": 197490.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.294, + "step": 294 + }, + { + "loss": 0.1205, + "grad_norm": 1.0238693952560425, + "learning_rate": 1.7080000000000002e-05, + "num_tokens": 198093.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.295, + "step": 295 + }, + { + "loss": 0.212, + "grad_norm": 1.9184578657150269, + "learning_rate": 1.707e-05, + "num_tokens": 198696.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 0.296, + "step": 296 + }, + { + "loss": 0.1069, + "grad_norm": 0.9579708576202393, + "learning_rate": 1.7060000000000003e-05, + "num_tokens": 199299.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.297, + "step": 297 + }, + { + "loss": 0.0898, + "grad_norm": 0.803164005279541, + "learning_rate": 1.705e-05, + "num_tokens": 199902.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.298, + "step": 298 + }, + { + "loss": 0.1047, + "grad_norm": 0.9029723405838013, + "learning_rate": 1.704e-05, + "num_tokens": 200505.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.299, + "step": 299 + }, + { + "loss": 0.1009, + "grad_norm": 0.8454239964485168, + "learning_rate": 1.703e-05, + "num_tokens": 201529.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.3, + "step": 300 + }, + { + "loss": 0.12, + "grad_norm": 1.1490987539291382, + "learning_rate": 1.702e-05, + "num_tokens": 202132.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.301, + "step": 301 + }, + { + "loss": 0.0849, + "grad_norm": 3.5246822834014893, + "learning_rate": 1.701e-05, + "num_tokens": 202314.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.302, + "step": 302 + }, + { + "loss": 0.1273, + "grad_norm": 0.8553935885429382, + "learning_rate": 1.7e-05, + "num_tokens": 203338.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.303, + "step": 303 + }, + { + "loss": 0.1041, + "grad_norm": 0.8264068365097046, + "learning_rate": 1.699e-05, + "num_tokens": 204362.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.304, + "step": 304 + }, + { + "loss": 0.168, + "grad_norm": 1.5330549478530884, + "learning_rate": 1.698e-05, + "num_tokens": 204965.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.305, + "step": 305 + }, + { + "loss": 0.1058, + "grad_norm": 0.7781637907028198, + "learning_rate": 1.6970000000000002e-05, + "num_tokens": 205989.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.306, + "step": 306 + }, + { + "loss": 0.0869, + "grad_norm": 3.432866334915161, + "learning_rate": 1.696e-05, + "num_tokens": 206171.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.307, + "step": 307 + }, + { + "loss": 0.0821, + "grad_norm": 0.8514496684074402, + "learning_rate": 1.6950000000000002e-05, + "num_tokens": 207195.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.308, + "step": 308 + }, + { + "loss": 0.1268, + "grad_norm": 1.127798318862915, + "learning_rate": 1.694e-05, + "num_tokens": 207798.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.309, + "step": 309 + }, + { + "loss": 0.1019, + "grad_norm": 1.4073783159255981, + "learning_rate": 1.6930000000000002e-05, + "num_tokens": 208401.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.31, + "step": 310 + }, + { + "loss": 0.1184, + "grad_norm": 1.0207278728485107, + "learning_rate": 1.692e-05, + "num_tokens": 209004.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.311, + "step": 311 + }, + { + "loss": 0.0843, + "grad_norm": 0.8584610223770142, + "learning_rate": 1.6910000000000002e-05, + "num_tokens": 209607.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.312, + "step": 312 + }, + { + "loss": 0.1054, + "grad_norm": 0.8356302976608276, + "learning_rate": 1.69e-05, + "num_tokens": 210631.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.313, + "step": 313 + }, + { + "loss": 0.1067, + "grad_norm": 0.9864552021026611, + "learning_rate": 1.6890000000000003e-05, + "num_tokens": 211234.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.314, + "step": 314 + }, + { + "loss": 0.0743, + "grad_norm": 2.7141575813293457, + "learning_rate": 1.688e-05, + "num_tokens": 211416.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.315, + "step": 315 + }, + { + "loss": 0.0712, + "grad_norm": 2.0179872512817383, + "learning_rate": 1.6870000000000003e-05, + "num_tokens": 211598.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.316, + "step": 316 + }, + { + "loss": 0.0974, + "grad_norm": 0.8369526863098145, + "learning_rate": 1.686e-05, + "num_tokens": 212622.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.317, + "step": 317 + }, + { + "loss": 0.0681, + "grad_norm": 1.5807322263717651, + "learning_rate": 1.6850000000000003e-05, + "num_tokens": 212804.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.318, + "step": 318 + }, + { + "loss": 0.1056, + "grad_norm": 0.9928346872329712, + "learning_rate": 1.684e-05, + "num_tokens": 213407.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.319, + "step": 319 + }, + { + "loss": 0.0641, + "grad_norm": 1.4489860534667969, + "learning_rate": 1.6830000000000003e-05, + "num_tokens": 213589.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.32, + "step": 320 + }, + { + "loss": 0.1246, + "grad_norm": 1.0914169549942017, + "learning_rate": 1.682e-05, + "num_tokens": 214613.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.321, + "step": 321 + }, + { + "loss": 0.0848, + "grad_norm": 1.0644992589950562, + "learning_rate": 1.6810000000000003e-05, + "num_tokens": 215216.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.322, + "step": 322 + }, + { + "loss": 0.1016, + "grad_norm": 0.9731497168540955, + "learning_rate": 1.6800000000000002e-05, + "num_tokens": 215819.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.323, + "step": 323 + }, + { + "loss": 0.1109, + "grad_norm": 0.79487144947052, + "learning_rate": 1.679e-05, + "num_tokens": 216843.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.324, + "step": 324 + }, + { + "loss": 0.1212, + "grad_norm": 0.84676593542099, + "learning_rate": 1.6780000000000002e-05, + "num_tokens": 217867.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.325, + "step": 325 + }, + { + "loss": 0.0666, + "grad_norm": 0.9512737393379211, + "learning_rate": 1.677e-05, + "num_tokens": 218470.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.326, + "step": 326 + }, + { + "loss": 0.0712, + "grad_norm": 4.367532730102539, + "learning_rate": 1.6760000000000002e-05, + "num_tokens": 218652.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.327, + "step": 327 + }, + { + "loss": 0.0935, + "grad_norm": 1.0136102437973022, + "learning_rate": 1.675e-05, + "num_tokens": 219255.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.328, + "step": 328 + }, + { + "loss": 0.0958, + "grad_norm": 0.7100754380226135, + "learning_rate": 1.6740000000000002e-05, + "num_tokens": 220279.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.329, + "step": 329 + }, + { + "loss": 0.0768, + "grad_norm": 1.1508314609527588, + "learning_rate": 1.673e-05, + "num_tokens": 220882.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.33, + "step": 330 + }, + { + "loss": 0.0923, + "grad_norm": 0.6459121108055115, + "learning_rate": 1.672e-05, + "num_tokens": 221906.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.331, + "step": 331 + }, + { + "loss": 0.1377, + "grad_norm": 1.2035995721817017, + "learning_rate": 1.671e-05, + "num_tokens": 222930.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.332, + "step": 332 + }, + { + "loss": 0.1404, + "grad_norm": 1.2314244508743286, + "learning_rate": 1.67e-05, + "num_tokens": 223954.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.333, + "step": 333 + }, + { + "loss": 0.0721, + "grad_norm": 1.5412744283676147, + "learning_rate": 1.669e-05, + "num_tokens": 224557.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.334, + "step": 334 + }, + { + "loss": 0.093, + "grad_norm": 1.1724885702133179, + "learning_rate": 1.668e-05, + "num_tokens": 225581.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.335, + "step": 335 + }, + { + "loss": 0.0948, + "grad_norm": 1.1767406463623047, + "learning_rate": 1.667e-05, + "num_tokens": 226184.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.336, + "step": 336 + }, + { + "loss": 0.1091, + "grad_norm": 0.8806567192077637, + "learning_rate": 1.666e-05, + "num_tokens": 227208.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.337, + "step": 337 + }, + { + "loss": 0.1123, + "grad_norm": 0.7883885502815247, + "learning_rate": 1.665e-05, + "num_tokens": 228232.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.338, + "step": 338 + }, + { + "loss": 0.1244, + "grad_norm": 1.631230115890503, + "learning_rate": 1.664e-05, + "num_tokens": 229256.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.339, + "step": 339 + }, + { + "loss": 0.0632, + "grad_norm": 3.258474588394165, + "learning_rate": 1.6630000000000002e-05, + "num_tokens": 229438.0, + "mean_token_accuracy": 0.9777777791023254, + "epoch": 0.34, + "step": 340 + }, + { + "loss": 0.1013, + "grad_norm": 0.8189828395843506, + "learning_rate": 1.662e-05, + "num_tokens": 230462.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.341, + "step": 341 + }, + { + "loss": 0.0942, + "grad_norm": 1.1684011220932007, + "learning_rate": 1.6610000000000002e-05, + "num_tokens": 231065.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.342, + "step": 342 + }, + { + "loss": 0.1017, + "grad_norm": 0.8611066341400146, + "learning_rate": 1.66e-05, + "num_tokens": 232089.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.343, + "step": 343 + }, + { + "loss": 0.117, + "grad_norm": 1.0313893556594849, + "learning_rate": 1.6590000000000002e-05, + "num_tokens": 233113.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.344, + "step": 344 + }, + { + "loss": 0.0756, + "grad_norm": 0.7209411859512329, + "learning_rate": 1.658e-05, + "num_tokens": 234137.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.345, + "step": 345 + }, + { + "loss": 0.0577, + "grad_norm": 2.9187569618225098, + "learning_rate": 1.6570000000000002e-05, + "num_tokens": 234319.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.346, + "step": 346 + }, + { + "loss": 0.084, + "grad_norm": 0.8220289349555969, + "learning_rate": 1.656e-05, + "num_tokens": 234922.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.347, + "step": 347 + }, + { + "loss": 0.0972, + "grad_norm": 0.8616042733192444, + "learning_rate": 1.6550000000000002e-05, + "num_tokens": 235946.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.348, + "step": 348 + }, + { + "loss": 0.1023, + "grad_norm": 0.815368115901947, + "learning_rate": 1.654e-05, + "num_tokens": 236970.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.349, + "step": 349 + }, + { + "loss": 0.0811, + "grad_norm": 0.9861577749252319, + "learning_rate": 1.6530000000000003e-05, + "num_tokens": 237994.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.35, + "step": 350 + }, + { + "loss": 0.0603, + "grad_norm": 2.2262823581695557, + "learning_rate": 1.652e-05, + "num_tokens": 238597.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.351, + "step": 351 + }, + { + "loss": 0.1032, + "grad_norm": 0.8391550183296204, + "learning_rate": 1.6510000000000003e-05, + "num_tokens": 239621.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.352, + "step": 352 + }, + { + "loss": 0.1072, + "grad_norm": 0.7724818587303162, + "learning_rate": 1.65e-05, + "num_tokens": 240224.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.353, + "step": 353 + }, + { + "loss": 0.0929, + "grad_norm": 0.7557445764541626, + "learning_rate": 1.6490000000000003e-05, + "num_tokens": 241248.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.354, + "step": 354 + }, + { + "loss": 0.1137, + "grad_norm": 0.9282433390617371, + "learning_rate": 1.648e-05, + "num_tokens": 241851.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.355, + "step": 355 + }, + { + "loss": 0.0894, + "grad_norm": 0.7736088037490845, + "learning_rate": 1.647e-05, + "num_tokens": 242875.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.356, + "step": 356 + }, + { + "loss": 0.0994, + "grad_norm": 0.921101450920105, + "learning_rate": 1.646e-05, + "num_tokens": 243478.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.357, + "step": 357 + }, + { + "loss": 0.1344, + "grad_norm": 1.202441930770874, + "learning_rate": 1.645e-05, + "num_tokens": 244502.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.358, + "step": 358 + }, + { + "loss": 0.099, + "grad_norm": 0.9869626760482788, + "learning_rate": 1.6440000000000002e-05, + "num_tokens": 245105.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.359, + "step": 359 + }, + { + "loss": 0.1427, + "grad_norm": 1.0982835292816162, + "learning_rate": 1.643e-05, + "num_tokens": 246129.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.36, + "step": 360 + }, + { + "loss": 0.0681, + "grad_norm": 0.9747059345245361, + "learning_rate": 1.6420000000000002e-05, + "num_tokens": 246732.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.361, + "step": 361 + }, + { + "loss": 0.0716, + "grad_norm": 1.0156511068344116, + "learning_rate": 1.641e-05, + "num_tokens": 247335.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.362, + "step": 362 + }, + { + "loss": 0.4351, + "grad_norm": 6.821441650390625, + "learning_rate": 1.64e-05, + "num_tokens": 247938.0, + "mean_token_accuracy": 0.9151414036750793, + "epoch": 0.363, + "step": 363 + }, + { + "loss": 0.0743, + "grad_norm": 0.8414461016654968, + "learning_rate": 1.639e-05, + "num_tokens": 248541.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.364, + "step": 364 + }, + { + "loss": 0.0844, + "grad_norm": 0.9070030450820923, + "learning_rate": 1.638e-05, + "num_tokens": 249565.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.365, + "step": 365 + }, + { + "loss": 0.0697, + "grad_norm": 0.9378820657730103, + "learning_rate": 1.637e-05, + "num_tokens": 250589.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.366, + "step": 366 + }, + { + "loss": 0.0452, + "grad_norm": 2.362260341644287, + "learning_rate": 1.636e-05, + "num_tokens": 250771.0, + "mean_token_accuracy": 1.0, + "epoch": 0.367, + "step": 367 + }, + { + "loss": 0.0658, + "grad_norm": 0.9978799819946289, + "learning_rate": 1.635e-05, + "num_tokens": 251374.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.368, + "step": 368 + }, + { + "loss": 0.139, + "grad_norm": 1.415355920791626, + "learning_rate": 1.634e-05, + "num_tokens": 252398.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 0.369, + "step": 369 + }, + { + "loss": 0.0418, + "grad_norm": 1.903359293937683, + "learning_rate": 1.633e-05, + "num_tokens": 252580.0, + "mean_token_accuracy": 1.0, + "epoch": 0.37, + "step": 370 + }, + { + "loss": 0.0788, + "grad_norm": 1.4235386848449707, + "learning_rate": 1.632e-05, + "num_tokens": 253183.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.371, + "step": 371 + }, + { + "loss": 0.0378, + "grad_norm": 1.322859525680542, + "learning_rate": 1.631e-05, + "num_tokens": 253365.0, + "mean_token_accuracy": 1.0, + "epoch": 0.372, + "step": 372 + }, + { + "loss": 0.0826, + "grad_norm": 0.9231904745101929, + "learning_rate": 1.63e-05, + "num_tokens": 253968.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.373, + "step": 373 + }, + { + "loss": 0.0338, + "grad_norm": 1.1984413862228394, + "learning_rate": 1.629e-05, + "num_tokens": 254150.0, + "mean_token_accuracy": 1.0, + "epoch": 0.374, + "step": 374 + }, + { + "loss": 0.0879, + "grad_norm": 1.0001438856124878, + "learning_rate": 1.628e-05, + "num_tokens": 255174.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.375, + "step": 375 + }, + { + "loss": 0.3459, + "grad_norm": 5.406961917877197, + "learning_rate": 1.6270000000000002e-05, + "num_tokens": 255777.0, + "mean_token_accuracy": 0.9234609007835388, + "epoch": 0.376, + "step": 376 + }, + { + "loss": 0.0886, + "grad_norm": 1.1185731887817383, + "learning_rate": 1.626e-05, + "num_tokens": 256380.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.377, + "step": 377 + }, + { + "loss": 0.0292, + "grad_norm": 1.0933966636657715, + "learning_rate": 1.6250000000000002e-05, + "num_tokens": 256562.0, + "mean_token_accuracy": 1.0, + "epoch": 0.378, + "step": 378 + }, + { + "loss": 0.0886, + "grad_norm": 1.093742847442627, + "learning_rate": 1.6240000000000004e-05, + "num_tokens": 257165.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.379, + "step": 379 + }, + { + "loss": 0.0697, + "grad_norm": 1.4595232009887695, + "learning_rate": 1.6230000000000002e-05, + "num_tokens": 257768.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.38, + "step": 380 + }, + { + "loss": 0.0265, + "grad_norm": 1.3366799354553223, + "learning_rate": 1.6220000000000004e-05, + "num_tokens": 257950.0, + "mean_token_accuracy": 1.0, + "epoch": 0.381, + "step": 381 + }, + { + "loss": 0.0886, + "grad_norm": 1.2207424640655518, + "learning_rate": 1.6210000000000002e-05, + "num_tokens": 258553.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.382, + "step": 382 + }, + { + "loss": 0.083, + "grad_norm": 1.0224359035491943, + "learning_rate": 1.62e-05, + "num_tokens": 259156.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.383, + "step": 383 + }, + { + "loss": 0.0936, + "grad_norm": 1.3029577732086182, + "learning_rate": 1.6190000000000003e-05, + "num_tokens": 260180.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.384, + "step": 384 + }, + { + "loss": 0.0825, + "grad_norm": 0.9989560842514038, + "learning_rate": 1.618e-05, + "num_tokens": 260783.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.385, + "step": 385 + }, + { + "loss": 0.0635, + "grad_norm": 0.817306399345398, + "learning_rate": 1.6170000000000003e-05, + "num_tokens": 261386.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.386, + "step": 386 + }, + { + "loss": 0.1573, + "grad_norm": 1.521423101425171, + "learning_rate": 1.616e-05, + "num_tokens": 261989.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.387, + "step": 387 + }, + { + "loss": 0.0281, + "grad_norm": 2.771691083908081, + "learning_rate": 1.6150000000000003e-05, + "num_tokens": 262171.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.388, + "step": 388 + }, + { + "loss": 0.0723, + "grad_norm": 0.718820333480835, + "learning_rate": 1.614e-05, + "num_tokens": 263195.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.389, + "step": 389 + }, + { + "loss": 0.029, + "grad_norm": 2.9579451084136963, + "learning_rate": 1.613e-05, + "num_tokens": 263377.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.39, + "step": 390 + }, + { + "loss": 0.0807, + "grad_norm": 0.7013575434684753, + "learning_rate": 1.612e-05, + "num_tokens": 263980.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.391, + "step": 391 + }, + { + "loss": 0.0899, + "grad_norm": 0.8263501524925232, + "learning_rate": 1.611e-05, + "num_tokens": 265004.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.392, + "step": 392 + }, + { + "loss": 0.085, + "grad_norm": 0.8872665762901306, + "learning_rate": 1.6100000000000002e-05, + "num_tokens": 265607.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.393, + "step": 393 + }, + { + "loss": 0.0642, + "grad_norm": 0.9370598196983337, + "learning_rate": 1.609e-05, + "num_tokens": 266210.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.394, + "step": 394 + }, + { + "loss": 0.0829, + "grad_norm": 0.8175517916679382, + "learning_rate": 1.6080000000000002e-05, + "num_tokens": 267234.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.395, + "step": 395 + }, + { + "loss": 0.0842, + "grad_norm": 0.8722137212753296, + "learning_rate": 1.607e-05, + "num_tokens": 267837.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.396, + "step": 396 + }, + { + "loss": 0.054, + "grad_norm": 0.9143010973930359, + "learning_rate": 1.6060000000000002e-05, + "num_tokens": 268440.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.397, + "step": 397 + }, + { + "loss": 0.0764, + "grad_norm": 0.9138529896736145, + "learning_rate": 1.605e-05, + "num_tokens": 269043.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.398, + "step": 398 + }, + { + "loss": 0.0443, + "grad_norm": 0.889714777469635, + "learning_rate": 1.6040000000000002e-05, + "num_tokens": 269646.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.399, + "step": 399 + }, + { + "loss": 0.0491, + "grad_norm": 0.864485502243042, + "learning_rate": 1.603e-05, + "num_tokens": 270249.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.4, + "step": 400 + }, + { + "loss": 0.0739, + "grad_norm": 0.881671667098999, + "learning_rate": 1.6020000000000002e-05, + "num_tokens": 271273.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.401, + "step": 401 + }, + { + "loss": 0.0268, + "grad_norm": 3.1773056983947754, + "learning_rate": 1.601e-05, + "num_tokens": 271455.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.402, + "step": 402 + }, + { + "loss": 0.0851, + "grad_norm": 0.8216137290000916, + "learning_rate": 1.6000000000000003e-05, + "num_tokens": 272058.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.403, + "step": 403 + }, + { + "loss": 0.0224, + "grad_norm": 2.446829319000244, + "learning_rate": 1.599e-05, + "num_tokens": 272240.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.404, + "step": 404 + }, + { + "loss": 0.0646, + "grad_norm": 1.87065589427948, + "learning_rate": 1.5980000000000003e-05, + "num_tokens": 272843.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.405, + "step": 405 + }, + { + "loss": 0.0921, + "grad_norm": 1.3701424598693848, + "learning_rate": 1.597e-05, + "num_tokens": 273867.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.406, + "step": 406 + }, + { + "loss": 0.0153, + "grad_norm": 1.378767967224121, + "learning_rate": 1.5960000000000003e-05, + "num_tokens": 274049.0, + "mean_token_accuracy": 1.0, + "epoch": 0.407, + "step": 407 + }, + { + "loss": 0.0517, + "grad_norm": 0.9267370700836182, + "learning_rate": 1.595e-05, + "num_tokens": 274652.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.408, + "step": 408 + }, + { + "loss": 0.0132, + "grad_norm": 0.8789790272712708, + "learning_rate": 1.5940000000000003e-05, + "num_tokens": 274834.0, + "mean_token_accuracy": 1.0, + "epoch": 0.409, + "step": 409 + }, + { + "loss": 0.1174, + "grad_norm": 1.7347341775894165, + "learning_rate": 1.593e-05, + "num_tokens": 275858.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 0.41, + "step": 410 + }, + { + "loss": 0.0701, + "grad_norm": 1.5347058773040771, + "learning_rate": 1.5920000000000003e-05, + "num_tokens": 276461.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.411, + "step": 411 + }, + { + "loss": 0.0559, + "grad_norm": 1.1168630123138428, + "learning_rate": 1.5910000000000002e-05, + "num_tokens": 277064.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.412, + "step": 412 + }, + { + "loss": 0.0107, + "grad_norm": 0.6622042655944824, + "learning_rate": 1.5900000000000004e-05, + "num_tokens": 277246.0, + "mean_token_accuracy": 1.0, + "epoch": 0.413, + "step": 413 + }, + { + "loss": 0.1178, + "grad_norm": 1.3859763145446777, + "learning_rate": 1.5890000000000002e-05, + "num_tokens": 278270.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.414, + "step": 414 + }, + { + "loss": 0.0964, + "grad_norm": 1.1624832153320312, + "learning_rate": 1.588e-05, + "num_tokens": 279294.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.415, + "step": 415 + }, + { + "loss": 0.0843, + "grad_norm": 1.0634915828704834, + "learning_rate": 1.5870000000000002e-05, + "num_tokens": 280318.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.416, + "step": 416 + }, + { + "loss": 0.2256, + "grad_norm": 3.064754009246826, + "learning_rate": 1.586e-05, + "num_tokens": 281342.0, + "mean_token_accuracy": 0.946183979511261, + "epoch": 0.417, + "step": 417 + }, + { + "loss": 0.0515, + "grad_norm": 1.3590887784957886, + "learning_rate": 1.5850000000000002e-05, + "num_tokens": 281945.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.418, + "step": 418 + }, + { + "loss": 0.0163, + "grad_norm": 2.414504051208496, + "learning_rate": 1.584e-05, + "num_tokens": 282127.0, + "mean_token_accuracy": 1.0, + "epoch": 0.419, + "step": 419 + }, + { + "loss": 0.1156, + "grad_norm": 1.1143982410430908, + "learning_rate": 1.5830000000000003e-05, + "num_tokens": 283151.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.42, + "step": 420 + }, + { + "loss": 0.047, + "grad_norm": 0.7985422015190125, + "learning_rate": 1.582e-05, + "num_tokens": 283754.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.421, + "step": 421 + }, + { + "loss": 0.0174, + "grad_norm": 2.6907079219818115, + "learning_rate": 1.581e-05, + "num_tokens": 283936.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.422, + "step": 422 + }, + { + "loss": 0.0756, + "grad_norm": 1.169379711151123, + "learning_rate": 1.58e-05, + "num_tokens": 284539.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.423, + "step": 423 + }, + { + "loss": 0.2354, + "grad_norm": 3.1526973247528076, + "learning_rate": 1.579e-05, + "num_tokens": 285563.0, + "mean_token_accuracy": 0.9452054500579834, + "epoch": 0.424, + "step": 424 + }, + { + "loss": 0.0458, + "grad_norm": 0.7426862120628357, + "learning_rate": 1.578e-05, + "num_tokens": 286166.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.425, + "step": 425 + }, + { + "loss": 0.0511, + "grad_norm": 0.8618159294128418, + "learning_rate": 1.577e-05, + "num_tokens": 286769.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.426, + "step": 426 + }, + { + "loss": 0.0657, + "grad_norm": 0.9042669534683228, + "learning_rate": 1.576e-05, + "num_tokens": 287793.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.427, + "step": 427 + }, + { + "loss": 0.1362, + "grad_norm": 1.7490906715393066, + "learning_rate": 1.575e-05, + "num_tokens": 288396.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 0.428, + "step": 428 + }, + { + "loss": 0.075, + "grad_norm": 1.2254105806350708, + "learning_rate": 1.5740000000000002e-05, + "num_tokens": 289420.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.429, + "step": 429 + }, + { + "loss": 0.0161, + "grad_norm": 2.119595527648926, + "learning_rate": 1.573e-05, + "num_tokens": 289602.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.43, + "step": 430 + }, + { + "loss": 0.0577, + "grad_norm": 0.7894997596740723, + "learning_rate": 1.5720000000000002e-05, + "num_tokens": 290205.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.431, + "step": 431 + }, + { + "loss": 0.1096, + "grad_norm": 1.0284491777420044, + "learning_rate": 1.571e-05, + "num_tokens": 291229.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 0.432, + "step": 432 + }, + { + "loss": 0.0674, + "grad_norm": 0.9232416152954102, + "learning_rate": 1.5700000000000002e-05, + "num_tokens": 292253.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.433, + "step": 433 + }, + { + "loss": 0.0671, + "grad_norm": 1.093686819076538, + "learning_rate": 1.569e-05, + "num_tokens": 292856.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.434, + "step": 434 + }, + { + "loss": 0.0768, + "grad_norm": 0.8534543514251709, + "learning_rate": 1.5680000000000002e-05, + "num_tokens": 293880.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.435, + "step": 435 + }, + { + "loss": 0.0882, + "grad_norm": 1.4358211755752563, + "learning_rate": 1.567e-05, + "num_tokens": 294483.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.436, + "step": 436 + }, + { + "loss": 0.0488, + "grad_norm": 1.3807297945022583, + "learning_rate": 1.5660000000000003e-05, + "num_tokens": 295086.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.437, + "step": 437 + }, + { + "loss": 0.0822, + "grad_norm": 1.7636574506759644, + "learning_rate": 1.565e-05, + "num_tokens": 296110.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.438, + "step": 438 + }, + { + "loss": 0.0943, + "grad_norm": 1.1918975114822388, + "learning_rate": 1.5640000000000003e-05, + "num_tokens": 297134.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.439, + "step": 439 + }, + { + "loss": 0.049, + "grad_norm": 1.1358352899551392, + "learning_rate": 1.563e-05, + "num_tokens": 297737.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.44, + "step": 440 + }, + { + "loss": 0.0829, + "grad_norm": 0.8242742419242859, + "learning_rate": 1.5620000000000003e-05, + "num_tokens": 298761.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.441, + "step": 441 + }, + { + "loss": 0.0734, + "grad_norm": 1.4186701774597168, + "learning_rate": 1.561e-05, + "num_tokens": 299364.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.442, + "step": 442 + }, + { + "loss": 0.0395, + "grad_norm": 0.9597113132476807, + "learning_rate": 1.5600000000000003e-05, + "num_tokens": 299967.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.443, + "step": 443 + }, + { + "loss": 0.0744, + "grad_norm": 1.404382348060608, + "learning_rate": 1.559e-05, + "num_tokens": 300570.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.444, + "step": 444 + }, + { + "loss": 0.0473, + "grad_norm": 0.8925930857658386, + "learning_rate": 1.5580000000000003e-05, + "num_tokens": 301594.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 0.445, + "step": 445 + }, + { + "loss": 0.071, + "grad_norm": 1.262951135635376, + "learning_rate": 1.5570000000000002e-05, + "num_tokens": 302197.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.446, + "step": 446 + }, + { + "loss": 0.0526, + "grad_norm": 1.162405252456665, + "learning_rate": 1.556e-05, + "num_tokens": 302800.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.447, + "step": 447 + }, + { + "loss": 0.2271, + "grad_norm": 2.828556537628174, + "learning_rate": 1.5550000000000002e-05, + "num_tokens": 303824.0, + "mean_token_accuracy": 0.9471624493598938, + "epoch": 0.448, + "step": 448 + }, + { + "loss": 0.1001, + "grad_norm": 1.7923780679702759, + "learning_rate": 1.554e-05, + "num_tokens": 304427.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.449, + "step": 449 + }, + { + "loss": 0.0787, + "grad_norm": 1.3813443183898926, + "learning_rate": 1.5530000000000002e-05, + "num_tokens": 305451.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.45, + "step": 450 + }, + { + "loss": 0.0172, + "grad_norm": 2.7238848209381104, + "learning_rate": 1.552e-05, + "num_tokens": 305633.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.451, + "step": 451 + }, + { + "loss": 0.0643, + "grad_norm": 1.0591074228286743, + "learning_rate": 1.5510000000000002e-05, + "num_tokens": 306236.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.452, + "step": 452 + }, + { + "loss": 0.0674, + "grad_norm": 0.9203467965126038, + "learning_rate": 1.55e-05, + "num_tokens": 306839.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.453, + "step": 453 + }, + { + "loss": 0.0729, + "grad_norm": 1.240227222442627, + "learning_rate": 1.549e-05, + "num_tokens": 307863.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.454, + "step": 454 + }, + { + "loss": 0.0597, + "grad_norm": 1.1434822082519531, + "learning_rate": 1.548e-05, + "num_tokens": 308466.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.455, + "step": 455 + }, + { + "loss": 0.0665, + "grad_norm": 0.814992368221283, + "learning_rate": 1.547e-05, + "num_tokens": 309069.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.456, + "step": 456 + }, + { + "loss": 0.0658, + "grad_norm": 1.0612773895263672, + "learning_rate": 1.546e-05, + "num_tokens": 310093.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.457, + "step": 457 + }, + { + "loss": 0.0662, + "grad_norm": 0.9019358158111572, + "learning_rate": 1.545e-05, + "num_tokens": 311117.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.458, + "step": 458 + }, + { + "loss": 0.1038, + "grad_norm": 1.3925731182098389, + "learning_rate": 1.544e-05, + "num_tokens": 312141.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.459, + "step": 459 + }, + { + "loss": 0.06, + "grad_norm": 0.8808843493461609, + "learning_rate": 1.543e-05, + "num_tokens": 312744.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.46, + "step": 460 + }, + { + "loss": 0.0632, + "grad_norm": 0.8605257868766785, + "learning_rate": 1.542e-05, + "num_tokens": 313768.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.461, + "step": 461 + }, + { + "loss": 0.0164, + "grad_norm": 3.112032890319824, + "learning_rate": 1.541e-05, + "num_tokens": 313950.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.462, + "step": 462 + }, + { + "loss": 0.0431, + "grad_norm": 0.8734879493713379, + "learning_rate": 1.54e-05, + "num_tokens": 314553.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.463, + "step": 463 + }, + { + "loss": 0.0443, + "grad_norm": 0.8178501129150391, + "learning_rate": 1.539e-05, + "num_tokens": 315156.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.464, + "step": 464 + }, + { + "loss": 0.0155, + "grad_norm": 2.3297200202941895, + "learning_rate": 1.5380000000000002e-05, + "num_tokens": 315338.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.465, + "step": 465 + }, + { + "loss": 0.0567, + "grad_norm": 1.0183790922164917, + "learning_rate": 1.537e-05, + "num_tokens": 316362.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.466, + "step": 466 + }, + { + "loss": 0.0133, + "grad_norm": 2.1442461013793945, + "learning_rate": 1.5360000000000002e-05, + "num_tokens": 316544.0, + "mean_token_accuracy": 1.0, + "epoch": 0.467, + "step": 467 + }, + { + "loss": 0.0718, + "grad_norm": 1.11445951461792, + "learning_rate": 1.535e-05, + "num_tokens": 317568.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.468, + "step": 468 + }, + { + "loss": 0.1012, + "grad_norm": 1.5906054973602295, + "learning_rate": 1.5340000000000002e-05, + "num_tokens": 318592.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 0.469, + "step": 469 + }, + { + "loss": 0.0745, + "grad_norm": 1.652694582939148, + "learning_rate": 1.533e-05, + "num_tokens": 319195.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.47, + "step": 470 + }, + { + "loss": 0.0903, + "grad_norm": 1.35775625705719, + "learning_rate": 1.5320000000000002e-05, + "num_tokens": 320219.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.471, + "step": 471 + }, + { + "loss": 0.099, + "grad_norm": 1.7678292989730835, + "learning_rate": 1.531e-05, + "num_tokens": 320822.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.472, + "step": 472 + }, + { + "loss": 0.0623, + "grad_norm": 0.8131306171417236, + "learning_rate": 1.5300000000000003e-05, + "num_tokens": 321425.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.473, + "step": 473 + }, + { + "loss": 0.1034, + "grad_norm": 1.3832954168319702, + "learning_rate": 1.529e-05, + "num_tokens": 322449.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.474, + "step": 474 + }, + { + "loss": 0.0643, + "grad_norm": 1.0007091760635376, + "learning_rate": 1.5280000000000003e-05, + "num_tokens": 323052.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.475, + "step": 475 + }, + { + "loss": 0.0874, + "grad_norm": 1.3062710762023926, + "learning_rate": 1.527e-05, + "num_tokens": 324076.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.476, + "step": 476 + }, + { + "loss": 0.0645, + "grad_norm": 1.046617865562439, + "learning_rate": 1.5260000000000003e-05, + "num_tokens": 324679.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.477, + "step": 477 + }, + { + "loss": 0.1331, + "grad_norm": 1.6525492668151855, + "learning_rate": 1.525e-05, + "num_tokens": 325703.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 0.478, + "step": 478 + }, + { + "loss": 0.0633, + "grad_norm": 1.07027006149292, + "learning_rate": 1.5240000000000001e-05, + "num_tokens": 326306.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.479, + "step": 479 + }, + { + "loss": 0.0543, + "grad_norm": 1.253555417060852, + "learning_rate": 1.523e-05, + "num_tokens": 326909.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.48, + "step": 480 + }, + { + "loss": 0.0689, + "grad_norm": 1.0007350444793701, + "learning_rate": 1.5220000000000002e-05, + "num_tokens": 327512.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.481, + "step": 481 + }, + { + "loss": 0.0201, + "grad_norm": 3.1733195781707764, + "learning_rate": 1.521e-05, + "num_tokens": 327694.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.482, + "step": 482 + }, + { + "loss": 0.0683, + "grad_norm": 1.078524112701416, + "learning_rate": 1.5200000000000002e-05, + "num_tokens": 328718.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.483, + "step": 483 + }, + { + "loss": 0.0444, + "grad_norm": 0.8199536800384521, + "learning_rate": 1.519e-05, + "num_tokens": 329321.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.484, + "step": 484 + }, + { + "loss": 0.0629, + "grad_norm": 1.2054758071899414, + "learning_rate": 1.5180000000000002e-05, + "num_tokens": 329924.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.485, + "step": 485 + }, + { + "loss": 0.0119, + "grad_norm": 1.753531575202942, + "learning_rate": 1.517e-05, + "num_tokens": 330106.0, + "mean_token_accuracy": 1.0, + "epoch": 0.486, + "step": 486 + }, + { + "loss": 0.0844, + "grad_norm": 1.1255303621292114, + "learning_rate": 1.516e-05, + "num_tokens": 331130.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.487, + "step": 487 + }, + { + "loss": 0.0856, + "grad_norm": 1.283798098564148, + "learning_rate": 1.515e-05, + "num_tokens": 332154.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.488, + "step": 488 + }, + { + "loss": 0.0631, + "grad_norm": 1.0470834970474243, + "learning_rate": 1.514e-05, + "num_tokens": 332757.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.489, + "step": 489 + }, + { + "loss": 0.0088, + "grad_norm": 1.2209492921829224, + "learning_rate": 1.513e-05, + "num_tokens": 332939.0, + "mean_token_accuracy": 1.0, + "epoch": 0.49, + "step": 490 + }, + { + "loss": 0.0523, + "grad_norm": 1.4202543497085571, + "learning_rate": 1.5120000000000001e-05, + "num_tokens": 333542.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.491, + "step": 491 + }, + { + "loss": 0.0075, + "grad_norm": 0.800220787525177, + "learning_rate": 1.5110000000000001e-05, + "num_tokens": 333724.0, + "mean_token_accuracy": 1.0, + "epoch": 0.492, + "step": 492 + }, + { + "loss": 0.0701, + "grad_norm": 2.3125245571136475, + "learning_rate": 1.5100000000000001e-05, + "num_tokens": 334327.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.493, + "step": 493 + }, + { + "loss": 0.0641, + "grad_norm": 1.2975730895996094, + "learning_rate": 1.509e-05, + "num_tokens": 334930.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.494, + "step": 494 + }, + { + "loss": 0.1182, + "grad_norm": 1.777302622795105, + "learning_rate": 1.5080000000000001e-05, + "num_tokens": 335533.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.495, + "step": 495 + }, + { + "loss": 0.0997, + "grad_norm": 1.2831844091415405, + "learning_rate": 1.507e-05, + "num_tokens": 336557.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.496, + "step": 496 + }, + { + "loss": 0.009, + "grad_norm": 2.3041038513183594, + "learning_rate": 1.5060000000000001e-05, + "num_tokens": 336739.0, + "mean_token_accuracy": 1.0, + "epoch": 0.497, + "step": 497 + }, + { + "loss": 0.1022, + "grad_norm": 2.2915868759155273, + "learning_rate": 1.505e-05, + "num_tokens": 337342.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.498, + "step": 498 + }, + { + "loss": 0.0091, + "grad_norm": 2.4227917194366455, + "learning_rate": 1.5040000000000002e-05, + "num_tokens": 337524.0, + "mean_token_accuracy": 1.0, + "epoch": 0.499, + "step": 499 + }, + { + "loss": 0.0741, + "grad_norm": 1.2912752628326416, + "learning_rate": 1.503e-05, + "num_tokens": 338548.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.5, + "step": 500 + }, + { + "loss": 0.0789, + "grad_norm": 1.418357491493225, + "learning_rate": 1.5020000000000002e-05, + "num_tokens": 339572.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.501, + "step": 501 + }, + { + "loss": 0.0437, + "grad_norm": 1.0824663639068604, + "learning_rate": 1.501e-05, + "num_tokens": 340175.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.502, + "step": 502 + }, + { + "loss": 0.0091, + "grad_norm": 2.0608322620391846, + "learning_rate": 1.5000000000000002e-05, + "num_tokens": 340357.0, + "mean_token_accuracy": 1.0, + "epoch": 0.503, + "step": 503 + }, + { + "loss": 0.008, + "grad_norm": 1.446424126625061, + "learning_rate": 1.4990000000000002e-05, + "num_tokens": 340539.0, + "mean_token_accuracy": 1.0, + "epoch": 0.504, + "step": 504 + }, + { + "loss": 0.0639, + "grad_norm": 1.2623666524887085, + "learning_rate": 1.498e-05, + "num_tokens": 341142.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.505, + "step": 505 + }, + { + "loss": 0.06, + "grad_norm": 0.982926607131958, + "learning_rate": 1.4970000000000002e-05, + "num_tokens": 342166.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.506, + "step": 506 + }, + { + "loss": 0.0553, + "grad_norm": 1.1177573204040527, + "learning_rate": 1.496e-05, + "num_tokens": 342769.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.507, + "step": 507 + }, + { + "loss": 0.0573, + "grad_norm": 1.266147494316101, + "learning_rate": 1.4950000000000003e-05, + "num_tokens": 343372.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.508, + "step": 508 + }, + { + "loss": 0.0632, + "grad_norm": 1.0854604244232178, + "learning_rate": 1.4940000000000001e-05, + "num_tokens": 343975.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.509, + "step": 509 + }, + { + "loss": 0.0569, + "grad_norm": 1.2042014598846436, + "learning_rate": 1.4930000000000003e-05, + "num_tokens": 344578.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.51, + "step": 510 + }, + { + "loss": 0.0755, + "grad_norm": 1.318413496017456, + "learning_rate": 1.4920000000000001e-05, + "num_tokens": 345602.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.511, + "step": 511 + }, + { + "loss": 0.096, + "grad_norm": 2.037118434906006, + "learning_rate": 1.4910000000000003e-05, + "num_tokens": 346205.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.512, + "step": 512 + }, + { + "loss": 0.0692, + "grad_norm": 1.2327139377593994, + "learning_rate": 1.4900000000000001e-05, + "num_tokens": 347229.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.513, + "step": 513 + }, + { + "loss": 0.0597, + "grad_norm": 1.2249183654785156, + "learning_rate": 1.4890000000000001e-05, + "num_tokens": 347832.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.514, + "step": 514 + }, + { + "loss": 0.0215, + "grad_norm": 3.7084152698516846, + "learning_rate": 1.4880000000000002e-05, + "num_tokens": 348014.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.515, + "step": 515 + }, + { + "loss": 0.0226, + "grad_norm": 3.6710031032562256, + "learning_rate": 1.4870000000000002e-05, + "num_tokens": 348196.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.516, + "step": 516 + }, + { + "loss": 0.0447, + "grad_norm": 1.1309056282043457, + "learning_rate": 1.4860000000000002e-05, + "num_tokens": 348799.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.517, + "step": 517 + }, + { + "loss": 0.0675, + "grad_norm": 0.7269265651702881, + "learning_rate": 1.4850000000000002e-05, + "num_tokens": 349823.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.518, + "step": 518 + }, + { + "loss": 0.0681, + "grad_norm": 0.942974865436554, + "learning_rate": 1.4840000000000002e-05, + "num_tokens": 350847.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.519, + "step": 519 + }, + { + "loss": 0.0596, + "grad_norm": 1.1206049919128418, + "learning_rate": 1.4830000000000002e-05, + "num_tokens": 351450.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.52, + "step": 520 + }, + { + "loss": 0.0626, + "grad_norm": 0.8903636336326599, + "learning_rate": 1.482e-05, + "num_tokens": 352474.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.521, + "step": 521 + }, + { + "loss": 0.0456, + "grad_norm": 1.0571587085723877, + "learning_rate": 1.4810000000000002e-05, + "num_tokens": 353077.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.522, + "step": 522 + }, + { + "loss": 0.0579, + "grad_norm": 0.832482635974884, + "learning_rate": 1.48e-05, + "num_tokens": 354101.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.523, + "step": 523 + }, + { + "loss": 0.0552, + "grad_norm": 1.0173414945602417, + "learning_rate": 1.4790000000000002e-05, + "num_tokens": 355125.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.524, + "step": 524 + }, + { + "loss": 0.0765, + "grad_norm": 1.0486934185028076, + "learning_rate": 1.478e-05, + "num_tokens": 356149.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.525, + "step": 525 + }, + { + "loss": 0.0554, + "grad_norm": 1.1522009372711182, + "learning_rate": 1.4770000000000003e-05, + "num_tokens": 356752.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.526, + "step": 526 + }, + { + "loss": 0.0435, + "grad_norm": 0.9237290024757385, + "learning_rate": 1.4760000000000001e-05, + "num_tokens": 357355.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.527, + "step": 527 + }, + { + "loss": 0.0451, + "grad_norm": 0.900613009929657, + "learning_rate": 1.4750000000000003e-05, + "num_tokens": 357958.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.528, + "step": 528 + }, + { + "loss": 0.0694, + "grad_norm": 0.940955400466919, + "learning_rate": 1.4740000000000001e-05, + "num_tokens": 358982.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.529, + "step": 529 + }, + { + "loss": 0.0191, + "grad_norm": 3.003450870513916, + "learning_rate": 1.4730000000000001e-05, + "num_tokens": 359164.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.53, + "step": 530 + }, + { + "loss": 0.043, + "grad_norm": 1.1651326417922974, + "learning_rate": 1.4720000000000001e-05, + "num_tokens": 359767.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.531, + "step": 531 + }, + { + "loss": 0.0637, + "grad_norm": 1.031686544418335, + "learning_rate": 1.4710000000000001e-05, + "num_tokens": 360370.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.532, + "step": 532 + }, + { + "loss": 0.0661, + "grad_norm": 1.5867462158203125, + "learning_rate": 1.4700000000000002e-05, + "num_tokens": 360973.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.533, + "step": 533 + }, + { + "loss": 0.0123, + "grad_norm": 2.072788715362549, + "learning_rate": 1.4690000000000002e-05, + "num_tokens": 361155.0, + "mean_token_accuracy": 1.0, + "epoch": 0.534, + "step": 534 + }, + { + "loss": 0.073, + "grad_norm": 1.3591760396957397, + "learning_rate": 1.4680000000000002e-05, + "num_tokens": 362179.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.535, + "step": 535 + }, + { + "loss": 0.0509, + "grad_norm": 1.1638456583023071, + "learning_rate": 1.4670000000000002e-05, + "num_tokens": 363203.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.536, + "step": 536 + }, + { + "loss": 0.0944, + "grad_norm": 1.6999235153198242, + "learning_rate": 1.466e-05, + "num_tokens": 364227.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.537, + "step": 537 + }, + { + "loss": 0.0562, + "grad_norm": 1.4748142957687378, + "learning_rate": 1.4650000000000002e-05, + "num_tokens": 364830.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.538, + "step": 538 + }, + { + "loss": 0.124, + "grad_norm": 2.1115293502807617, + "learning_rate": 1.464e-05, + "num_tokens": 365854.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 0.539, + "step": 539 + }, + { + "loss": 0.0477, + "grad_norm": 1.6090505123138428, + "learning_rate": 1.4630000000000002e-05, + "num_tokens": 366457.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.54, + "step": 540 + }, + { + "loss": 0.0081, + "grad_norm": 1.1160129308700562, + "learning_rate": 1.462e-05, + "num_tokens": 366639.0, + "mean_token_accuracy": 1.0, + "epoch": 0.541, + "step": 541 + }, + { + "loss": 0.0709, + "grad_norm": 1.0318498611450195, + "learning_rate": 1.4610000000000002e-05, + "num_tokens": 367663.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.542, + "step": 542 + }, + { + "loss": 0.0394, + "grad_norm": 1.2405304908752441, + "learning_rate": 1.46e-05, + "num_tokens": 368266.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.543, + "step": 543 + }, + { + "loss": 0.0081, + "grad_norm": 1.2077956199645996, + "learning_rate": 1.4590000000000003e-05, + "num_tokens": 368448.0, + "mean_token_accuracy": 1.0, + "epoch": 0.544, + "step": 544 + }, + { + "loss": 0.0073, + "grad_norm": 1.0318228006362915, + "learning_rate": 1.4580000000000001e-05, + "num_tokens": 368630.0, + "mean_token_accuracy": 1.0, + "epoch": 0.545, + "step": 545 + }, + { + "loss": 0.0061, + "grad_norm": 0.6988610029220581, + "learning_rate": 1.4570000000000001e-05, + "num_tokens": 368812.0, + "mean_token_accuracy": 1.0, + "epoch": 0.546, + "step": 546 + }, + { + "loss": 0.0962, + "grad_norm": 1.2362191677093506, + "learning_rate": 1.4560000000000001e-05, + "num_tokens": 369836.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.547, + "step": 547 + }, + { + "loss": 0.0475, + "grad_norm": 1.1755952835083008, + "learning_rate": 1.4550000000000001e-05, + "num_tokens": 370439.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.548, + "step": 548 + }, + { + "loss": 0.0395, + "grad_norm": 1.067665934562683, + "learning_rate": 1.4540000000000001e-05, + "num_tokens": 371042.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.549, + "step": 549 + }, + { + "loss": 0.0697, + "grad_norm": 1.282993197441101, + "learning_rate": 1.4530000000000001e-05, + "num_tokens": 371645.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.55, + "step": 550 + }, + { + "loss": 0.0043, + "grad_norm": 0.33643096685409546, + "learning_rate": 1.4520000000000002e-05, + "num_tokens": 371827.0, + "mean_token_accuracy": 1.0, + "epoch": 0.551, + "step": 551 + }, + { + "loss": 0.0041, + "grad_norm": 0.32346561551094055, + "learning_rate": 1.4510000000000002e-05, + "num_tokens": 372009.0, + "mean_token_accuracy": 1.0, + "epoch": 0.552, + "step": 552 + }, + { + "loss": 0.0782, + "grad_norm": 1.3768310546875, + "learning_rate": 1.45e-05, + "num_tokens": 372612.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.553, + "step": 553 + }, + { + "loss": 0.0631, + "grad_norm": 0.9446674585342407, + "learning_rate": 1.4490000000000002e-05, + "num_tokens": 373636.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.554, + "step": 554 + }, + { + "loss": 0.0666, + "grad_norm": 1.2715314626693726, + "learning_rate": 1.448e-05, + "num_tokens": 374660.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.555, + "step": 555 + }, + { + "loss": 0.0663, + "grad_norm": 1.154998540878296, + "learning_rate": 1.4470000000000002e-05, + "num_tokens": 375684.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.556, + "step": 556 + }, + { + "loss": 0.0511, + "grad_norm": 0.8647584915161133, + "learning_rate": 1.446e-05, + "num_tokens": 376708.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.557, + "step": 557 + }, + { + "loss": 0.0487, + "grad_norm": 0.9593469500541687, + "learning_rate": 1.4450000000000002e-05, + "num_tokens": 377311.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.558, + "step": 558 + }, + { + "loss": 0.0566, + "grad_norm": 0.6962567567825317, + "learning_rate": 1.444e-05, + "num_tokens": 378335.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.559, + "step": 559 + }, + { + "loss": 0.0581, + "grad_norm": 0.9556426405906677, + "learning_rate": 1.4430000000000002e-05, + "num_tokens": 379359.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.56, + "step": 560 + }, + { + "loss": 0.0531, + "grad_norm": 0.9037861227989197, + "learning_rate": 1.4420000000000001e-05, + "num_tokens": 380383.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.561, + "step": 561 + }, + { + "loss": 0.0459, + "grad_norm": 1.1316790580749512, + "learning_rate": 1.4410000000000001e-05, + "num_tokens": 380986.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.562, + "step": 562 + }, + { + "loss": 0.0259, + "grad_norm": 3.605470657348633, + "learning_rate": 1.4400000000000001e-05, + "num_tokens": 381168.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.563, + "step": 563 + }, + { + "loss": 0.064, + "grad_norm": 0.8718283176422119, + "learning_rate": 1.4390000000000001e-05, + "num_tokens": 382192.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.564, + "step": 564 + }, + { + "loss": 0.0807, + "grad_norm": 0.9344546794891357, + "learning_rate": 1.4380000000000001e-05, + "num_tokens": 383216.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.565, + "step": 565 + }, + { + "loss": 0.0655, + "grad_norm": 1.1615803241729736, + "learning_rate": 1.4370000000000001e-05, + "num_tokens": 384240.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.566, + "step": 566 + }, + { + "loss": 0.04, + "grad_norm": 0.9558491706848145, + "learning_rate": 1.4360000000000001e-05, + "num_tokens": 384843.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.567, + "step": 567 + }, + { + "loss": 0.0296, + "grad_norm": 3.508678674697876, + "learning_rate": 1.4350000000000002e-05, + "num_tokens": 385025.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.568, + "step": 568 + }, + { + "loss": 0.0599, + "grad_norm": 1.2113062143325806, + "learning_rate": 1.434e-05, + "num_tokens": 386049.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.569, + "step": 569 + }, + { + "loss": 0.0531, + "grad_norm": 1.2263380289077759, + "learning_rate": 1.4330000000000002e-05, + "num_tokens": 386652.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.57, + "step": 570 + }, + { + "loss": 0.0471, + "grad_norm": 1.1156768798828125, + "learning_rate": 1.432e-05, + "num_tokens": 387255.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.571, + "step": 571 + }, + { + "loss": 0.0418, + "grad_norm": 0.7835745215415955, + "learning_rate": 1.4310000000000002e-05, + "num_tokens": 388279.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 0.572, + "step": 572 + }, + { + "loss": 0.0449, + "grad_norm": 1.0317991971969604, + "learning_rate": 1.43e-05, + "num_tokens": 388882.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.573, + "step": 573 + }, + { + "loss": 0.0373, + "grad_norm": 0.9112545847892761, + "learning_rate": 1.4290000000000002e-05, + "num_tokens": 389485.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.574, + "step": 574 + }, + { + "loss": 0.0144, + "grad_norm": 2.238581657409668, + "learning_rate": 1.428e-05, + "num_tokens": 389667.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.575, + "step": 575 + }, + { + "loss": 0.1003, + "grad_norm": 1.459584355354309, + "learning_rate": 1.4270000000000002e-05, + "num_tokens": 390270.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.576, + "step": 576 + }, + { + "loss": 0.0096, + "grad_norm": 1.6822608709335327, + "learning_rate": 1.426e-05, + "num_tokens": 390452.0, + "mean_token_accuracy": 1.0, + "epoch": 0.577, + "step": 577 + }, + { + "loss": 0.0538, + "grad_norm": 0.8980907797813416, + "learning_rate": 1.425e-05, + "num_tokens": 391476.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.578, + "step": 578 + }, + { + "loss": 0.0388, + "grad_norm": 1.2530609369277954, + "learning_rate": 1.4240000000000001e-05, + "num_tokens": 392079.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.579, + "step": 579 + }, + { + "loss": 0.058, + "grad_norm": 1.6785279512405396, + "learning_rate": 1.4230000000000001e-05, + "num_tokens": 393103.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.58, + "step": 580 + }, + { + "loss": 0.0455, + "grad_norm": 0.9678398966789246, + "learning_rate": 1.4220000000000001e-05, + "num_tokens": 393706.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.581, + "step": 581 + }, + { + "loss": 0.0053, + "grad_norm": 0.6296008229255676, + "learning_rate": 1.4210000000000001e-05, + "num_tokens": 393888.0, + "mean_token_accuracy": 1.0, + "epoch": 0.582, + "step": 582 + }, + { + "loss": 0.0345, + "grad_norm": 0.8543047308921814, + "learning_rate": 1.4200000000000001e-05, + "num_tokens": 394491.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 0.583, + "step": 583 + }, + { + "loss": 0.0717, + "grad_norm": 1.287461519241333, + "learning_rate": 1.4190000000000001e-05, + "num_tokens": 395515.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.584, + "step": 584 + }, + { + "loss": 0.0365, + "grad_norm": 1.0201870203018188, + "learning_rate": 1.418e-05, + "num_tokens": 396118.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.585, + "step": 585 + }, + { + "loss": 0.0051, + "grad_norm": 0.67372727394104, + "learning_rate": 1.4170000000000002e-05, + "num_tokens": 396300.0, + "mean_token_accuracy": 1.0, + "epoch": 0.586, + "step": 586 + }, + { + "loss": 0.0988, + "grad_norm": 1.6359323263168335, + "learning_rate": 1.416e-05, + "num_tokens": 396903.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.587, + "step": 587 + }, + { + "loss": 0.0581, + "grad_norm": 0.944645345211029, + "learning_rate": 1.4150000000000002e-05, + "num_tokens": 397506.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.588, + "step": 588 + }, + { + "loss": 0.0428, + "grad_norm": 0.9059939384460449, + "learning_rate": 1.414e-05, + "num_tokens": 398109.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 0.589, + "step": 589 + }, + { + "loss": 0.0622, + "grad_norm": 0.939890444278717, + "learning_rate": 1.4130000000000002e-05, + "num_tokens": 399133.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.59, + "step": 590 + }, + { + "loss": 0.0621, + "grad_norm": 0.8959317207336426, + "learning_rate": 1.412e-05, + "num_tokens": 399736.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.591, + "step": 591 + }, + { + "loss": 0.0554, + "grad_norm": 1.2328743934631348, + "learning_rate": 1.4110000000000002e-05, + "num_tokens": 400339.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.592, + "step": 592 + }, + { + "loss": 0.0447, + "grad_norm": 0.7593986988067627, + "learning_rate": 1.41e-05, + "num_tokens": 401363.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.593, + "step": 593 + }, + { + "loss": 0.0487, + "grad_norm": 0.7263651490211487, + "learning_rate": 1.409e-05, + "num_tokens": 402387.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.594, + "step": 594 + }, + { + "loss": 0.0604, + "grad_norm": 1.2630764245986938, + "learning_rate": 1.408e-05, + "num_tokens": 402990.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.595, + "step": 595 + }, + { + "loss": 0.0386, + "grad_norm": 1.0648528337478638, + "learning_rate": 1.407e-05, + "num_tokens": 403593.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.596, + "step": 596 + }, + { + "loss": 0.0573, + "grad_norm": 0.8750402331352234, + "learning_rate": 1.4060000000000001e-05, + "num_tokens": 404617.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.597, + "step": 597 + }, + { + "loss": 0.0688, + "grad_norm": 0.9205127358436584, + "learning_rate": 1.4050000000000001e-05, + "num_tokens": 405641.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.598, + "step": 598 + }, + { + "loss": 0.0556, + "grad_norm": 0.8728544116020203, + "learning_rate": 1.4040000000000001e-05, + "num_tokens": 406665.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.599, + "step": 599 + }, + { + "loss": 0.0547, + "grad_norm": 1.0766440629959106, + "learning_rate": 1.4030000000000001e-05, + "num_tokens": 407268.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.6, + "step": 600 + }, + { + "loss": 0.3259, + "grad_norm": 6.388917446136475, + "learning_rate": 1.402e-05, + "num_tokens": 408292.0, + "mean_token_accuracy": 0.9207436442375183, + "epoch": 0.601, + "step": 601 + }, + { + "loss": 0.0617, + "grad_norm": 1.350803256034851, + "learning_rate": 1.4010000000000001e-05, + "num_tokens": 408895.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.602, + "step": 602 + }, + { + "loss": 0.0262, + "grad_norm": 4.706890106201172, + "learning_rate": 1.4e-05, + "num_tokens": 409077.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.603, + "step": 603 + }, + { + "loss": 0.0864, + "grad_norm": 1.3663084506988525, + "learning_rate": 1.3990000000000002e-05, + "num_tokens": 410101.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.604, + "step": 604 + }, + { + "loss": 0.0952, + "grad_norm": 1.8354886770248413, + "learning_rate": 1.398e-05, + "num_tokens": 410704.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.605, + "step": 605 + }, + { + "loss": 0.0815, + "grad_norm": 1.1599925756454468, + "learning_rate": 1.3970000000000002e-05, + "num_tokens": 411728.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.606, + "step": 606 + }, + { + "loss": 0.0123, + "grad_norm": 2.261835813522339, + "learning_rate": 1.396e-05, + "num_tokens": 411910.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.607, + "step": 607 + }, + { + "loss": 0.0742, + "grad_norm": 1.4766002893447876, + "learning_rate": 1.3950000000000002e-05, + "num_tokens": 412934.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.608, + "step": 608 + }, + { + "loss": 0.0719, + "grad_norm": 1.077452540397644, + "learning_rate": 1.394e-05, + "num_tokens": 413958.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.609, + "step": 609 + }, + { + "loss": 0.0626, + "grad_norm": 1.2010332345962524, + "learning_rate": 1.393e-05, + "num_tokens": 414561.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.61, + "step": 610 + }, + { + "loss": 0.0482, + "grad_norm": 1.1365265846252441, + "learning_rate": 1.392e-05, + "num_tokens": 415164.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.611, + "step": 611 + }, + { + "loss": 0.0774, + "grad_norm": 1.2080539464950562, + "learning_rate": 1.391e-05, + "num_tokens": 416188.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 0.612, + "step": 612 + }, + { + "loss": 0.0546, + "grad_norm": 0.9698471426963806, + "learning_rate": 1.39e-05, + "num_tokens": 416791.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.613, + "step": 613 + }, + { + "loss": 0.06, + "grad_norm": 1.1115221977233887, + "learning_rate": 1.389e-05, + "num_tokens": 417394.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.614, + "step": 614 + }, + { + "loss": 0.0072, + "grad_norm": 1.051293969154358, + "learning_rate": 1.3880000000000001e-05, + "num_tokens": 417576.0, + "mean_token_accuracy": 1.0, + "epoch": 0.615, + "step": 615 + }, + { + "loss": 0.0457, + "grad_norm": 1.0508517026901245, + "learning_rate": 1.3870000000000001e-05, + "num_tokens": 418600.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.616, + "step": 616 + }, + { + "loss": 0.0649, + "grad_norm": 1.458174467086792, + "learning_rate": 1.386e-05, + "num_tokens": 419203.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.617, + "step": 617 + }, + { + "loss": 0.0598, + "grad_norm": 1.3368812799453735, + "learning_rate": 1.3850000000000001e-05, + "num_tokens": 420227.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.618, + "step": 618 + }, + { + "loss": 0.0558, + "grad_norm": 1.0999784469604492, + "learning_rate": 1.384e-05, + "num_tokens": 420830.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.619, + "step": 619 + }, + { + "loss": 0.0835, + "grad_norm": 1.2071765661239624, + "learning_rate": 1.3830000000000001e-05, + "num_tokens": 421854.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.62, + "step": 620 + }, + { + "loss": 0.048, + "grad_norm": 1.2251503467559814, + "learning_rate": 1.382e-05, + "num_tokens": 422457.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.621, + "step": 621 + }, + { + "loss": 0.0497, + "grad_norm": 1.2595113515853882, + "learning_rate": 1.3810000000000002e-05, + "num_tokens": 423481.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.622, + "step": 622 + }, + { + "loss": 0.0647, + "grad_norm": 1.2705106735229492, + "learning_rate": 1.38e-05, + "num_tokens": 424505.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.623, + "step": 623 + }, + { + "loss": 0.0577, + "grad_norm": 1.1510343551635742, + "learning_rate": 1.3790000000000002e-05, + "num_tokens": 425529.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.624, + "step": 624 + }, + { + "loss": 0.0659, + "grad_norm": 1.2172942161560059, + "learning_rate": 1.378e-05, + "num_tokens": 426132.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.625, + "step": 625 + }, + { + "loss": 0.0494, + "grad_norm": 1.2537918090820312, + "learning_rate": 1.377e-05, + "num_tokens": 426735.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.626, + "step": 626 + }, + { + "loss": 0.057, + "grad_norm": 1.2958061695098877, + "learning_rate": 1.376e-05, + "num_tokens": 427338.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.627, + "step": 627 + }, + { + "loss": 0.0547, + "grad_norm": 0.6661484837532043, + "learning_rate": 1.375e-05, + "num_tokens": 428362.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.628, + "step": 628 + }, + { + "loss": 0.0897, + "grad_norm": 1.4734290838241577, + "learning_rate": 1.3740000000000002e-05, + "num_tokens": 428965.0, + "mean_token_accuracy": 0.961730420589447, + "epoch": 0.629, + "step": 629 + }, + { + "loss": 0.0519, + "grad_norm": 0.7639888525009155, + "learning_rate": 1.373e-05, + "num_tokens": 429989.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.63, + "step": 630 + }, + { + "loss": 0.0659, + "grad_norm": 1.1685161590576172, + "learning_rate": 1.3720000000000002e-05, + "num_tokens": 431013.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.631, + "step": 631 + }, + { + "loss": 0.0604, + "grad_norm": 0.9931361079216003, + "learning_rate": 1.3710000000000001e-05, + "num_tokens": 432037.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.632, + "step": 632 + }, + { + "loss": 0.0218, + "grad_norm": 2.6311545372009277, + "learning_rate": 1.3700000000000003e-05, + "num_tokens": 432219.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.633, + "step": 633 + }, + { + "loss": 0.0199, + "grad_norm": 2.497168779373169, + "learning_rate": 1.3690000000000001e-05, + "num_tokens": 432401.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.634, + "step": 634 + }, + { + "loss": 0.0633, + "grad_norm": 0.8656933307647705, + "learning_rate": 1.3680000000000003e-05, + "num_tokens": 433425.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.635, + "step": 635 + }, + { + "loss": 0.0775, + "grad_norm": 1.6720925569534302, + "learning_rate": 1.3670000000000001e-05, + "num_tokens": 434028.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.636, + "step": 636 + }, + { + "loss": 0.0701, + "grad_norm": 1.2704541683197021, + "learning_rate": 1.3660000000000001e-05, + "num_tokens": 434631.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.637, + "step": 637 + }, + { + "loss": 0.0108, + "grad_norm": 1.5020633935928345, + "learning_rate": 1.3650000000000001e-05, + "num_tokens": 434813.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.638, + "step": 638 + }, + { + "loss": 0.0404, + "grad_norm": 0.7698756456375122, + "learning_rate": 1.3640000000000002e-05, + "num_tokens": 435416.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.639, + "step": 639 + }, + { + "loss": 0.008, + "grad_norm": 1.2060641050338745, + "learning_rate": 1.3630000000000002e-05, + "num_tokens": 435598.0, + "mean_token_accuracy": 1.0, + "epoch": 0.64, + "step": 640 + }, + { + "loss": 0.0747, + "grad_norm": 1.159375786781311, + "learning_rate": 1.3620000000000002e-05, + "num_tokens": 436622.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.641, + "step": 641 + }, + { + "loss": 0.0054, + "grad_norm": 0.802221417427063, + "learning_rate": 1.3610000000000002e-05, + "num_tokens": 436804.0, + "mean_token_accuracy": 1.0, + "epoch": 0.642, + "step": 642 + }, + { + "loss": 0.0631, + "grad_norm": 1.0704505443572998, + "learning_rate": 1.3600000000000002e-05, + "num_tokens": 437407.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.643, + "step": 643 + }, + { + "loss": 0.0555, + "grad_norm": 0.9658818244934082, + "learning_rate": 1.359e-05, + "num_tokens": 438431.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.644, + "step": 644 + }, + { + "loss": 0.0831, + "grad_norm": 1.4335317611694336, + "learning_rate": 1.3580000000000002e-05, + "num_tokens": 439455.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.645, + "step": 645 + }, + { + "loss": 0.0387, + "grad_norm": 0.9613522291183472, + "learning_rate": 1.357e-05, + "num_tokens": 440058.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.646, + "step": 646 + }, + { + "loss": 0.0034, + "grad_norm": 0.3476230204105377, + "learning_rate": 1.3560000000000002e-05, + "num_tokens": 440240.0, + "mean_token_accuracy": 1.0, + "epoch": 0.647, + "step": 647 + }, + { + "loss": 0.0446, + "grad_norm": 1.1713249683380127, + "learning_rate": 1.355e-05, + "num_tokens": 440843.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.648, + "step": 648 + }, + { + "loss": 0.0469, + "grad_norm": 1.0446158647537231, + "learning_rate": 1.3540000000000003e-05, + "num_tokens": 441446.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.649, + "step": 649 + }, + { + "loss": 0.0754, + "grad_norm": 1.0586427450180054, + "learning_rate": 1.3530000000000001e-05, + "num_tokens": 442470.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.65, + "step": 650 + }, + { + "loss": 0.0681, + "grad_norm": 1.0640681982040405, + "learning_rate": 1.3520000000000003e-05, + "num_tokens": 443494.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.651, + "step": 651 + }, + { + "loss": 0.0387, + "grad_norm": 0.8930626511573792, + "learning_rate": 1.3510000000000001e-05, + "num_tokens": 444097.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.652, + "step": 652 + }, + { + "loss": 0.0482, + "grad_norm": 0.9406304955482483, + "learning_rate": 1.3500000000000001e-05, + "num_tokens": 445121.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.653, + "step": 653 + }, + { + "loss": 0.052, + "grad_norm": 0.8975579738616943, + "learning_rate": 1.3490000000000001e-05, + "num_tokens": 445724.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.654, + "step": 654 + }, + { + "loss": 0.0516, + "grad_norm": 1.0024687051773071, + "learning_rate": 1.3480000000000001e-05, + "num_tokens": 446327.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.655, + "step": 655 + }, + { + "loss": 0.0607, + "grad_norm": 1.477307677268982, + "learning_rate": 1.3470000000000001e-05, + "num_tokens": 446930.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.656, + "step": 656 + }, + { + "loss": 0.0577, + "grad_norm": 0.7049059271812439, + "learning_rate": 1.3460000000000002e-05, + "num_tokens": 447954.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.657, + "step": 657 + }, + { + "loss": 0.0554, + "grad_norm": 1.0566304922103882, + "learning_rate": 1.3450000000000002e-05, + "num_tokens": 448557.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.658, + "step": 658 + }, + { + "loss": 0.0603, + "grad_norm": 1.3350647687911987, + "learning_rate": 1.3440000000000002e-05, + "num_tokens": 449160.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.659, + "step": 659 + }, + { + "loss": 0.055, + "grad_norm": 0.9154465198516846, + "learning_rate": 1.343e-05, + "num_tokens": 450184.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.66, + "step": 660 + }, + { + "loss": 0.0628, + "grad_norm": 1.230380654335022, + "learning_rate": 1.3420000000000002e-05, + "num_tokens": 450787.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.661, + "step": 661 + }, + { + "loss": 0.0516, + "grad_norm": 0.9731350541114807, + "learning_rate": 1.341e-05, + "num_tokens": 451390.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.662, + "step": 662 + }, + { + "loss": 0.0471, + "grad_norm": 0.7833011746406555, + "learning_rate": 1.3400000000000002e-05, + "num_tokens": 452414.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.663, + "step": 663 + }, + { + "loss": 0.0436, + "grad_norm": 0.7588993906974792, + "learning_rate": 1.339e-05, + "num_tokens": 453438.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 0.664, + "step": 664 + }, + { + "loss": 0.0916, + "grad_norm": 1.6703461408615112, + "learning_rate": 1.3380000000000002e-05, + "num_tokens": 454041.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.665, + "step": 665 + }, + { + "loss": 0.0492, + "grad_norm": 0.6929834485054016, + "learning_rate": 1.337e-05, + "num_tokens": 455065.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.666, + "step": 666 + }, + { + "loss": 0.0465, + "grad_norm": 0.888302743434906, + "learning_rate": 1.3360000000000003e-05, + "num_tokens": 455668.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.667, + "step": 667 + }, + { + "loss": 0.296, + "grad_norm": 5.514519214630127, + "learning_rate": 1.3350000000000001e-05, + "num_tokens": 456271.0, + "mean_token_accuracy": 0.9317803382873535, + "epoch": 0.668, + "step": 668 + }, + { + "loss": 0.0207, + "grad_norm": 2.874188184738159, + "learning_rate": 1.3340000000000001e-05, + "num_tokens": 456453.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.669, + "step": 669 + }, + { + "loss": 0.056, + "grad_norm": 0.6424664855003357, + "learning_rate": 1.3330000000000001e-05, + "num_tokens": 457477.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.67, + "step": 670 + }, + { + "loss": 0.0577, + "grad_norm": 0.8440362811088562, + "learning_rate": 1.3320000000000001e-05, + "num_tokens": 458501.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.671, + "step": 671 + }, + { + "loss": 0.0584, + "grad_norm": 0.8988680243492126, + "learning_rate": 1.3310000000000001e-05, + "num_tokens": 459104.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.672, + "step": 672 + }, + { + "loss": 0.0802, + "grad_norm": 1.072707176208496, + "learning_rate": 1.3300000000000001e-05, + "num_tokens": 460128.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.673, + "step": 673 + }, + { + "loss": 0.0655, + "grad_norm": 1.1271072626113892, + "learning_rate": 1.3290000000000002e-05, + "num_tokens": 461152.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.674, + "step": 674 + }, + { + "loss": 0.0129, + "grad_norm": 1.9966233968734741, + "learning_rate": 1.3280000000000002e-05, + "num_tokens": 461334.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.675, + "step": 675 + }, + { + "loss": 0.0124, + "grad_norm": 1.8515944480895996, + "learning_rate": 1.327e-05, + "num_tokens": 461516.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.676, + "step": 676 + }, + { + "loss": 0.0545, + "grad_norm": 0.946265697479248, + "learning_rate": 1.3260000000000002e-05, + "num_tokens": 462540.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.677, + "step": 677 + }, + { + "loss": 0.0484, + "grad_norm": 1.0001753568649292, + "learning_rate": 1.325e-05, + "num_tokens": 463143.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.678, + "step": 678 + }, + { + "loss": 0.0078, + "grad_norm": 1.164751648902893, + "learning_rate": 1.3240000000000002e-05, + "num_tokens": 463325.0, + "mean_token_accuracy": 1.0, + "epoch": 0.679, + "step": 679 + }, + { + "loss": 0.0725, + "grad_norm": 1.3081203699111938, + "learning_rate": 1.323e-05, + "num_tokens": 464349.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.68, + "step": 680 + }, + { + "loss": 0.0404, + "grad_norm": 0.8555117845535278, + "learning_rate": 1.3220000000000002e-05, + "num_tokens": 464952.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.681, + "step": 681 + }, + { + "loss": 0.0046, + "grad_norm": 0.5416426062583923, + "learning_rate": 1.321e-05, + "num_tokens": 465134.0, + "mean_token_accuracy": 1.0, + "epoch": 0.682, + "step": 682 + }, + { + "loss": 0.0576, + "grad_norm": 1.0527853965759277, + "learning_rate": 1.3200000000000002e-05, + "num_tokens": 466158.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.683, + "step": 683 + }, + { + "loss": 0.0564, + "grad_norm": 0.8705971837043762, + "learning_rate": 1.319e-05, + "num_tokens": 466761.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.684, + "step": 684 + }, + { + "loss": 0.0536, + "grad_norm": 1.1689633131027222, + "learning_rate": 1.3180000000000001e-05, + "num_tokens": 467364.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.685, + "step": 685 + }, + { + "loss": 0.0445, + "grad_norm": 1.2486073970794678, + "learning_rate": 1.3170000000000001e-05, + "num_tokens": 468388.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.686, + "step": 686 + }, + { + "loss": 0.0662, + "grad_norm": 1.1041734218597412, + "learning_rate": 1.3160000000000001e-05, + "num_tokens": 469412.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.687, + "step": 687 + }, + { + "loss": 0.0536, + "grad_norm": 0.8892203569412231, + "learning_rate": 1.3150000000000001e-05, + "num_tokens": 470015.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.688, + "step": 688 + }, + { + "loss": 0.072, + "grad_norm": 1.2102046012878418, + "learning_rate": 1.3140000000000001e-05, + "num_tokens": 471039.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.689, + "step": 689 + }, + { + "loss": 0.0814, + "grad_norm": 1.2888877391815186, + "learning_rate": 1.3130000000000001e-05, + "num_tokens": 471642.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.69, + "step": 690 + }, + { + "loss": 0.0795, + "grad_norm": 1.6404471397399902, + "learning_rate": 1.3120000000000001e-05, + "num_tokens": 472245.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.691, + "step": 691 + }, + { + "loss": 0.0651, + "grad_norm": 0.8605929613113403, + "learning_rate": 1.311e-05, + "num_tokens": 473269.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.692, + "step": 692 + }, + { + "loss": 0.2317, + "grad_norm": 4.306615352630615, + "learning_rate": 1.3100000000000002e-05, + "num_tokens": 473872.0, + "mean_token_accuracy": 0.9367720484733582, + "epoch": 0.693, + "step": 693 + }, + { + "loss": 0.0175, + "grad_norm": 4.539740085601807, + "learning_rate": 1.309e-05, + "num_tokens": 474054.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.694, + "step": 694 + }, + { + "loss": 0.0188, + "grad_norm": 4.633057594299316, + "learning_rate": 1.3080000000000002e-05, + "num_tokens": 474236.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.695, + "step": 695 + }, + { + "loss": 0.0829, + "grad_norm": 1.670581340789795, + "learning_rate": 1.307e-05, + "num_tokens": 474839.0, + "mean_token_accuracy": 0.9667221307754517, + "epoch": 0.696, + "step": 696 + }, + { + "loss": 0.033, + "grad_norm": 0.8580129742622375, + "learning_rate": 1.3060000000000002e-05, + "num_tokens": 475442.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.697, + "step": 697 + }, + { + "loss": 0.06, + "grad_norm": 0.9854735732078552, + "learning_rate": 1.305e-05, + "num_tokens": 476466.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.698, + "step": 698 + }, + { + "loss": 0.0623, + "grad_norm": 1.267706036567688, + "learning_rate": 1.3040000000000002e-05, + "num_tokens": 477069.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.699, + "step": 699 + }, + { + "loss": 0.0731, + "grad_norm": 1.2111179828643799, + "learning_rate": 1.303e-05, + "num_tokens": 477672.0, + "mean_token_accuracy": 0.9650582075119019, + "epoch": 0.7, + "step": 700 + }, + { + "loss": 0.0571, + "grad_norm": 0.7638604044914246, + "learning_rate": 1.302e-05, + "num_tokens": 478696.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.701, + "step": 701 + }, + { + "loss": 0.0524, + "grad_norm": 0.9293149709701538, + "learning_rate": 1.301e-05, + "num_tokens": 479299.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.702, + "step": 702 + }, + { + "loss": 0.0493, + "grad_norm": 0.7328387498855591, + "learning_rate": 1.3000000000000001e-05, + "num_tokens": 480323.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.703, + "step": 703 + }, + { + "loss": 0.0505, + "grad_norm": 0.7699645757675171, + "learning_rate": 1.2990000000000001e-05, + "num_tokens": 481347.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.704, + "step": 704 + }, + { + "loss": 0.0641, + "grad_norm": 0.9049856066703796, + "learning_rate": 1.2980000000000001e-05, + "num_tokens": 482371.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.705, + "step": 705 + }, + { + "loss": 0.0556, + "grad_norm": 0.9629088640213013, + "learning_rate": 1.2970000000000001e-05, + "num_tokens": 482974.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.706, + "step": 706 + }, + { + "loss": 0.0575, + "grad_norm": 0.9650252461433411, + "learning_rate": 1.2960000000000001e-05, + "num_tokens": 483577.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.707, + "step": 707 + }, + { + "loss": 0.0757, + "grad_norm": 0.934861421585083, + "learning_rate": 1.295e-05, + "num_tokens": 484601.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.708, + "step": 708 + }, + { + "loss": 0.055, + "grad_norm": 1.0304492712020874, + "learning_rate": 1.2940000000000001e-05, + "num_tokens": 485204.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.709, + "step": 709 + }, + { + "loss": 0.0472, + "grad_norm": 0.9187700748443604, + "learning_rate": 1.293e-05, + "num_tokens": 485807.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.71, + "step": 710 + }, + { + "loss": 0.0487, + "grad_norm": 0.7827608585357666, + "learning_rate": 1.2920000000000002e-05, + "num_tokens": 486410.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.711, + "step": 711 + }, + { + "loss": 0.0594, + "grad_norm": 0.8399698138237, + "learning_rate": 1.291e-05, + "num_tokens": 487434.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.712, + "step": 712 + }, + { + "loss": 0.0557, + "grad_norm": 1.0209884643554688, + "learning_rate": 1.2900000000000002e-05, + "num_tokens": 488458.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.713, + "step": 713 + }, + { + "loss": 0.0145, + "grad_norm": 2.2941842079162598, + "learning_rate": 1.289e-05, + "num_tokens": 488640.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.714, + "step": 714 + }, + { + "loss": 0.0603, + "grad_norm": 0.9182419776916504, + "learning_rate": 1.2880000000000002e-05, + "num_tokens": 489664.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.715, + "step": 715 + }, + { + "loss": 0.0141, + "grad_norm": 2.3380424976348877, + "learning_rate": 1.287e-05, + "num_tokens": 489846.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.716, + "step": 716 + }, + { + "loss": 0.0122, + "grad_norm": 2.0624377727508545, + "learning_rate": 1.286e-05, + "num_tokens": 490028.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.717, + "step": 717 + }, + { + "loss": 0.0518, + "grad_norm": 1.0140818357467651, + "learning_rate": 1.285e-05, + "num_tokens": 490631.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.718, + "step": 718 + }, + { + "loss": 0.059, + "grad_norm": 1.5269079208374023, + "learning_rate": 1.284e-05, + "num_tokens": 491234.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.719, + "step": 719 + }, + { + "loss": 0.0385, + "grad_norm": 0.9199709892272949, + "learning_rate": 1.283e-05, + "num_tokens": 491837.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.72, + "step": 720 + }, + { + "loss": 0.0346, + "grad_norm": 0.9498630166053772, + "learning_rate": 1.2820000000000001e-05, + "num_tokens": 492440.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.721, + "step": 721 + }, + { + "loss": 0.0387, + "grad_norm": 1.0423791408538818, + "learning_rate": 1.2810000000000001e-05, + "num_tokens": 493043.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.722, + "step": 722 + }, + { + "loss": 0.0561, + "grad_norm": 1.3060035705566406, + "learning_rate": 1.2800000000000001e-05, + "num_tokens": 493646.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.723, + "step": 723 + }, + { + "loss": 0.0598, + "grad_norm": 1.1314760446548462, + "learning_rate": 1.279e-05, + "num_tokens": 494249.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.724, + "step": 724 + }, + { + "loss": 0.0051, + "grad_norm": 0.840337872505188, + "learning_rate": 1.2780000000000001e-05, + "num_tokens": 494431.0, + "mean_token_accuracy": 1.0, + "epoch": 0.725, + "step": 725 + }, + { + "loss": 0.0049, + "grad_norm": 0.8124201893806458, + "learning_rate": 1.277e-05, + "num_tokens": 494613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.726, + "step": 726 + }, + { + "loss": 0.0388, + "grad_norm": 1.1167151927947998, + "learning_rate": 1.2760000000000001e-05, + "num_tokens": 495216.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.727, + "step": 727 + }, + { + "loss": 0.0436, + "grad_norm": 1.271494746208191, + "learning_rate": 1.275e-05, + "num_tokens": 495819.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.728, + "step": 728 + }, + { + "loss": 0.0375, + "grad_norm": 0.8926107883453369, + "learning_rate": 1.2740000000000002e-05, + "num_tokens": 496422.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.729, + "step": 729 + }, + { + "loss": 0.0036, + "grad_norm": 0.5271093249320984, + "learning_rate": 1.273e-05, + "num_tokens": 496604.0, + "mean_token_accuracy": 1.0, + "epoch": 0.73, + "step": 730 + }, + { + "loss": 0.0613, + "grad_norm": 1.239539623260498, + "learning_rate": 1.2720000000000002e-05, + "num_tokens": 497207.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.731, + "step": 731 + }, + { + "loss": 0.0566, + "grad_norm": 1.033392310142517, + "learning_rate": 1.271e-05, + "num_tokens": 498231.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.732, + "step": 732 + }, + { + "loss": 0.0562, + "grad_norm": 1.020779013633728, + "learning_rate": 1.27e-05, + "num_tokens": 498834.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.733, + "step": 733 + }, + { + "loss": 0.0391, + "grad_norm": 0.95565265417099, + "learning_rate": 1.269e-05, + "num_tokens": 499437.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.734, + "step": 734 + }, + { + "loss": 0.0617, + "grad_norm": 1.0239723920822144, + "learning_rate": 1.268e-05, + "num_tokens": 500461.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.735, + "step": 735 + }, + { + "loss": 0.0756, + "grad_norm": 1.4600635766983032, + "learning_rate": 1.267e-05, + "num_tokens": 501064.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.736, + "step": 736 + }, + { + "loss": 0.0351, + "grad_norm": 0.7788209319114685, + "learning_rate": 1.266e-05, + "num_tokens": 501667.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.737, + "step": 737 + }, + { + "loss": 0.0361, + "grad_norm": 0.8924766182899475, + "learning_rate": 1.2650000000000001e-05, + "num_tokens": 502270.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.738, + "step": 738 + }, + { + "loss": 0.0563, + "grad_norm": 0.8318547606468201, + "learning_rate": 1.2640000000000001e-05, + "num_tokens": 503294.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.739, + "step": 739 + }, + { + "loss": 0.0601, + "grad_norm": 0.7167434096336365, + "learning_rate": 1.263e-05, + "num_tokens": 504318.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.74, + "step": 740 + }, + { + "loss": 0.0716, + "grad_norm": 1.6360701322555542, + "learning_rate": 1.2620000000000001e-05, + "num_tokens": 504921.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.741, + "step": 741 + }, + { + "loss": 0.053, + "grad_norm": 0.8519343137741089, + "learning_rate": 1.261e-05, + "num_tokens": 505524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.742, + "step": 742 + }, + { + "loss": 0.0143, + "grad_norm": 2.3694989681243896, + "learning_rate": 1.2600000000000001e-05, + "num_tokens": 505706.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.743, + "step": 743 + }, + { + "loss": 0.0518, + "grad_norm": 0.7736840844154358, + "learning_rate": 1.259e-05, + "num_tokens": 506730.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.744, + "step": 744 + }, + { + "loss": 0.0136, + "grad_norm": 2.3100736141204834, + "learning_rate": 1.2580000000000002e-05, + "num_tokens": 506912.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.745, + "step": 745 + }, + { + "loss": 0.077, + "grad_norm": 1.0608011484146118, + "learning_rate": 1.257e-05, + "num_tokens": 507936.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.746, + "step": 746 + }, + { + "loss": 0.041, + "grad_norm": 0.8255691528320312, + "learning_rate": 1.2560000000000002e-05, + "num_tokens": 508539.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.747, + "step": 747 + }, + { + "loss": 0.0448, + "grad_norm": 1.0147794485092163, + "learning_rate": 1.255e-05, + "num_tokens": 509563.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.748, + "step": 748 + }, + { + "loss": 0.2396, + "grad_norm": 5.24788236618042, + "learning_rate": 1.254e-05, + "num_tokens": 510166.0, + "mean_token_accuracy": 0.9467554092407227, + "epoch": 0.749, + "step": 749 + }, + { + "loss": 0.06, + "grad_norm": 1.0772548913955688, + "learning_rate": 1.253e-05, + "num_tokens": 511190.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.75, + "step": 750 + }, + { + "loss": 0.0329, + "grad_norm": 0.748359739780426, + "learning_rate": 1.252e-05, + "num_tokens": 511793.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 0.751, + "step": 751 + }, + { + "loss": 0.0786, + "grad_norm": 1.5040301084518433, + "learning_rate": 1.251e-05, + "num_tokens": 512396.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.752, + "step": 752 + }, + { + "loss": 0.01, + "grad_norm": 1.7024807929992676, + "learning_rate": 1.25e-05, + "num_tokens": 512578.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.753, + "step": 753 + }, + { + "loss": 0.0564, + "grad_norm": 0.9046693444252014, + "learning_rate": 1.2490000000000002e-05, + "num_tokens": 513602.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.754, + "step": 754 + }, + { + "loss": 0.0626, + "grad_norm": 1.064791202545166, + "learning_rate": 1.248e-05, + "num_tokens": 514626.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.755, + "step": 755 + }, + { + "loss": 0.0562, + "grad_norm": 0.962312638759613, + "learning_rate": 1.2470000000000003e-05, + "num_tokens": 515650.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.756, + "step": 756 + }, + { + "loss": 0.037, + "grad_norm": 0.8026986122131348, + "learning_rate": 1.2460000000000001e-05, + "num_tokens": 516253.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.757, + "step": 757 + }, + { + "loss": 0.0639, + "grad_norm": 0.8239317536354065, + "learning_rate": 1.2450000000000003e-05, + "num_tokens": 517277.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.758, + "step": 758 + }, + { + "loss": 0.0553, + "grad_norm": 0.874905526638031, + "learning_rate": 1.2440000000000001e-05, + "num_tokens": 517880.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.759, + "step": 759 + }, + { + "loss": 0.0358, + "grad_norm": 0.9866107702255249, + "learning_rate": 1.2430000000000001e-05, + "num_tokens": 518483.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.76, + "step": 760 + }, + { + "loss": 0.0707, + "grad_norm": 1.2454264163970947, + "learning_rate": 1.2420000000000001e-05, + "num_tokens": 519507.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 0.761, + "step": 761 + }, + { + "loss": 0.0671, + "grad_norm": 0.9112080335617065, + "learning_rate": 1.2410000000000001e-05, + "num_tokens": 520531.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.762, + "step": 762 + }, + { + "loss": 0.0288, + "grad_norm": 0.7277910113334656, + "learning_rate": 1.2400000000000002e-05, + "num_tokens": 521134.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 0.763, + "step": 763 + }, + { + "loss": 0.0507, + "grad_norm": 0.6795754432678223, + "learning_rate": 1.2390000000000002e-05, + "num_tokens": 522158.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.764, + "step": 764 + }, + { + "loss": 0.0626, + "grad_norm": 1.8835927248001099, + "learning_rate": 1.2380000000000002e-05, + "num_tokens": 522761.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.765, + "step": 765 + }, + { + "loss": 0.0581, + "grad_norm": 0.9371005892753601, + "learning_rate": 1.2370000000000002e-05, + "num_tokens": 523364.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.766, + "step": 766 + }, + { + "loss": 0.0159, + "grad_norm": 2.4912757873535156, + "learning_rate": 1.236e-05, + "num_tokens": 523546.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.767, + "step": 767 + }, + { + "loss": 0.0716, + "grad_norm": 1.2988524436950684, + "learning_rate": 1.2350000000000002e-05, + "num_tokens": 524570.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.768, + "step": 768 + }, + { + "loss": 0.0147, + "grad_norm": 2.4790022373199463, + "learning_rate": 1.234e-05, + "num_tokens": 524752.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.769, + "step": 769 + }, + { + "loss": 0.0623, + "grad_norm": 1.0703315734863281, + "learning_rate": 1.2330000000000002e-05, + "num_tokens": 525776.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.77, + "step": 770 + }, + { + "loss": 0.0545, + "grad_norm": 0.8702475428581238, + "learning_rate": 1.232e-05, + "num_tokens": 526379.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.771, + "step": 771 + }, + { + "loss": 0.0629, + "grad_norm": 0.907402753829956, + "learning_rate": 1.2310000000000002e-05, + "num_tokens": 527403.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.772, + "step": 772 + }, + { + "loss": 0.1845, + "grad_norm": 2.788726568222046, + "learning_rate": 1.23e-05, + "num_tokens": 528427.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.773, + "step": 773 + }, + { + "loss": 0.054, + "grad_norm": 0.9503142833709717, + "learning_rate": 1.2290000000000003e-05, + "num_tokens": 529030.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.774, + "step": 774 + }, + { + "loss": 0.1536, + "grad_norm": 2.5461437702178955, + "learning_rate": 1.2280000000000001e-05, + "num_tokens": 530054.0, + "mean_token_accuracy": 0.9520547986030579, + "epoch": 0.775, + "step": 775 + }, + { + "loss": 0.0416, + "grad_norm": 1.0022748708724976, + "learning_rate": 1.2270000000000001e-05, + "num_tokens": 530657.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.776, + "step": 776 + }, + { + "loss": 0.0325, + "grad_norm": 0.7322590947151184, + "learning_rate": 1.2260000000000001e-05, + "num_tokens": 531260.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.777, + "step": 777 + }, + { + "loss": 0.0605, + "grad_norm": 1.0229724645614624, + "learning_rate": 1.2250000000000001e-05, + "num_tokens": 531863.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.778, + "step": 778 + }, + { + "loss": 0.0553, + "grad_norm": 1.0746158361434937, + "learning_rate": 1.2240000000000001e-05, + "num_tokens": 532466.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.779, + "step": 779 + }, + { + "loss": 0.055, + "grad_norm": 0.9289519190788269, + "learning_rate": 1.2230000000000001e-05, + "num_tokens": 533069.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.78, + "step": 780 + }, + { + "loss": 0.0543, + "grad_norm": 0.7544193267822266, + "learning_rate": 1.2220000000000002e-05, + "num_tokens": 534093.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.781, + "step": 781 + }, + { + "loss": 0.0644, + "grad_norm": 1.1872286796569824, + "learning_rate": 1.2210000000000002e-05, + "num_tokens": 534696.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.782, + "step": 782 + }, + { + "loss": 0.0588, + "grad_norm": 0.8853201866149902, + "learning_rate": 1.22e-05, + "num_tokens": 535299.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.783, + "step": 783 + }, + { + "loss": 0.0095, + "grad_norm": 1.7591997385025024, + "learning_rate": 1.2190000000000002e-05, + "num_tokens": 535481.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.784, + "step": 784 + }, + { + "loss": 0.0498, + "grad_norm": 0.6254715323448181, + "learning_rate": 1.218e-05, + "num_tokens": 536505.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.785, + "step": 785 + }, + { + "loss": 0.1833, + "grad_norm": 3.4329724311828613, + "learning_rate": 1.2170000000000002e-05, + "num_tokens": 537108.0, + "mean_token_accuracy": 0.940099835395813, + "epoch": 0.786, + "step": 786 + }, + { + "loss": 0.0805, + "grad_norm": 1.3052853345870972, + "learning_rate": 1.216e-05, + "num_tokens": 537711.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.787, + "step": 787 + }, + { + "loss": 0.048, + "grad_norm": 0.8230918645858765, + "learning_rate": 1.2150000000000002e-05, + "num_tokens": 538314.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.788, + "step": 788 + }, + { + "loss": 0.0531, + "grad_norm": 0.718222439289093, + "learning_rate": 1.214e-05, + "num_tokens": 539338.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.789, + "step": 789 + }, + { + "loss": 0.0067, + "grad_norm": 1.2014926671981812, + "learning_rate": 1.2130000000000002e-05, + "num_tokens": 539520.0, + "mean_token_accuracy": 1.0, + "epoch": 0.79, + "step": 790 + }, + { + "loss": 0.0524, + "grad_norm": 0.9611308574676514, + "learning_rate": 1.2120000000000001e-05, + "num_tokens": 540123.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.791, + "step": 791 + }, + { + "loss": 0.0459, + "grad_norm": 0.7757530212402344, + "learning_rate": 1.2110000000000001e-05, + "num_tokens": 540726.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.792, + "step": 792 + }, + { + "loss": 0.0063, + "grad_norm": 1.0544146299362183, + "learning_rate": 1.2100000000000001e-05, + "num_tokens": 540908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.793, + "step": 793 + }, + { + "loss": 0.0055, + "grad_norm": 0.8991574645042419, + "learning_rate": 1.2090000000000001e-05, + "num_tokens": 541090.0, + "mean_token_accuracy": 1.0, + "epoch": 0.794, + "step": 794 + }, + { + "loss": 0.0391, + "grad_norm": 0.7629162669181824, + "learning_rate": 1.2080000000000001e-05, + "num_tokens": 542114.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.795, + "step": 795 + }, + { + "loss": 0.0623, + "grad_norm": 0.9102928042411804, + "learning_rate": 1.2070000000000001e-05, + "num_tokens": 543138.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.796, + "step": 796 + }, + { + "loss": 0.0033, + "grad_norm": 0.3725976347923279, + "learning_rate": 1.2060000000000001e-05, + "num_tokens": 543320.0, + "mean_token_accuracy": 1.0, + "epoch": 0.797, + "step": 797 + }, + { + "loss": 0.0709, + "grad_norm": 0.9508499503135681, + "learning_rate": 1.2050000000000002e-05, + "num_tokens": 544344.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 0.798, + "step": 798 + }, + { + "loss": 0.0704, + "grad_norm": 1.1272201538085938, + "learning_rate": 1.204e-05, + "num_tokens": 545368.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.799, + "step": 799 + }, + { + "loss": 0.0512, + "grad_norm": 1.284423589706421, + "learning_rate": 1.2030000000000002e-05, + "num_tokens": 546392.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.8, + "step": 800 + }, + { + "loss": 0.0606, + "grad_norm": 1.0930120944976807, + "learning_rate": 1.202e-05, + "num_tokens": 546995.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.801, + "step": 801 + }, + { + "loss": 0.2028, + "grad_norm": 2.9636154174804688, + "learning_rate": 1.2010000000000002e-05, + "num_tokens": 547598.0, + "mean_token_accuracy": 0.9434276223182678, + "epoch": 0.802, + "step": 802 + }, + { + "loss": 0.0551, + "grad_norm": 0.9880566596984863, + "learning_rate": 1.2e-05, + "num_tokens": 548201.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.803, + "step": 803 + }, + { + "loss": 0.0741, + "grad_norm": 1.0149595737457275, + "learning_rate": 1.1990000000000002e-05, + "num_tokens": 549225.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.804, + "step": 804 + }, + { + "loss": 0.0558, + "grad_norm": 0.7165041565895081, + "learning_rate": 1.198e-05, + "num_tokens": 550249.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.805, + "step": 805 + }, + { + "loss": 0.1578, + "grad_norm": 2.9387247562408447, + "learning_rate": 1.1970000000000002e-05, + "num_tokens": 550852.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.806, + "step": 806 + }, + { + "loss": 0.0072, + "grad_norm": 1.3342481851577759, + "learning_rate": 1.196e-05, + "num_tokens": 551034.0, + "mean_token_accuracy": 1.0, + "epoch": 0.807, + "step": 807 + }, + { + "loss": 0.0518, + "grad_norm": 0.9258549213409424, + "learning_rate": 1.195e-05, + "num_tokens": 551637.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.808, + "step": 808 + }, + { + "loss": 0.0535, + "grad_norm": 0.812700092792511, + "learning_rate": 1.1940000000000001e-05, + "num_tokens": 552240.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.809, + "step": 809 + }, + { + "loss": 0.0595, + "grad_norm": 1.1722562313079834, + "learning_rate": 1.1930000000000001e-05, + "num_tokens": 552843.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.81, + "step": 810 + }, + { + "loss": 0.0521, + "grad_norm": 0.7275489568710327, + "learning_rate": 1.1920000000000001e-05, + "num_tokens": 553867.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.811, + "step": 811 + }, + { + "loss": 0.01, + "grad_norm": 1.7290879487991333, + "learning_rate": 1.1910000000000001e-05, + "num_tokens": 554049.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.812, + "step": 812 + }, + { + "loss": 0.0679, + "grad_norm": 0.8877097368240356, + "learning_rate": 1.1900000000000001e-05, + "num_tokens": 555073.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.813, + "step": 813 + }, + { + "loss": 0.0096, + "grad_norm": 1.703001618385315, + "learning_rate": 1.1890000000000001e-05, + "num_tokens": 555255.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.814, + "step": 814 + }, + { + "loss": 0.0084, + "grad_norm": 1.508344292640686, + "learning_rate": 1.188e-05, + "num_tokens": 555437.0, + "mean_token_accuracy": 1.0, + "epoch": 0.815, + "step": 815 + }, + { + "loss": 0.0544, + "grad_norm": 0.9113777279853821, + "learning_rate": 1.1870000000000002e-05, + "num_tokens": 556040.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.816, + "step": 816 + }, + { + "loss": 0.0704, + "grad_norm": 1.184165358543396, + "learning_rate": 1.186e-05, + "num_tokens": 556643.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.817, + "step": 817 + }, + { + "loss": 0.0478, + "grad_norm": 0.9185481667518616, + "learning_rate": 1.1850000000000002e-05, + "num_tokens": 557246.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.818, + "step": 818 + }, + { + "loss": 0.0398, + "grad_norm": 0.9394212365150452, + "learning_rate": 1.184e-05, + "num_tokens": 557849.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.819, + "step": 819 + }, + { + "loss": 0.0529, + "grad_norm": 0.9966578483581543, + "learning_rate": 1.1830000000000002e-05, + "num_tokens": 558873.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.82, + "step": 820 + }, + { + "loss": 0.0553, + "grad_norm": 0.995188295841217, + "learning_rate": 1.182e-05, + "num_tokens": 559897.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.821, + "step": 821 + }, + { + "loss": 0.0605, + "grad_norm": 1.2694830894470215, + "learning_rate": 1.1810000000000002e-05, + "num_tokens": 560921.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.822, + "step": 822 + }, + { + "loss": 0.0582, + "grad_norm": 0.8434872627258301, + "learning_rate": 1.18e-05, + "num_tokens": 561945.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.823, + "step": 823 + }, + { + "loss": 0.0457, + "grad_norm": 0.8467468023300171, + "learning_rate": 1.179e-05, + "num_tokens": 562548.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.824, + "step": 824 + }, + { + "loss": 0.0063, + "grad_norm": 1.0665810108184814, + "learning_rate": 1.178e-05, + "num_tokens": 562730.0, + "mean_token_accuracy": 1.0, + "epoch": 0.825, + "step": 825 + }, + { + "loss": 0.0566, + "grad_norm": 0.9971085786819458, + "learning_rate": 1.177e-05, + "num_tokens": 563333.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.826, + "step": 826 + }, + { + "loss": 0.0492, + "grad_norm": 0.831574559211731, + "learning_rate": 1.1760000000000001e-05, + "num_tokens": 564357.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.827, + "step": 827 + }, + { + "loss": 0.0534, + "grad_norm": 1.0245475769042969, + "learning_rate": 1.1750000000000001e-05, + "num_tokens": 565381.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.828, + "step": 828 + }, + { + "loss": 0.0541, + "grad_norm": 0.9119972586631775, + "learning_rate": 1.1740000000000001e-05, + "num_tokens": 565984.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.829, + "step": 829 + }, + { + "loss": 0.0082, + "grad_norm": 1.4160255193710327, + "learning_rate": 1.1730000000000001e-05, + "num_tokens": 566166.0, + "mean_token_accuracy": 1.0, + "epoch": 0.83, + "step": 830 + }, + { + "loss": 0.0346, + "grad_norm": 0.6937861442565918, + "learning_rate": 1.172e-05, + "num_tokens": 566769.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.831, + "step": 831 + }, + { + "loss": 0.0526, + "grad_norm": 0.8763881921768188, + "learning_rate": 1.1710000000000001e-05, + "num_tokens": 567793.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.832, + "step": 832 + }, + { + "loss": 0.0552, + "grad_norm": 0.975339949131012, + "learning_rate": 1.17e-05, + "num_tokens": 568396.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.833, + "step": 833 + }, + { + "loss": 0.0555, + "grad_norm": 0.7523898482322693, + "learning_rate": 1.1690000000000002e-05, + "num_tokens": 568999.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.834, + "step": 834 + }, + { + "loss": 0.0549, + "grad_norm": 0.8790054321289062, + "learning_rate": 1.168e-05, + "num_tokens": 570023.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.835, + "step": 835 + }, + { + "loss": 0.0624, + "grad_norm": 1.2932872772216797, + "learning_rate": 1.1670000000000002e-05, + "num_tokens": 570626.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.836, + "step": 836 + }, + { + "loss": 0.0472, + "grad_norm": 0.7312279343605042, + "learning_rate": 1.166e-05, + "num_tokens": 571650.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 0.837, + "step": 837 + }, + { + "loss": 0.0392, + "grad_norm": 0.7702077627182007, + "learning_rate": 1.1650000000000002e-05, + "num_tokens": 572674.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 0.838, + "step": 838 + }, + { + "loss": 0.0126, + "grad_norm": 1.9679837226867676, + "learning_rate": 1.164e-05, + "num_tokens": 572856.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.839, + "step": 839 + }, + { + "loss": 0.0523, + "grad_norm": 0.7391607165336609, + "learning_rate": 1.163e-05, + "num_tokens": 573880.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.84, + "step": 840 + }, + { + "loss": 0.0423, + "grad_norm": 0.6933834552764893, + "learning_rate": 1.162e-05, + "num_tokens": 574904.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.841, + "step": 841 + }, + { + "loss": 0.011, + "grad_norm": 1.7495671510696411, + "learning_rate": 1.161e-05, + "num_tokens": 575086.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.842, + "step": 842 + }, + { + "loss": 0.0661, + "grad_norm": 0.9738606810569763, + "learning_rate": 1.16e-05, + "num_tokens": 576110.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.843, + "step": 843 + }, + { + "loss": 0.054, + "grad_norm": 1.1215018033981323, + "learning_rate": 1.159e-05, + "num_tokens": 576713.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.844, + "step": 844 + }, + { + "loss": 0.0397, + "grad_norm": 0.7533130645751953, + "learning_rate": 1.1580000000000001e-05, + "num_tokens": 577737.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 0.845, + "step": 845 + }, + { + "loss": 0.0099, + "grad_norm": 1.6206952333450317, + "learning_rate": 1.1570000000000001e-05, + "num_tokens": 577919.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.846, + "step": 846 + }, + { + "loss": 0.0482, + "grad_norm": 0.8448578119277954, + "learning_rate": 1.156e-05, + "num_tokens": 578522.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.847, + "step": 847 + }, + { + "loss": 0.0497, + "grad_norm": 0.9532232284545898, + "learning_rate": 1.1550000000000001e-05, + "num_tokens": 579125.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.848, + "step": 848 + }, + { + "loss": 0.0611, + "grad_norm": 1.0645647048950195, + "learning_rate": 1.154e-05, + "num_tokens": 579728.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.849, + "step": 849 + }, + { + "loss": 0.0487, + "grad_norm": 0.9649556875228882, + "learning_rate": 1.1530000000000001e-05, + "num_tokens": 580752.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.85, + "step": 850 + }, + { + "loss": 0.0355, + "grad_norm": 1.1456025838851929, + "learning_rate": 1.152e-05, + "num_tokens": 581355.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.851, + "step": 851 + }, + { + "loss": 0.0403, + "grad_norm": 0.9182752370834351, + "learning_rate": 1.1510000000000002e-05, + "num_tokens": 581958.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.852, + "step": 852 + }, + { + "loss": 0.0639, + "grad_norm": 1.5189045667648315, + "learning_rate": 1.15e-05, + "num_tokens": 582561.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.853, + "step": 853 + }, + { + "loss": 0.0485, + "grad_norm": 1.0986984968185425, + "learning_rate": 1.1490000000000002e-05, + "num_tokens": 583164.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.854, + "step": 854 + }, + { + "loss": 0.0487, + "grad_norm": 0.8655186891555786, + "learning_rate": 1.148e-05, + "num_tokens": 584188.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.855, + "step": 855 + }, + { + "loss": 0.056, + "grad_norm": 0.998289167881012, + "learning_rate": 1.147e-05, + "num_tokens": 585212.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.856, + "step": 856 + }, + { + "loss": 0.0077, + "grad_norm": 1.1870158910751343, + "learning_rate": 1.146e-05, + "num_tokens": 585394.0, + "mean_token_accuracy": 1.0, + "epoch": 0.857, + "step": 857 + }, + { + "loss": 0.0671, + "grad_norm": 1.062109112739563, + "learning_rate": 1.145e-05, + "num_tokens": 586418.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.858, + "step": 858 + }, + { + "loss": 0.0604, + "grad_norm": 0.7632076144218445, + "learning_rate": 1.144e-05, + "num_tokens": 587442.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.859, + "step": 859 + }, + { + "loss": 0.0504, + "grad_norm": 1.0189100503921509, + "learning_rate": 1.143e-05, + "num_tokens": 588466.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.86, + "step": 860 + }, + { + "loss": 0.0729, + "grad_norm": 1.0248647928237915, + "learning_rate": 1.142e-05, + "num_tokens": 589490.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 0.861, + "step": 861 + }, + { + "loss": 0.0772, + "grad_norm": 1.485296607017517, + "learning_rate": 1.1410000000000001e-05, + "num_tokens": 590093.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 0.862, + "step": 862 + }, + { + "loss": 0.0457, + "grad_norm": 1.0928043127059937, + "learning_rate": 1.14e-05, + "num_tokens": 590696.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.863, + "step": 863 + }, + { + "loss": 0.0394, + "grad_norm": 0.8996139168739319, + "learning_rate": 1.1390000000000001e-05, + "num_tokens": 591299.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.864, + "step": 864 + }, + { + "loss": 0.0646, + "grad_norm": 0.981772243976593, + "learning_rate": 1.138e-05, + "num_tokens": 592323.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.865, + "step": 865 + }, + { + "loss": 0.0514, + "grad_norm": 1.0952850580215454, + "learning_rate": 1.1370000000000001e-05, + "num_tokens": 592926.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.866, + "step": 866 + }, + { + "loss": 0.042, + "grad_norm": 0.9182447195053101, + "learning_rate": 1.136e-05, + "num_tokens": 593529.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.867, + "step": 867 + }, + { + "loss": 0.0137, + "grad_norm": 1.8901221752166748, + "learning_rate": 1.1350000000000001e-05, + "num_tokens": 593711.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.868, + "step": 868 + }, + { + "loss": 0.0352, + "grad_norm": 0.8652055263519287, + "learning_rate": 1.134e-05, + "num_tokens": 594314.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.869, + "step": 869 + }, + { + "loss": 0.0113, + "grad_norm": 1.687259316444397, + "learning_rate": 1.1330000000000002e-05, + "num_tokens": 594496.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.87, + "step": 870 + }, + { + "loss": 0.0698, + "grad_norm": 0.8221616744995117, + "learning_rate": 1.132e-05, + "num_tokens": 595520.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.871, + "step": 871 + }, + { + "loss": 0.0662, + "grad_norm": 1.1668425798416138, + "learning_rate": 1.131e-05, + "num_tokens": 596544.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.872, + "step": 872 + }, + { + "loss": 0.0086, + "grad_norm": 1.3820511102676392, + "learning_rate": 1.13e-05, + "num_tokens": 596726.0, + "mean_token_accuracy": 1.0, + "epoch": 0.873, + "step": 873 + }, + { + "loss": 0.0069, + "grad_norm": 1.1286393404006958, + "learning_rate": 1.129e-05, + "num_tokens": 596908.0, + "mean_token_accuracy": 1.0, + "epoch": 0.874, + "step": 874 + }, + { + "loss": 0.0482, + "grad_norm": 0.7835375666618347, + "learning_rate": 1.128e-05, + "num_tokens": 597511.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.875, + "step": 875 + }, + { + "loss": 0.0627, + "grad_norm": 0.9090060591697693, + "learning_rate": 1.127e-05, + "num_tokens": 598535.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 0.876, + "step": 876 + }, + { + "loss": 0.0503, + "grad_norm": 0.902717113494873, + "learning_rate": 1.126e-05, + "num_tokens": 599559.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.877, + "step": 877 + }, + { + "loss": 0.0491, + "grad_norm": 1.2322841882705688, + "learning_rate": 1.125e-05, + "num_tokens": 600162.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.878, + "step": 878 + }, + { + "loss": 0.0652, + "grad_norm": 1.2013965845108032, + "learning_rate": 1.1240000000000002e-05, + "num_tokens": 600765.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.879, + "step": 879 + }, + { + "loss": 0.054, + "grad_norm": 1.0098602771759033, + "learning_rate": 1.1230000000000001e-05, + "num_tokens": 601368.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.88, + "step": 880 + }, + { + "loss": 0.0534, + "grad_norm": 1.5369949340820312, + "learning_rate": 1.1220000000000003e-05, + "num_tokens": 601971.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.881, + "step": 881 + }, + { + "loss": 0.0445, + "grad_norm": 0.7995336055755615, + "learning_rate": 1.1210000000000001e-05, + "num_tokens": 602995.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.882, + "step": 882 + }, + { + "loss": 0.0477, + "grad_norm": 0.907474160194397, + "learning_rate": 1.1200000000000001e-05, + "num_tokens": 603598.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.883, + "step": 883 + }, + { + "loss": 0.0651, + "grad_norm": 1.6879723072052002, + "learning_rate": 1.1190000000000001e-05, + "num_tokens": 604201.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.884, + "step": 884 + }, + { + "loss": 0.0553, + "grad_norm": 0.8439010381698608, + "learning_rate": 1.1180000000000001e-05, + "num_tokens": 605225.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.885, + "step": 885 + }, + { + "loss": 0.0498, + "grad_norm": 0.8361995220184326, + "learning_rate": 1.1170000000000001e-05, + "num_tokens": 605828.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.886, + "step": 886 + }, + { + "loss": 0.0308, + "grad_norm": 0.7240535020828247, + "learning_rate": 1.1160000000000002e-05, + "num_tokens": 606431.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.887, + "step": 887 + }, + { + "loss": 0.0078, + "grad_norm": 1.3500488996505737, + "learning_rate": 1.1150000000000002e-05, + "num_tokens": 606613.0, + "mean_token_accuracy": 1.0, + "epoch": 0.888, + "step": 888 + }, + { + "loss": 0.0609, + "grad_norm": 1.5635021924972534, + "learning_rate": 1.1140000000000002e-05, + "num_tokens": 607216.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.889, + "step": 889 + }, + { + "loss": 0.0539, + "grad_norm": 0.8278137445449829, + "learning_rate": 1.113e-05, + "num_tokens": 608240.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.89, + "step": 890 + }, + { + "loss": 0.0355, + "grad_norm": 0.7066246867179871, + "learning_rate": 1.1120000000000002e-05, + "num_tokens": 608843.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.891, + "step": 891 + }, + { + "loss": 0.0091, + "grad_norm": 1.524722933769226, + "learning_rate": 1.111e-05, + "num_tokens": 609025.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 0.892, + "step": 892 + }, + { + "loss": 0.0624, + "grad_norm": 1.1601239442825317, + "learning_rate": 1.1100000000000002e-05, + "num_tokens": 609628.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.893, + "step": 893 + }, + { + "loss": 0.0537, + "grad_norm": 0.9016846418380737, + "learning_rate": 1.109e-05, + "num_tokens": 610231.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.894, + "step": 894 + }, + { + "loss": 0.054, + "grad_norm": 0.905412495136261, + "learning_rate": 1.1080000000000002e-05, + "num_tokens": 610834.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.895, + "step": 895 + }, + { + "loss": 0.0607, + "grad_norm": 0.9579037427902222, + "learning_rate": 1.107e-05, + "num_tokens": 611437.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.896, + "step": 896 + }, + { + "loss": 0.0553, + "grad_norm": 0.9763801693916321, + "learning_rate": 1.1060000000000003e-05, + "num_tokens": 612040.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.897, + "step": 897 + }, + { + "loss": 0.0478, + "grad_norm": 0.8512241244316101, + "learning_rate": 1.1050000000000001e-05, + "num_tokens": 612643.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.898, + "step": 898 + }, + { + "loss": 0.0072, + "grad_norm": 1.1735706329345703, + "learning_rate": 1.1040000000000001e-05, + "num_tokens": 612825.0, + "mean_token_accuracy": 1.0, + "epoch": 0.899, + "step": 899 + }, + { + "loss": 0.046, + "grad_norm": 0.696629524230957, + "learning_rate": 1.1030000000000001e-05, + "num_tokens": 613849.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.9, + "step": 900 + }, + { + "loss": 0.0513, + "grad_norm": 0.9666752219200134, + "learning_rate": 1.1020000000000001e-05, + "num_tokens": 614452.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.901, + "step": 901 + }, + { + "loss": 0.0534, + "grad_norm": 1.0399560928344727, + "learning_rate": 1.1010000000000001e-05, + "num_tokens": 615055.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.902, + "step": 902 + }, + { + "loss": 0.0516, + "grad_norm": 0.8517758250236511, + "learning_rate": 1.1000000000000001e-05, + "num_tokens": 616079.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.903, + "step": 903 + }, + { + "loss": 0.0519, + "grad_norm": 0.992303729057312, + "learning_rate": 1.0990000000000002e-05, + "num_tokens": 616682.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.904, + "step": 904 + }, + { + "loss": 0.0363, + "grad_norm": 0.900538444519043, + "learning_rate": 1.0980000000000002e-05, + "num_tokens": 617285.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.905, + "step": 905 + }, + { + "loss": 0.0059, + "grad_norm": 0.9594456553459167, + "learning_rate": 1.097e-05, + "num_tokens": 617467.0, + "mean_token_accuracy": 1.0, + "epoch": 0.906, + "step": 906 + }, + { + "loss": 0.0513, + "grad_norm": 0.7595255970954895, + "learning_rate": 1.0960000000000002e-05, + "num_tokens": 618491.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.907, + "step": 907 + }, + { + "loss": 0.0553, + "grad_norm": 1.0218267440795898, + "learning_rate": 1.095e-05, + "num_tokens": 619515.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.908, + "step": 908 + }, + { + "loss": 0.0795, + "grad_norm": 2.5160579681396484, + "learning_rate": 1.0940000000000002e-05, + "num_tokens": 620118.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.909, + "step": 909 + }, + { + "loss": 0.0442, + "grad_norm": 0.8641685247421265, + "learning_rate": 1.093e-05, + "num_tokens": 620721.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.91, + "step": 910 + }, + { + "loss": 0.0812, + "grad_norm": 2.464181661605835, + "learning_rate": 1.0920000000000002e-05, + "num_tokens": 621324.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 0.911, + "step": 911 + }, + { + "loss": 0.0555, + "grad_norm": 1.158937931060791, + "learning_rate": 1.091e-05, + "num_tokens": 621927.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.912, + "step": 912 + }, + { + "loss": 0.0063, + "grad_norm": 1.0397167205810547, + "learning_rate": 1.0900000000000002e-05, + "num_tokens": 622109.0, + "mean_token_accuracy": 1.0, + "epoch": 0.913, + "step": 913 + }, + { + "loss": 0.036, + "grad_norm": 0.9005758166313171, + "learning_rate": 1.089e-05, + "num_tokens": 622712.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.914, + "step": 914 + }, + { + "loss": 0.0407, + "grad_norm": 0.800634503364563, + "learning_rate": 1.0880000000000001e-05, + "num_tokens": 623736.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.915, + "step": 915 + }, + { + "loss": 0.0063, + "grad_norm": 1.1051758527755737, + "learning_rate": 1.0870000000000001e-05, + "num_tokens": 623918.0, + "mean_token_accuracy": 1.0, + "epoch": 0.916, + "step": 916 + }, + { + "loss": 0.0493, + "grad_norm": 1.1623152494430542, + "learning_rate": 1.0860000000000001e-05, + "num_tokens": 624521.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.917, + "step": 917 + }, + { + "loss": 0.0052, + "grad_norm": 0.9127672910690308, + "learning_rate": 1.0850000000000001e-05, + "num_tokens": 624703.0, + "mean_token_accuracy": 1.0, + "epoch": 0.918, + "step": 918 + }, + { + "loss": 0.0441, + "grad_norm": 1.1386882066726685, + "learning_rate": 1.0840000000000001e-05, + "num_tokens": 625306.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.919, + "step": 919 + }, + { + "loss": 0.0521, + "grad_norm": 0.9355550408363342, + "learning_rate": 1.0830000000000001e-05, + "num_tokens": 625909.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.92, + "step": 920 + }, + { + "loss": 0.0565, + "grad_norm": 0.9229368567466736, + "learning_rate": 1.0820000000000001e-05, + "num_tokens": 626512.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.921, + "step": 921 + }, + { + "loss": 0.0336, + "grad_norm": 0.991707444190979, + "learning_rate": 1.081e-05, + "num_tokens": 627115.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.922, + "step": 922 + }, + { + "loss": 0.0531, + "grad_norm": 1.174130916595459, + "learning_rate": 1.0800000000000002e-05, + "num_tokens": 628139.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.923, + "step": 923 + }, + { + "loss": 0.0038, + "grad_norm": 0.6629912257194519, + "learning_rate": 1.079e-05, + "num_tokens": 628321.0, + "mean_token_accuracy": 1.0, + "epoch": 0.924, + "step": 924 + }, + { + "loss": 0.0546, + "grad_norm": 1.1083015203475952, + "learning_rate": 1.0780000000000002e-05, + "num_tokens": 628924.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.925, + "step": 925 + }, + { + "loss": 0.0631, + "grad_norm": 0.8983903527259827, + "learning_rate": 1.077e-05, + "num_tokens": 629948.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.926, + "step": 926 + }, + { + "loss": 0.0549, + "grad_norm": 1.1400083303451538, + "learning_rate": 1.0760000000000002e-05, + "num_tokens": 630551.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 0.927, + "step": 927 + }, + { + "loss": 0.0508, + "grad_norm": 1.156061053276062, + "learning_rate": 1.075e-05, + "num_tokens": 631575.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.928, + "step": 928 + }, + { + "loss": 0.0489, + "grad_norm": 1.3074612617492676, + "learning_rate": 1.0740000000000002e-05, + "num_tokens": 632178.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.929, + "step": 929 + }, + { + "loss": 0.0055, + "grad_norm": 1.0049898624420166, + "learning_rate": 1.073e-05, + "num_tokens": 632360.0, + "mean_token_accuracy": 1.0, + "epoch": 0.93, + "step": 930 + }, + { + "loss": 0.0341, + "grad_norm": 0.7812163829803467, + "learning_rate": 1.072e-05, + "num_tokens": 632963.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.931, + "step": 931 + }, + { + "loss": 0.0517, + "grad_norm": 0.9627772569656372, + "learning_rate": 1.071e-05, + "num_tokens": 633566.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.932, + "step": 932 + }, + { + "loss": 0.0331, + "grad_norm": 0.7385684251785278, + "learning_rate": 1.0700000000000001e-05, + "num_tokens": 634169.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.933, + "step": 933 + }, + { + "loss": 0.0478, + "grad_norm": 0.8066194653511047, + "learning_rate": 1.0690000000000001e-05, + "num_tokens": 634772.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.934, + "step": 934 + }, + { + "loss": 0.0321, + "grad_norm": 0.7036237120628357, + "learning_rate": 1.0680000000000001e-05, + "num_tokens": 635375.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.935, + "step": 935 + }, + { + "loss": 0.0345, + "grad_norm": 0.716787576675415, + "learning_rate": 1.0670000000000001e-05, + "num_tokens": 635978.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.936, + "step": 936 + }, + { + "loss": 0.0567, + "grad_norm": 0.7176898717880249, + "learning_rate": 1.0660000000000001e-05, + "num_tokens": 637002.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.937, + "step": 937 + }, + { + "loss": 0.0513, + "grad_norm": 0.7790811657905579, + "learning_rate": 1.065e-05, + "num_tokens": 638026.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.938, + "step": 938 + }, + { + "loss": 0.0338, + "grad_norm": 0.6591680645942688, + "learning_rate": 1.0640000000000001e-05, + "num_tokens": 638629.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 0.939, + "step": 939 + }, + { + "loss": 0.0549, + "grad_norm": 0.9362866878509521, + "learning_rate": 1.063e-05, + "num_tokens": 639653.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.94, + "step": 940 + }, + { + "loss": 0.011, + "grad_norm": 1.7603825330734253, + "learning_rate": 1.0620000000000002e-05, + "num_tokens": 639835.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.941, + "step": 941 + }, + { + "loss": 0.048, + "grad_norm": 0.73158860206604, + "learning_rate": 1.061e-05, + "num_tokens": 640859.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 0.942, + "step": 942 + }, + { + "loss": 0.0558, + "grad_norm": 1.1625018119812012, + "learning_rate": 1.0600000000000002e-05, + "num_tokens": 641462.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.943, + "step": 943 + }, + { + "loss": 0.0479, + "grad_norm": 0.6336035132408142, + "learning_rate": 1.059e-05, + "num_tokens": 642486.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.944, + "step": 944 + }, + { + "loss": 0.0787, + "grad_norm": 1.3355145454406738, + "learning_rate": 1.0580000000000002e-05, + "num_tokens": 643510.0, + "mean_token_accuracy": 0.9618395566940308, + "epoch": 0.945, + "step": 945 + }, + { + "loss": 0.0557, + "grad_norm": 0.9856793880462646, + "learning_rate": 1.057e-05, + "num_tokens": 644534.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.946, + "step": 946 + }, + { + "loss": 0.0543, + "grad_norm": 0.7999506592750549, + "learning_rate": 1.056e-05, + "num_tokens": 645558.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.947, + "step": 947 + }, + { + "loss": 0.0574, + "grad_norm": 1.2324020862579346, + "learning_rate": 1.055e-05, + "num_tokens": 646582.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.948, + "step": 948 + }, + { + "loss": 0.0597, + "grad_norm": 0.7820236682891846, + "learning_rate": 1.054e-05, + "num_tokens": 647606.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.949, + "step": 949 + }, + { + "loss": 0.0457, + "grad_norm": 0.8172613978385925, + "learning_rate": 1.053e-05, + "num_tokens": 648630.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.95, + "step": 950 + }, + { + "loss": 0.0594, + "grad_norm": 0.7998207807540894, + "learning_rate": 1.0520000000000001e-05, + "num_tokens": 649654.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.951, + "step": 951 + }, + { + "loss": 0.0392, + "grad_norm": 0.9326035380363464, + "learning_rate": 1.0510000000000001e-05, + "num_tokens": 650257.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.952, + "step": 952 + }, + { + "loss": 0.0512, + "grad_norm": 0.7850275635719299, + "learning_rate": 1.0500000000000001e-05, + "num_tokens": 651281.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.953, + "step": 953 + }, + { + "loss": 0.0176, + "grad_norm": 2.2797505855560303, + "learning_rate": 1.049e-05, + "num_tokens": 651463.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.954, + "step": 954 + }, + { + "loss": 0.0611, + "grad_norm": 1.1397391557693481, + "learning_rate": 1.0480000000000001e-05, + "num_tokens": 652487.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.955, + "step": 955 + }, + { + "loss": 0.0452, + "grad_norm": 0.7332718372344971, + "learning_rate": 1.047e-05, + "num_tokens": 653511.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.956, + "step": 956 + }, + { + "loss": 0.1722, + "grad_norm": 3.8387889862060547, + "learning_rate": 1.0460000000000001e-05, + "num_tokens": 654114.0, + "mean_token_accuracy": 0.9517470598220825, + "epoch": 0.957, + "step": 957 + }, + { + "loss": 0.0559, + "grad_norm": 0.9827572703361511, + "learning_rate": 1.045e-05, + "num_tokens": 655138.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.958, + "step": 958 + }, + { + "loss": 0.0698, + "grad_norm": 2.284926414489746, + "learning_rate": 1.0440000000000002e-05, + "num_tokens": 655741.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.959, + "step": 959 + }, + { + "loss": 0.0544, + "grad_norm": 0.8642245531082153, + "learning_rate": 1.043e-05, + "num_tokens": 656765.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.96, + "step": 960 + }, + { + "loss": 0.057, + "grad_norm": 0.9453803300857544, + "learning_rate": 1.0420000000000002e-05, + "num_tokens": 657789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.961, + "step": 961 + }, + { + "loss": 0.05, + "grad_norm": 0.7844247221946716, + "learning_rate": 1.041e-05, + "num_tokens": 658813.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.962, + "step": 962 + }, + { + "loss": 0.1372, + "grad_norm": 3.7035183906555176, + "learning_rate": 1.04e-05, + "num_tokens": 659416.0, + "mean_token_accuracy": 0.9584026336669922, + "epoch": 0.963, + "step": 963 + }, + { + "loss": 0.0488, + "grad_norm": 0.9842399954795837, + "learning_rate": 1.039e-05, + "num_tokens": 660440.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 0.964, + "step": 964 + }, + { + "loss": 0.0537, + "grad_norm": 1.0709846019744873, + "learning_rate": 1.038e-05, + "num_tokens": 661464.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.965, + "step": 965 + }, + { + "loss": 0.0564, + "grad_norm": 0.7966786026954651, + "learning_rate": 1.037e-05, + "num_tokens": 662488.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 0.966, + "step": 966 + }, + { + "loss": 0.0537, + "grad_norm": 0.8567167520523071, + "learning_rate": 1.036e-05, + "num_tokens": 663091.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.967, + "step": 967 + }, + { + "loss": 0.0517, + "grad_norm": 2.8711585998535156, + "learning_rate": 1.0350000000000001e-05, + "num_tokens": 663694.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.968, + "step": 968 + }, + { + "loss": 0.0424, + "grad_norm": 0.7927305102348328, + "learning_rate": 1.0340000000000001e-05, + "num_tokens": 664718.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 0.969, + "step": 969 + }, + { + "loss": 0.0328, + "grad_norm": 0.7149138450622559, + "learning_rate": 1.033e-05, + "num_tokens": 665321.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.97, + "step": 970 + }, + { + "loss": 0.0453, + "grad_norm": 0.9201661944389343, + "learning_rate": 1.0320000000000001e-05, + "num_tokens": 666345.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 0.971, + "step": 971 + }, + { + "loss": 0.0583, + "grad_norm": 0.7454182505607605, + "learning_rate": 1.031e-05, + "num_tokens": 667369.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.972, + "step": 972 + }, + { + "loss": 0.0386, + "grad_norm": 0.864448070526123, + "learning_rate": 1.0300000000000001e-05, + "num_tokens": 667972.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.973, + "step": 973 + }, + { + "loss": 0.0524, + "grad_norm": 0.653964102268219, + "learning_rate": 1.029e-05, + "num_tokens": 668996.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 0.974, + "step": 974 + }, + { + "loss": 0.062, + "grad_norm": 0.8780527114868164, + "learning_rate": 1.0280000000000002e-05, + "num_tokens": 670020.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 0.975, + "step": 975 + }, + { + "loss": 0.0363, + "grad_norm": 0.855196475982666, + "learning_rate": 1.027e-05, + "num_tokens": 670623.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.976, + "step": 976 + }, + { + "loss": 0.0189, + "grad_norm": 2.3670332431793213, + "learning_rate": 1.0260000000000002e-05, + "num_tokens": 670805.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.977, + "step": 977 + }, + { + "loss": 0.0635, + "grad_norm": 1.3440663814544678, + "learning_rate": 1.025e-05, + "num_tokens": 671408.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 0.978, + "step": 978 + }, + { + "loss": 0.0481, + "grad_norm": 0.8412259221076965, + "learning_rate": 1.024e-05, + "num_tokens": 672011.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.979, + "step": 979 + }, + { + "loss": 0.0589, + "grad_norm": 0.7858722805976868, + "learning_rate": 1.023e-05, + "num_tokens": 673035.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 0.98, + "step": 980 + }, + { + "loss": 0.0519, + "grad_norm": 0.7315422892570496, + "learning_rate": 1.022e-05, + "num_tokens": 674059.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.981, + "step": 981 + }, + { + "loss": 0.0594, + "grad_norm": 1.3124761581420898, + "learning_rate": 1.021e-05, + "num_tokens": 674662.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 0.982, + "step": 982 + }, + { + "loss": 0.0115, + "grad_norm": 1.7334574460983276, + "learning_rate": 1.02e-05, + "num_tokens": 674844.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 0.983, + "step": 983 + }, + { + "loss": 0.0559, + "grad_norm": 1.1707409620285034, + "learning_rate": 1.019e-05, + "num_tokens": 675447.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 0.984, + "step": 984 + }, + { + "loss": 0.0339, + "grad_norm": 0.7773995399475098, + "learning_rate": 1.018e-05, + "num_tokens": 676050.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.985, + "step": 985 + }, + { + "loss": 0.0557, + "grad_norm": 0.897598385810852, + "learning_rate": 1.017e-05, + "num_tokens": 677074.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 0.986, + "step": 986 + }, + { + "loss": 0.0578, + "grad_norm": 0.9828428626060486, + "learning_rate": 1.0160000000000001e-05, + "num_tokens": 678098.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.987, + "step": 987 + }, + { + "loss": 0.0478, + "grad_norm": 0.7874612808227539, + "learning_rate": 1.015e-05, + "num_tokens": 679122.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 0.988, + "step": 988 + }, + { + "loss": 0.0067, + "grad_norm": 1.0844510793685913, + "learning_rate": 1.0140000000000001e-05, + "num_tokens": 679304.0, + "mean_token_accuracy": 1.0, + "epoch": 0.989, + "step": 989 + }, + { + "loss": 0.0463, + "grad_norm": 0.9287775754928589, + "learning_rate": 1.013e-05, + "num_tokens": 679907.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 0.99, + "step": 990 + }, + { + "loss": 0.0542, + "grad_norm": 1.1648800373077393, + "learning_rate": 1.0120000000000001e-05, + "num_tokens": 680510.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 0.991, + "step": 991 + }, + { + "loss": 0.0594, + "grad_norm": 1.4217649698257446, + "learning_rate": 1.011e-05, + "num_tokens": 681534.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 0.992, + "step": 992 + }, + { + "loss": 0.0537, + "grad_norm": 1.002682089805603, + "learning_rate": 1.0100000000000002e-05, + "num_tokens": 682137.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 0.993, + "step": 993 + }, + { + "loss": 0.0303, + "grad_norm": 0.6803109645843506, + "learning_rate": 1.009e-05, + "num_tokens": 682740.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.994, + "step": 994 + }, + { + "loss": 0.048, + "grad_norm": 0.9071928858757019, + "learning_rate": 1.008e-05, + "num_tokens": 683764.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 0.995, + "step": 995 + }, + { + "loss": 0.0427, + "grad_norm": 0.9404779672622681, + "learning_rate": 1.007e-05, + "num_tokens": 684367.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 0.996, + "step": 996 + }, + { + "loss": 0.0597, + "grad_norm": 0.8706483840942383, + "learning_rate": 1.006e-05, + "num_tokens": 685391.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 0.997, + "step": 997 + }, + { + "loss": 0.0349, + "grad_norm": 0.7749162912368774, + "learning_rate": 1.005e-05, + "num_tokens": 685994.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 0.998, + "step": 998 + }, + { + "loss": 0.0368, + "grad_norm": 0.8396089673042297, + "learning_rate": 1.004e-05, + "num_tokens": 686597.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 0.999, + "step": 999 + }, + { + "loss": 0.0564, + "grad_norm": 1.237868070602417, + "learning_rate": 1.003e-05, + "num_tokens": 687200.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.0, + "step": 1000 + }, + { + "loss": 0.0474, + "grad_norm": 0.7974348664283752, + "learning_rate": 1.002e-05, + "num_tokens": 687803.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.001, + "step": 1001 + }, + { + "loss": 0.009, + "grad_norm": 1.6003921031951904, + "learning_rate": 1.0009999999999999e-05, + "num_tokens": 687985.0, + "mean_token_accuracy": 1.0, + "epoch": 1.002, + "step": 1002 + }, + { + "loss": 0.01, + "grad_norm": 1.6830997467041016, + "learning_rate": 1e-05, + "num_tokens": 688167.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.003, + "step": 1003 + }, + { + "loss": 0.0509, + "grad_norm": 0.905796468257904, + "learning_rate": 9.990000000000001e-06, + "num_tokens": 688770.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.004, + "step": 1004 + }, + { + "loss": 0.0388, + "grad_norm": 0.7253294587135315, + "learning_rate": 9.980000000000001e-06, + "num_tokens": 689794.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.005, + "step": 1005 + }, + { + "loss": 0.0531, + "grad_norm": 0.8021969199180603, + "learning_rate": 9.970000000000001e-06, + "num_tokens": 690818.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.006, + "step": 1006 + }, + { + "loss": 0.0528, + "grad_norm": 0.8415541052818298, + "learning_rate": 9.960000000000001e-06, + "num_tokens": 691842.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.007, + "step": 1007 + }, + { + "loss": 0.0587, + "grad_norm": 1.1446748971939087, + "learning_rate": 9.950000000000001e-06, + "num_tokens": 692866.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.008, + "step": 1008 + }, + { + "loss": 0.0604, + "grad_norm": 0.802824079990387, + "learning_rate": 9.940000000000001e-06, + "num_tokens": 693890.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.009, + "step": 1009 + }, + { + "loss": 0.0381, + "grad_norm": 0.8150053024291992, + "learning_rate": 9.930000000000001e-06, + "num_tokens": 694493.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.01, + "step": 1010 + }, + { + "loss": 0.0084, + "grad_norm": 1.5208303928375244, + "learning_rate": 9.920000000000002e-06, + "num_tokens": 694675.0, + "mean_token_accuracy": 1.0, + "epoch": 1.011, + "step": 1011 + }, + { + "loss": 0.0675, + "grad_norm": 1.4418550729751587, + "learning_rate": 9.91e-06, + "num_tokens": 695278.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.012, + "step": 1012 + }, + { + "loss": 0.0415, + "grad_norm": 0.6883193850517273, + "learning_rate": 9.9e-06, + "num_tokens": 696302.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.013, + "step": 1013 + }, + { + "loss": 0.0595, + "grad_norm": 0.8060528039932251, + "learning_rate": 9.89e-06, + "num_tokens": 697326.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.014, + "step": 1014 + }, + { + "loss": 0.0441, + "grad_norm": 0.6391285061836243, + "learning_rate": 9.88e-06, + "num_tokens": 698350.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.015, + "step": 1015 + }, + { + "loss": 0.0587, + "grad_norm": 3.029737710952759, + "learning_rate": 9.87e-06, + "num_tokens": 698953.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.016, + "step": 1016 + }, + { + "loss": 0.0486, + "grad_norm": 0.8655040860176086, + "learning_rate": 9.86e-06, + "num_tokens": 699556.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.017, + "step": 1017 + }, + { + "loss": 0.0318, + "grad_norm": 0.7095951437950134, + "learning_rate": 9.85e-06, + "num_tokens": 700159.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.018, + "step": 1018 + }, + { + "loss": 0.0372, + "grad_norm": 0.971708357334137, + "learning_rate": 9.84e-06, + "num_tokens": 700762.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.019, + "step": 1019 + }, + { + "loss": 0.0489, + "grad_norm": 0.7406445145606995, + "learning_rate": 9.83e-06, + "num_tokens": 701786.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.02, + "step": 1020 + }, + { + "loss": 0.0102, + "grad_norm": 1.7808157205581665, + "learning_rate": 9.820000000000001e-06, + "num_tokens": 701968.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.021, + "step": 1021 + }, + { + "loss": 0.0688, + "grad_norm": 1.5178371667861938, + "learning_rate": 9.810000000000001e-06, + "num_tokens": 702571.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.022, + "step": 1022 + }, + { + "loss": 0.0527, + "grad_norm": 1.1028006076812744, + "learning_rate": 9.800000000000001e-06, + "num_tokens": 703174.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.023, + "step": 1023 + }, + { + "loss": 0.0495, + "grad_norm": 0.8541064858436584, + "learning_rate": 9.790000000000001e-06, + "num_tokens": 703777.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.024, + "step": 1024 + }, + { + "loss": 0.0321, + "grad_norm": 0.749095618724823, + "learning_rate": 9.780000000000001e-06, + "num_tokens": 704380.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.025, + "step": 1025 + }, + { + "loss": 0.0533, + "grad_norm": 1.0253041982650757, + "learning_rate": 9.770000000000001e-06, + "num_tokens": 704983.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.026, + "step": 1026 + }, + { + "loss": 0.0603, + "grad_norm": 1.2609119415283203, + "learning_rate": 9.760000000000001e-06, + "num_tokens": 705586.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.027, + "step": 1027 + }, + { + "loss": 0.0623, + "grad_norm": 1.2862604856491089, + "learning_rate": 9.75e-06, + "num_tokens": 706189.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.028, + "step": 1028 + }, + { + "loss": 0.0077, + "grad_norm": 1.3439050912857056, + "learning_rate": 9.74e-06, + "num_tokens": 706371.0, + "mean_token_accuracy": 1.0, + "epoch": 1.029, + "step": 1029 + }, + { + "loss": 0.0456, + "grad_norm": 0.8898230195045471, + "learning_rate": 9.73e-06, + "num_tokens": 706974.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.03, + "step": 1030 + }, + { + "loss": 0.0709, + "grad_norm": 1.024522304534912, + "learning_rate": 9.72e-06, + "num_tokens": 707998.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.031, + "step": 1031 + }, + { + "loss": 0.0339, + "grad_norm": 0.9764677286148071, + "learning_rate": 9.71e-06, + "num_tokens": 708601.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.032, + "step": 1032 + }, + { + "loss": 0.059, + "grad_norm": 1.010137677192688, + "learning_rate": 9.7e-06, + "num_tokens": 709204.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.033, + "step": 1033 + }, + { + "loss": 0.0467, + "grad_norm": 1.2479255199432373, + "learning_rate": 9.69e-06, + "num_tokens": 709807.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.034, + "step": 1034 + }, + { + "loss": 0.0652, + "grad_norm": 1.532749056816101, + "learning_rate": 9.68e-06, + "num_tokens": 710410.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.035, + "step": 1035 + }, + { + "loss": 0.0493, + "grad_norm": 0.7740268707275391, + "learning_rate": 9.67e-06, + "num_tokens": 711434.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.036, + "step": 1036 + }, + { + "loss": 0.0353, + "grad_norm": 0.9729663729667664, + "learning_rate": 9.66e-06, + "num_tokens": 712037.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.037, + "step": 1037 + }, + { + "loss": 0.0547, + "grad_norm": 1.164442539215088, + "learning_rate": 9.65e-06, + "num_tokens": 712640.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.038, + "step": 1038 + }, + { + "loss": 0.0069, + "grad_norm": 1.2468204498291016, + "learning_rate": 9.640000000000001e-06, + "num_tokens": 712822.0, + "mean_token_accuracy": 1.0, + "epoch": 1.039, + "step": 1039 + }, + { + "loss": 0.0506, + "grad_norm": 1.4751908779144287, + "learning_rate": 9.630000000000001e-06, + "num_tokens": 713425.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.04, + "step": 1040 + }, + { + "loss": 0.0373, + "grad_norm": 0.8496048450469971, + "learning_rate": 9.620000000000001e-06, + "num_tokens": 714028.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.041, + "step": 1041 + }, + { + "loss": 0.0344, + "grad_norm": 0.8480894565582275, + "learning_rate": 9.610000000000001e-06, + "num_tokens": 714631.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.042, + "step": 1042 + }, + { + "loss": 0.0538, + "grad_norm": 0.9738388061523438, + "learning_rate": 9.600000000000001e-06, + "num_tokens": 715655.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.043, + "step": 1043 + }, + { + "loss": 0.0066, + "grad_norm": 1.1477543115615845, + "learning_rate": 9.59e-06, + "num_tokens": 715837.0, + "mean_token_accuracy": 1.0, + "epoch": 1.044, + "step": 1044 + }, + { + "loss": 0.0405, + "grad_norm": 0.913650393486023, + "learning_rate": 9.58e-06, + "num_tokens": 716861.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.045, + "step": 1045 + }, + { + "loss": 0.0487, + "grad_norm": 0.9134669303894043, + "learning_rate": 9.57e-06, + "num_tokens": 717464.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.046, + "step": 1046 + }, + { + "loss": 0.0521, + "grad_norm": 1.0108141899108887, + "learning_rate": 9.56e-06, + "num_tokens": 718067.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.047, + "step": 1047 + }, + { + "loss": 0.0065, + "grad_norm": 1.1465944051742554, + "learning_rate": 9.55e-06, + "num_tokens": 718249.0, + "mean_token_accuracy": 1.0, + "epoch": 1.048, + "step": 1048 + }, + { + "loss": 0.0494, + "grad_norm": 0.7855933308601379, + "learning_rate": 9.54e-06, + "num_tokens": 719273.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.049, + "step": 1049 + }, + { + "loss": 0.0574, + "grad_norm": 1.1935304403305054, + "learning_rate": 9.53e-06, + "num_tokens": 719876.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.05, + "step": 1050 + }, + { + "loss": 0.0524, + "grad_norm": 1.244053840637207, + "learning_rate": 9.52e-06, + "num_tokens": 720479.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.051, + "step": 1051 + }, + { + "loss": 0.0394, + "grad_norm": 0.8121421933174133, + "learning_rate": 9.51e-06, + "num_tokens": 721503.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.052, + "step": 1052 + }, + { + "loss": 0.0587, + "grad_norm": 0.8952818512916565, + "learning_rate": 9.5e-06, + "num_tokens": 722527.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.053, + "step": 1053 + }, + { + "loss": 0.0523, + "grad_norm": 1.0233876705169678, + "learning_rate": 9.49e-06, + "num_tokens": 723130.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.054, + "step": 1054 + }, + { + "loss": 0.0059, + "grad_norm": 0.9962955713272095, + "learning_rate": 9.48e-06, + "num_tokens": 723312.0, + "mean_token_accuracy": 1.0, + "epoch": 1.055, + "step": 1055 + }, + { + "loss": 0.0063, + "grad_norm": 1.0562559366226196, + "learning_rate": 9.47e-06, + "num_tokens": 723494.0, + "mean_token_accuracy": 1.0, + "epoch": 1.056, + "step": 1056 + }, + { + "loss": 0.0057, + "grad_norm": 0.9193427562713623, + "learning_rate": 9.460000000000001e-06, + "num_tokens": 723676.0, + "mean_token_accuracy": 1.0, + "epoch": 1.057, + "step": 1057 + }, + { + "loss": 0.0349, + "grad_norm": 0.8626947999000549, + "learning_rate": 9.450000000000001e-06, + "num_tokens": 724279.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.058, + "step": 1058 + }, + { + "loss": 0.004, + "grad_norm": 0.589850902557373, + "learning_rate": 9.440000000000001e-06, + "num_tokens": 724461.0, + "mean_token_accuracy": 1.0, + "epoch": 1.059, + "step": 1059 + }, + { + "loss": 0.033, + "grad_norm": 0.9240136742591858, + "learning_rate": 9.43e-06, + "num_tokens": 725064.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.06, + "step": 1060 + }, + { + "loss": 0.0031, + "grad_norm": 0.410062700510025, + "learning_rate": 9.42e-06, + "num_tokens": 725246.0, + "mean_token_accuracy": 1.0, + "epoch": 1.061, + "step": 1061 + }, + { + "loss": 0.0569, + "grad_norm": 1.0026599168777466, + "learning_rate": 9.41e-06, + "num_tokens": 726270.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.062, + "step": 1062 + }, + { + "loss": 0.0319, + "grad_norm": 0.7115553617477417, + "learning_rate": 9.4e-06, + "num_tokens": 726873.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.063, + "step": 1063 + }, + { + "loss": 0.044, + "grad_norm": 1.1377477645874023, + "learning_rate": 9.39e-06, + "num_tokens": 727897.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.064, + "step": 1064 + }, + { + "loss": 0.0022, + "grad_norm": 0.2264242321252823, + "learning_rate": 9.38e-06, + "num_tokens": 728079.0, + "mean_token_accuracy": 1.0, + "epoch": 1.065, + "step": 1065 + }, + { + "loss": 0.0406, + "grad_norm": 1.1054085493087769, + "learning_rate": 9.370000000000002e-06, + "num_tokens": 728682.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.066, + "step": 1066 + }, + { + "loss": 0.0542, + "grad_norm": 1.080283522605896, + "learning_rate": 9.360000000000002e-06, + "num_tokens": 729706.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.067, + "step": 1067 + }, + { + "loss": 0.0355, + "grad_norm": 0.8702858686447144, + "learning_rate": 9.350000000000002e-06, + "num_tokens": 730309.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.068, + "step": 1068 + }, + { + "loss": 0.0023, + "grad_norm": 0.2787419557571411, + "learning_rate": 9.340000000000002e-06, + "num_tokens": 730491.0, + "mean_token_accuracy": 1.0, + "epoch": 1.069, + "step": 1069 + }, + { + "loss": 0.0539, + "grad_norm": 1.061450481414795, + "learning_rate": 9.33e-06, + "num_tokens": 731515.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.07, + "step": 1070 + }, + { + "loss": 0.0772, + "grad_norm": 1.567914605140686, + "learning_rate": 9.32e-06, + "num_tokens": 732539.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.071, + "step": 1071 + }, + { + "loss": 0.0493, + "grad_norm": 0.7363911271095276, + "learning_rate": 9.31e-06, + "num_tokens": 733563.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.072, + "step": 1072 + }, + { + "loss": 0.0561, + "grad_norm": 1.2731812000274658, + "learning_rate": 9.3e-06, + "num_tokens": 734166.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.073, + "step": 1073 + }, + { + "loss": 0.0673, + "grad_norm": 1.3731825351715088, + "learning_rate": 9.29e-06, + "num_tokens": 734769.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.074, + "step": 1074 + }, + { + "loss": 0.0666, + "grad_norm": 1.0484107732772827, + "learning_rate": 9.280000000000001e-06, + "num_tokens": 735793.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.075, + "step": 1075 + }, + { + "loss": 0.0472, + "grad_norm": 1.0025572776794434, + "learning_rate": 9.270000000000001e-06, + "num_tokens": 736817.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.076, + "step": 1076 + }, + { + "loss": 0.006, + "grad_norm": 1.1883853673934937, + "learning_rate": 9.260000000000001e-06, + "num_tokens": 736999.0, + "mean_token_accuracy": 1.0, + "epoch": 1.077, + "step": 1077 + }, + { + "loss": 0.0549, + "grad_norm": 1.1541094779968262, + "learning_rate": 9.250000000000001e-06, + "num_tokens": 737602.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.078, + "step": 1078 + }, + { + "loss": 0.0499, + "grad_norm": 0.9700387716293335, + "learning_rate": 9.240000000000001e-06, + "num_tokens": 738205.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.079, + "step": 1079 + }, + { + "loss": 0.0542, + "grad_norm": 0.8913364410400391, + "learning_rate": 9.230000000000001e-06, + "num_tokens": 738808.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.08, + "step": 1080 + }, + { + "loss": 0.048, + "grad_norm": 0.8343157172203064, + "learning_rate": 9.220000000000002e-06, + "num_tokens": 739411.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.081, + "step": 1081 + }, + { + "loss": 0.0492, + "grad_norm": 0.6102253794670105, + "learning_rate": 9.210000000000002e-06, + "num_tokens": 740435.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.082, + "step": 1082 + }, + { + "loss": 0.0502, + "grad_norm": 1.070359230041504, + "learning_rate": 9.200000000000002e-06, + "num_tokens": 741038.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.083, + "step": 1083 + }, + { + "loss": 0.0581, + "grad_norm": 0.858526885509491, + "learning_rate": 9.190000000000002e-06, + "num_tokens": 742062.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.084, + "step": 1084 + }, + { + "loss": 0.0502, + "grad_norm": 0.9168484210968018, + "learning_rate": 9.180000000000002e-06, + "num_tokens": 742665.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.085, + "step": 1085 + }, + { + "loss": 0.0583, + "grad_norm": 0.8808404207229614, + "learning_rate": 9.17e-06, + "num_tokens": 743689.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.086, + "step": 1086 + }, + { + "loss": 0.0642, + "grad_norm": 1.2995198965072632, + "learning_rate": 9.16e-06, + "num_tokens": 744292.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.087, + "step": 1087 + }, + { + "loss": 0.0133, + "grad_norm": 2.1493337154388428, + "learning_rate": 9.15e-06, + "num_tokens": 744474.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.088, + "step": 1088 + }, + { + "loss": 0.0379, + "grad_norm": 1.0027700662612915, + "learning_rate": 9.14e-06, + "num_tokens": 745077.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.089, + "step": 1089 + }, + { + "loss": 0.0659, + "grad_norm": 0.9788306951522827, + "learning_rate": 9.13e-06, + "num_tokens": 746101.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.09, + "step": 1090 + }, + { + "loss": 0.0616, + "grad_norm": 0.9896969795227051, + "learning_rate": 9.12e-06, + "num_tokens": 747125.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.091, + "step": 1091 + }, + { + "loss": 0.1079, + "grad_norm": 2.129412889480591, + "learning_rate": 9.110000000000001e-06, + "num_tokens": 748149.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.092, + "step": 1092 + }, + { + "loss": 0.0362, + "grad_norm": 0.836596667766571, + "learning_rate": 9.100000000000001e-06, + "num_tokens": 748752.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.093, + "step": 1093 + }, + { + "loss": 0.0564, + "grad_norm": 0.9442873001098633, + "learning_rate": 9.090000000000001e-06, + "num_tokens": 749355.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.094, + "step": 1094 + }, + { + "loss": 0.033, + "grad_norm": 0.8565213680267334, + "learning_rate": 9.080000000000001e-06, + "num_tokens": 749958.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.095, + "step": 1095 + }, + { + "loss": 0.0122, + "grad_norm": 2.0779123306274414, + "learning_rate": 9.070000000000001e-06, + "num_tokens": 750140.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.096, + "step": 1096 + }, + { + "loss": 0.0474, + "grad_norm": 0.8895683288574219, + "learning_rate": 9.060000000000001e-06, + "num_tokens": 750743.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.097, + "step": 1097 + }, + { + "loss": 0.0371, + "grad_norm": 0.8520296812057495, + "learning_rate": 9.050000000000001e-06, + "num_tokens": 751346.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.098, + "step": 1098 + }, + { + "loss": 0.0521, + "grad_norm": 1.0311665534973145, + "learning_rate": 9.040000000000002e-06, + "num_tokens": 751949.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.099, + "step": 1099 + }, + { + "loss": 0.0493, + "grad_norm": 0.7174288034439087, + "learning_rate": 9.030000000000002e-06, + "num_tokens": 752552.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.1, + "step": 1100 + }, + { + "loss": 0.0078, + "grad_norm": 1.336002230644226, + "learning_rate": 9.020000000000002e-06, + "num_tokens": 752734.0, + "mean_token_accuracy": 1.0, + "epoch": 1.101, + "step": 1101 + }, + { + "loss": 0.0563, + "grad_norm": 0.7885469794273376, + "learning_rate": 9.01e-06, + "num_tokens": 753758.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.102, + "step": 1102 + }, + { + "loss": 0.0509, + "grad_norm": 0.8089726567268372, + "learning_rate": 9e-06, + "num_tokens": 754361.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.103, + "step": 1103 + }, + { + "loss": 0.0655, + "grad_norm": 1.0928263664245605, + "learning_rate": 8.99e-06, + "num_tokens": 755385.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.104, + "step": 1104 + }, + { + "loss": 0.0477, + "grad_norm": 0.7860797643661499, + "learning_rate": 8.98e-06, + "num_tokens": 756409.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.105, + "step": 1105 + }, + { + "loss": 0.0457, + "grad_norm": 0.7514035105705261, + "learning_rate": 8.97e-06, + "num_tokens": 757433.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.106, + "step": 1106 + }, + { + "loss": 0.0521, + "grad_norm": 0.7597775459289551, + "learning_rate": 8.96e-06, + "num_tokens": 758036.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.107, + "step": 1107 + }, + { + "loss": 0.0361, + "grad_norm": 1.1093838214874268, + "learning_rate": 8.95e-06, + "num_tokens": 758639.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.108, + "step": 1108 + }, + { + "loss": 0.1442, + "grad_norm": 2.127009391784668, + "learning_rate": 8.94e-06, + "num_tokens": 759663.0, + "mean_token_accuracy": 0.951076328754425, + "epoch": 1.109, + "step": 1109 + }, + { + "loss": 0.0066, + "grad_norm": 1.1645936965942383, + "learning_rate": 8.930000000000001e-06, + "num_tokens": 759845.0, + "mean_token_accuracy": 1.0, + "epoch": 1.11, + "step": 1110 + }, + { + "loss": 0.1759, + "grad_norm": 2.889411687850952, + "learning_rate": 8.920000000000001e-06, + "num_tokens": 760448.0, + "mean_token_accuracy": 0.9500831961631775, + "epoch": 1.111, + "step": 1111 + }, + { + "loss": 0.0631, + "grad_norm": 0.8576507568359375, + "learning_rate": 8.910000000000001e-06, + "num_tokens": 761472.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.112, + "step": 1112 + }, + { + "loss": 0.033, + "grad_norm": 0.680837869644165, + "learning_rate": 8.900000000000001e-06, + "num_tokens": 762075.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.113, + "step": 1113 + }, + { + "loss": 0.0569, + "grad_norm": 0.7789044976234436, + "learning_rate": 8.890000000000001e-06, + "num_tokens": 763099.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.114, + "step": 1114 + }, + { + "loss": 0.0346, + "grad_norm": 0.7028644680976868, + "learning_rate": 8.880000000000001e-06, + "num_tokens": 763702.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.115, + "step": 1115 + }, + { + "loss": 0.0534, + "grad_norm": 0.8470257520675659, + "learning_rate": 8.870000000000001e-06, + "num_tokens": 764305.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.116, + "step": 1116 + }, + { + "loss": 0.0342, + "grad_norm": 0.7343347668647766, + "learning_rate": 8.860000000000002e-06, + "num_tokens": 764908.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.117, + "step": 1117 + }, + { + "loss": 0.0347, + "grad_norm": 0.8201417922973633, + "learning_rate": 8.85e-06, + "num_tokens": 765511.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.1179999999999999, + "step": 1118 + }, + { + "loss": 0.0616, + "grad_norm": 0.8209514617919922, + "learning_rate": 8.84e-06, + "num_tokens": 766535.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.119, + "step": 1119 + }, + { + "loss": 0.1121, + "grad_norm": 2.1913256645202637, + "learning_rate": 8.83e-06, + "num_tokens": 767559.0, + "mean_token_accuracy": 0.9569471478462219, + "epoch": 1.12, + "step": 1120 + }, + { + "loss": 0.034, + "grad_norm": 0.8490939736366272, + "learning_rate": 8.82e-06, + "num_tokens": 768162.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.121, + "step": 1121 + }, + { + "loss": 0.0572, + "grad_norm": 0.6898327469825745, + "learning_rate": 8.81e-06, + "num_tokens": 769186.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.1219999999999999, + "step": 1122 + }, + { + "loss": 0.0107, + "grad_norm": 1.8263050317764282, + "learning_rate": 8.8e-06, + "num_tokens": 769368.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.123, + "step": 1123 + }, + { + "loss": 0.0281, + "grad_norm": 0.6163520216941833, + "learning_rate": 8.79e-06, + "num_tokens": 769971.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 1.124, + "step": 1124 + }, + { + "loss": 0.0425, + "grad_norm": 0.7312502861022949, + "learning_rate": 8.78e-06, + "num_tokens": 770574.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.125, + "step": 1125 + }, + { + "loss": 0.0352, + "grad_norm": 0.9618499279022217, + "learning_rate": 8.77e-06, + "num_tokens": 771177.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.126, + "step": 1126 + }, + { + "loss": 0.0373, + "grad_norm": 0.9263796806335449, + "learning_rate": 8.76e-06, + "num_tokens": 771780.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.127, + "step": 1127 + }, + { + "loss": 0.0331, + "grad_norm": 0.862051784992218, + "learning_rate": 8.750000000000001e-06, + "num_tokens": 772383.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.1280000000000001, + "step": 1128 + }, + { + "loss": 0.008, + "grad_norm": 1.4848543405532837, + "learning_rate": 8.740000000000001e-06, + "num_tokens": 772565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.129, + "step": 1129 + }, + { + "loss": 0.1153, + "grad_norm": 1.4379287958145142, + "learning_rate": 8.730000000000001e-06, + "num_tokens": 773589.0, + "mean_token_accuracy": 0.9559686779975891, + "epoch": 1.13, + "step": 1130 + }, + { + "loss": 0.0338, + "grad_norm": 1.0212937593460083, + "learning_rate": 8.720000000000001e-06, + "num_tokens": 774192.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.131, + "step": 1131 + }, + { + "loss": 0.057, + "grad_norm": 1.1756787300109863, + "learning_rate": 8.710000000000001e-06, + "num_tokens": 775216.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.1320000000000001, + "step": 1132 + }, + { + "loss": 0.0066, + "grad_norm": 1.1858594417572021, + "learning_rate": 8.700000000000001e-06, + "num_tokens": 775398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.133, + "step": 1133 + }, + { + "loss": 0.0577, + "grad_norm": 0.945641815662384, + "learning_rate": 8.690000000000002e-06, + "num_tokens": 776001.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.134, + "step": 1134 + }, + { + "loss": 0.006, + "grad_norm": 1.0474095344543457, + "learning_rate": 8.68e-06, + "num_tokens": 776183.0, + "mean_token_accuracy": 1.0, + "epoch": 1.135, + "step": 1135 + }, + { + "loss": 0.0506, + "grad_norm": 1.064457654953003, + "learning_rate": 8.67e-06, + "num_tokens": 776786.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.1360000000000001, + "step": 1136 + }, + { + "loss": 0.0039, + "grad_norm": 0.6367634534835815, + "learning_rate": 8.66e-06, + "num_tokens": 776968.0, + "mean_token_accuracy": 1.0, + "epoch": 1.137, + "step": 1137 + }, + { + "loss": 0.052, + "grad_norm": 0.8969452381134033, + "learning_rate": 8.65e-06, + "num_tokens": 777992.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.138, + "step": 1138 + }, + { + "loss": 0.0432, + "grad_norm": 1.0857516527175903, + "learning_rate": 8.64e-06, + "num_tokens": 779016.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.139, + "step": 1139 + }, + { + "loss": 0.0607, + "grad_norm": 0.9557591676712036, + "learning_rate": 8.63e-06, + "num_tokens": 780040.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.1400000000000001, + "step": 1140 + }, + { + "loss": 0.0027, + "grad_norm": 0.344619482755661, + "learning_rate": 8.62e-06, + "num_tokens": 780222.0, + "mean_token_accuracy": 1.0, + "epoch": 1.141, + "step": 1141 + }, + { + "loss": 0.0469, + "grad_norm": 0.8497910499572754, + "learning_rate": 8.61e-06, + "num_tokens": 780825.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.142, + "step": 1142 + }, + { + "loss": 0.0025, + "grad_norm": 0.32798898220062256, + "learning_rate": 8.6e-06, + "num_tokens": 781007.0, + "mean_token_accuracy": 1.0, + "epoch": 1.143, + "step": 1143 + }, + { + "loss": 0.0501, + "grad_norm": 0.8057241439819336, + "learning_rate": 8.59e-06, + "num_tokens": 782031.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.144, + "step": 1144 + }, + { + "loss": 0.046, + "grad_norm": 0.953300952911377, + "learning_rate": 8.580000000000001e-06, + "num_tokens": 782634.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.145, + "step": 1145 + }, + { + "loss": 0.0027, + "grad_norm": 0.3377975523471832, + "learning_rate": 8.570000000000001e-06, + "num_tokens": 782816.0, + "mean_token_accuracy": 1.0, + "epoch": 1.146, + "step": 1146 + }, + { + "loss": 0.0609, + "grad_norm": 1.1738802194595337, + "learning_rate": 8.560000000000001e-06, + "num_tokens": 783419.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.147, + "step": 1147 + }, + { + "loss": 0.0338, + "grad_norm": 0.8058255314826965, + "learning_rate": 8.550000000000001e-06, + "num_tokens": 784022.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.148, + "step": 1148 + }, + { + "loss": 0.0393, + "grad_norm": 0.9772086143493652, + "learning_rate": 8.540000000000001e-06, + "num_tokens": 784625.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.149, + "step": 1149 + }, + { + "loss": 0.0682, + "grad_norm": 0.9261571168899536, + "learning_rate": 8.530000000000001e-06, + "num_tokens": 785649.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.15, + "step": 1150 + }, + { + "loss": 0.0632, + "grad_norm": 1.2219634056091309, + "learning_rate": 8.52e-06, + "num_tokens": 786252.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.151, + "step": 1151 + }, + { + "loss": 0.0316, + "grad_norm": 0.8042699098587036, + "learning_rate": 8.51e-06, + "num_tokens": 786855.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.152, + "step": 1152 + }, + { + "loss": 0.0365, + "grad_norm": 0.780549943447113, + "learning_rate": 8.5e-06, + "num_tokens": 787458.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.153, + "step": 1153 + }, + { + "loss": 0.0466, + "grad_norm": 0.8015241026878357, + "learning_rate": 8.49e-06, + "num_tokens": 788061.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.154, + "step": 1154 + }, + { + "loss": 0.0054, + "grad_norm": 0.963787317276001, + "learning_rate": 8.48e-06, + "num_tokens": 788243.0, + "mean_token_accuracy": 1.0, + "epoch": 1.155, + "step": 1155 + }, + { + "loss": 0.006, + "grad_norm": 1.0807055234909058, + "learning_rate": 8.47e-06, + "num_tokens": 788425.0, + "mean_token_accuracy": 1.0, + "epoch": 1.156, + "step": 1156 + }, + { + "loss": 0.0589, + "grad_norm": 1.0101304054260254, + "learning_rate": 8.46e-06, + "num_tokens": 789449.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.157, + "step": 1157 + }, + { + "loss": 0.0543, + "grad_norm": 0.8502178192138672, + "learning_rate": 8.45e-06, + "num_tokens": 790052.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.158, + "step": 1158 + }, + { + "loss": 0.0644, + "grad_norm": 1.153565526008606, + "learning_rate": 8.44e-06, + "num_tokens": 791076.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.159, + "step": 1159 + }, + { + "loss": 0.047, + "grad_norm": 1.0197230577468872, + "learning_rate": 8.43e-06, + "num_tokens": 791679.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.16, + "step": 1160 + }, + { + "loss": 0.0617, + "grad_norm": 0.944006621837616, + "learning_rate": 8.42e-06, + "num_tokens": 792703.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.161, + "step": 1161 + }, + { + "loss": 0.0569, + "grad_norm": 0.7898733019828796, + "learning_rate": 8.41e-06, + "num_tokens": 793727.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.162, + "step": 1162 + }, + { + "loss": 0.0546, + "grad_norm": 1.01863694190979, + "learning_rate": 8.400000000000001e-06, + "num_tokens": 794330.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.163, + "step": 1163 + }, + { + "loss": 0.0335, + "grad_norm": 0.905055820941925, + "learning_rate": 8.390000000000001e-06, + "num_tokens": 794933.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.164, + "step": 1164 + }, + { + "loss": 0.057, + "grad_norm": 1.0154438018798828, + "learning_rate": 8.380000000000001e-06, + "num_tokens": 795957.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.165, + "step": 1165 + }, + { + "loss": 0.0936, + "grad_norm": 1.4929184913635254, + "learning_rate": 8.370000000000001e-06, + "num_tokens": 796981.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 1.166, + "step": 1166 + }, + { + "loss": 0.0372, + "grad_norm": 0.8776635527610779, + "learning_rate": 8.36e-06, + "num_tokens": 797584.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.167, + "step": 1167 + }, + { + "loss": 0.0346, + "grad_norm": 0.842157244682312, + "learning_rate": 8.35e-06, + "num_tokens": 798187.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.168, + "step": 1168 + }, + { + "loss": 0.0547, + "grad_norm": 0.950747549533844, + "learning_rate": 8.34e-06, + "num_tokens": 798790.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.169, + "step": 1169 + }, + { + "loss": 0.0554, + "grad_norm": 0.9959940314292908, + "learning_rate": 8.33e-06, + "num_tokens": 799814.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.17, + "step": 1170 + }, + { + "loss": 0.0607, + "grad_norm": 1.4246129989624023, + "learning_rate": 8.32e-06, + "num_tokens": 800417.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.171, + "step": 1171 + }, + { + "loss": 0.0451, + "grad_norm": 0.8737262487411499, + "learning_rate": 8.31e-06, + "num_tokens": 801020.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.172, + "step": 1172 + }, + { + "loss": 0.0506, + "grad_norm": 0.747963547706604, + "learning_rate": 8.3e-06, + "num_tokens": 802044.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.173, + "step": 1173 + }, + { + "loss": 0.013, + "grad_norm": 1.951322078704834, + "learning_rate": 8.29e-06, + "num_tokens": 802226.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.174, + "step": 1174 + }, + { + "loss": 0.0392, + "grad_norm": 0.8089998960494995, + "learning_rate": 8.28e-06, + "num_tokens": 803250.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.175, + "step": 1175 + }, + { + "loss": 0.0379, + "grad_norm": 0.9302856922149658, + "learning_rate": 8.27e-06, + "num_tokens": 803853.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.176, + "step": 1176 + }, + { + "loss": 0.0588, + "grad_norm": 0.9273074865341187, + "learning_rate": 8.26e-06, + "num_tokens": 804877.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.177, + "step": 1177 + }, + { + "loss": 0.0452, + "grad_norm": 0.6838861107826233, + "learning_rate": 8.25e-06, + "num_tokens": 805901.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.178, + "step": 1178 + }, + { + "loss": 0.0132, + "grad_norm": 1.9745922088623047, + "learning_rate": 8.24e-06, + "num_tokens": 806083.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.179, + "step": 1179 + }, + { + "loss": 0.0107, + "grad_norm": 1.7368767261505127, + "learning_rate": 8.23e-06, + "num_tokens": 806265.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.18, + "step": 1180 + }, + { + "loss": 0.038, + "grad_norm": 0.9753760099411011, + "learning_rate": 8.220000000000001e-06, + "num_tokens": 806868.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.181, + "step": 1181 + }, + { + "loss": 0.0572, + "grad_norm": 0.8498497009277344, + "learning_rate": 8.210000000000001e-06, + "num_tokens": 807471.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.182, + "step": 1182 + }, + { + "loss": 0.0332, + "grad_norm": 0.7482154369354248, + "learning_rate": 8.2e-06, + "num_tokens": 808074.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.183, + "step": 1183 + }, + { + "loss": 0.0504, + "grad_norm": 1.1742054224014282, + "learning_rate": 8.19e-06, + "num_tokens": 809098.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.184, + "step": 1184 + }, + { + "loss": 0.0564, + "grad_norm": 1.028494954109192, + "learning_rate": 8.18e-06, + "num_tokens": 809701.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.185, + "step": 1185 + }, + { + "loss": 0.0565, + "grad_norm": 0.8841472268104553, + "learning_rate": 8.17e-06, + "num_tokens": 810725.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.186, + "step": 1186 + }, + { + "loss": 0.0425, + "grad_norm": 0.9280575513839722, + "learning_rate": 8.16e-06, + "num_tokens": 811328.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.187, + "step": 1187 + }, + { + "loss": 0.0391, + "grad_norm": 0.7514525651931763, + "learning_rate": 8.15e-06, + "num_tokens": 812352.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.188, + "step": 1188 + }, + { + "loss": 0.0062, + "grad_norm": 1.0712858438491821, + "learning_rate": 8.14e-06, + "num_tokens": 812534.0, + "mean_token_accuracy": 1.0, + "epoch": 1.189, + "step": 1189 + }, + { + "loss": 0.0575, + "grad_norm": 0.9916480779647827, + "learning_rate": 8.13e-06, + "num_tokens": 813558.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.19, + "step": 1190 + }, + { + "loss": 0.0456, + "grad_norm": 0.7496938705444336, + "learning_rate": 8.120000000000002e-06, + "num_tokens": 814582.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.191, + "step": 1191 + }, + { + "loss": 0.0545, + "grad_norm": 1.0540683269500732, + "learning_rate": 8.110000000000002e-06, + "num_tokens": 815185.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.192, + "step": 1192 + }, + { + "loss": 0.0409, + "grad_norm": 0.7678093314170837, + "learning_rate": 8.1e-06, + "num_tokens": 816209.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.193, + "step": 1193 + }, + { + "loss": 0.0573, + "grad_norm": 1.1160331964492798, + "learning_rate": 8.09e-06, + "num_tokens": 816812.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.194, + "step": 1194 + }, + { + "loss": 0.0063, + "grad_norm": 1.0925832986831665, + "learning_rate": 8.08e-06, + "num_tokens": 816994.0, + "mean_token_accuracy": 1.0, + "epoch": 1.195, + "step": 1195 + }, + { + "loss": 0.0598, + "grad_norm": 1.1617772579193115, + "learning_rate": 8.07e-06, + "num_tokens": 817597.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.196, + "step": 1196 + }, + { + "loss": 0.047, + "grad_norm": 0.9485524296760559, + "learning_rate": 8.06e-06, + "num_tokens": 818621.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.197, + "step": 1197 + }, + { + "loss": 0.0481, + "grad_norm": 0.8719391822814941, + "learning_rate": 8.050000000000001e-06, + "num_tokens": 819224.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.198, + "step": 1198 + }, + { + "loss": 0.0537, + "grad_norm": 1.0189318656921387, + "learning_rate": 8.040000000000001e-06, + "num_tokens": 819827.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.199, + "step": 1199 + }, + { + "loss": 0.0501, + "grad_norm": 1.06423819065094, + "learning_rate": 8.030000000000001e-06, + "num_tokens": 820430.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2, + "step": 1200 + }, + { + "loss": 0.0399, + "grad_norm": 1.01286780834198, + "learning_rate": 8.020000000000001e-06, + "num_tokens": 821033.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.201, + "step": 1201 + }, + { + "loss": 0.0595, + "grad_norm": 1.2328540086746216, + "learning_rate": 8.010000000000001e-06, + "num_tokens": 821636.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.202, + "step": 1202 + }, + { + "loss": 0.0499, + "grad_norm": 0.9263268709182739, + "learning_rate": 8.000000000000001e-06, + "num_tokens": 822239.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.203, + "step": 1203 + }, + { + "loss": 0.0063, + "grad_norm": 1.1311625242233276, + "learning_rate": 7.990000000000001e-06, + "num_tokens": 822421.0, + "mean_token_accuracy": 1.0, + "epoch": 1.204, + "step": 1204 + }, + { + "loss": 0.0566, + "grad_norm": 0.9658464193344116, + "learning_rate": 7.980000000000002e-06, + "num_tokens": 823445.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.205, + "step": 1205 + }, + { + "loss": 0.0518, + "grad_norm": 1.3028377294540405, + "learning_rate": 7.970000000000002e-06, + "num_tokens": 824048.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.206, + "step": 1206 + }, + { + "loss": 0.0064, + "grad_norm": 1.1466141939163208, + "learning_rate": 7.960000000000002e-06, + "num_tokens": 824230.0, + "mean_token_accuracy": 1.0, + "epoch": 1.207, + "step": 1207 + }, + { + "loss": 0.0612, + "grad_norm": 1.9032516479492188, + "learning_rate": 7.950000000000002e-06, + "num_tokens": 824833.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.208, + "step": 1208 + }, + { + "loss": 0.0466, + "grad_norm": 0.9508463740348816, + "learning_rate": 7.94e-06, + "num_tokens": 825436.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.209, + "step": 1209 + }, + { + "loss": 0.0446, + "grad_norm": 0.9122347831726074, + "learning_rate": 7.93e-06, + "num_tokens": 826039.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.21, + "step": 1210 + }, + { + "loss": 0.0302, + "grad_norm": 0.722285270690918, + "learning_rate": 7.92e-06, + "num_tokens": 826642.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.211, + "step": 1211 + }, + { + "loss": 0.0435, + "grad_norm": 0.8678917288780212, + "learning_rate": 7.91e-06, + "num_tokens": 827245.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.212, + "step": 1212 + }, + { + "loss": 0.0485, + "grad_norm": 1.0040737390518188, + "learning_rate": 7.9e-06, + "num_tokens": 827848.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.213, + "step": 1213 + }, + { + "loss": 0.0496, + "grad_norm": 0.9628919363021851, + "learning_rate": 7.89e-06, + "num_tokens": 828451.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.214, + "step": 1214 + }, + { + "loss": 0.0541, + "grad_norm": 1.1007357835769653, + "learning_rate": 7.88e-06, + "num_tokens": 829054.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.215, + "step": 1215 + }, + { + "loss": 0.0607, + "grad_norm": 1.0743118524551392, + "learning_rate": 7.870000000000001e-06, + "num_tokens": 830078.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.216, + "step": 1216 + }, + { + "loss": 0.0362, + "grad_norm": 0.8190649747848511, + "learning_rate": 7.860000000000001e-06, + "num_tokens": 830681.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.217, + "step": 1217 + }, + { + "loss": 0.0061, + "grad_norm": 1.0019081830978394, + "learning_rate": 7.850000000000001e-06, + "num_tokens": 830863.0, + "mean_token_accuracy": 1.0, + "epoch": 1.218, + "step": 1218 + }, + { + "loss": 0.0062, + "grad_norm": 1.036359429359436, + "learning_rate": 7.840000000000001e-06, + "num_tokens": 831045.0, + "mean_token_accuracy": 1.0, + "epoch": 1.219, + "step": 1219 + }, + { + "loss": 0.0595, + "grad_norm": 1.399138331413269, + "learning_rate": 7.830000000000001e-06, + "num_tokens": 831648.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.22, + "step": 1220 + }, + { + "loss": 0.0539, + "grad_norm": 0.9354347586631775, + "learning_rate": 7.820000000000001e-06, + "num_tokens": 832672.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.221, + "step": 1221 + }, + { + "loss": 0.0575, + "grad_norm": 1.4165191650390625, + "learning_rate": 7.810000000000001e-06, + "num_tokens": 833275.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.222, + "step": 1222 + }, + { + "loss": 0.0555, + "grad_norm": 1.097415804862976, + "learning_rate": 7.800000000000002e-06, + "num_tokens": 833878.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.223, + "step": 1223 + }, + { + "loss": 0.0422, + "grad_norm": 0.8333101272583008, + "learning_rate": 7.790000000000002e-06, + "num_tokens": 834902.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.224, + "step": 1224 + }, + { + "loss": 0.0393, + "grad_norm": 0.9399459958076477, + "learning_rate": 7.78e-06, + "num_tokens": 835505.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.225, + "step": 1225 + }, + { + "loss": 0.042, + "grad_norm": 0.7714658975601196, + "learning_rate": 7.77e-06, + "num_tokens": 836108.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.226, + "step": 1226 + }, + { + "loss": 0.0054, + "grad_norm": 0.939201831817627, + "learning_rate": 7.76e-06, + "num_tokens": 836290.0, + "mean_token_accuracy": 1.0, + "epoch": 1.227, + "step": 1227 + }, + { + "loss": 0.0522, + "grad_norm": 1.0808459520339966, + "learning_rate": 7.75e-06, + "num_tokens": 836893.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.228, + "step": 1228 + }, + { + "loss": 0.005, + "grad_norm": 0.9102663397789001, + "learning_rate": 7.74e-06, + "num_tokens": 837075.0, + "mean_token_accuracy": 1.0, + "epoch": 1.229, + "step": 1229 + }, + { + "loss": 0.0053, + "grad_norm": 0.9372754693031311, + "learning_rate": 7.73e-06, + "num_tokens": 837257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.23, + "step": 1230 + }, + { + "loss": 0.0502, + "grad_norm": 1.0474785566329956, + "learning_rate": 7.72e-06, + "num_tokens": 837860.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.231, + "step": 1231 + }, + { + "loss": 0.0519, + "grad_norm": 0.8802561163902283, + "learning_rate": 7.71e-06, + "num_tokens": 838463.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.232, + "step": 1232 + }, + { + "loss": 0.051, + "grad_norm": 1.0580495595932007, + "learning_rate": 7.7e-06, + "num_tokens": 839066.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.233, + "step": 1233 + }, + { + "loss": 0.0525, + "grad_norm": 1.1949350833892822, + "learning_rate": 7.690000000000001e-06, + "num_tokens": 839669.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.234, + "step": 1234 + }, + { + "loss": 0.0596, + "grad_norm": 0.7280122637748718, + "learning_rate": 7.680000000000001e-06, + "num_tokens": 840693.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.2349999999999999, + "step": 1235 + }, + { + "loss": 0.0483, + "grad_norm": 0.9881341457366943, + "learning_rate": 7.670000000000001e-06, + "num_tokens": 841296.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.236, + "step": 1236 + }, + { + "loss": 0.0351, + "grad_norm": 0.834136962890625, + "learning_rate": 7.660000000000001e-06, + "num_tokens": 841899.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.237, + "step": 1237 + }, + { + "loss": 0.0565, + "grad_norm": 1.0071011781692505, + "learning_rate": 7.650000000000001e-06, + "num_tokens": 842502.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.238, + "step": 1238 + }, + { + "loss": 0.0322, + "grad_norm": 1.0965189933776855, + "learning_rate": 7.640000000000001e-06, + "num_tokens": 843105.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.2389999999999999, + "step": 1239 + }, + { + "loss": 0.0318, + "grad_norm": 0.9356407523155212, + "learning_rate": 7.630000000000001e-06, + "num_tokens": 843708.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.24, + "step": 1240 + }, + { + "loss": 0.0553, + "grad_norm": 1.0970121622085571, + "learning_rate": 7.620000000000001e-06, + "num_tokens": 844732.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.241, + "step": 1241 + }, + { + "loss": 0.0544, + "grad_norm": 0.7283899188041687, + "learning_rate": 7.610000000000001e-06, + "num_tokens": 845756.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.242, + "step": 1242 + }, + { + "loss": 0.0511, + "grad_norm": 0.9140603542327881, + "learning_rate": 7.600000000000001e-06, + "num_tokens": 846359.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.2429999999999999, + "step": 1243 + }, + { + "loss": 0.0386, + "grad_norm": 0.8892003893852234, + "learning_rate": 7.590000000000001e-06, + "num_tokens": 846962.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.244, + "step": 1244 + }, + { + "loss": 0.0301, + "grad_norm": 0.6963894963264465, + "learning_rate": 7.58e-06, + "num_tokens": 847565.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.245, + "step": 1245 + }, + { + "loss": 0.0589, + "grad_norm": 0.8111267685890198, + "learning_rate": 7.57e-06, + "num_tokens": 848589.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.246, + "step": 1246 + }, + { + "loss": 0.0572, + "grad_norm": 1.1883255243301392, + "learning_rate": 7.5600000000000005e-06, + "num_tokens": 849192.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.2469999999999999, + "step": 1247 + }, + { + "loss": 0.0097, + "grad_norm": 1.6102426052093506, + "learning_rate": 7.5500000000000006e-06, + "num_tokens": 849374.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.248, + "step": 1248 + }, + { + "loss": 0.0468, + "grad_norm": 0.7692415118217468, + "learning_rate": 7.540000000000001e-06, + "num_tokens": 849977.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.249, + "step": 1249 + }, + { + "loss": 0.0584, + "grad_norm": 1.3470611572265625, + "learning_rate": 7.530000000000001e-06, + "num_tokens": 850580.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.25, + "step": 1250 + }, + { + "loss": 0.01, + "grad_norm": 1.5853478908538818, + "learning_rate": 7.520000000000001e-06, + "num_tokens": 850762.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.251, + "step": 1251 + }, + { + "loss": 0.0481, + "grad_norm": 0.8128389716148376, + "learning_rate": 7.510000000000001e-06, + "num_tokens": 851365.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.252, + "step": 1252 + }, + { + "loss": 0.0322, + "grad_norm": 0.7977066040039062, + "learning_rate": 7.500000000000001e-06, + "num_tokens": 851968.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2530000000000001, + "step": 1253 + }, + { + "loss": 0.0544, + "grad_norm": 0.9201311469078064, + "learning_rate": 7.49e-06, + "num_tokens": 852571.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.254, + "step": 1254 + }, + { + "loss": 0.0296, + "grad_norm": 0.6444401144981384, + "learning_rate": 7.48e-06, + "num_tokens": 853174.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.255, + "step": 1255 + }, + { + "loss": 0.0629, + "grad_norm": 0.9161770939826965, + "learning_rate": 7.4700000000000005e-06, + "num_tokens": 854198.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.256, + "step": 1256 + }, + { + "loss": 0.0067, + "grad_norm": 1.1707040071487427, + "learning_rate": 7.4600000000000006e-06, + "num_tokens": 854380.0, + "mean_token_accuracy": 1.0, + "epoch": 1.2570000000000001, + "step": 1257 + }, + { + "loss": 0.05, + "grad_norm": 1.0465596914291382, + "learning_rate": 7.450000000000001e-06, + "num_tokens": 854983.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.258, + "step": 1258 + }, + { + "loss": 0.0061, + "grad_norm": 1.0755349397659302, + "learning_rate": 7.440000000000001e-06, + "num_tokens": 855165.0, + "mean_token_accuracy": 1.0, + "epoch": 1.259, + "step": 1259 + }, + { + "loss": 0.0587, + "grad_norm": 1.1517828702926636, + "learning_rate": 7.430000000000001e-06, + "num_tokens": 855768.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.26, + "step": 1260 + }, + { + "loss": 0.0567, + "grad_norm": 0.894393265247345, + "learning_rate": 7.420000000000001e-06, + "num_tokens": 856792.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.2610000000000001, + "step": 1261 + }, + { + "loss": 0.004, + "grad_norm": 0.625373899936676, + "learning_rate": 7.41e-06, + "num_tokens": 856974.0, + "mean_token_accuracy": 1.0, + "epoch": 1.262, + "step": 1262 + }, + { + "loss": 0.0613, + "grad_norm": 1.038960337638855, + "learning_rate": 7.4e-06, + "num_tokens": 857998.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.263, + "step": 1263 + }, + { + "loss": 0.0648, + "grad_norm": 0.9525636434555054, + "learning_rate": 7.39e-06, + "num_tokens": 859022.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.264, + "step": 1264 + }, + { + "loss": 0.036, + "grad_norm": 0.9128121733665466, + "learning_rate": 7.3800000000000005e-06, + "num_tokens": 859625.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.2650000000000001, + "step": 1265 + }, + { + "loss": 0.0565, + "grad_norm": 1.1845719814300537, + "learning_rate": 7.370000000000001e-06, + "num_tokens": 860228.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.266, + "step": 1266 + }, + { + "loss": 0.0655, + "grad_norm": 1.0292823314666748, + "learning_rate": 7.360000000000001e-06, + "num_tokens": 861252.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.267, + "step": 1267 + }, + { + "loss": 0.0493, + "grad_norm": 1.01980721950531, + "learning_rate": 7.350000000000001e-06, + "num_tokens": 861855.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.268, + "step": 1268 + }, + { + "loss": 0.1008, + "grad_norm": 1.9880106449127197, + "learning_rate": 7.340000000000001e-06, + "num_tokens": 862879.0, + "mean_token_accuracy": 0.9598825573921204, + "epoch": 1.2690000000000001, + "step": 1269 + }, + { + "loss": 0.0461, + "grad_norm": 0.750867486000061, + "learning_rate": 7.33e-06, + "num_tokens": 863903.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.27, + "step": 1270 + }, + { + "loss": 0.0514, + "grad_norm": 0.8738319277763367, + "learning_rate": 7.32e-06, + "num_tokens": 864506.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.271, + "step": 1271 + }, + { + "loss": 0.1093, + "grad_norm": 2.573967933654785, + "learning_rate": 7.31e-06, + "num_tokens": 865109.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 1.272, + "step": 1272 + }, + { + "loss": 0.0375, + "grad_norm": 0.7688126564025879, + "learning_rate": 7.3e-06, + "num_tokens": 865712.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2730000000000001, + "step": 1273 + }, + { + "loss": 0.0059, + "grad_norm": 0.9865520000457764, + "learning_rate": 7.2900000000000005e-06, + "num_tokens": 865894.0, + "mean_token_accuracy": 1.0, + "epoch": 1.274, + "step": 1274 + }, + { + "loss": 0.0326, + "grad_norm": 0.6999955773353577, + "learning_rate": 7.280000000000001e-06, + "num_tokens": 866497.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.275, + "step": 1275 + }, + { + "loss": 0.0525, + "grad_norm": 0.8453314900398254, + "learning_rate": 7.270000000000001e-06, + "num_tokens": 867521.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.276, + "step": 1276 + }, + { + "loss": 0.0537, + "grad_norm": 0.8030353784561157, + "learning_rate": 7.260000000000001e-06, + "num_tokens": 868545.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.2770000000000001, + "step": 1277 + }, + { + "loss": 0.0539, + "grad_norm": 1.3158842325210571, + "learning_rate": 7.25e-06, + "num_tokens": 869148.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.278, + "step": 1278 + }, + { + "loss": 0.0345, + "grad_norm": 0.7475882172584534, + "learning_rate": 7.24e-06, + "num_tokens": 869751.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.279, + "step": 1279 + }, + { + "loss": 0.0326, + "grad_norm": 0.7297677397727966, + "learning_rate": 7.23e-06, + "num_tokens": 870354.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.28, + "step": 1280 + }, + { + "loss": 0.0372, + "grad_norm": 0.9404818415641785, + "learning_rate": 7.22e-06, + "num_tokens": 870957.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.2810000000000001, + "step": 1281 + }, + { + "loss": 0.0079, + "grad_norm": 1.2922416925430298, + "learning_rate": 7.2100000000000004e-06, + "num_tokens": 871139.0, + "mean_token_accuracy": 1.0, + "epoch": 1.282, + "step": 1282 + }, + { + "loss": 0.0544, + "grad_norm": 0.7138064503669739, + "learning_rate": 7.2000000000000005e-06, + "num_tokens": 872163.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.283, + "step": 1283 + }, + { + "loss": 0.0491, + "grad_norm": 0.9901664853096008, + "learning_rate": 7.190000000000001e-06, + "num_tokens": 872766.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.284, + "step": 1284 + }, + { + "loss": 0.0515, + "grad_norm": 0.8993235230445862, + "learning_rate": 7.180000000000001e-06, + "num_tokens": 873790.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.285, + "step": 1285 + }, + { + "loss": 0.0544, + "grad_norm": 1.186691164970398, + "learning_rate": 7.17e-06, + "num_tokens": 874393.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.286, + "step": 1286 + }, + { + "loss": 0.057, + "grad_norm": 0.7776333689689636, + "learning_rate": 7.16e-06, + "num_tokens": 875417.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.287, + "step": 1287 + }, + { + "loss": 0.06, + "grad_norm": 0.8132596015930176, + "learning_rate": 7.15e-06, + "num_tokens": 876441.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.288, + "step": 1288 + }, + { + "loss": 0.0471, + "grad_norm": 0.9748024940490723, + "learning_rate": 7.14e-06, + "num_tokens": 877044.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.289, + "step": 1289 + }, + { + "loss": 0.0507, + "grad_norm": 0.8249137997627258, + "learning_rate": 7.13e-06, + "num_tokens": 877647.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.29, + "step": 1290 + }, + { + "loss": 0.0604, + "grad_norm": 0.9042787551879883, + "learning_rate": 7.1200000000000004e-06, + "num_tokens": 878671.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.291, + "step": 1291 + }, + { + "loss": 0.0709, + "grad_norm": 1.0456619262695312, + "learning_rate": 7.1100000000000005e-06, + "num_tokens": 879695.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.292, + "step": 1292 + }, + { + "loss": 0.0509, + "grad_norm": 1.0809437036514282, + "learning_rate": 7.100000000000001e-06, + "num_tokens": 880298.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.293, + "step": 1293 + }, + { + "loss": 0.0466, + "grad_norm": 0.8374451398849487, + "learning_rate": 7.09e-06, + "num_tokens": 880901.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.294, + "step": 1294 + }, + { + "loss": 0.0396, + "grad_norm": 0.6764081716537476, + "learning_rate": 7.08e-06, + "num_tokens": 881925.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 1.295, + "step": 1295 + }, + { + "loss": 0.047, + "grad_norm": 0.7990655899047852, + "learning_rate": 7.07e-06, + "num_tokens": 882528.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.296, + "step": 1296 + }, + { + "loss": 0.0458, + "grad_norm": 0.8706727027893066, + "learning_rate": 7.06e-06, + "num_tokens": 883131.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.297, + "step": 1297 + }, + { + "loss": 0.0598, + "grad_norm": 1.1233471632003784, + "learning_rate": 7.05e-06, + "num_tokens": 883734.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.298, + "step": 1298 + }, + { + "loss": 0.0504, + "grad_norm": 0.7818260192871094, + "learning_rate": 7.04e-06, + "num_tokens": 884758.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.299, + "step": 1299 + }, + { + "loss": 0.0468, + "grad_norm": 1.0131233930587769, + "learning_rate": 7.0300000000000005e-06, + "num_tokens": 885361.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.3, + "step": 1300 + }, + { + "loss": 0.0124, + "grad_norm": 1.7857097387313843, + "learning_rate": 7.0200000000000006e-06, + "num_tokens": 885543.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.301, + "step": 1301 + }, + { + "loss": 0.0632, + "grad_norm": 0.9438235759735107, + "learning_rate": 7.01e-06, + "num_tokens": 886567.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.302, + "step": 1302 + }, + { + "loss": 0.011, + "grad_norm": 1.6502615213394165, + "learning_rate": 7e-06, + "num_tokens": 886749.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.303, + "step": 1303 + }, + { + "loss": 0.0465, + "grad_norm": 0.70659339427948, + "learning_rate": 6.99e-06, + "num_tokens": 887352.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.304, + "step": 1304 + }, + { + "loss": 0.0471, + "grad_norm": 0.7495580911636353, + "learning_rate": 6.98e-06, + "num_tokens": 887955.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.305, + "step": 1305 + }, + { + "loss": 0.0561, + "grad_norm": 0.8991160988807678, + "learning_rate": 6.97e-06, + "num_tokens": 888558.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.306, + "step": 1306 + }, + { + "loss": 0.0516, + "grad_norm": 1.163590669631958, + "learning_rate": 6.96e-06, + "num_tokens": 889161.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.307, + "step": 1307 + }, + { + "loss": 0.0524, + "grad_norm": 1.1685197353363037, + "learning_rate": 6.95e-06, + "num_tokens": 890185.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.308, + "step": 1308 + }, + { + "loss": 0.0619, + "grad_norm": 0.846095621585846, + "learning_rate": 6.9400000000000005e-06, + "num_tokens": 891209.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.309, + "step": 1309 + }, + { + "loss": 0.0447, + "grad_norm": 0.8409944176673889, + "learning_rate": 6.93e-06, + "num_tokens": 891812.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.31, + "step": 1310 + }, + { + "loss": 0.0538, + "grad_norm": 1.0099889039993286, + "learning_rate": 6.92e-06, + "num_tokens": 892836.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.311, + "step": 1311 + }, + { + "loss": 0.0549, + "grad_norm": 0.7870184779167175, + "learning_rate": 6.91e-06, + "num_tokens": 893860.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.312, + "step": 1312 + }, + { + "loss": 0.0507, + "grad_norm": 0.7824894785881042, + "learning_rate": 6.9e-06, + "num_tokens": 894884.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.313, + "step": 1313 + }, + { + "loss": 0.0293, + "grad_norm": 0.7371014356613159, + "learning_rate": 6.89e-06, + "num_tokens": 895487.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.314, + "step": 1314 + }, + { + "loss": 0.0673, + "grad_norm": 1.400519609451294, + "learning_rate": 6.88e-06, + "num_tokens": 896511.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.315, + "step": 1315 + }, + { + "loss": 0.0508, + "grad_norm": 0.8923640251159668, + "learning_rate": 6.870000000000001e-06, + "num_tokens": 897114.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.316, + "step": 1316 + }, + { + "loss": 0.1169, + "grad_norm": 1.8647280931472778, + "learning_rate": 6.860000000000001e-06, + "num_tokens": 898138.0, + "mean_token_accuracy": 0.9589040875434875, + "epoch": 1.317, + "step": 1317 + }, + { + "loss": 0.0091, + "grad_norm": 1.4598783254623413, + "learning_rate": 6.850000000000001e-06, + "num_tokens": 898320.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.318, + "step": 1318 + }, + { + "loss": 0.0641, + "grad_norm": 1.6538336277008057, + "learning_rate": 6.8400000000000014e-06, + "num_tokens": 898923.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.319, + "step": 1319 + }, + { + "loss": 0.039, + "grad_norm": 0.976009726524353, + "learning_rate": 6.830000000000001e-06, + "num_tokens": 899526.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.32, + "step": 1320 + }, + { + "loss": 0.0323, + "grad_norm": 0.9658445715904236, + "learning_rate": 6.820000000000001e-06, + "num_tokens": 900129.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.321, + "step": 1321 + }, + { + "loss": 0.0093, + "grad_norm": 1.460464596748352, + "learning_rate": 6.810000000000001e-06, + "num_tokens": 900311.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.322, + "step": 1322 + }, + { + "loss": 0.0592, + "grad_norm": 0.9687524437904358, + "learning_rate": 6.800000000000001e-06, + "num_tokens": 901335.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.323, + "step": 1323 + }, + { + "loss": 0.0552, + "grad_norm": 0.7118176817893982, + "learning_rate": 6.790000000000001e-06, + "num_tokens": 902359.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.324, + "step": 1324 + }, + { + "loss": 0.0549, + "grad_norm": 0.6859893202781677, + "learning_rate": 6.780000000000001e-06, + "num_tokens": 903383.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.325, + "step": 1325 + }, + { + "loss": 0.0073, + "grad_norm": 1.21769380569458, + "learning_rate": 6.770000000000001e-06, + "num_tokens": 903565.0, + "mean_token_accuracy": 1.0, + "epoch": 1.326, + "step": 1326 + }, + { + "loss": 0.0595, + "grad_norm": 0.9237185716629028, + "learning_rate": 6.760000000000001e-06, + "num_tokens": 904589.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.327, + "step": 1327 + }, + { + "loss": 0.055, + "grad_norm": 0.8631585240364075, + "learning_rate": 6.750000000000001e-06, + "num_tokens": 905613.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.328, + "step": 1328 + }, + { + "loss": 0.0595, + "grad_norm": 1.1469013690948486, + "learning_rate": 6.740000000000001e-06, + "num_tokens": 906216.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.329, + "step": 1329 + }, + { + "loss": 0.0066, + "grad_norm": 1.1101781129837036, + "learning_rate": 6.730000000000001e-06, + "num_tokens": 906398.0, + "mean_token_accuracy": 1.0, + "epoch": 1.33, + "step": 1330 + }, + { + "loss": 0.0598, + "grad_norm": 0.9575704336166382, + "learning_rate": 6.720000000000001e-06, + "num_tokens": 907422.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.331, + "step": 1331 + }, + { + "loss": 0.0584, + "grad_norm": 1.1068741083145142, + "learning_rate": 6.710000000000001e-06, + "num_tokens": 908025.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.332, + "step": 1332 + }, + { + "loss": 0.0558, + "grad_norm": 0.8627570271492004, + "learning_rate": 6.700000000000001e-06, + "num_tokens": 909049.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.333, + "step": 1333 + }, + { + "loss": 0.0055, + "grad_norm": 0.9423507452011108, + "learning_rate": 6.690000000000001e-06, + "num_tokens": 909231.0, + "mean_token_accuracy": 1.0, + "epoch": 1.334, + "step": 1334 + }, + { + "loss": 0.0363, + "grad_norm": 0.8017407655715942, + "learning_rate": 6.680000000000001e-06, + "num_tokens": 909834.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.335, + "step": 1335 + }, + { + "loss": 0.066, + "grad_norm": 1.1265746355056763, + "learning_rate": 6.6700000000000005e-06, + "num_tokens": 910858.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.336, + "step": 1336 + }, + { + "loss": 0.0996, + "grad_norm": 2.5847702026367188, + "learning_rate": 6.660000000000001e-06, + "num_tokens": 911461.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 1.337, + "step": 1337 + }, + { + "loss": 0.0559, + "grad_norm": 0.8754604458808899, + "learning_rate": 6.650000000000001e-06, + "num_tokens": 912064.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.338, + "step": 1338 + }, + { + "loss": 0.0636, + "grad_norm": 0.9931411743164062, + "learning_rate": 6.640000000000001e-06, + "num_tokens": 913088.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.339, + "step": 1339 + }, + { + "loss": 0.0555, + "grad_norm": 1.157425880432129, + "learning_rate": 6.630000000000001e-06, + "num_tokens": 913691.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.34, + "step": 1340 + }, + { + "loss": 0.0495, + "grad_norm": 0.7949211001396179, + "learning_rate": 6.620000000000001e-06, + "num_tokens": 914294.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.341, + "step": 1341 + }, + { + "loss": 0.0557, + "grad_norm": 0.7969265580177307, + "learning_rate": 6.610000000000001e-06, + "num_tokens": 915318.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.342, + "step": 1342 + }, + { + "loss": 0.0453, + "grad_norm": 0.9040102958679199, + "learning_rate": 6.600000000000001e-06, + "num_tokens": 915921.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.343, + "step": 1343 + }, + { + "loss": 0.0884, + "grad_norm": 1.350819706916809, + "learning_rate": 6.5900000000000004e-06, + "num_tokens": 916945.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.3439999999999999, + "step": 1344 + }, + { + "loss": 0.0554, + "grad_norm": 1.2525602579116821, + "learning_rate": 6.5800000000000005e-06, + "num_tokens": 917548.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.345, + "step": 1345 + }, + { + "loss": 0.0058, + "grad_norm": 0.9376251697540283, + "learning_rate": 6.570000000000001e-06, + "num_tokens": 917730.0, + "mean_token_accuracy": 1.0, + "epoch": 1.346, + "step": 1346 + }, + { + "loss": 0.0063, + "grad_norm": 1.013806700706482, + "learning_rate": 6.560000000000001e-06, + "num_tokens": 917912.0, + "mean_token_accuracy": 1.0, + "epoch": 1.347, + "step": 1347 + }, + { + "loss": 0.0503, + "grad_norm": 1.1062885522842407, + "learning_rate": 6.550000000000001e-06, + "num_tokens": 918515.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.3479999999999999, + "step": 1348 + }, + { + "loss": 0.0578, + "grad_norm": 0.8600636720657349, + "learning_rate": 6.540000000000001e-06, + "num_tokens": 919539.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.349, + "step": 1349 + }, + { + "loss": 0.0387, + "grad_norm": 0.9621451497077942, + "learning_rate": 6.530000000000001e-06, + "num_tokens": 920142.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.35, + "step": 1350 + }, + { + "loss": 0.0349, + "grad_norm": 0.8627477288246155, + "learning_rate": 6.520000000000001e-06, + "num_tokens": 920745.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.351, + "step": 1351 + }, + { + "loss": 0.0854, + "grad_norm": 1.6566712856292725, + "learning_rate": 6.51e-06, + "num_tokens": 921769.0, + "mean_token_accuracy": 0.9628180265426636, + "epoch": 1.3519999999999999, + "step": 1352 + }, + { + "loss": 0.0449, + "grad_norm": 0.7205953598022461, + "learning_rate": 6.5000000000000004e-06, + "num_tokens": 922793.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.353, + "step": 1353 + }, + { + "loss": 0.0332, + "grad_norm": 0.8109530806541443, + "learning_rate": 6.4900000000000005e-06, + "num_tokens": 923396.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.354, + "step": 1354 + }, + { + "loss": 0.0056, + "grad_norm": 0.9386361837387085, + "learning_rate": 6.480000000000001e-06, + "num_tokens": 923578.0, + "mean_token_accuracy": 1.0, + "epoch": 1.355, + "step": 1355 + }, + { + "loss": 0.0366, + "grad_norm": 0.8277124762535095, + "learning_rate": 6.470000000000001e-06, + "num_tokens": 924181.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.3559999999999999, + "step": 1356 + }, + { + "loss": 0.0456, + "grad_norm": 1.019851803779602, + "learning_rate": 6.460000000000001e-06, + "num_tokens": 924784.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.357, + "step": 1357 + }, + { + "loss": 0.0054, + "grad_norm": 0.8904734253883362, + "learning_rate": 6.450000000000001e-06, + "num_tokens": 924966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.358, + "step": 1358 + }, + { + "loss": 0.0544, + "grad_norm": 0.9087153673171997, + "learning_rate": 6.440000000000001e-06, + "num_tokens": 925569.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.359, + "step": 1359 + }, + { + "loss": 0.0392, + "grad_norm": 0.8872094750404358, + "learning_rate": 6.43e-06, + "num_tokens": 926172.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.3599999999999999, + "step": 1360 + }, + { + "loss": 0.0504, + "grad_norm": 0.6818045377731323, + "learning_rate": 6.42e-06, + "num_tokens": 927196.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.361, + "step": 1361 + }, + { + "loss": 0.0492, + "grad_norm": 1.2012197971343994, + "learning_rate": 6.4100000000000005e-06, + "num_tokens": 927799.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.362, + "step": 1362 + }, + { + "loss": 0.052, + "grad_norm": 0.7941383719444275, + "learning_rate": 6.4000000000000006e-06, + "num_tokens": 928823.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.363, + "step": 1363 + }, + { + "loss": 0.0337, + "grad_norm": 0.8198418617248535, + "learning_rate": 6.390000000000001e-06, + "num_tokens": 929426.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.3639999999999999, + "step": 1364 + }, + { + "loss": 0.0499, + "grad_norm": 0.9409139156341553, + "learning_rate": 6.380000000000001e-06, + "num_tokens": 930029.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.365, + "step": 1365 + }, + { + "loss": 0.0056, + "grad_norm": 0.9511061906814575, + "learning_rate": 6.370000000000001e-06, + "num_tokens": 930211.0, + "mean_token_accuracy": 1.0, + "epoch": 1.366, + "step": 1366 + }, + { + "loss": 0.046, + "grad_norm": 1.0836243629455566, + "learning_rate": 6.360000000000001e-06, + "num_tokens": 930814.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.367, + "step": 1367 + }, + { + "loss": 0.0457, + "grad_norm": 0.8588566184043884, + "learning_rate": 6.35e-06, + "num_tokens": 931838.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.3679999999999999, + "step": 1368 + }, + { + "loss": 0.034, + "grad_norm": 0.7359830141067505, + "learning_rate": 6.34e-06, + "num_tokens": 932441.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.369, + "step": 1369 + }, + { + "loss": 0.0541, + "grad_norm": 1.353061318397522, + "learning_rate": 6.33e-06, + "num_tokens": 933044.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.37, + "step": 1370 + }, + { + "loss": 0.0498, + "grad_norm": 1.1353765726089478, + "learning_rate": 6.3200000000000005e-06, + "num_tokens": 933647.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.371, + "step": 1371 + }, + { + "loss": 0.0054, + "grad_norm": 0.9213358759880066, + "learning_rate": 6.3100000000000006e-06, + "num_tokens": 933829.0, + "mean_token_accuracy": 1.0, + "epoch": 1.3719999999999999, + "step": 1372 + }, + { + "loss": 0.0595, + "grad_norm": 1.0413357019424438, + "learning_rate": 6.300000000000001e-06, + "num_tokens": 934853.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.373, + "step": 1373 + }, + { + "loss": 0.0501, + "grad_norm": 0.8945645689964294, + "learning_rate": 6.290000000000001e-06, + "num_tokens": 935456.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.374, + "step": 1374 + }, + { + "loss": 0.0982, + "grad_norm": 1.3816639184951782, + "learning_rate": 6.280000000000001e-06, + "num_tokens": 936480.0, + "mean_token_accuracy": 0.9637964963912964, + "epoch": 1.375, + "step": 1375 + }, + { + "loss": 0.0592, + "grad_norm": 0.8560639023780823, + "learning_rate": 6.27e-06, + "num_tokens": 937504.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.376, + "step": 1376 + }, + { + "loss": 0.0542, + "grad_norm": 1.0596678256988525, + "learning_rate": 6.26e-06, + "num_tokens": 938107.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.377, + "step": 1377 + }, + { + "loss": 0.0616, + "grad_norm": 1.3990719318389893, + "learning_rate": 6.25e-06, + "num_tokens": 939131.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.3780000000000001, + "step": 1378 + }, + { + "loss": 0.0487, + "grad_norm": 0.9481455087661743, + "learning_rate": 6.24e-06, + "num_tokens": 939734.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.379, + "step": 1379 + }, + { + "loss": 0.0586, + "grad_norm": 0.9030970335006714, + "learning_rate": 6.2300000000000005e-06, + "num_tokens": 940758.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.38, + "step": 1380 + }, + { + "loss": 0.0461, + "grad_norm": 0.8725113272666931, + "learning_rate": 6.220000000000001e-06, + "num_tokens": 941361.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.381, + "step": 1381 + }, + { + "loss": 0.0328, + "grad_norm": 0.7602605819702148, + "learning_rate": 6.210000000000001e-06, + "num_tokens": 941964.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.3820000000000001, + "step": 1382 + }, + { + "loss": 0.0389, + "grad_norm": 0.8838405013084412, + "learning_rate": 6.200000000000001e-06, + "num_tokens": 942988.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.383, + "step": 1383 + }, + { + "loss": 0.0453, + "grad_norm": 0.9330336451530457, + "learning_rate": 6.190000000000001e-06, + "num_tokens": 943591.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.384, + "step": 1384 + }, + { + "loss": 0.0556, + "grad_norm": 0.8908242583274841, + "learning_rate": 6.18e-06, + "num_tokens": 944615.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.385, + "step": 1385 + }, + { + "loss": 0.0366, + "grad_norm": 0.6753963232040405, + "learning_rate": 6.17e-06, + "num_tokens": 945639.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.3860000000000001, + "step": 1386 + }, + { + "loss": 0.0567, + "grad_norm": 1.027570128440857, + "learning_rate": 6.16e-06, + "num_tokens": 946663.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.387, + "step": 1387 + }, + { + "loss": 0.031, + "grad_norm": 0.7927929162979126, + "learning_rate": 6.15e-06, + "num_tokens": 947266.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.388, + "step": 1388 + }, + { + "loss": 0.0588, + "grad_norm": 1.1400188207626343, + "learning_rate": 6.1400000000000005e-06, + "num_tokens": 947869.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.389, + "step": 1389 + }, + { + "loss": 0.054, + "grad_norm": 0.7212454676628113, + "learning_rate": 6.130000000000001e-06, + "num_tokens": 948893.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.3900000000000001, + "step": 1390 + }, + { + "loss": 0.0125, + "grad_norm": 1.9306414127349854, + "learning_rate": 6.120000000000001e-06, + "num_tokens": 949075.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.391, + "step": 1391 + }, + { + "loss": 0.0132, + "grad_norm": 1.9667447805404663, + "learning_rate": 6.110000000000001e-06, + "num_tokens": 949257.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.392, + "step": 1392 + }, + { + "loss": 0.0455, + "grad_norm": 0.7732621431350708, + "learning_rate": 6.1e-06, + "num_tokens": 950281.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.393, + "step": 1393 + }, + { + "loss": 0.0593, + "grad_norm": 1.3347744941711426, + "learning_rate": 6.09e-06, + "num_tokens": 950884.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.3940000000000001, + "step": 1394 + }, + { + "loss": 0.0312, + "grad_norm": 0.7966394424438477, + "learning_rate": 6.08e-06, + "num_tokens": 951487.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.395, + "step": 1395 + }, + { + "loss": 0.0525, + "grad_norm": 0.9916096329689026, + "learning_rate": 6.07e-06, + "num_tokens": 952090.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.396, + "step": 1396 + }, + { + "loss": 0.0348, + "grad_norm": 0.8064159154891968, + "learning_rate": 6.0600000000000004e-06, + "num_tokens": 952693.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.397, + "step": 1397 + }, + { + "loss": 0.0476, + "grad_norm": 0.7438748478889465, + "learning_rate": 6.0500000000000005e-06, + "num_tokens": 953296.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.3980000000000001, + "step": 1398 + }, + { + "loss": 0.0481, + "grad_norm": 0.7596222162246704, + "learning_rate": 6.040000000000001e-06, + "num_tokens": 954320.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.399, + "step": 1399 + }, + { + "loss": 0.0398, + "grad_norm": 0.770300567150116, + "learning_rate": 6.030000000000001e-06, + "num_tokens": 955344.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.4, + "step": 1400 + }, + { + "loss": 0.0481, + "grad_norm": 0.8269065022468567, + "learning_rate": 6.02e-06, + "num_tokens": 955947.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.401, + "step": 1401 + }, + { + "loss": 0.049, + "grad_norm": 0.8216456770896912, + "learning_rate": 6.01e-06, + "num_tokens": 956550.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.4020000000000001, + "step": 1402 + }, + { + "loss": 0.0657, + "grad_norm": 1.7622767686843872, + "learning_rate": 6e-06, + "num_tokens": 957153.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.403, + "step": 1403 + }, + { + "loss": 0.0535, + "grad_norm": 0.9183257222175598, + "learning_rate": 5.99e-06, + "num_tokens": 957756.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.404, + "step": 1404 + }, + { + "loss": 0.0386, + "grad_norm": 0.7511618137359619, + "learning_rate": 5.98e-06, + "num_tokens": 958780.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.405, + "step": 1405 + }, + { + "loss": 0.0634, + "grad_norm": 0.8935681581497192, + "learning_rate": 5.9700000000000004e-06, + "num_tokens": 959804.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.4060000000000001, + "step": 1406 + }, + { + "loss": 0.0589, + "grad_norm": 1.1542671918869019, + "learning_rate": 5.9600000000000005e-06, + "num_tokens": 960407.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.407, + "step": 1407 + }, + { + "loss": 0.0553, + "grad_norm": 0.9951035380363464, + "learning_rate": 5.950000000000001e-06, + "num_tokens": 961010.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.408, + "step": 1408 + }, + { + "loss": 0.0674, + "grad_norm": 1.0712668895721436, + "learning_rate": 5.94e-06, + "num_tokens": 962034.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.409, + "step": 1409 + }, + { + "loss": 0.0098, + "grad_norm": 1.5661463737487793, + "learning_rate": 5.93e-06, + "num_tokens": 962216.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.41, + "step": 1410 + }, + { + "loss": 0.0478, + "grad_norm": 0.8384937644004822, + "learning_rate": 5.92e-06, + "num_tokens": 963240.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.411, + "step": 1411 + }, + { + "loss": 0.0528, + "grad_norm": 1.0182603597640991, + "learning_rate": 5.91e-06, + "num_tokens": 964264.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.412, + "step": 1412 + }, + { + "loss": 0.0097, + "grad_norm": 1.5686061382293701, + "learning_rate": 5.9e-06, + "num_tokens": 964446.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.413, + "step": 1413 + }, + { + "loss": 0.0346, + "grad_norm": 0.8263946771621704, + "learning_rate": 5.89e-06, + "num_tokens": 965049.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.414, + "step": 1414 + }, + { + "loss": 0.0476, + "grad_norm": 0.9938256144523621, + "learning_rate": 5.8800000000000005e-06, + "num_tokens": 966073.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.415, + "step": 1415 + }, + { + "loss": 0.0451, + "grad_norm": 0.6707625985145569, + "learning_rate": 5.8700000000000005e-06, + "num_tokens": 967097.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.416, + "step": 1416 + }, + { + "loss": 0.0379, + "grad_norm": 0.843828558921814, + "learning_rate": 5.86e-06, + "num_tokens": 967700.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.417, + "step": 1417 + }, + { + "loss": 0.0428, + "grad_norm": 0.6218018531799316, + "learning_rate": 5.85e-06, + "num_tokens": 968724.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.418, + "step": 1418 + }, + { + "loss": 0.0085, + "grad_norm": 1.4659920930862427, + "learning_rate": 5.84e-06, + "num_tokens": 968906.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.419, + "step": 1419 + }, + { + "loss": 0.0448, + "grad_norm": 0.6442410945892334, + "learning_rate": 5.83e-06, + "num_tokens": 969930.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.42, + "step": 1420 + }, + { + "loss": 0.0319, + "grad_norm": 0.7817755937576294, + "learning_rate": 5.82e-06, + "num_tokens": 970533.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.421, + "step": 1421 + }, + { + "loss": 0.0509, + "grad_norm": 0.7503489851951599, + "learning_rate": 5.81e-06, + "num_tokens": 971557.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.422, + "step": 1422 + }, + { + "loss": 0.0551, + "grad_norm": 0.8380895256996155, + "learning_rate": 5.8e-06, + "num_tokens": 972581.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.423, + "step": 1423 + }, + { + "loss": 0.0639, + "grad_norm": 0.8143321871757507, + "learning_rate": 5.7900000000000005e-06, + "num_tokens": 973605.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.424, + "step": 1424 + }, + { + "loss": 0.0474, + "grad_norm": 0.8417466282844543, + "learning_rate": 5.78e-06, + "num_tokens": 974208.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.425, + "step": 1425 + }, + { + "loss": 0.0559, + "grad_norm": 0.8972397446632385, + "learning_rate": 5.77e-06, + "num_tokens": 974811.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.426, + "step": 1426 + }, + { + "loss": 0.0377, + "grad_norm": 0.7338786125183105, + "learning_rate": 5.76e-06, + "num_tokens": 975835.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.427, + "step": 1427 + }, + { + "loss": 0.0554, + "grad_norm": 0.9697425961494446, + "learning_rate": 5.75e-06, + "num_tokens": 976859.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.428, + "step": 1428 + }, + { + "loss": 0.0593, + "grad_norm": 1.1090219020843506, + "learning_rate": 5.74e-06, + "num_tokens": 977883.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.429, + "step": 1429 + }, + { + "loss": 0.0562, + "grad_norm": 0.8675426840782166, + "learning_rate": 5.73e-06, + "num_tokens": 978907.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.43, + "step": 1430 + }, + { + "loss": 0.0511, + "grad_norm": 0.9766101837158203, + "learning_rate": 5.72e-06, + "num_tokens": 979510.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.431, + "step": 1431 + }, + { + "loss": 0.0376, + "grad_norm": 0.8910675048828125, + "learning_rate": 5.71e-06, + "num_tokens": 980113.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.432, + "step": 1432 + }, + { + "loss": 0.0481, + "grad_norm": 0.7779074907302856, + "learning_rate": 5.7e-06, + "num_tokens": 980716.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.433, + "step": 1433 + }, + { + "loss": 0.01, + "grad_norm": 1.5922235250473022, + "learning_rate": 5.69e-06, + "num_tokens": 980898.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.434, + "step": 1434 + }, + { + "loss": 0.052, + "grad_norm": 1.0975040197372437, + "learning_rate": 5.68e-06, + "num_tokens": 981501.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.435, + "step": 1435 + }, + { + "loss": 0.0314, + "grad_norm": 0.6844534873962402, + "learning_rate": 5.67e-06, + "num_tokens": 982104.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.436, + "step": 1436 + }, + { + "loss": 0.0105, + "grad_norm": 1.6451897621154785, + "learning_rate": 5.66e-06, + "num_tokens": 982286.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.437, + "step": 1437 + }, + { + "loss": 0.0989, + "grad_norm": 1.1932672262191772, + "learning_rate": 5.65e-06, + "num_tokens": 983310.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.438, + "step": 1438 + }, + { + "loss": 0.0553, + "grad_norm": 0.8934344053268433, + "learning_rate": 5.64e-06, + "num_tokens": 984334.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.439, + "step": 1439 + }, + { + "loss": 0.0469, + "grad_norm": 0.9624803066253662, + "learning_rate": 5.63e-06, + "num_tokens": 984937.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.44, + "step": 1440 + }, + { + "loss": 0.0519, + "grad_norm": 0.8022207617759705, + "learning_rate": 5.620000000000001e-06, + "num_tokens": 985961.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.441, + "step": 1441 + }, + { + "loss": 0.0474, + "grad_norm": 0.9001027941703796, + "learning_rate": 5.610000000000001e-06, + "num_tokens": 986564.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.442, + "step": 1442 + }, + { + "loss": 0.0071, + "grad_norm": 1.2037103176116943, + "learning_rate": 5.600000000000001e-06, + "num_tokens": 986746.0, + "mean_token_accuracy": 1.0, + "epoch": 1.443, + "step": 1443 + }, + { + "loss": 0.0487, + "grad_norm": 0.9536978006362915, + "learning_rate": 5.590000000000001e-06, + "num_tokens": 987349.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.444, + "step": 1444 + }, + { + "loss": 0.0469, + "grad_norm": 0.7186264395713806, + "learning_rate": 5.580000000000001e-06, + "num_tokens": 988373.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.445, + "step": 1445 + }, + { + "loss": 0.1263, + "grad_norm": 2.343201160430908, + "learning_rate": 5.570000000000001e-06, + "num_tokens": 988976.0, + "mean_token_accuracy": 0.9633943438529968, + "epoch": 1.446, + "step": 1446 + }, + { + "loss": 0.0488, + "grad_norm": 0.8710882067680359, + "learning_rate": 5.560000000000001e-06, + "num_tokens": 990000.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.447, + "step": 1447 + }, + { + "loss": 0.0644, + "grad_norm": 1.3034676313400269, + "learning_rate": 5.550000000000001e-06, + "num_tokens": 991024.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.448, + "step": 1448 + }, + { + "loss": 0.0343, + "grad_norm": 0.8432696461677551, + "learning_rate": 5.540000000000001e-06, + "num_tokens": 991627.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.449, + "step": 1449 + }, + { + "loss": 0.006, + "grad_norm": 0.9940508008003235, + "learning_rate": 5.530000000000001e-06, + "num_tokens": 991809.0, + "mean_token_accuracy": 1.0, + "epoch": 1.45, + "step": 1450 + }, + { + "loss": 0.0501, + "grad_norm": 0.7937811613082886, + "learning_rate": 5.5200000000000005e-06, + "num_tokens": 992833.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.451, + "step": 1451 + }, + { + "loss": 0.057, + "grad_norm": 1.3005925416946411, + "learning_rate": 5.510000000000001e-06, + "num_tokens": 993436.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.452, + "step": 1452 + }, + { + "loss": 0.0056, + "grad_norm": 0.953944742679596, + "learning_rate": 5.500000000000001e-06, + "num_tokens": 993618.0, + "mean_token_accuracy": 1.0, + "epoch": 1.453, + "step": 1453 + }, + { + "loss": 0.0339, + "grad_norm": 0.7726427912712097, + "learning_rate": 5.490000000000001e-06, + "num_tokens": 994221.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.454, + "step": 1454 + }, + { + "loss": 0.0435, + "grad_norm": 0.8961969017982483, + "learning_rate": 5.480000000000001e-06, + "num_tokens": 995245.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.455, + "step": 1455 + }, + { + "loss": 0.0577, + "grad_norm": 0.8478931188583374, + "learning_rate": 5.470000000000001e-06, + "num_tokens": 996269.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.456, + "step": 1456 + }, + { + "loss": 0.0329, + "grad_norm": 0.8090602159500122, + "learning_rate": 5.460000000000001e-06, + "num_tokens": 996872.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.457, + "step": 1457 + }, + { + "loss": 0.0608, + "grad_norm": 0.9001142382621765, + "learning_rate": 5.450000000000001e-06, + "num_tokens": 997896.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.458, + "step": 1458 + }, + { + "loss": 0.0538, + "grad_norm": 0.772366464138031, + "learning_rate": 5.4400000000000004e-06, + "num_tokens": 998920.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.459, + "step": 1459 + }, + { + "loss": 0.0517, + "grad_norm": 1.0373460054397583, + "learning_rate": 5.4300000000000005e-06, + "num_tokens": 999523.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.46, + "step": 1460 + }, + { + "loss": 0.0378, + "grad_norm": 0.962916374206543, + "learning_rate": 5.420000000000001e-06, + "num_tokens": 1000126.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.461, + "step": 1461 + }, + { + "loss": 0.0515, + "grad_norm": 0.7162904739379883, + "learning_rate": 5.410000000000001e-06, + "num_tokens": 1001150.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.462, + "step": 1462 + }, + { + "loss": 0.0595, + "grad_norm": 0.8994327187538147, + "learning_rate": 5.400000000000001e-06, + "num_tokens": 1002174.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.463, + "step": 1463 + }, + { + "loss": 0.0505, + "grad_norm": 1.0326029062271118, + "learning_rate": 5.390000000000001e-06, + "num_tokens": 1002777.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.464, + "step": 1464 + }, + { + "loss": 0.0574, + "grad_norm": 0.9661214351654053, + "learning_rate": 5.380000000000001e-06, + "num_tokens": 1003801.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.465, + "step": 1465 + }, + { + "loss": 0.049, + "grad_norm": 0.9666001200675964, + "learning_rate": 5.370000000000001e-06, + "num_tokens": 1004404.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.466, + "step": 1466 + }, + { + "loss": 0.0085, + "grad_norm": 1.3241703510284424, + "learning_rate": 5.36e-06, + "num_tokens": 1004586.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.467, + "step": 1467 + }, + { + "loss": 0.046, + "grad_norm": 0.6046337485313416, + "learning_rate": 5.3500000000000004e-06, + "num_tokens": 1005610.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.468, + "step": 1468 + }, + { + "loss": 0.0593, + "grad_norm": 0.6918057799339294, + "learning_rate": 5.3400000000000005e-06, + "num_tokens": 1006634.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.4689999999999999, + "step": 1469 + }, + { + "loss": 0.0451, + "grad_norm": 0.6940487027168274, + "learning_rate": 5.330000000000001e-06, + "num_tokens": 1007658.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.47, + "step": 1470 + }, + { + "loss": 0.0478, + "grad_norm": 0.9059286117553711, + "learning_rate": 5.320000000000001e-06, + "num_tokens": 1008261.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.471, + "step": 1471 + }, + { + "loss": 0.0618, + "grad_norm": 1.034736156463623, + "learning_rate": 5.310000000000001e-06, + "num_tokens": 1009285.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.472, + "step": 1472 + }, + { + "loss": 0.0454, + "grad_norm": 0.8436343669891357, + "learning_rate": 5.300000000000001e-06, + "num_tokens": 1009888.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.4729999999999999, + "step": 1473 + }, + { + "loss": 0.0484, + "grad_norm": 0.7013604044914246, + "learning_rate": 5.290000000000001e-06, + "num_tokens": 1010491.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.474, + "step": 1474 + }, + { + "loss": 0.0097, + "grad_norm": 1.5493104457855225, + "learning_rate": 5.28e-06, + "num_tokens": 1010673.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.475, + "step": 1475 + }, + { + "loss": 0.0474, + "grad_norm": 0.7735861539840698, + "learning_rate": 5.27e-06, + "num_tokens": 1011697.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.476, + "step": 1476 + }, + { + "loss": 0.0519, + "grad_norm": 0.8996990323066711, + "learning_rate": 5.2600000000000005e-06, + "num_tokens": 1012300.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.4769999999999999, + "step": 1477 + }, + { + "loss": 0.0471, + "grad_norm": 0.9033766984939575, + "learning_rate": 5.2500000000000006e-06, + "num_tokens": 1013324.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.478, + "step": 1478 + }, + { + "loss": 0.0098, + "grad_norm": 1.5441380739212036, + "learning_rate": 5.240000000000001e-06, + "num_tokens": 1013506.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.479, + "step": 1479 + }, + { + "loss": 0.0321, + "grad_norm": 0.7326072454452515, + "learning_rate": 5.230000000000001e-06, + "num_tokens": 1014109.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.48, + "step": 1480 + }, + { + "loss": 0.05, + "grad_norm": 0.7916252017021179, + "learning_rate": 5.220000000000001e-06, + "num_tokens": 1015133.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4809999999999999, + "step": 1481 + }, + { + "loss": 0.0469, + "grad_norm": 0.6595597863197327, + "learning_rate": 5.210000000000001e-06, + "num_tokens": 1016157.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.482, + "step": 1482 + }, + { + "loss": 0.0087, + "grad_norm": 1.4249048233032227, + "learning_rate": 5.2e-06, + "num_tokens": 1016339.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.483, + "step": 1483 + }, + { + "loss": 0.0508, + "grad_norm": 0.8671485781669617, + "learning_rate": 5.19e-06, + "num_tokens": 1017363.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.484, + "step": 1484 + }, + { + "loss": 0.0079, + "grad_norm": 1.3106517791748047, + "learning_rate": 5.18e-06, + "num_tokens": 1017545.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4849999999999999, + "step": 1485 + }, + { + "loss": 0.0071, + "grad_norm": 1.222119927406311, + "learning_rate": 5.1700000000000005e-06, + "num_tokens": 1017727.0, + "mean_token_accuracy": 1.0, + "epoch": 1.486, + "step": 1486 + }, + { + "loss": 0.0672, + "grad_norm": 1.2891416549682617, + "learning_rate": 5.1600000000000006e-06, + "num_tokens": 1018751.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.487, + "step": 1487 + }, + { + "loss": 0.0366, + "grad_norm": 0.7987739443778992, + "learning_rate": 5.150000000000001e-06, + "num_tokens": 1019775.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.488, + "step": 1488 + }, + { + "loss": 0.0052, + "grad_norm": 0.914754331111908, + "learning_rate": 5.140000000000001e-06, + "num_tokens": 1019957.0, + "mean_token_accuracy": 1.0, + "epoch": 1.4889999999999999, + "step": 1489 + }, + { + "loss": 0.0616, + "grad_norm": 1.0975897312164307, + "learning_rate": 5.130000000000001e-06, + "num_tokens": 1020981.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.49, + "step": 1490 + }, + { + "loss": 0.004, + "grad_norm": 0.7056474089622498, + "learning_rate": 5.12e-06, + "num_tokens": 1021163.0, + "mean_token_accuracy": 1.0, + "epoch": 1.491, + "step": 1491 + }, + { + "loss": 0.0436, + "grad_norm": 1.1120914220809937, + "learning_rate": 5.11e-06, + "num_tokens": 1021766.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.492, + "step": 1492 + }, + { + "loss": 0.0336, + "grad_norm": 0.6931697726249695, + "learning_rate": 5.1e-06, + "num_tokens": 1022369.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.4929999999999999, + "step": 1493 + }, + { + "loss": 0.0378, + "grad_norm": 0.9726889729499817, + "learning_rate": 5.09e-06, + "num_tokens": 1022972.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.494, + "step": 1494 + }, + { + "loss": 0.0366, + "grad_norm": 0.8213800191879272, + "learning_rate": 5.0800000000000005e-06, + "num_tokens": 1023575.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.495, + "step": 1495 + }, + { + "loss": 0.0031, + "grad_norm": 0.5312236547470093, + "learning_rate": 5.070000000000001e-06, + "num_tokens": 1023757.0, + "mean_token_accuracy": 1.0, + "epoch": 1.496, + "step": 1496 + }, + { + "loss": 0.0549, + "grad_norm": 1.0347145795822144, + "learning_rate": 5.060000000000001e-06, + "num_tokens": 1024781.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.4969999999999999, + "step": 1497 + }, + { + "loss": 0.0383, + "grad_norm": 0.7086313962936401, + "learning_rate": 5.050000000000001e-06, + "num_tokens": 1025805.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.498, + "step": 1498 + }, + { + "loss": 0.0028, + "grad_norm": 0.4698486626148224, + "learning_rate": 5.04e-06, + "num_tokens": 1025987.0, + "mean_token_accuracy": 1.0, + "epoch": 1.499, + "step": 1499 + }, + { + "loss": 0.0336, + "grad_norm": 1.0022740364074707, + "learning_rate": 5.03e-06, + "num_tokens": 1026590.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5, + "step": 1500 + }, + { + "loss": 0.0485, + "grad_norm": 1.0019136667251587, + "learning_rate": 5.02e-06, + "num_tokens": 1027193.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.501, + "step": 1501 + }, + { + "loss": 0.0646, + "grad_norm": 1.0677893161773682, + "learning_rate": 5.01e-06, + "num_tokens": 1028217.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.502, + "step": 1502 + }, + { + "loss": 0.0518, + "grad_norm": 1.0055443048477173, + "learning_rate": 5e-06, + "num_tokens": 1028820.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.5030000000000001, + "step": 1503 + }, + { + "loss": 0.0579, + "grad_norm": 0.7834446430206299, + "learning_rate": 4.9900000000000005e-06, + "num_tokens": 1029844.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.504, + "step": 1504 + }, + { + "loss": 0.0502, + "grad_norm": 0.8990997076034546, + "learning_rate": 4.980000000000001e-06, + "num_tokens": 1030447.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.505, + "step": 1505 + }, + { + "loss": 0.0489, + "grad_norm": 0.812285840511322, + "learning_rate": 4.970000000000001e-06, + "num_tokens": 1031050.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.506, + "step": 1506 + }, + { + "loss": 0.0035, + "grad_norm": 0.6116827726364136, + "learning_rate": 4.960000000000001e-06, + "num_tokens": 1031232.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5070000000000001, + "step": 1507 + }, + { + "loss": 0.0039, + "grad_norm": 0.6817529201507568, + "learning_rate": 4.95e-06, + "num_tokens": 1031414.0, + "mean_token_accuracy": 1.0, + "epoch": 1.508, + "step": 1508 + }, + { + "loss": 0.0545, + "grad_norm": 0.8566991090774536, + "learning_rate": 4.94e-06, + "num_tokens": 1032438.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.509, + "step": 1509 + }, + { + "loss": 0.0421, + "grad_norm": 0.7650224566459656, + "learning_rate": 4.93e-06, + "num_tokens": 1033462.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.51, + "step": 1510 + }, + { + "loss": 0.0502, + "grad_norm": 1.4276961088180542, + "learning_rate": 4.92e-06, + "num_tokens": 1034065.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.5110000000000001, + "step": 1511 + }, + { + "loss": 0.0595, + "grad_norm": 0.9101549983024597, + "learning_rate": 4.9100000000000004e-06, + "num_tokens": 1035089.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.512, + "step": 1512 + }, + { + "loss": 0.0411, + "grad_norm": 0.8246486783027649, + "learning_rate": 4.9000000000000005e-06, + "num_tokens": 1036113.0, + "mean_token_accuracy": 0.9833659529685974, + "epoch": 1.513, + "step": 1513 + }, + { + "loss": 0.0567, + "grad_norm": 0.6719825863838196, + "learning_rate": 4.890000000000001e-06, + "num_tokens": 1037137.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.514, + "step": 1514 + }, + { + "loss": 0.0651, + "grad_norm": 0.9816451072692871, + "learning_rate": 4.880000000000001e-06, + "num_tokens": 1038161.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.5150000000000001, + "step": 1515 + }, + { + "loss": 0.043, + "grad_norm": 0.5606999397277832, + "learning_rate": 4.87e-06, + "num_tokens": 1039185.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.516, + "step": 1516 + }, + { + "loss": 0.0587, + "grad_norm": 0.8615964651107788, + "learning_rate": 4.86e-06, + "num_tokens": 1040209.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.517, + "step": 1517 + }, + { + "loss": 0.0066, + "grad_norm": 1.1458766460418701, + "learning_rate": 4.85e-06, + "num_tokens": 1040391.0, + "mean_token_accuracy": 1.0, + "epoch": 1.518, + "step": 1518 + }, + { + "loss": 0.0317, + "grad_norm": 0.603073239326477, + "learning_rate": 4.84e-06, + "num_tokens": 1040994.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.5190000000000001, + "step": 1519 + }, + { + "loss": 0.0478, + "grad_norm": 0.7289522886276245, + "learning_rate": 4.83e-06, + "num_tokens": 1042018.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.52, + "step": 1520 + }, + { + "loss": 0.0575, + "grad_norm": 1.0849231481552124, + "learning_rate": 4.8200000000000004e-06, + "num_tokens": 1042621.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.521, + "step": 1521 + }, + { + "loss": 0.0455, + "grad_norm": 0.7681816220283508, + "learning_rate": 4.8100000000000005e-06, + "num_tokens": 1043224.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.522, + "step": 1522 + }, + { + "loss": 0.0334, + "grad_norm": 0.7258145213127136, + "learning_rate": 4.800000000000001e-06, + "num_tokens": 1043827.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5230000000000001, + "step": 1523 + }, + { + "loss": 0.0558, + "grad_norm": 0.8517635464668274, + "learning_rate": 4.79e-06, + "num_tokens": 1044851.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.524, + "step": 1524 + }, + { + "loss": 0.0449, + "grad_norm": 0.9045063257217407, + "learning_rate": 4.78e-06, + "num_tokens": 1045454.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.525, + "step": 1525 + }, + { + "loss": 0.0333, + "grad_norm": 0.8299849033355713, + "learning_rate": 4.77e-06, + "num_tokens": 1046057.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.526, + "step": 1526 + }, + { + "loss": 0.0539, + "grad_norm": 0.7558150291442871, + "learning_rate": 4.76e-06, + "num_tokens": 1047081.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.5270000000000001, + "step": 1527 + }, + { + "loss": 0.0567, + "grad_norm": 1.201917052268982, + "learning_rate": 4.75e-06, + "num_tokens": 1047684.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.528, + "step": 1528 + }, + { + "loss": 0.0539, + "grad_norm": 1.0532753467559814, + "learning_rate": 4.74e-06, + "num_tokens": 1048287.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.529, + "step": 1529 + }, + { + "loss": 0.0339, + "grad_norm": 0.8715020418167114, + "learning_rate": 4.7300000000000005e-06, + "num_tokens": 1048890.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.53, + "step": 1530 + }, + { + "loss": 0.0552, + "grad_norm": 1.2127397060394287, + "learning_rate": 4.7200000000000005e-06, + "num_tokens": 1049493.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.5310000000000001, + "step": 1531 + }, + { + "loss": 0.0466, + "grad_norm": 0.6669203042984009, + "learning_rate": 4.71e-06, + "num_tokens": 1050517.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.532, + "step": 1532 + }, + { + "loss": 0.0523, + "grad_norm": 0.8616625070571899, + "learning_rate": 4.7e-06, + "num_tokens": 1051541.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.533, + "step": 1533 + }, + { + "loss": 0.0635, + "grad_norm": 1.3307801485061646, + "learning_rate": 4.69e-06, + "num_tokens": 1052144.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.534, + "step": 1534 + }, + { + "loss": 0.0447, + "grad_norm": 0.8427996039390564, + "learning_rate": 4.680000000000001e-06, + "num_tokens": 1052747.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5350000000000001, + "step": 1535 + }, + { + "loss": 0.057, + "grad_norm": 1.3174206018447876, + "learning_rate": 4.670000000000001e-06, + "num_tokens": 1053350.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.536, + "step": 1536 + }, + { + "loss": 0.0523, + "grad_norm": 1.1958731412887573, + "learning_rate": 4.66e-06, + "num_tokens": 1053953.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.537, + "step": 1537 + }, + { + "loss": 0.0562, + "grad_norm": 1.1242337226867676, + "learning_rate": 4.65e-06, + "num_tokens": 1054977.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.538, + "step": 1538 + }, + { + "loss": 0.0342, + "grad_norm": 0.7817521691322327, + "learning_rate": 4.6400000000000005e-06, + "num_tokens": 1055580.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.5390000000000001, + "step": 1539 + }, + { + "loss": 0.0516, + "grad_norm": 0.8116522431373596, + "learning_rate": 4.6300000000000006e-06, + "num_tokens": 1056183.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.54, + "step": 1540 + }, + { + "loss": 0.0551, + "grad_norm": 0.7639745473861694, + "learning_rate": 4.620000000000001e-06, + "num_tokens": 1057207.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.541, + "step": 1541 + }, + { + "loss": 0.0536, + "grad_norm": 0.8198519945144653, + "learning_rate": 4.610000000000001e-06, + "num_tokens": 1058231.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.542, + "step": 1542 + }, + { + "loss": 0.0344, + "grad_norm": 0.7266889214515686, + "learning_rate": 4.600000000000001e-06, + "num_tokens": 1058834.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5430000000000001, + "step": 1543 + }, + { + "loss": 0.0555, + "grad_norm": 1.113586187362671, + "learning_rate": 4.590000000000001e-06, + "num_tokens": 1059437.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.544, + "step": 1544 + }, + { + "loss": 0.012, + "grad_norm": 1.719358205795288, + "learning_rate": 4.58e-06, + "num_tokens": 1059619.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.545, + "step": 1545 + }, + { + "loss": 0.0321, + "grad_norm": 0.7295169234275818, + "learning_rate": 4.57e-06, + "num_tokens": 1060222.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.546, + "step": 1546 + }, + { + "loss": 0.0516, + "grad_norm": 0.7697953581809998, + "learning_rate": 4.56e-06, + "num_tokens": 1061246.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5470000000000002, + "step": 1547 + }, + { + "loss": 0.0106, + "grad_norm": 1.5413947105407715, + "learning_rate": 4.5500000000000005e-06, + "num_tokens": 1061428.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.548, + "step": 1548 + }, + { + "loss": 0.0588, + "grad_norm": 0.8341297507286072, + "learning_rate": 4.540000000000001e-06, + "num_tokens": 1062452.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.549, + "step": 1549 + }, + { + "loss": 0.0101, + "grad_norm": 1.516141653060913, + "learning_rate": 4.530000000000001e-06, + "num_tokens": 1062634.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.55, + "step": 1550 + }, + { + "loss": 0.0366, + "grad_norm": 0.8384003639221191, + "learning_rate": 4.520000000000001e-06, + "num_tokens": 1063237.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5510000000000002, + "step": 1551 + }, + { + "loss": 0.0506, + "grad_norm": 0.8416287899017334, + "learning_rate": 4.510000000000001e-06, + "num_tokens": 1064261.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.552, + "step": 1552 + }, + { + "loss": 0.0538, + "grad_norm": 1.3951233625411987, + "learning_rate": 4.5e-06, + "num_tokens": 1064864.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.553, + "step": 1553 + }, + { + "loss": 0.0565, + "grad_norm": 0.8929548859596252, + "learning_rate": 4.49e-06, + "num_tokens": 1065888.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.554, + "step": 1554 + }, + { + "loss": 0.0466, + "grad_norm": 0.8937817215919495, + "learning_rate": 4.48e-06, + "num_tokens": 1066491.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.5550000000000002, + "step": 1555 + }, + { + "loss": 0.0609, + "grad_norm": 0.8740326166152954, + "learning_rate": 4.47e-06, + "num_tokens": 1067515.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.556, + "step": 1556 + }, + { + "loss": 0.0352, + "grad_norm": 0.8204190135002136, + "learning_rate": 4.4600000000000005e-06, + "num_tokens": 1068118.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.557, + "step": 1557 + }, + { + "loss": 0.0447, + "grad_norm": 0.7500142455101013, + "learning_rate": 4.450000000000001e-06, + "num_tokens": 1068721.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.558, + "step": 1558 + }, + { + "loss": 0.0503, + "grad_norm": 0.7551432847976685, + "learning_rate": 4.440000000000001e-06, + "num_tokens": 1069745.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.5590000000000002, + "step": 1559 + }, + { + "loss": 0.0352, + "grad_norm": 0.7508884072303772, + "learning_rate": 4.430000000000001e-06, + "num_tokens": 1070348.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.56, + "step": 1560 + }, + { + "loss": 0.0521, + "grad_norm": 0.9934411644935608, + "learning_rate": 4.42e-06, + "num_tokens": 1070951.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.561, + "step": 1561 + }, + { + "loss": 0.0459, + "grad_norm": 0.6874534487724304, + "learning_rate": 4.41e-06, + "num_tokens": 1071975.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.562, + "step": 1562 + }, + { + "loss": 0.0501, + "grad_norm": 0.7553894519805908, + "learning_rate": 4.4e-06, + "num_tokens": 1072999.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.563, + "step": 1563 + }, + { + "loss": 0.0073, + "grad_norm": 1.179804801940918, + "learning_rate": 4.39e-06, + "num_tokens": 1073181.0, + "mean_token_accuracy": 1.0, + "epoch": 1.564, + "step": 1564 + }, + { + "loss": 0.0487, + "grad_norm": 0.7780734896659851, + "learning_rate": 4.38e-06, + "num_tokens": 1074205.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.565, + "step": 1565 + }, + { + "loss": 0.0071, + "grad_norm": 1.1694072484970093, + "learning_rate": 4.3700000000000005e-06, + "num_tokens": 1074387.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5659999999999998, + "step": 1566 + }, + { + "loss": 0.0516, + "grad_norm": 1.098961353302002, + "learning_rate": 4.360000000000001e-06, + "num_tokens": 1074990.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.567, + "step": 1567 + }, + { + "loss": 0.0456, + "grad_norm": 0.7084697484970093, + "learning_rate": 4.350000000000001e-06, + "num_tokens": 1076014.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.568, + "step": 1568 + }, + { + "loss": 0.0572, + "grad_norm": 0.8608739376068115, + "learning_rate": 4.34e-06, + "num_tokens": 1077038.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.569, + "step": 1569 + }, + { + "loss": 0.0536, + "grad_norm": 1.1235098838806152, + "learning_rate": 4.33e-06, + "num_tokens": 1077641.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.5699999999999998, + "step": 1570 + }, + { + "loss": 0.0061, + "grad_norm": 1.022011399269104, + "learning_rate": 4.32e-06, + "num_tokens": 1077823.0, + "mean_token_accuracy": 1.0, + "epoch": 1.571, + "step": 1571 + }, + { + "loss": 0.0594, + "grad_norm": 0.8419452905654907, + "learning_rate": 4.31e-06, + "num_tokens": 1078847.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.572, + "step": 1572 + }, + { + "loss": 0.0376, + "grad_norm": 0.7862662672996521, + "learning_rate": 4.3e-06, + "num_tokens": 1079871.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.573, + "step": 1573 + }, + { + "loss": 0.0397, + "grad_norm": 0.7846319079399109, + "learning_rate": 4.2900000000000004e-06, + "num_tokens": 1080895.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.5739999999999998, + "step": 1574 + }, + { + "loss": 0.0061, + "grad_norm": 1.02032470703125, + "learning_rate": 4.2800000000000005e-06, + "num_tokens": 1081077.0, + "mean_token_accuracy": 1.0, + "epoch": 1.575, + "step": 1575 + }, + { + "loss": 0.0358, + "grad_norm": 0.8401283621788025, + "learning_rate": 4.270000000000001e-06, + "num_tokens": 1081680.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.576, + "step": 1576 + }, + { + "loss": 0.0423, + "grad_norm": 0.9667369723320007, + "learning_rate": 4.26e-06, + "num_tokens": 1082283.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.577, + "step": 1577 + }, + { + "loss": 0.0427, + "grad_norm": 0.9331235289573669, + "learning_rate": 4.25e-06, + "num_tokens": 1083307.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.5779999999999998, + "step": 1578 + }, + { + "loss": 0.0341, + "grad_norm": 0.7807062268257141, + "learning_rate": 4.24e-06, + "num_tokens": 1083910.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.579, + "step": 1579 + }, + { + "loss": 0.0491, + "grad_norm": 0.861403226852417, + "learning_rate": 4.23e-06, + "num_tokens": 1084513.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.58, + "step": 1580 + }, + { + "loss": 0.0581, + "grad_norm": 1.2565624713897705, + "learning_rate": 4.22e-06, + "num_tokens": 1085537.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.581, + "step": 1581 + }, + { + "loss": 0.0927, + "grad_norm": 1.466109275817871, + "learning_rate": 4.21e-06, + "num_tokens": 1086561.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.5819999999999999, + "step": 1582 + }, + { + "loss": 0.0519, + "grad_norm": 1.1252888441085815, + "learning_rate": 4.2000000000000004e-06, + "num_tokens": 1087585.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.583, + "step": 1583 + }, + { + "loss": 0.0534, + "grad_norm": 1.0422850847244263, + "learning_rate": 4.1900000000000005e-06, + "num_tokens": 1088188.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.584, + "step": 1584 + }, + { + "loss": 0.0059, + "grad_norm": 0.9880717396736145, + "learning_rate": 4.18e-06, + "num_tokens": 1088370.0, + "mean_token_accuracy": 1.0, + "epoch": 1.585, + "step": 1585 + }, + { + "loss": 0.0318, + "grad_norm": 0.8194119930267334, + "learning_rate": 4.17e-06, + "num_tokens": 1088973.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.5859999999999999, + "step": 1586 + }, + { + "loss": 0.035, + "grad_norm": 0.9220993518829346, + "learning_rate": 4.16e-06, + "num_tokens": 1089576.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.587, + "step": 1587 + }, + { + "loss": 0.0058, + "grad_norm": 0.9712525010108948, + "learning_rate": 4.15e-06, + "num_tokens": 1089758.0, + "mean_token_accuracy": 1.0, + "epoch": 1.588, + "step": 1588 + }, + { + "loss": 0.0449, + "grad_norm": 0.7077950835227966, + "learning_rate": 4.14e-06, + "num_tokens": 1090782.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.589, + "step": 1589 + }, + { + "loss": 0.0529, + "grad_norm": 0.994533360004425, + "learning_rate": 4.13e-06, + "num_tokens": 1091385.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.5899999999999999, + "step": 1590 + }, + { + "loss": 0.0495, + "grad_norm": 0.8751122355461121, + "learning_rate": 4.12e-06, + "num_tokens": 1091988.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.591, + "step": 1591 + }, + { + "loss": 0.0476, + "grad_norm": 0.8288613557815552, + "learning_rate": 4.1100000000000005e-06, + "num_tokens": 1093012.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.592, + "step": 1592 + }, + { + "loss": 0.0601, + "grad_norm": 1.0450148582458496, + "learning_rate": 4.1e-06, + "num_tokens": 1094036.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.593, + "step": 1593 + }, + { + "loss": 0.0063, + "grad_norm": 1.0433647632598877, + "learning_rate": 4.09e-06, + "num_tokens": 1094218.0, + "mean_token_accuracy": 1.0, + "epoch": 1.5939999999999999, + "step": 1594 + }, + { + "loss": 0.0575, + "grad_norm": 1.1538662910461426, + "learning_rate": 4.08e-06, + "num_tokens": 1094821.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.595, + "step": 1595 + }, + { + "loss": 0.0362, + "grad_norm": 0.8405407667160034, + "learning_rate": 4.07e-06, + "num_tokens": 1095424.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.596, + "step": 1596 + }, + { + "loss": 0.0611, + "grad_norm": 0.9581584334373474, + "learning_rate": 4.060000000000001e-06, + "num_tokens": 1096448.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.597, + "step": 1597 + }, + { + "loss": 0.0583, + "grad_norm": 1.2413828372955322, + "learning_rate": 4.05e-06, + "num_tokens": 1097051.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.5979999999999999, + "step": 1598 + }, + { + "loss": 0.0515, + "grad_norm": 1.0595495700836182, + "learning_rate": 4.04e-06, + "num_tokens": 1097654.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.599, + "step": 1599 + }, + { + "loss": 0.039, + "grad_norm": 0.931210458278656, + "learning_rate": 4.03e-06, + "num_tokens": 1098257.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.6, + "step": 1600 + }, + { + "loss": 0.0316, + "grad_norm": 0.8093856573104858, + "learning_rate": 4.0200000000000005e-06, + "num_tokens": 1098860.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.601, + "step": 1601 + }, + { + "loss": 0.0312, + "grad_norm": 0.8087005019187927, + "learning_rate": 4.0100000000000006e-06, + "num_tokens": 1099463.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.6019999999999999, + "step": 1602 + }, + { + "loss": 0.0482, + "grad_norm": 0.9823475480079651, + "learning_rate": 4.000000000000001e-06, + "num_tokens": 1100487.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.603, + "step": 1603 + }, + { + "loss": 0.0527, + "grad_norm": 0.8676301836967468, + "learning_rate": 3.990000000000001e-06, + "num_tokens": 1101090.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.604, + "step": 1604 + }, + { + "loss": 0.0596, + "grad_norm": 0.9275328516960144, + "learning_rate": 3.980000000000001e-06, + "num_tokens": 1102114.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.605, + "step": 1605 + }, + { + "loss": 0.0302, + "grad_norm": 0.8553646802902222, + "learning_rate": 3.97e-06, + "num_tokens": 1102717.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.6059999999999999, + "step": 1606 + }, + { + "loss": 0.0064, + "grad_norm": 1.1059050559997559, + "learning_rate": 3.96e-06, + "num_tokens": 1102899.0, + "mean_token_accuracy": 1.0, + "epoch": 1.607, + "step": 1607 + }, + { + "loss": 0.036, + "grad_norm": 0.7443641424179077, + "learning_rate": 3.95e-06, + "num_tokens": 1103502.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.608, + "step": 1608 + }, + { + "loss": 0.0629, + "grad_norm": 0.9508353471755981, + "learning_rate": 3.94e-06, + "num_tokens": 1104526.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.609, + "step": 1609 + }, + { + "loss": 0.0069, + "grad_norm": 1.15656578540802, + "learning_rate": 3.9300000000000005e-06, + "num_tokens": 1104708.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6099999999999999, + "step": 1610 + }, + { + "loss": 0.0496, + "grad_norm": 0.723640501499176, + "learning_rate": 3.920000000000001e-06, + "num_tokens": 1105732.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.611, + "step": 1611 + }, + { + "loss": 0.0625, + "grad_norm": 1.0058673620224, + "learning_rate": 3.910000000000001e-06, + "num_tokens": 1106756.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.612, + "step": 1612 + }, + { + "loss": 0.0483, + "grad_norm": 0.7778430581092834, + "learning_rate": 3.900000000000001e-06, + "num_tokens": 1107780.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.613, + "step": 1613 + }, + { + "loss": 0.0065, + "grad_norm": 1.1014611721038818, + "learning_rate": 3.89e-06, + "num_tokens": 1107962.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6139999999999999, + "step": 1614 + }, + { + "loss": 0.0623, + "grad_norm": 0.8831361532211304, + "learning_rate": 3.88e-06, + "num_tokens": 1108986.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.615, + "step": 1615 + }, + { + "loss": 0.0061, + "grad_norm": 1.0461324453353882, + "learning_rate": 3.87e-06, + "num_tokens": 1109168.0, + "mean_token_accuracy": 1.0, + "epoch": 1.616, + "step": 1616 + }, + { + "loss": 0.0499, + "grad_norm": 1.056103229522705, + "learning_rate": 3.86e-06, + "num_tokens": 1109771.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.617, + "step": 1617 + }, + { + "loss": 0.0452, + "grad_norm": 0.7944758534431458, + "learning_rate": 3.85e-06, + "num_tokens": 1110374.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.6179999999999999, + "step": 1618 + }, + { + "loss": 0.0315, + "grad_norm": 0.8054194450378418, + "learning_rate": 3.8400000000000005e-06, + "num_tokens": 1110977.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.619, + "step": 1619 + }, + { + "loss": 0.0504, + "grad_norm": 0.9761496782302856, + "learning_rate": 3.830000000000001e-06, + "num_tokens": 1111580.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.62, + "step": 1620 + }, + { + "loss": 0.0658, + "grad_norm": 0.9077417254447937, + "learning_rate": 3.820000000000001e-06, + "num_tokens": 1112604.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.621, + "step": 1621 + }, + { + "loss": 0.0477, + "grad_norm": 0.8071428537368774, + "learning_rate": 3.8100000000000004e-06, + "num_tokens": 1113207.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.6219999999999999, + "step": 1622 + }, + { + "loss": 0.041, + "grad_norm": 0.7867160439491272, + "learning_rate": 3.8000000000000005e-06, + "num_tokens": 1114231.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.623, + "step": 1623 + }, + { + "loss": 0.0332, + "grad_norm": 0.8921499252319336, + "learning_rate": 3.79e-06, + "num_tokens": 1114834.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.624, + "step": 1624 + }, + { + "loss": 0.051, + "grad_norm": 0.9043579697608948, + "learning_rate": 3.7800000000000002e-06, + "num_tokens": 1115858.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.625, + "step": 1625 + }, + { + "loss": 0.0613, + "grad_norm": 1.0464129447937012, + "learning_rate": 3.7700000000000003e-06, + "num_tokens": 1116882.0, + "mean_token_accuracy": 0.9667319059371948, + "epoch": 1.626, + "step": 1626 + }, + { + "loss": 0.058, + "grad_norm": 1.1696254014968872, + "learning_rate": 3.7600000000000004e-06, + "num_tokens": 1117485.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.627, + "step": 1627 + }, + { + "loss": 0.0549, + "grad_norm": 0.8511863946914673, + "learning_rate": 3.7500000000000005e-06, + "num_tokens": 1118509.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.6280000000000001, + "step": 1628 + }, + { + "loss": 0.0063, + "grad_norm": 1.0807744264602661, + "learning_rate": 3.74e-06, + "num_tokens": 1118691.0, + "mean_token_accuracy": 1.0, + "epoch": 1.629, + "step": 1629 + }, + { + "loss": 0.0509, + "grad_norm": 0.9100387096405029, + "learning_rate": 3.7300000000000003e-06, + "num_tokens": 1119294.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.63, + "step": 1630 + }, + { + "loss": 0.0066, + "grad_norm": 1.1098606586456299, + "learning_rate": 3.7200000000000004e-06, + "num_tokens": 1119476.0, + "mean_token_accuracy": 1.0, + "epoch": 1.631, + "step": 1631 + }, + { + "loss": 0.0459, + "grad_norm": 0.6645187139511108, + "learning_rate": 3.7100000000000005e-06, + "num_tokens": 1120500.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6320000000000001, + "step": 1632 + }, + { + "loss": 0.0494, + "grad_norm": 1.1095669269561768, + "learning_rate": 3.7e-06, + "num_tokens": 1121103.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.633, + "step": 1633 + }, + { + "loss": 0.0471, + "grad_norm": 0.8348158597946167, + "learning_rate": 3.6900000000000002e-06, + "num_tokens": 1121706.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.634, + "step": 1634 + }, + { + "loss": 0.0563, + "grad_norm": 0.8096620440483093, + "learning_rate": 3.6800000000000003e-06, + "num_tokens": 1122730.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.635, + "step": 1635 + }, + { + "loss": 0.0498, + "grad_norm": 0.7935335636138916, + "learning_rate": 3.6700000000000004e-06, + "num_tokens": 1123754.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.6360000000000001, + "step": 1636 + }, + { + "loss": 0.0962, + "grad_norm": 1.131250023841858, + "learning_rate": 3.66e-06, + "num_tokens": 1124778.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.637, + "step": 1637 + }, + { + "loss": 0.0365, + "grad_norm": 0.808918297290802, + "learning_rate": 3.65e-06, + "num_tokens": 1125381.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.638, + "step": 1638 + }, + { + "loss": 0.0063, + "grad_norm": 1.0540261268615723, + "learning_rate": 3.6400000000000003e-06, + "num_tokens": 1125563.0, + "mean_token_accuracy": 1.0, + "epoch": 1.639, + "step": 1639 + }, + { + "loss": 0.0631, + "grad_norm": 0.9925756454467773, + "learning_rate": 3.6300000000000004e-06, + "num_tokens": 1126587.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.6400000000000001, + "step": 1640 + }, + { + "loss": 0.057, + "grad_norm": 0.8026877641677856, + "learning_rate": 3.62e-06, + "num_tokens": 1127611.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.641, + "step": 1641 + }, + { + "loss": 0.0331, + "grad_norm": 0.7825866937637329, + "learning_rate": 3.61e-06, + "num_tokens": 1128214.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.642, + "step": 1642 + }, + { + "loss": 0.0395, + "grad_norm": 0.9599487781524658, + "learning_rate": 3.6000000000000003e-06, + "num_tokens": 1129238.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.643, + "step": 1643 + }, + { + "loss": 0.054, + "grad_norm": 0.8558062314987183, + "learning_rate": 3.5900000000000004e-06, + "num_tokens": 1130262.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.6440000000000001, + "step": 1644 + }, + { + "loss": 0.0073, + "grad_norm": 1.2038366794586182, + "learning_rate": 3.58e-06, + "num_tokens": 1130444.0, + "mean_token_accuracy": 1.0, + "epoch": 1.645, + "step": 1645 + }, + { + "loss": 0.0493, + "grad_norm": 0.989517867565155, + "learning_rate": 3.57e-06, + "num_tokens": 1131468.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.646, + "step": 1646 + }, + { + "loss": 0.0503, + "grad_norm": 0.8166787624359131, + "learning_rate": 3.5600000000000002e-06, + "num_tokens": 1132071.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.647, + "step": 1647 + }, + { + "loss": 0.0067, + "grad_norm": 1.1410889625549316, + "learning_rate": 3.5500000000000003e-06, + "num_tokens": 1132253.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6480000000000001, + "step": 1648 + }, + { + "loss": 0.0621, + "grad_norm": 0.9194291234016418, + "learning_rate": 3.54e-06, + "num_tokens": 1133277.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.649, + "step": 1649 + }, + { + "loss": 0.0507, + "grad_norm": 0.981034517288208, + "learning_rate": 3.53e-06, + "num_tokens": 1133880.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.65, + "step": 1650 + }, + { + "loss": 0.0512, + "grad_norm": 0.7907586097717285, + "learning_rate": 3.52e-06, + "num_tokens": 1134904.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.651, + "step": 1651 + }, + { + "loss": 0.0574, + "grad_norm": 0.8653498291969299, + "learning_rate": 3.5100000000000003e-06, + "num_tokens": 1135928.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.6520000000000001, + "step": 1652 + }, + { + "loss": 0.0509, + "grad_norm": 1.11887788772583, + "learning_rate": 3.5e-06, + "num_tokens": 1136531.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.653, + "step": 1653 + }, + { + "loss": 0.0568, + "grad_norm": 1.312667727470398, + "learning_rate": 3.49e-06, + "num_tokens": 1137134.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.654, + "step": 1654 + }, + { + "loss": 0.0523, + "grad_norm": 1.0086694955825806, + "learning_rate": 3.48e-06, + "num_tokens": 1137737.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.655, + "step": 1655 + }, + { + "loss": 0.0061, + "grad_norm": 1.0424482822418213, + "learning_rate": 3.4700000000000002e-06, + "num_tokens": 1137919.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6560000000000001, + "step": 1656 + }, + { + "loss": 0.0443, + "grad_norm": 0.8345255255699158, + "learning_rate": 3.46e-06, + "num_tokens": 1138522.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.657, + "step": 1657 + }, + { + "loss": 0.0511, + "grad_norm": 0.9122284054756165, + "learning_rate": 3.45e-06, + "num_tokens": 1139546.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.658, + "step": 1658 + }, + { + "loss": 0.0425, + "grad_norm": 0.8380939960479736, + "learning_rate": 3.44e-06, + "num_tokens": 1140149.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.659, + "step": 1659 + }, + { + "loss": 0.0441, + "grad_norm": 0.7784305810928345, + "learning_rate": 3.4300000000000006e-06, + "num_tokens": 1141173.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.6600000000000001, + "step": 1660 + }, + { + "loss": 0.0535, + "grad_norm": 0.9853757619857788, + "learning_rate": 3.4200000000000007e-06, + "num_tokens": 1142197.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.661, + "step": 1661 + }, + { + "loss": 0.0571, + "grad_norm": 0.8722765445709229, + "learning_rate": 3.4100000000000004e-06, + "num_tokens": 1143221.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.662, + "step": 1662 + }, + { + "loss": 0.059, + "grad_norm": 1.0534354448318481, + "learning_rate": 3.4000000000000005e-06, + "num_tokens": 1144245.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.663, + "step": 1663 + }, + { + "loss": 0.0068, + "grad_norm": 1.146028757095337, + "learning_rate": 3.3900000000000006e-06, + "num_tokens": 1144427.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6640000000000001, + "step": 1664 + }, + { + "loss": 0.0548, + "grad_norm": 0.8375920057296753, + "learning_rate": 3.3800000000000007e-06, + "num_tokens": 1145451.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.665, + "step": 1665 + }, + { + "loss": 0.0449, + "grad_norm": 1.0094847679138184, + "learning_rate": 3.3700000000000003e-06, + "num_tokens": 1146054.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.666, + "step": 1666 + }, + { + "loss": 0.045, + "grad_norm": 0.8592609763145447, + "learning_rate": 3.3600000000000004e-06, + "num_tokens": 1146657.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.667, + "step": 1667 + }, + { + "loss": 0.0381, + "grad_norm": 0.7064121961593628, + "learning_rate": 3.3500000000000005e-06, + "num_tokens": 1147681.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.6680000000000001, + "step": 1668 + }, + { + "loss": 0.0403, + "grad_norm": 0.9719851016998291, + "learning_rate": 3.3400000000000006e-06, + "num_tokens": 1148284.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.669, + "step": 1669 + }, + { + "loss": 0.0422, + "grad_norm": 0.8167884945869446, + "learning_rate": 3.3300000000000003e-06, + "num_tokens": 1148887.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.67, + "step": 1670 + }, + { + "loss": 0.054, + "grad_norm": 1.1122660636901855, + "learning_rate": 3.3200000000000004e-06, + "num_tokens": 1149490.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.671, + "step": 1671 + }, + { + "loss": 0.0464, + "grad_norm": 0.8594599366188049, + "learning_rate": 3.3100000000000005e-06, + "num_tokens": 1150514.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.6720000000000002, + "step": 1672 + }, + { + "loss": 0.0071, + "grad_norm": 1.174099326133728, + "learning_rate": 3.3000000000000006e-06, + "num_tokens": 1150696.0, + "mean_token_accuracy": 1.0, + "epoch": 1.673, + "step": 1673 + }, + { + "loss": 0.0389, + "grad_norm": 0.7924457788467407, + "learning_rate": 3.2900000000000003e-06, + "num_tokens": 1151720.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.674, + "step": 1674 + }, + { + "loss": 0.0078, + "grad_norm": 1.306631088256836, + "learning_rate": 3.2800000000000004e-06, + "num_tokens": 1151902.0, + "mean_token_accuracy": 1.0, + "epoch": 1.675, + "step": 1675 + }, + { + "loss": 0.0071, + "grad_norm": 1.1881757974624634, + "learning_rate": 3.2700000000000005e-06, + "num_tokens": 1152084.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6760000000000002, + "step": 1676 + }, + { + "loss": 0.0339, + "grad_norm": 0.8299407362937927, + "learning_rate": 3.2600000000000006e-06, + "num_tokens": 1152687.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.677, + "step": 1677 + }, + { + "loss": 0.0298, + "grad_norm": 0.7375956773757935, + "learning_rate": 3.2500000000000002e-06, + "num_tokens": 1153290.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.678, + "step": 1678 + }, + { + "loss": 0.0055, + "grad_norm": 0.9513365626335144, + "learning_rate": 3.2400000000000003e-06, + "num_tokens": 1153472.0, + "mean_token_accuracy": 1.0, + "epoch": 1.679, + "step": 1679 + }, + { + "loss": 0.0058, + "grad_norm": 0.9881709218025208, + "learning_rate": 3.2300000000000004e-06, + "num_tokens": 1153654.0, + "mean_token_accuracy": 1.0, + "epoch": 1.6800000000000002, + "step": 1680 + }, + { + "loss": 0.0049, + "grad_norm": 0.8430343270301819, + "learning_rate": 3.2200000000000005e-06, + "num_tokens": 1153836.0, + "mean_token_accuracy": 1.0, + "epoch": 1.681, + "step": 1681 + }, + { + "loss": 0.0612, + "grad_norm": 0.9250144958496094, + "learning_rate": 3.21e-06, + "num_tokens": 1154860.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.682, + "step": 1682 + }, + { + "loss": 0.066, + "grad_norm": 1.1275829076766968, + "learning_rate": 3.2000000000000003e-06, + "num_tokens": 1155884.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.683, + "step": 1683 + }, + { + "loss": 0.0382, + "grad_norm": 0.895256519317627, + "learning_rate": 3.1900000000000004e-06, + "num_tokens": 1156908.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.6840000000000002, + "step": 1684 + }, + { + "loss": 0.0542, + "grad_norm": 1.2117300033569336, + "learning_rate": 3.1800000000000005e-06, + "num_tokens": 1157511.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.685, + "step": 1685 + }, + { + "loss": 0.0574, + "grad_norm": 0.973501980304718, + "learning_rate": 3.17e-06, + "num_tokens": 1158114.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.686, + "step": 1686 + }, + { + "loss": 0.037, + "grad_norm": 0.9485671520233154, + "learning_rate": 3.1600000000000002e-06, + "num_tokens": 1158717.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.687, + "step": 1687 + }, + { + "loss": 0.0546, + "grad_norm": 0.8555501699447632, + "learning_rate": 3.1500000000000003e-06, + "num_tokens": 1159741.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.688, + "step": 1688 + }, + { + "loss": 0.0602, + "grad_norm": 1.0455832481384277, + "learning_rate": 3.1400000000000004e-06, + "num_tokens": 1160765.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.689, + "step": 1689 + }, + { + "loss": 0.033, + "grad_norm": 0.9069396257400513, + "learning_rate": 3.13e-06, + "num_tokens": 1161368.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.69, + "step": 1690 + }, + { + "loss": 0.0485, + "grad_norm": 0.9210625290870667, + "learning_rate": 3.12e-06, + "num_tokens": 1161971.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.6909999999999998, + "step": 1691 + }, + { + "loss": 0.044, + "grad_norm": 0.8520143628120422, + "learning_rate": 3.1100000000000003e-06, + "num_tokens": 1162574.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.692, + "step": 1692 + }, + { + "loss": 0.0038, + "grad_norm": 0.6605420708656311, + "learning_rate": 3.1000000000000004e-06, + "num_tokens": 1162756.0, + "mean_token_accuracy": 1.0, + "epoch": 1.693, + "step": 1693 + }, + { + "loss": 0.0492, + "grad_norm": 1.0434776544570923, + "learning_rate": 3.09e-06, + "num_tokens": 1163359.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.694, + "step": 1694 + }, + { + "loss": 0.0475, + "grad_norm": 0.8778819441795349, + "learning_rate": 3.08e-06, + "num_tokens": 1164383.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.6949999999999998, + "step": 1695 + }, + { + "loss": 0.0427, + "grad_norm": 0.8830644488334656, + "learning_rate": 3.0700000000000003e-06, + "num_tokens": 1164986.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.696, + "step": 1696 + }, + { + "loss": 0.05, + "grad_norm": 1.0579566955566406, + "learning_rate": 3.0600000000000003e-06, + "num_tokens": 1165589.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.697, + "step": 1697 + }, + { + "loss": 0.0351, + "grad_norm": 0.850786566734314, + "learning_rate": 3.05e-06, + "num_tokens": 1166192.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.698, + "step": 1698 + }, + { + "loss": 0.0451, + "grad_norm": 0.9166119694709778, + "learning_rate": 3.04e-06, + "num_tokens": 1166795.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.6989999999999998, + "step": 1699 + }, + { + "loss": 0.0046, + "grad_norm": 0.7936509847640991, + "learning_rate": 3.0300000000000002e-06, + "num_tokens": 1166977.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7, + "step": 1700 + }, + { + "loss": 0.055, + "grad_norm": 1.1245038509368896, + "learning_rate": 3.0200000000000003e-06, + "num_tokens": 1167580.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.701, + "step": 1701 + }, + { + "loss": 0.0496, + "grad_norm": 0.7564581632614136, + "learning_rate": 3.01e-06, + "num_tokens": 1168604.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.702, + "step": 1702 + }, + { + "loss": 0.048, + "grad_norm": 0.9736590385437012, + "learning_rate": 3e-06, + "num_tokens": 1169207.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.7029999999999998, + "step": 1703 + }, + { + "loss": 0.0324, + "grad_norm": 0.7254967093467712, + "learning_rate": 2.99e-06, + "num_tokens": 1169810.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.704, + "step": 1704 + }, + { + "loss": 0.0048, + "grad_norm": 0.8456124663352966, + "learning_rate": 2.9800000000000003e-06, + "num_tokens": 1169992.0, + "mean_token_accuracy": 1.0, + "epoch": 1.705, + "step": 1705 + }, + { + "loss": 0.0044, + "grad_norm": 0.7698477506637573, + "learning_rate": 2.97e-06, + "num_tokens": 1170174.0, + "mean_token_accuracy": 1.0, + "epoch": 1.706, + "step": 1706 + }, + { + "loss": 0.0048, + "grad_norm": 0.8261660933494568, + "learning_rate": 2.96e-06, + "num_tokens": 1170356.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7069999999999999, + "step": 1707 + }, + { + "loss": 0.0336, + "grad_norm": 0.8241095542907715, + "learning_rate": 2.95e-06, + "num_tokens": 1170959.0, + "mean_token_accuracy": 0.9900166392326355, + "epoch": 1.708, + "step": 1708 + }, + { + "loss": 0.0476, + "grad_norm": 0.7233520746231079, + "learning_rate": 2.9400000000000002e-06, + "num_tokens": 1171983.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.709, + "step": 1709 + }, + { + "loss": 0.0462, + "grad_norm": 0.8334800004959106, + "learning_rate": 2.93e-06, + "num_tokens": 1172586.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.71, + "step": 1710 + }, + { + "loss": 0.0545, + "grad_norm": 0.702858030796051, + "learning_rate": 2.92e-06, + "num_tokens": 1173610.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.7109999999999999, + "step": 1711 + }, + { + "loss": 0.0502, + "grad_norm": 0.9014273285865784, + "learning_rate": 2.91e-06, + "num_tokens": 1174634.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.712, + "step": 1712 + }, + { + "loss": 0.05, + "grad_norm": 0.892711877822876, + "learning_rate": 2.9e-06, + "num_tokens": 1175237.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.713, + "step": 1713 + }, + { + "loss": 0.0548, + "grad_norm": 1.1328569650650024, + "learning_rate": 2.89e-06, + "num_tokens": 1175840.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.714, + "step": 1714 + }, + { + "loss": 0.004, + "grad_norm": 0.7089178562164307, + "learning_rate": 2.88e-06, + "num_tokens": 1176022.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7149999999999999, + "step": 1715 + }, + { + "loss": 0.0443, + "grad_norm": 0.9402340054512024, + "learning_rate": 2.87e-06, + "num_tokens": 1176625.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.716, + "step": 1716 + }, + { + "loss": 0.0356, + "grad_norm": 0.7975518703460693, + "learning_rate": 2.86e-06, + "num_tokens": 1177228.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.717, + "step": 1717 + }, + { + "loss": 0.0459, + "grad_norm": 0.7821065187454224, + "learning_rate": 2.85e-06, + "num_tokens": 1177831.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.718, + "step": 1718 + }, + { + "loss": 0.0554, + "grad_norm": 1.1063010692596436, + "learning_rate": 2.84e-06, + "num_tokens": 1178855.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.7189999999999999, + "step": 1719 + }, + { + "loss": 0.0586, + "grad_norm": 0.9329798817634583, + "learning_rate": 2.83e-06, + "num_tokens": 1179879.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.72, + "step": 1720 + }, + { + "loss": 0.0518, + "grad_norm": 0.8736408352851868, + "learning_rate": 2.82e-06, + "num_tokens": 1180903.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.721, + "step": 1721 + }, + { + "loss": 0.0346, + "grad_norm": 0.8308598399162292, + "learning_rate": 2.8100000000000006e-06, + "num_tokens": 1181506.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.722, + "step": 1722 + }, + { + "loss": 0.0577, + "grad_norm": 1.303083062171936, + "learning_rate": 2.8000000000000003e-06, + "num_tokens": 1182109.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.7229999999999999, + "step": 1723 + }, + { + "loss": 0.0051, + "grad_norm": 0.873818576335907, + "learning_rate": 2.7900000000000004e-06, + "num_tokens": 1182291.0, + "mean_token_accuracy": 1.0, + "epoch": 1.724, + "step": 1724 + }, + { + "loss": 0.0054, + "grad_norm": 0.9341294765472412, + "learning_rate": 2.7800000000000005e-06, + "num_tokens": 1182473.0, + "mean_token_accuracy": 1.0, + "epoch": 1.725, + "step": 1725 + }, + { + "loss": 0.0471, + "grad_norm": 0.8815944790840149, + "learning_rate": 2.7700000000000006e-06, + "num_tokens": 1183076.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.726, + "step": 1726 + }, + { + "loss": 0.0457, + "grad_norm": 0.9239593148231506, + "learning_rate": 2.7600000000000003e-06, + "num_tokens": 1184100.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.7269999999999999, + "step": 1727 + }, + { + "loss": 0.0048, + "grad_norm": 0.8393141031265259, + "learning_rate": 2.7500000000000004e-06, + "num_tokens": 1184282.0, + "mean_token_accuracy": 1.0, + "epoch": 1.728, + "step": 1728 + }, + { + "loss": 0.0463, + "grad_norm": 0.9265674352645874, + "learning_rate": 2.7400000000000004e-06, + "num_tokens": 1184885.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.729, + "step": 1729 + }, + { + "loss": 0.033, + "grad_norm": 0.7537205815315247, + "learning_rate": 2.7300000000000005e-06, + "num_tokens": 1185488.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.73, + "step": 1730 + }, + { + "loss": 0.005, + "grad_norm": 0.8731275796890259, + "learning_rate": 2.7200000000000002e-06, + "num_tokens": 1185670.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7309999999999999, + "step": 1731 + }, + { + "loss": 0.0621, + "grad_norm": 0.9686384201049805, + "learning_rate": 2.7100000000000003e-06, + "num_tokens": 1186694.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.732, + "step": 1732 + }, + { + "loss": 0.0308, + "grad_norm": 0.754749596118927, + "learning_rate": 2.7000000000000004e-06, + "num_tokens": 1187297.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.733, + "step": 1733 + }, + { + "loss": 0.0046, + "grad_norm": 0.8170429468154907, + "learning_rate": 2.6900000000000005e-06, + "num_tokens": 1187479.0, + "mean_token_accuracy": 1.0, + "epoch": 1.734, + "step": 1734 + }, + { + "loss": 0.0479, + "grad_norm": 0.8735800981521606, + "learning_rate": 2.68e-06, + "num_tokens": 1188503.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.7349999999999999, + "step": 1735 + }, + { + "loss": 0.0585, + "grad_norm": 1.3467590808868408, + "learning_rate": 2.6700000000000003e-06, + "num_tokens": 1189106.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.736, + "step": 1736 + }, + { + "loss": 0.0533, + "grad_norm": 0.8141427636146545, + "learning_rate": 2.6600000000000004e-06, + "num_tokens": 1189709.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.737, + "step": 1737 + }, + { + "loss": 0.0552, + "grad_norm": 0.8551588654518127, + "learning_rate": 2.6500000000000005e-06, + "num_tokens": 1190733.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.738, + "step": 1738 + }, + { + "loss": 0.0333, + "grad_norm": 0.7597099542617798, + "learning_rate": 2.64e-06, + "num_tokens": 1191336.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.7389999999999999, + "step": 1739 + }, + { + "loss": 0.0044, + "grad_norm": 0.7741936445236206, + "learning_rate": 2.6300000000000002e-06, + "num_tokens": 1191518.0, + "mean_token_accuracy": 1.0, + "epoch": 1.74, + "step": 1740 + }, + { + "loss": 0.0582, + "grad_norm": 0.7289506196975708, + "learning_rate": 2.6200000000000003e-06, + "num_tokens": 1192542.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.741, + "step": 1741 + }, + { + "loss": 0.0516, + "grad_norm": 1.0435099601745605, + "learning_rate": 2.6100000000000004e-06, + "num_tokens": 1193566.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.742, + "step": 1742 + }, + { + "loss": 0.0563, + "grad_norm": 0.9215458035469055, + "learning_rate": 2.6e-06, + "num_tokens": 1194590.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.7429999999999999, + "step": 1743 + }, + { + "loss": 0.0383, + "grad_norm": 0.7490559816360474, + "learning_rate": 2.59e-06, + "num_tokens": 1195614.0, + "mean_token_accuracy": 0.9843444228172302, + "epoch": 1.744, + "step": 1744 + }, + { + "loss": 0.0529, + "grad_norm": 0.8243502378463745, + "learning_rate": 2.5800000000000003e-06, + "num_tokens": 1196217.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.745, + "step": 1745 + }, + { + "loss": 0.0614, + "grad_norm": 0.9065500497817993, + "learning_rate": 2.5700000000000004e-06, + "num_tokens": 1197241.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.746, + "step": 1746 + }, + { + "loss": 0.0316, + "grad_norm": 0.7572464346885681, + "learning_rate": 2.56e-06, + "num_tokens": 1197844.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.7469999999999999, + "step": 1747 + }, + { + "loss": 0.048, + "grad_norm": 0.7955116033554077, + "learning_rate": 2.55e-06, + "num_tokens": 1198868.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.748, + "step": 1748 + }, + { + "loss": 0.0809, + "grad_norm": 2.686805248260498, + "learning_rate": 2.5400000000000002e-06, + "num_tokens": 1199471.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.749, + "step": 1749 + }, + { + "loss": 0.0316, + "grad_norm": 0.7225703597068787, + "learning_rate": 2.5300000000000003e-06, + "num_tokens": 1200074.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.75, + "step": 1750 + }, + { + "loss": 0.0336, + "grad_norm": 0.7847139239311218, + "learning_rate": 2.52e-06, + "num_tokens": 1200677.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.751, + "step": 1751 + }, + { + "loss": 0.0532, + "grad_norm": 0.905462384223938, + "learning_rate": 2.51e-06, + "num_tokens": 1201701.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.752, + "step": 1752 + }, + { + "loss": 0.0058, + "grad_norm": 1.000243902206421, + "learning_rate": 2.5e-06, + "num_tokens": 1201883.0, + "mean_token_accuracy": 1.0, + "epoch": 1.7530000000000001, + "step": 1753 + }, + { + "loss": 0.0437, + "grad_norm": 0.7757262587547302, + "learning_rate": 2.4900000000000003e-06, + "num_tokens": 1202486.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.754, + "step": 1754 + }, + { + "loss": 0.0061, + "grad_norm": 1.0458347797393799, + "learning_rate": 2.4800000000000004e-06, + "num_tokens": 1202668.0, + "mean_token_accuracy": 1.0, + "epoch": 1.755, + "step": 1755 + }, + { + "loss": 0.0504, + "grad_norm": 0.8413608074188232, + "learning_rate": 2.47e-06, + "num_tokens": 1203692.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.756, + "step": 1756 + }, + { + "loss": 0.0522, + "grad_norm": 1.0522884130477905, + "learning_rate": 2.46e-06, + "num_tokens": 1204295.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.7570000000000001, + "step": 1757 + }, + { + "loss": 0.0393, + "grad_norm": 0.6745458841323853, + "learning_rate": 2.4500000000000003e-06, + "num_tokens": 1205319.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.758, + "step": 1758 + }, + { + "loss": 0.0585, + "grad_norm": 0.7667430639266968, + "learning_rate": 2.4400000000000004e-06, + "num_tokens": 1206343.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.759, + "step": 1759 + }, + { + "loss": 0.0505, + "grad_norm": 0.9792746901512146, + "learning_rate": 2.43e-06, + "num_tokens": 1206946.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.76, + "step": 1760 + }, + { + "loss": 0.0551, + "grad_norm": 0.7983967661857605, + "learning_rate": 2.42e-06, + "num_tokens": 1207970.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.7610000000000001, + "step": 1761 + }, + { + "loss": 0.0564, + "grad_norm": 0.7570465207099915, + "learning_rate": 2.4100000000000002e-06, + "num_tokens": 1208994.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.762, + "step": 1762 + }, + { + "loss": 0.043, + "grad_norm": 0.814797043800354, + "learning_rate": 2.4000000000000003e-06, + "num_tokens": 1209597.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.763, + "step": 1763 + }, + { + "loss": 0.0488, + "grad_norm": 0.7885193228721619, + "learning_rate": 2.39e-06, + "num_tokens": 1210621.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.764, + "step": 1764 + }, + { + "loss": 0.0344, + "grad_norm": 0.818915843963623, + "learning_rate": 2.38e-06, + "num_tokens": 1211224.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.7650000000000001, + "step": 1765 + }, + { + "loss": 0.0604, + "grad_norm": 0.9282973408699036, + "learning_rate": 2.37e-06, + "num_tokens": 1212248.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.766, + "step": 1766 + }, + { + "loss": 0.0404, + "grad_norm": 0.7900825142860413, + "learning_rate": 2.3600000000000003e-06, + "num_tokens": 1212851.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.767, + "step": 1767 + }, + { + "loss": 0.031, + "grad_norm": 0.7015290260314941, + "learning_rate": 2.35e-06, + "num_tokens": 1213454.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.768, + "step": 1768 + }, + { + "loss": 0.0364, + "grad_norm": 0.9064289927482605, + "learning_rate": 2.3400000000000005e-06, + "num_tokens": 1214057.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7690000000000001, + "step": 1769 + }, + { + "loss": 0.0466, + "grad_norm": 0.9048400521278381, + "learning_rate": 2.33e-06, + "num_tokens": 1215081.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.77, + "step": 1770 + }, + { + "loss": 0.0301, + "grad_norm": 0.7496972680091858, + "learning_rate": 2.3200000000000002e-06, + "num_tokens": 1215684.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.771, + "step": 1771 + }, + { + "loss": 0.0493, + "grad_norm": 0.6115801930427551, + "learning_rate": 2.3100000000000003e-06, + "num_tokens": 1216708.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.772, + "step": 1772 + }, + { + "loss": 0.0304, + "grad_norm": 0.7350578308105469, + "learning_rate": 2.3000000000000004e-06, + "num_tokens": 1217311.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.7730000000000001, + "step": 1773 + }, + { + "loss": 0.0472, + "grad_norm": 1.045663833618164, + "learning_rate": 2.29e-06, + "num_tokens": 1217914.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.774, + "step": 1774 + }, + { + "loss": 0.0551, + "grad_norm": 1.1708678007125854, + "learning_rate": 2.28e-06, + "num_tokens": 1218517.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.775, + "step": 1775 + }, + { + "loss": 0.0644, + "grad_norm": 1.0152207612991333, + "learning_rate": 2.2700000000000003e-06, + "num_tokens": 1219541.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.776, + "step": 1776 + }, + { + "loss": 0.0495, + "grad_norm": 0.9661046266555786, + "learning_rate": 2.2600000000000004e-06, + "num_tokens": 1220144.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.7770000000000001, + "step": 1777 + }, + { + "loss": 0.0396, + "grad_norm": 0.8248231410980225, + "learning_rate": 2.25e-06, + "num_tokens": 1221168.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.778, + "step": 1778 + }, + { + "loss": 0.0572, + "grad_norm": 0.741680920124054, + "learning_rate": 2.24e-06, + "num_tokens": 1222192.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.779, + "step": 1779 + }, + { + "loss": 0.0445, + "grad_norm": 0.7325671911239624, + "learning_rate": 2.2300000000000002e-06, + "num_tokens": 1223216.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.78, + "step": 1780 + }, + { + "loss": 0.0317, + "grad_norm": 0.7711221575737, + "learning_rate": 2.2200000000000003e-06, + "num_tokens": 1223819.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.7810000000000001, + "step": 1781 + }, + { + "loss": 0.0527, + "grad_norm": 0.9079440236091614, + "learning_rate": 2.21e-06, + "num_tokens": 1224422.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.782, + "step": 1782 + }, + { + "loss": 0.0108, + "grad_norm": 1.6502025127410889, + "learning_rate": 2.2e-06, + "num_tokens": 1224604.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.783, + "step": 1783 + }, + { + "loss": 0.0537, + "grad_norm": 1.1283652782440186, + "learning_rate": 2.19e-06, + "num_tokens": 1225207.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.784, + "step": 1784 + }, + { + "loss": 0.0104, + "grad_norm": 1.5997681617736816, + "learning_rate": 2.1800000000000003e-06, + "num_tokens": 1225389.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.7850000000000001, + "step": 1785 + }, + { + "loss": 0.0365, + "grad_norm": 0.6672436594963074, + "learning_rate": 2.17e-06, + "num_tokens": 1226413.0, + "mean_token_accuracy": 0.9863013625144958, + "epoch": 1.786, + "step": 1786 + }, + { + "loss": 0.0506, + "grad_norm": 0.9749234318733215, + "learning_rate": 2.16e-06, + "num_tokens": 1227016.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.787, + "step": 1787 + }, + { + "loss": 0.0491, + "grad_norm": 0.6571372747421265, + "learning_rate": 2.15e-06, + "num_tokens": 1228040.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.788, + "step": 1788 + }, + { + "loss": 0.067, + "grad_norm": 1.2986317873001099, + "learning_rate": 2.1400000000000003e-06, + "num_tokens": 1229064.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.7890000000000001, + "step": 1789 + }, + { + "loss": 0.053, + "grad_norm": 1.0465713739395142, + "learning_rate": 2.13e-06, + "num_tokens": 1229667.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.79, + "step": 1790 + }, + { + "loss": 0.053, + "grad_norm": 0.8406110405921936, + "learning_rate": 2.12e-06, + "num_tokens": 1230691.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.791, + "step": 1791 + }, + { + "loss": 0.0093, + "grad_norm": 1.4866935014724731, + "learning_rate": 2.11e-06, + "num_tokens": 1230873.0, + "mean_token_accuracy": 0.9888888597488403, + "epoch": 1.792, + "step": 1792 + }, + { + "loss": 0.0611, + "grad_norm": 0.9989224076271057, + "learning_rate": 2.1000000000000002e-06, + "num_tokens": 1231897.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.7930000000000001, + "step": 1793 + }, + { + "loss": 0.0448, + "grad_norm": 0.6616271734237671, + "learning_rate": 2.09e-06, + "num_tokens": 1232921.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.794, + "step": 1794 + }, + { + "loss": 0.0475, + "grad_norm": 0.9157487750053406, + "learning_rate": 2.08e-06, + "num_tokens": 1233524.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.795, + "step": 1795 + }, + { + "loss": 0.0084, + "grad_norm": 1.3727267980575562, + "learning_rate": 2.07e-06, + "num_tokens": 1233706.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.796, + "step": 1796 + }, + { + "loss": 0.0488, + "grad_norm": 1.0055174827575684, + "learning_rate": 2.06e-06, + "num_tokens": 1234309.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.7970000000000002, + "step": 1797 + }, + { + "loss": 0.0566, + "grad_norm": 0.8666424751281738, + "learning_rate": 2.05e-06, + "num_tokens": 1235333.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.798, + "step": 1798 + }, + { + "loss": 0.0531, + "grad_norm": 0.8747699856758118, + "learning_rate": 2.04e-06, + "num_tokens": 1236357.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.799, + "step": 1799 + }, + { + "loss": 0.0358, + "grad_norm": 0.8999316692352295, + "learning_rate": 2.0300000000000005e-06, + "num_tokens": 1236960.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.8, + "step": 1800 + }, + { + "loss": 0.0472, + "grad_norm": 1.0433317422866821, + "learning_rate": 2.02e-06, + "num_tokens": 1237563.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.8010000000000002, + "step": 1801 + }, + { + "loss": 0.0359, + "grad_norm": 0.8629103899002075, + "learning_rate": 2.0100000000000002e-06, + "num_tokens": 1238166.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.802, + "step": 1802 + }, + { + "loss": 0.0546, + "grad_norm": 1.0378329753875732, + "learning_rate": 2.0000000000000003e-06, + "num_tokens": 1238769.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.803, + "step": 1803 + }, + { + "loss": 0.032, + "grad_norm": 0.7883849143981934, + "learning_rate": 1.9900000000000004e-06, + "num_tokens": 1239372.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.804, + "step": 1804 + }, + { + "loss": 0.0287, + "grad_norm": 0.735058069229126, + "learning_rate": 1.98e-06, + "num_tokens": 1239975.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.8050000000000002, + "step": 1805 + }, + { + "loss": 0.0388, + "grad_norm": 0.8934848308563232, + "learning_rate": 1.97e-06, + "num_tokens": 1240578.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.806, + "step": 1806 + }, + { + "loss": 0.0495, + "grad_norm": 1.1365348100662231, + "learning_rate": 1.9600000000000003e-06, + "num_tokens": 1241181.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.807, + "step": 1807 + }, + { + "loss": 0.0591, + "grad_norm": 0.8974589705467224, + "learning_rate": 1.9500000000000004e-06, + "num_tokens": 1242205.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.808, + "step": 1808 + }, + { + "loss": 0.035, + "grad_norm": 0.7894022464752197, + "learning_rate": 1.94e-06, + "num_tokens": 1242808.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.8090000000000002, + "step": 1809 + }, + { + "loss": 0.0923, + "grad_norm": 3.20685076713562, + "learning_rate": 1.93e-06, + "num_tokens": 1243411.0, + "mean_token_accuracy": 0.960066556930542, + "epoch": 1.81, + "step": 1810 + }, + { + "loss": 0.048, + "grad_norm": 0.9050451517105103, + "learning_rate": 1.9200000000000003e-06, + "num_tokens": 1244014.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.811, + "step": 1811 + }, + { + "loss": 0.0519, + "grad_norm": 1.2017446756362915, + "learning_rate": 1.9100000000000003e-06, + "num_tokens": 1245038.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.812, + "step": 1812 + }, + { + "loss": 0.0525, + "grad_norm": 0.616727888584137, + "learning_rate": 1.9000000000000002e-06, + "num_tokens": 1246062.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.813, + "step": 1813 + }, + { + "loss": 0.0459, + "grad_norm": 0.8932090401649475, + "learning_rate": 1.8900000000000001e-06, + "num_tokens": 1247086.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.814, + "step": 1814 + }, + { + "loss": 0.0083, + "grad_norm": 1.3748656511306763, + "learning_rate": 1.8800000000000002e-06, + "num_tokens": 1247268.0, + "mean_token_accuracy": 1.0, + "epoch": 1.815, + "step": 1815 + }, + { + "loss": 0.0622, + "grad_norm": 0.8398600816726685, + "learning_rate": 1.87e-06, + "num_tokens": 1248292.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.8159999999999998, + "step": 1816 + }, + { + "loss": 0.0454, + "grad_norm": 0.941429078578949, + "learning_rate": 1.8600000000000002e-06, + "num_tokens": 1248895.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.817, + "step": 1817 + }, + { + "loss": 0.0083, + "grad_norm": 1.3848148584365845, + "learning_rate": 1.85e-06, + "num_tokens": 1249077.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.818, + "step": 1818 + }, + { + "loss": 0.0342, + "grad_norm": 0.9025738835334778, + "learning_rate": 1.8400000000000002e-06, + "num_tokens": 1249680.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.819, + "step": 1819 + }, + { + "loss": 0.0355, + "grad_norm": 0.6912959814071655, + "learning_rate": 1.83e-06, + "num_tokens": 1250704.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.8199999999999998, + "step": 1820 + }, + { + "loss": 0.0515, + "grad_norm": 0.7383629679679871, + "learning_rate": 1.8200000000000002e-06, + "num_tokens": 1251728.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.821, + "step": 1821 + }, + { + "loss": 0.0454, + "grad_norm": 0.6471507549285889, + "learning_rate": 1.81e-06, + "num_tokens": 1252752.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.822, + "step": 1822 + }, + { + "loss": 0.0457, + "grad_norm": 0.8248931169509888, + "learning_rate": 1.8000000000000001e-06, + "num_tokens": 1253355.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.823, + "step": 1823 + }, + { + "loss": 0.0519, + "grad_norm": 0.949046790599823, + "learning_rate": 1.79e-06, + "num_tokens": 1254379.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.8239999999999998, + "step": 1824 + }, + { + "loss": 0.0581, + "grad_norm": 1.1707154512405396, + "learning_rate": 1.7800000000000001e-06, + "num_tokens": 1254982.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.825, + "step": 1825 + }, + { + "loss": 0.0483, + "grad_norm": 0.7052024006843567, + "learning_rate": 1.77e-06, + "num_tokens": 1256006.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.826, + "step": 1826 + }, + { + "loss": 0.0443, + "grad_norm": 0.8777363896369934, + "learning_rate": 1.76e-06, + "num_tokens": 1256609.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.827, + "step": 1827 + }, + { + "loss": 0.0083, + "grad_norm": 1.3815189599990845, + "learning_rate": 1.75e-06, + "num_tokens": 1256791.0, + "mean_token_accuracy": 1.0, + "epoch": 1.8279999999999998, + "step": 1828 + }, + { + "loss": 0.0377, + "grad_norm": 0.7194532155990601, + "learning_rate": 1.74e-06, + "num_tokens": 1257815.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.829, + "step": 1829 + }, + { + "loss": 0.046, + "grad_norm": 0.9212157130241394, + "learning_rate": 1.73e-06, + "num_tokens": 1258839.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.83, + "step": 1830 + }, + { + "loss": 0.0528, + "grad_norm": 0.8202394247055054, + "learning_rate": 1.72e-06, + "num_tokens": 1259863.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.831, + "step": 1831 + }, + { + "loss": 0.032, + "grad_norm": 0.8170984983444214, + "learning_rate": 1.7100000000000004e-06, + "num_tokens": 1260466.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.8319999999999999, + "step": 1832 + }, + { + "loss": 0.0567, + "grad_norm": 0.76454758644104, + "learning_rate": 1.7000000000000002e-06, + "num_tokens": 1261490.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.833, + "step": 1833 + }, + { + "loss": 0.0444, + "grad_norm": 0.8616076111793518, + "learning_rate": 1.6900000000000003e-06, + "num_tokens": 1262093.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.834, + "step": 1834 + }, + { + "loss": 0.0598, + "grad_norm": 1.2619731426239014, + "learning_rate": 1.6800000000000002e-06, + "num_tokens": 1262696.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.835, + "step": 1835 + }, + { + "loss": 0.0579, + "grad_norm": 0.8180704116821289, + "learning_rate": 1.6700000000000003e-06, + "num_tokens": 1263720.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.8359999999999999, + "step": 1836 + }, + { + "loss": 0.0443, + "grad_norm": 0.8013731241226196, + "learning_rate": 1.6600000000000002e-06, + "num_tokens": 1264323.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.837, + "step": 1837 + }, + { + "loss": 0.0459, + "grad_norm": 0.6007160544395447, + "learning_rate": 1.6500000000000003e-06, + "num_tokens": 1265347.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.838, + "step": 1838 + }, + { + "loss": 0.0081, + "grad_norm": 1.3501945734024048, + "learning_rate": 1.6400000000000002e-06, + "num_tokens": 1265529.0, + "mean_token_accuracy": 1.0, + "epoch": 1.839, + "step": 1839 + }, + { + "loss": 0.0577, + "grad_norm": 1.0602728128433228, + "learning_rate": 1.6300000000000003e-06, + "num_tokens": 1266132.0, + "mean_token_accuracy": 0.9683859944343567, + "epoch": 1.8399999999999999, + "step": 1840 + }, + { + "loss": 0.0316, + "grad_norm": 0.799614429473877, + "learning_rate": 1.6200000000000002e-06, + "num_tokens": 1266735.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.841, + "step": 1841 + }, + { + "loss": 0.0465, + "grad_norm": 1.0291104316711426, + "learning_rate": 1.6100000000000003e-06, + "num_tokens": 1267338.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.842, + "step": 1842 + }, + { + "loss": 0.0415, + "grad_norm": 0.9690372347831726, + "learning_rate": 1.6000000000000001e-06, + "num_tokens": 1267941.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.843, + "step": 1843 + }, + { + "loss": 0.0505, + "grad_norm": 0.7197061777114868, + "learning_rate": 1.5900000000000002e-06, + "num_tokens": 1268965.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.8439999999999999, + "step": 1844 + }, + { + "loss": 0.0351, + "grad_norm": 0.7125798463821411, + "learning_rate": 1.5800000000000001e-06, + "num_tokens": 1269989.0, + "mean_token_accuracy": 0.985322892665863, + "epoch": 1.845, + "step": 1845 + }, + { + "loss": 0.0087, + "grad_norm": 1.4389352798461914, + "learning_rate": 1.5700000000000002e-06, + "num_tokens": 1270171.0, + "mean_token_accuracy": 0.9944444298744202, + "epoch": 1.846, + "step": 1846 + }, + { + "loss": 0.008, + "grad_norm": 1.326840877532959, + "learning_rate": 1.56e-06, + "num_tokens": 1270353.0, + "mean_token_accuracy": 1.0, + "epoch": 1.847, + "step": 1847 + }, + { + "loss": 0.0489, + "grad_norm": 0.9269915819168091, + "learning_rate": 1.5500000000000002e-06, + "num_tokens": 1270956.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.8479999999999999, + "step": 1848 + }, + { + "loss": 0.0564, + "grad_norm": 0.826057493686676, + "learning_rate": 1.54e-06, + "num_tokens": 1271980.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.849, + "step": 1849 + }, + { + "loss": 0.0346, + "grad_norm": 0.8716343641281128, + "learning_rate": 1.5300000000000002e-06, + "num_tokens": 1272583.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.85, + "step": 1850 + }, + { + "loss": 0.0073, + "grad_norm": 1.2124102115631104, + "learning_rate": 1.52e-06, + "num_tokens": 1272765.0, + "mean_token_accuracy": 1.0, + "epoch": 1.851, + "step": 1851 + }, + { + "loss": 0.049, + "grad_norm": 0.6428321599960327, + "learning_rate": 1.5100000000000002e-06, + "num_tokens": 1273789.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8519999999999999, + "step": 1852 + }, + { + "loss": 0.0071, + "grad_norm": 1.2075852155685425, + "learning_rate": 1.5e-06, + "num_tokens": 1273971.0, + "mean_token_accuracy": 1.0, + "epoch": 1.853, + "step": 1853 + }, + { + "loss": 0.0529, + "grad_norm": 1.0347280502319336, + "learning_rate": 1.4900000000000001e-06, + "num_tokens": 1274995.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.854, + "step": 1854 + }, + { + "loss": 0.0307, + "grad_norm": 0.7036189436912537, + "learning_rate": 1.48e-06, + "num_tokens": 1275598.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.855, + "step": 1855 + }, + { + "loss": 0.0407, + "grad_norm": 1.0765986442565918, + "learning_rate": 1.4700000000000001e-06, + "num_tokens": 1276201.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.8559999999999999, + "step": 1856 + }, + { + "loss": 0.0513, + "grad_norm": 0.8049939274787903, + "learning_rate": 1.46e-06, + "num_tokens": 1277225.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.857, + "step": 1857 + }, + { + "loss": 0.0516, + "grad_norm": 0.8225579857826233, + "learning_rate": 1.45e-06, + "num_tokens": 1277828.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.858, + "step": 1858 + }, + { + "loss": 0.0069, + "grad_norm": 1.1663427352905273, + "learning_rate": 1.44e-06, + "num_tokens": 1278010.0, + "mean_token_accuracy": 1.0, + "epoch": 1.859, + "step": 1859 + }, + { + "loss": 0.0549, + "grad_norm": 0.9747959971427917, + "learning_rate": 1.43e-06, + "num_tokens": 1279034.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.8599999999999999, + "step": 1860 + }, + { + "loss": 0.057, + "grad_norm": 0.9016417860984802, + "learning_rate": 1.42e-06, + "num_tokens": 1280058.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.861, + "step": 1861 + }, + { + "loss": 0.0065, + "grad_norm": 1.1208806037902832, + "learning_rate": 1.41e-06, + "num_tokens": 1280240.0, + "mean_token_accuracy": 1.0, + "epoch": 1.862, + "step": 1862 + }, + { + "loss": 0.0347, + "grad_norm": 0.9389989972114563, + "learning_rate": 1.4000000000000001e-06, + "num_tokens": 1280843.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.863, + "step": 1863 + }, + { + "loss": 0.0457, + "grad_norm": 0.7054025530815125, + "learning_rate": 1.3900000000000002e-06, + "num_tokens": 1281867.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.8639999999999999, + "step": 1864 + }, + { + "loss": 0.0512, + "grad_norm": 0.9198103547096252, + "learning_rate": 1.3800000000000001e-06, + "num_tokens": 1282891.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.865, + "step": 1865 + }, + { + "loss": 0.0508, + "grad_norm": 0.9358418583869934, + "learning_rate": 1.3700000000000002e-06, + "num_tokens": 1283494.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.866, + "step": 1866 + }, + { + "loss": 0.0477, + "grad_norm": 0.7468611001968384, + "learning_rate": 1.3600000000000001e-06, + "num_tokens": 1284518.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.867, + "step": 1867 + }, + { + "loss": 0.043, + "grad_norm": 0.7610995769500732, + "learning_rate": 1.3500000000000002e-06, + "num_tokens": 1285542.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.8679999999999999, + "step": 1868 + }, + { + "loss": 0.0492, + "grad_norm": 0.8499964475631714, + "learning_rate": 1.34e-06, + "num_tokens": 1286566.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.869, + "step": 1869 + }, + { + "loss": 0.058, + "grad_norm": 0.7332651615142822, + "learning_rate": 1.3300000000000002e-06, + "num_tokens": 1287590.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.87, + "step": 1870 + }, + { + "loss": 0.0471, + "grad_norm": 0.8671208620071411, + "learning_rate": 1.32e-06, + "num_tokens": 1288193.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.871, + "step": 1871 + }, + { + "loss": 0.0474, + "grad_norm": 0.8300747275352478, + "learning_rate": 1.3100000000000002e-06, + "num_tokens": 1289217.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.8719999999999999, + "step": 1872 + }, + { + "loss": 0.1259, + "grad_norm": 1.9161871671676636, + "learning_rate": 1.3e-06, + "num_tokens": 1289820.0, + "mean_token_accuracy": 0.9567387700080872, + "epoch": 1.873, + "step": 1873 + }, + { + "loss": 0.0537, + "grad_norm": 1.0094809532165527, + "learning_rate": 1.2900000000000001e-06, + "num_tokens": 1290423.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.874, + "step": 1874 + }, + { + "loss": 0.0535, + "grad_norm": 0.8210059404373169, + "learning_rate": 1.28e-06, + "num_tokens": 1291026.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.875, + "step": 1875 + }, + { + "loss": 0.0063, + "grad_norm": 1.0734435319900513, + "learning_rate": 1.2700000000000001e-06, + "num_tokens": 1291208.0, + "mean_token_accuracy": 1.0, + "epoch": 1.876, + "step": 1876 + }, + { + "loss": 0.0332, + "grad_norm": 0.7847937345504761, + "learning_rate": 1.26e-06, + "num_tokens": 1291811.0, + "mean_token_accuracy": 0.9850249290466309, + "epoch": 1.877, + "step": 1877 + }, + { + "loss": 0.0618, + "grad_norm": 0.8579657077789307, + "learning_rate": 1.25e-06, + "num_tokens": 1292835.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.8780000000000001, + "step": 1878 + }, + { + "loss": 0.0547, + "grad_norm": 0.8215232491493225, + "learning_rate": 1.2400000000000002e-06, + "num_tokens": 1293859.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.879, + "step": 1879 + }, + { + "loss": 0.0317, + "grad_norm": 0.7249704599380493, + "learning_rate": 1.23e-06, + "num_tokens": 1294462.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.88, + "step": 1880 + }, + { + "loss": 0.0721, + "grad_norm": 1.369104027748108, + "learning_rate": 1.2200000000000002e-06, + "num_tokens": 1295486.0, + "mean_token_accuracy": 0.965753436088562, + "epoch": 1.881, + "step": 1881 + }, + { + "loss": 0.054, + "grad_norm": 1.2583900690078735, + "learning_rate": 1.21e-06, + "num_tokens": 1296089.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.8820000000000001, + "step": 1882 + }, + { + "loss": 0.0529, + "grad_norm": 0.9122426509857178, + "learning_rate": 1.2000000000000002e-06, + "num_tokens": 1296692.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.883, + "step": 1883 + }, + { + "loss": 0.0492, + "grad_norm": 0.7298877835273743, + "learning_rate": 1.19e-06, + "num_tokens": 1297716.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.884, + "step": 1884 + }, + { + "loss": 0.0565, + "grad_norm": 1.4061273336410522, + "learning_rate": 1.1800000000000001e-06, + "num_tokens": 1298319.0, + "mean_token_accuracy": 0.9700499176979065, + "epoch": 1.885, + "step": 1885 + }, + { + "loss": 0.0485, + "grad_norm": 0.9004549384117126, + "learning_rate": 1.1700000000000002e-06, + "num_tokens": 1299343.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.8860000000000001, + "step": 1886 + }, + { + "loss": 0.007, + "grad_norm": 1.170093059539795, + "learning_rate": 1.1600000000000001e-06, + "num_tokens": 1299525.0, + "mean_token_accuracy": 1.0, + "epoch": 1.887, + "step": 1887 + }, + { + "loss": 0.0067, + "grad_norm": 1.128398060798645, + "learning_rate": 1.1500000000000002e-06, + "num_tokens": 1299707.0, + "mean_token_accuracy": 1.0, + "epoch": 1.888, + "step": 1888 + }, + { + "loss": 0.052, + "grad_norm": 0.8170666098594666, + "learning_rate": 1.14e-06, + "num_tokens": 1300731.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.889, + "step": 1889 + }, + { + "loss": 0.0447, + "grad_norm": 0.7825000882148743, + "learning_rate": 1.1300000000000002e-06, + "num_tokens": 1301755.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.8900000000000001, + "step": 1890 + }, + { + "loss": 0.0479, + "grad_norm": 0.7074435949325562, + "learning_rate": 1.12e-06, + "num_tokens": 1302779.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.891, + "step": 1891 + }, + { + "loss": 0.0559, + "grad_norm": 1.2572802305221558, + "learning_rate": 1.1100000000000002e-06, + "num_tokens": 1303382.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.892, + "step": 1892 + }, + { + "loss": 0.0062, + "grad_norm": 1.083220362663269, + "learning_rate": 1.1e-06, + "num_tokens": 1303564.0, + "mean_token_accuracy": 1.0, + "epoch": 1.893, + "step": 1893 + }, + { + "loss": 0.0373, + "grad_norm": 1.386085867881775, + "learning_rate": 1.0900000000000002e-06, + "num_tokens": 1304167.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.8940000000000001, + "step": 1894 + }, + { + "loss": 0.0499, + "grad_norm": 0.9271661043167114, + "learning_rate": 1.08e-06, + "num_tokens": 1304770.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.895, + "step": 1895 + }, + { + "loss": 0.0069, + "grad_norm": 1.1777589321136475, + "learning_rate": 1.0700000000000001e-06, + "num_tokens": 1304952.0, + "mean_token_accuracy": 1.0, + "epoch": 1.896, + "step": 1896 + }, + { + "loss": 0.0063, + "grad_norm": 1.0855423212051392, + "learning_rate": 1.06e-06, + "num_tokens": 1305134.0, + "mean_token_accuracy": 1.0, + "epoch": 1.897, + "step": 1897 + }, + { + "loss": 0.0563, + "grad_norm": 0.6582868099212646, + "learning_rate": 1.0500000000000001e-06, + "num_tokens": 1306158.0, + "mean_token_accuracy": 0.9696673154830933, + "epoch": 1.8980000000000001, + "step": 1898 + }, + { + "loss": 0.0322, + "grad_norm": 0.929911196231842, + "learning_rate": 1.04e-06, + "num_tokens": 1306761.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.899, + "step": 1899 + }, + { + "loss": 0.0313, + "grad_norm": 0.7664781808853149, + "learning_rate": 1.03e-06, + "num_tokens": 1307364.0, + "mean_token_accuracy": 0.9883527159690857, + "epoch": 1.9, + "step": 1900 + }, + { + "loss": 0.0367, + "grad_norm": 0.8684309124946594, + "learning_rate": 1.02e-06, + "num_tokens": 1307967.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.901, + "step": 1901 + }, + { + "loss": 0.0559, + "grad_norm": 1.2534968852996826, + "learning_rate": 1.01e-06, + "num_tokens": 1308570.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9020000000000001, + "step": 1902 + }, + { + "loss": 0.0654, + "grad_norm": 1.0085036754608154, + "learning_rate": 1.0000000000000002e-06, + "num_tokens": 1309594.0, + "mean_token_accuracy": 0.9647749662399292, + "epoch": 1.903, + "step": 1903 + }, + { + "loss": 0.0055, + "grad_norm": 0.9474945068359375, + "learning_rate": 9.9e-07, + "num_tokens": 1309776.0, + "mean_token_accuracy": 1.0, + "epoch": 1.904, + "step": 1904 + }, + { + "loss": 0.0468, + "grad_norm": 0.9569233059883118, + "learning_rate": 9.800000000000001e-07, + "num_tokens": 1310800.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.905, + "step": 1905 + }, + { + "loss": 0.0344, + "grad_norm": 0.797659695148468, + "learning_rate": 9.7e-07, + "num_tokens": 1311403.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.9060000000000001, + "step": 1906 + }, + { + "loss": 0.0495, + "grad_norm": 0.9170741438865662, + "learning_rate": 9.600000000000001e-07, + "num_tokens": 1312006.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.907, + "step": 1907 + }, + { + "loss": 0.0051, + "grad_norm": 0.8878421187400818, + "learning_rate": 9.500000000000001e-07, + "num_tokens": 1312188.0, + "mean_token_accuracy": 1.0, + "epoch": 1.908, + "step": 1908 + }, + { + "loss": 0.0441, + "grad_norm": 0.9606658220291138, + "learning_rate": 9.400000000000001e-07, + "num_tokens": 1312791.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.909, + "step": 1909 + }, + { + "loss": 0.0589, + "grad_norm": 0.9086238145828247, + "learning_rate": 9.300000000000001e-07, + "num_tokens": 1313815.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.9100000000000001, + "step": 1910 + }, + { + "loss": 0.0057, + "grad_norm": 0.9700196981430054, + "learning_rate": 9.200000000000001e-07, + "num_tokens": 1313997.0, + "mean_token_accuracy": 1.0, + "epoch": 1.911, + "step": 1911 + }, + { + "loss": 0.0527, + "grad_norm": 1.117866039276123, + "learning_rate": 9.100000000000001e-07, + "num_tokens": 1314600.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.912, + "step": 1912 + }, + { + "loss": 0.0321, + "grad_norm": 0.7691379189491272, + "learning_rate": 9.000000000000001e-07, + "num_tokens": 1315203.0, + "mean_token_accuracy": 0.9866888523101807, + "epoch": 1.913, + "step": 1913 + }, + { + "loss": 0.0485, + "grad_norm": 1.0280470848083496, + "learning_rate": 8.900000000000001e-07, + "num_tokens": 1315806.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.9140000000000001, + "step": 1914 + }, + { + "loss": 0.0614, + "grad_norm": 1.213173508644104, + "learning_rate": 8.8e-07, + "num_tokens": 1316409.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.915, + "step": 1915 + }, + { + "loss": 0.0449, + "grad_norm": 0.8026267886161804, + "learning_rate": 8.7e-07, + "num_tokens": 1317433.0, + "mean_token_accuracy": 0.9794520735740662, + "epoch": 1.916, + "step": 1916 + }, + { + "loss": 0.0053, + "grad_norm": 0.9020451903343201, + "learning_rate": 8.6e-07, + "num_tokens": 1317615.0, + "mean_token_accuracy": 1.0, + "epoch": 1.917, + "step": 1917 + }, + { + "loss": 0.0465, + "grad_norm": 0.9917466044425964, + "learning_rate": 8.500000000000001e-07, + "num_tokens": 1318218.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9180000000000001, + "step": 1918 + }, + { + "loss": 0.0338, + "grad_norm": 0.8889523148536682, + "learning_rate": 8.400000000000001e-07, + "num_tokens": 1318821.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.919, + "step": 1919 + }, + { + "loss": 0.0521, + "grad_norm": 0.8119315505027771, + "learning_rate": 8.300000000000001e-07, + "num_tokens": 1319845.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.92, + "step": 1920 + }, + { + "loss": 0.0378, + "grad_norm": 0.9816769957542419, + "learning_rate": 8.200000000000001e-07, + "num_tokens": 1320448.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.921, + "step": 1921 + }, + { + "loss": 0.0613, + "grad_norm": 1.0251444578170776, + "learning_rate": 8.100000000000001e-07, + "num_tokens": 1321472.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.9220000000000002, + "step": 1922 + }, + { + "loss": 0.0345, + "grad_norm": 0.9047452211380005, + "learning_rate": 8.000000000000001e-07, + "num_tokens": 1322075.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.923, + "step": 1923 + }, + { + "loss": 0.0049, + "grad_norm": 0.8505979776382446, + "learning_rate": 7.900000000000001e-07, + "num_tokens": 1322257.0, + "mean_token_accuracy": 1.0, + "epoch": 1.924, + "step": 1924 + }, + { + "loss": 0.0397, + "grad_norm": 0.9435928463935852, + "learning_rate": 7.8e-07, + "num_tokens": 1322860.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.925, + "step": 1925 + }, + { + "loss": 0.0378, + "grad_norm": 0.8154147863388062, + "learning_rate": 7.7e-07, + "num_tokens": 1323884.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.9260000000000002, + "step": 1926 + }, + { + "loss": 0.0592, + "grad_norm": 1.2856541872024536, + "learning_rate": 7.6e-07, + "num_tokens": 1324487.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.927, + "step": 1927 + }, + { + "loss": 0.0527, + "grad_norm": 0.998885452747345, + "learning_rate": 7.5e-07, + "num_tokens": 1325090.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.928, + "step": 1928 + }, + { + "loss": 0.0418, + "grad_norm": 1.227192759513855, + "learning_rate": 7.4e-07, + "num_tokens": 1325693.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.929, + "step": 1929 + }, + { + "loss": 0.0353, + "grad_norm": 0.9215168356895447, + "learning_rate": 7.3e-07, + "num_tokens": 1326296.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9300000000000002, + "step": 1930 + }, + { + "loss": 0.0614, + "grad_norm": 0.9548213481903076, + "learning_rate": 7.2e-07, + "num_tokens": 1327320.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.931, + "step": 1931 + }, + { + "loss": 0.005, + "grad_norm": 0.8584897518157959, + "learning_rate": 7.1e-07, + "num_tokens": 1327502.0, + "mean_token_accuracy": 1.0, + "epoch": 1.932, + "step": 1932 + }, + { + "loss": 0.0521, + "grad_norm": 0.8318498134613037, + "learning_rate": 7.000000000000001e-07, + "num_tokens": 1328526.0, + "mean_token_accuracy": 0.9745596647262573, + "epoch": 1.933, + "step": 1933 + }, + { + "loss": 0.0393, + "grad_norm": 0.8967841267585754, + "learning_rate": 6.900000000000001e-07, + "num_tokens": 1329129.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.9340000000000002, + "step": 1934 + }, + { + "loss": 0.0049, + "grad_norm": 0.8509653806686401, + "learning_rate": 6.800000000000001e-07, + "num_tokens": 1329311.0, + "mean_token_accuracy": 1.0, + "epoch": 1.935, + "step": 1935 + }, + { + "loss": 0.0844, + "grad_norm": 1.9590702056884766, + "learning_rate": 6.7e-07, + "num_tokens": 1330335.0, + "mean_token_accuracy": 0.9608610272407532, + "epoch": 1.936, + "step": 1936 + }, + { + "loss": 0.0048, + "grad_norm": 0.8454121351242065, + "learning_rate": 6.6e-07, + "num_tokens": 1330517.0, + "mean_token_accuracy": 1.0, + "epoch": 1.937, + "step": 1937 + }, + { + "loss": 0.0049, + "grad_norm": 0.8549466133117676, + "learning_rate": 6.5e-07, + "num_tokens": 1330699.0, + "mean_token_accuracy": 1.0, + "epoch": 1.938, + "step": 1938 + }, + { + "loss": 0.0495, + "grad_norm": 1.1537846326828003, + "learning_rate": 6.4e-07, + "num_tokens": 1331302.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.939, + "step": 1939 + }, + { + "loss": 0.0532, + "grad_norm": 0.8321271538734436, + "learning_rate": 6.3e-07, + "num_tokens": 1332326.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.94, + "step": 1940 + }, + { + "loss": 0.0553, + "grad_norm": 0.9713524580001831, + "learning_rate": 6.200000000000001e-07, + "num_tokens": 1333350.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.9409999999999998, + "step": 1941 + }, + { + "loss": 0.047, + "grad_norm": 0.9886651635169983, + "learning_rate": 6.100000000000001e-07, + "num_tokens": 1333953.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.942, + "step": 1942 + }, + { + "loss": 0.0046, + "grad_norm": 0.8035193085670471, + "learning_rate": 6.000000000000001e-07, + "num_tokens": 1334135.0, + "mean_token_accuracy": 1.0, + "epoch": 1.943, + "step": 1943 + }, + { + "loss": 0.0528, + "grad_norm": 1.0886720418930054, + "learning_rate": 5.900000000000001e-07, + "num_tokens": 1334738.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.944, + "step": 1944 + }, + { + "loss": 0.04, + "grad_norm": 0.7274325489997864, + "learning_rate": 5.800000000000001e-07, + "num_tokens": 1335762.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9449999999999998, + "step": 1945 + }, + { + "loss": 0.0486, + "grad_norm": 0.8618095517158508, + "learning_rate": 5.7e-07, + "num_tokens": 1336786.0, + "mean_token_accuracy": 0.976516604423523, + "epoch": 1.946, + "step": 1946 + }, + { + "loss": 0.0575, + "grad_norm": 0.8250148892402649, + "learning_rate": 5.6e-07, + "num_tokens": 1337810.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.947, + "step": 1947 + }, + { + "loss": 0.0505, + "grad_norm": 0.9134087562561035, + "learning_rate": 5.5e-07, + "num_tokens": 1338834.0, + "mean_token_accuracy": 0.9774951338768005, + "epoch": 1.948, + "step": 1948 + }, + { + "loss": 0.0578, + "grad_norm": 0.9032110571861267, + "learning_rate": 5.4e-07, + "num_tokens": 1339858.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9489999999999998, + "step": 1949 + }, + { + "loss": 0.0051, + "grad_norm": 0.8683751225471497, + "learning_rate": 5.3e-07, + "num_tokens": 1340040.0, + "mean_token_accuracy": 1.0, + "epoch": 1.95, + "step": 1950 + }, + { + "loss": 0.0471, + "grad_norm": 0.9614758491516113, + "learning_rate": 5.2e-07, + "num_tokens": 1340643.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.951, + "step": 1951 + }, + { + "loss": 0.0425, + "grad_norm": 0.7443792819976807, + "learning_rate": 5.1e-07, + "num_tokens": 1341246.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.952, + "step": 1952 + }, + { + "loss": 0.0526, + "grad_norm": 0.8888201117515564, + "learning_rate": 5.000000000000001e-07, + "num_tokens": 1342270.0, + "mean_token_accuracy": 0.9706457853317261, + "epoch": 1.9529999999999998, + "step": 1953 + }, + { + "loss": 0.0315, + "grad_norm": 0.8375948667526245, + "learning_rate": 4.900000000000001e-07, + "num_tokens": 1342873.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.954, + "step": 1954 + }, + { + "loss": 0.0415, + "grad_norm": 0.8021379113197327, + "learning_rate": 4.800000000000001e-07, + "num_tokens": 1343897.0, + "mean_token_accuracy": 0.9814090132713318, + "epoch": 1.955, + "step": 1955 + }, + { + "loss": 0.0473, + "grad_norm": 0.8499237895011902, + "learning_rate": 4.7000000000000005e-07, + "num_tokens": 1344921.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.956, + "step": 1956 + }, + { + "loss": 0.0517, + "grad_norm": 1.1220508813858032, + "learning_rate": 4.6000000000000004e-07, + "num_tokens": 1345524.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9569999999999999, + "step": 1957 + }, + { + "loss": 0.0486, + "grad_norm": 0.9968160390853882, + "learning_rate": 4.5000000000000003e-07, + "num_tokens": 1346127.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.958, + "step": 1958 + }, + { + "loss": 0.0052, + "grad_norm": 0.9024248719215393, + "learning_rate": 4.4e-07, + "num_tokens": 1346309.0, + "mean_token_accuracy": 1.0, + "epoch": 1.959, + "step": 1959 + }, + { + "loss": 0.0328, + "grad_norm": 0.7692991495132446, + "learning_rate": 4.3e-07, + "num_tokens": 1346912.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.96, + "step": 1960 + }, + { + "loss": 0.05, + "grad_norm": 1.0936299562454224, + "learning_rate": 4.2000000000000006e-07, + "num_tokens": 1347515.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9609999999999999, + "step": 1961 + }, + { + "loss": 0.063, + "grad_norm": 1.1761913299560547, + "learning_rate": 4.1000000000000004e-07, + "num_tokens": 1348118.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.962, + "step": 1962 + }, + { + "loss": 0.0657, + "grad_norm": 1.1236613988876343, + "learning_rate": 4.0000000000000003e-07, + "num_tokens": 1349142.0, + "mean_token_accuracy": 0.9677103757858276, + "epoch": 1.963, + "step": 1963 + }, + { + "loss": 0.0434, + "grad_norm": 0.8958877325057983, + "learning_rate": 3.9e-07, + "num_tokens": 1350166.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.964, + "step": 1964 + }, + { + "loss": 0.0052, + "grad_norm": 0.906029462814331, + "learning_rate": 3.8e-07, + "num_tokens": 1350348.0, + "mean_token_accuracy": 1.0, + "epoch": 1.9649999999999999, + "step": 1965 + }, + { + "loss": 0.0451, + "grad_norm": 0.9595372080802917, + "learning_rate": 3.7e-07, + "num_tokens": 1350951.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.966, + "step": 1966 + }, + { + "loss": 0.0504, + "grad_norm": 0.7299979329109192, + "learning_rate": 3.6e-07, + "num_tokens": 1351975.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.967, + "step": 1967 + }, + { + "loss": 0.0435, + "grad_norm": 0.7944428324699402, + "learning_rate": 3.5000000000000004e-07, + "num_tokens": 1352578.0, + "mean_token_accuracy": 0.9783693552017212, + "epoch": 1.968, + "step": 1968 + }, + { + "loss": 0.0488, + "grad_norm": 0.6681357026100159, + "learning_rate": 3.4000000000000003e-07, + "num_tokens": 1353602.0, + "mean_token_accuracy": 0.9755381345748901, + "epoch": 1.9689999999999999, + "step": 1969 + }, + { + "loss": 0.0049, + "grad_norm": 0.874741792678833, + "learning_rate": 3.3e-07, + "num_tokens": 1353784.0, + "mean_token_accuracy": 1.0, + "epoch": 1.97, + "step": 1970 + }, + { + "loss": 0.0051, + "grad_norm": 0.8841032385826111, + "learning_rate": 3.2e-07, + "num_tokens": 1353966.0, + "mean_token_accuracy": 1.0, + "epoch": 1.971, + "step": 1971 + }, + { + "loss": 0.0371, + "grad_norm": 0.8100385665893555, + "learning_rate": 3.1000000000000005e-07, + "num_tokens": 1354990.0, + "mean_token_accuracy": 0.9823874831199646, + "epoch": 1.972, + "step": 1972 + }, + { + "loss": 0.0335, + "grad_norm": 0.737175464630127, + "learning_rate": 3.0000000000000004e-07, + "num_tokens": 1355593.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.9729999999999999, + "step": 1973 + }, + { + "loss": 0.059, + "grad_norm": 0.7973077297210693, + "learning_rate": 2.9000000000000003e-07, + "num_tokens": 1356617.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.974, + "step": 1974 + }, + { + "loss": 0.0607, + "grad_norm": 0.9615496397018433, + "learning_rate": 2.8e-07, + "num_tokens": 1357641.0, + "mean_token_accuracy": 0.9686888456344604, + "epoch": 1.975, + "step": 1975 + }, + { + "loss": 0.0519, + "grad_norm": 0.9827134609222412, + "learning_rate": 2.7e-07, + "num_tokens": 1358665.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.976, + "step": 1976 + }, + { + "loss": 0.0454, + "grad_norm": 0.7800329327583313, + "learning_rate": 2.6e-07, + "num_tokens": 1359268.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.9769999999999999, + "step": 1977 + }, + { + "loss": 0.0432, + "grad_norm": 0.849504292011261, + "learning_rate": 2.5000000000000004e-07, + "num_tokens": 1359871.0, + "mean_token_accuracy": 0.981697142124176, + "epoch": 1.978, + "step": 1978 + }, + { + "loss": 0.0491, + "grad_norm": 0.753039538860321, + "learning_rate": 2.4000000000000003e-07, + "num_tokens": 1360895.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.979, + "step": 1979 + }, + { + "loss": 0.0523, + "grad_norm": 1.0666791200637817, + "learning_rate": 2.3000000000000002e-07, + "num_tokens": 1361498.0, + "mean_token_accuracy": 0.9717137813568115, + "epoch": 1.98, + "step": 1980 + }, + { + "loss": 0.0461, + "grad_norm": 0.9669170379638672, + "learning_rate": 2.2e-07, + "num_tokens": 1362101.0, + "mean_token_accuracy": 0.980033278465271, + "epoch": 1.9809999999999999, + "step": 1981 + }, + { + "loss": 0.0053, + "grad_norm": 0.9321076273918152, + "learning_rate": 2.1000000000000003e-07, + "num_tokens": 1362283.0, + "mean_token_accuracy": 1.0, + "epoch": 1.982, + "step": 1982 + }, + { + "loss": 0.0501, + "grad_norm": 1.037760615348816, + "learning_rate": 2.0000000000000002e-07, + "num_tokens": 1362886.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.983, + "step": 1983 + }, + { + "loss": 0.0471, + "grad_norm": 0.6260714530944824, + "learning_rate": 1.9e-07, + "num_tokens": 1363910.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.984, + "step": 1984 + }, + { + "loss": 0.0539, + "grad_norm": 1.0233992338180542, + "learning_rate": 1.8e-07, + "num_tokens": 1364513.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.9849999999999999, + "step": 1985 + }, + { + "loss": 0.0649, + "grad_norm": 0.9640028476715088, + "learning_rate": 1.7000000000000001e-07, + "num_tokens": 1365537.0, + "mean_token_accuracy": 0.9716242551803589, + "epoch": 1.986, + "step": 1986 + }, + { + "loss": 0.0051, + "grad_norm": 0.877005398273468, + "learning_rate": 1.6e-07, + "num_tokens": 1365719.0, + "mean_token_accuracy": 1.0, + "epoch": 1.987, + "step": 1987 + }, + { + "loss": 0.0556, + "grad_norm": 0.7788808345794678, + "learning_rate": 1.5000000000000002e-07, + "num_tokens": 1366743.0, + "mean_token_accuracy": 0.9735811948776245, + "epoch": 1.988, + "step": 1988 + }, + { + "loss": 0.0483, + "grad_norm": 0.9708361625671387, + "learning_rate": 1.4e-07, + "num_tokens": 1367346.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9889999999999999, + "step": 1989 + }, + { + "loss": 0.0522, + "grad_norm": 0.7852795124053955, + "learning_rate": 1.3e-07, + "num_tokens": 1368370.0, + "mean_token_accuracy": 0.9726027250289917, + "epoch": 1.99, + "step": 1990 + }, + { + "loss": 0.0335, + "grad_norm": 0.8945266604423523, + "learning_rate": 1.2000000000000002e-07, + "num_tokens": 1368973.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.991, + "step": 1991 + }, + { + "loss": 0.0511, + "grad_norm": 0.971626877784729, + "learning_rate": 1.1e-07, + "num_tokens": 1369576.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 1.992, + "step": 1992 + }, + { + "loss": 0.0461, + "grad_norm": 0.7959609031677246, + "learning_rate": 1.0000000000000001e-07, + "num_tokens": 1370179.0, + "mean_token_accuracy": 0.9767054915428162, + "epoch": 1.9929999999999999, + "step": 1993 + }, + { + "loss": 0.0047, + "grad_norm": 0.820395827293396, + "learning_rate": 9e-08, + "num_tokens": 1370361.0, + "mean_token_accuracy": 1.0, + "epoch": 1.994, + "step": 1994 + }, + { + "loss": 0.0509, + "grad_norm": 0.869403064250946, + "learning_rate": 8e-08, + "num_tokens": 1370964.0, + "mean_token_accuracy": 0.9733777046203613, + "epoch": 1.995, + "step": 1995 + }, + { + "loss": 0.0363, + "grad_norm": 0.882118821144104, + "learning_rate": 7e-08, + "num_tokens": 1371988.0, + "mean_token_accuracy": 0.980430543422699, + "epoch": 1.996, + "step": 1996 + }, + { + "loss": 0.0375, + "grad_norm": 0.7351768016815186, + "learning_rate": 6.000000000000001e-08, + "num_tokens": 1373012.0, + "mean_token_accuracy": 0.9784736037254333, + "epoch": 1.9969999999999999, + "step": 1997 + }, + { + "loss": 0.0053, + "grad_norm": 0.9105353355407715, + "learning_rate": 5.0000000000000004e-08, + "num_tokens": 1373194.0, + "mean_token_accuracy": 1.0, + "epoch": 1.998, + "step": 1998 + }, + { + "loss": 0.0325, + "grad_norm": 0.792142927646637, + "learning_rate": 4e-08, + "num_tokens": 1373797.0, + "mean_token_accuracy": 0.9833610653877258, + "epoch": 1.999, + "step": 1999 + }, + { + "loss": 0.054, + "grad_norm": 1.1374331712722778, + "learning_rate": 3.0000000000000004e-08, + "num_tokens": 1374400.0, + "mean_token_accuracy": 0.9750415682792664, + "epoch": 2.0, + "step": 2000 + }, + { + "train_runtime": 715.2908, + "train_samples_per_second": 5.592, + "train_steps_per_second": 2.796, + "total_flos": 2.949554402500608e+16, + "train_loss": 0.15688225453009363, + "epoch": 2.0, + "step": 2000 + } +] \ No newline at end of file diff --git a/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json new file mode 100644 index 0000000000000000000000000000000000000000..e49c30bdde3d50be652809e01980974b13691c98 --- /dev/null +++ b/docs/results/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json @@ -0,0 +1,18 @@ +{ + "status": "ok", + "backend": "trl_transformers", + "examples_used": 2000, + "model_id": "Qwen/Qwen2.5-3B-Instruct", + "unsloth_available": false, + "train_runtime": 715.2908, + "train_loss": 0.15688225453009363, + "train_metrics": { + "train_runtime": 715.2908, + "train_samples_per_second": 5.592, + "train_steps_per_second": 2.796, + "total_flos": 2.949554402500608e+16, + "train_loss": 0.15688225453009363 + }, + "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", + "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter" +} \ No newline at end of file diff --git a/docs/results/train_holdout_gap.png b/docs/results/train_holdout_gap.png new file mode 100644 index 0000000000000000000000000000000000000000..cc5337749b8e32c3bef07a631b7b2d54b944c407 Binary files /dev/null and b/docs/results/train_holdout_gap.png differ